Skip to content

Commit b0a8c14

Browse files
authored
[GPU] Updated GPU cache size retrieval and refined closest_pow_of_2 (openvinotoolkit#28059)
Details: Existing method for cache size calculation was static and need continious updates to the sku table which was already being missed for latest skus e.g DG2. This update introduces a new member variable, max_global_cache_size, to store the GPU's global cache size, obtained via the OpenCL property CL_DEVICE_GLOBAL_MEM_CACHE_SIZE. The existing hard coded cache calculations are removed. Additionally, the closest_pow_of_2 function has been enhanced to return the nearest power of 2, favoring the upper value if the input is within 30% of the range for the upper bound. These changes improve memory management and ensure better utilization of GPU resources towards bottle neck situations. Tickets: CVS-159076 Signed-off-by: Arshad Mehmood <arshad.mehmood@intel.com>
1 parent adf097b commit b0a8c14

File tree

3 files changed

+22
-29
lines changed

3 files changed

+22
-29
lines changed

src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ struct device_info {
5656
uint64_t max_local_mem_size; ///< Maximum size of local memory arena in bytes.
5757
uint64_t max_global_mem_size; ///< Maximum size of global device memory in bytes.
5858
uint64_t max_alloc_mem_size; ///< Maximum size of memory object allocation in bytes.
59+
uint64_t max_global_cache_size; ///< Maximum size of cache memory bytes.
5960

6061
uint64_t max_image2d_width; ///< Maximum image 2d width supported by the device.
6162
uint64_t max_image2d_height; ///< Maximum image 2d height supported by the device.

src/plugins/intel_gpu/src/plugin/plugin.cpp

+20-29
Original file line numberDiff line numberDiff line change
@@ -797,12 +797,24 @@ uint32_t Plugin::get_optimal_batch_size(const ov::AnyMap& options) const {
797797
auto device_id = get_property(ov::device::id.name(), options).as<std::string>();
798798
auto context = get_default_contexts().at(device_id);
799799
const auto& device_info = context->get_engine().get_device_info();
800-
auto next_pow_of_2 = [] (float x) {
801-
return pow(2, ceil(std::log(x)/std::log(2)));
802-
};
800+
803801
auto closest_pow_of_2 = [] (float x) {
804-
return pow(2, floor(std::log(x)/std::log(2)));
802+
int lower_power = static_cast<int>(floor(std::log(x) / std::log(2)));
803+
double lower_value = pow(2, lower_power); // Current power of 2
804+
double upper_value = pow(2, lower_power + 1); // Next power of 2
805+
806+
// Determine the threshold (70% of the range between lower and upper values)
807+
// If x is within the upper 30% of the range, return the upper power of 2.
808+
double threshold = 0.7 * (upper_value - lower_value);
809+
810+
// Compare x with the threshold and return the appropriate power of 2
811+
if (x - lower_value > threshold) {
812+
return upper_value; // Return the next power of 2
813+
} else {
814+
return lower_value; // Return the current power of 2
815+
}
805816
};
817+
806818
auto model_param = options.find(ov::hint::model.name());
807819
if (model_param == options.end()) {
808820
GPU_DEBUG_INFO << "[OPTIMAL_BATCH_SIZE] ov::hint::model is not set: return 1" << std::endl;
@@ -816,31 +828,10 @@ uint32_t Plugin::get_optimal_batch_size(const ov::AnyMap& options) const {
816828
}
817829
GPU_DEBUG_INFO << "DEVICE_INFO:"
818830
<< "gfx_version.major, " << device_info.gfx_ver.major
819-
<< "gfx_version.minor " << std::to_string(device_info.gfx_ver.minor) << std::endl;
820-
static std::map<cldnn::gfx_version, size_t> gen_kbytes_per_bank = {
821-
{{12, 0, 0}, 480}, // TGL
822-
{{12, 1, 0}, 2048}, // DG1
823-
{{12, 5, 0}, 320},
824-
{{12, 7, 0}, 512},
825-
};
826-
size_t L3_cache_size = device_info.gfx_ver.major && (device_info.gfx_ver.major <= 9)
827-
? 768 * 1024 // Gen9
828-
: 2 * 768 * 1024; //reasonable default when no arch has been detected (e.g. due to old driver ver)
829-
cldnn::gfx_version gen = {device_info.gfx_ver.major, device_info.gfx_ver.minor, 0 /*ignore the revision*/};
830-
auto val = gen_kbytes_per_bank.find(gen);
831-
if (gen_kbytes_per_bank.end() != val) {
832-
auto kbytes_per_bank = val->second;
833-
auto num_banks_per_slice = device_info.num_sub_slices_per_slice > 4
834-
? next_pow_of_2(device_info.num_sub_slices_per_slice)
835-
: 2 * device_info.num_sub_slices_per_slice;
836-
L3_cache_size = kbytes_per_bank * 1024 * num_banks_per_slice * device_info.num_slices;
837-
GPU_DEBUG_INFO << "DEVICE_INFO:"
838-
<< "num_slices " << device_info.num_slices
839-
<< ", num_sub_slices_per_slice " << device_info.num_sub_slices_per_slice
840-
<< ", num_banks_per_slice " << num_banks_per_slice
841-
<< ", gen_kbytes_per_bank : " << kbytes_per_bank
842-
<< ", L3_cache_size is (MB): " << float(L3_cache_size) / 1024 / 1024 << std::endl;
843-
}
831+
<< "gfx_version.minor " << std::to_string(device_info.gfx_ver.minor)
832+
<< "Cache size " << std::to_string(device_info.max_global_cache_size) << std::endl;
833+
834+
size_t L3_cache_size = device_info.max_global_cache_size;
844835
auto config = m_configs_map.at(device_id);
845836
auto cloned_model = clone_and_transform_model(model, config, context);
846837
ov::MemBandwidthPressure memPressure = ov::mem_bandwidth_pressure_tolerance(cloned_model, L3_cache_size);

src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ device_info init_device_info(const cl::Device& device, const cl::Context& contex
224224
info.max_local_mem_size = static_cast<uint64_t>(device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>());
225225
info.max_global_mem_size = static_cast<uint64_t>(device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>());
226226
info.max_alloc_mem_size = static_cast<uint64_t>(device.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>());
227+
info.max_global_cache_size = static_cast<uint64_t>(device.getInfo<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE>());
227228

228229
info.supports_image = static_cast<uint8_t>(device.getInfo<CL_DEVICE_IMAGE_SUPPORT>());
229230
info.max_image2d_width = static_cast<uint64_t>(device.getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>());

0 commit comments

Comments
 (0)