We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 317638e commit 1fbd98fCopy full SHA for 1fbd98f
src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_kv_cache.cl
@@ -89,7 +89,7 @@ KERNEL(dynamic_quantize_gpu_kv_cache)(
89
max_value = work_group_reduce_max(max_value);
90
91
// If the range of input data is zero, it is adjusted to the minimum value(0.001).
92
- half diff_value = max_value == min_value ? (grp_max) : (max_value - min_value);
+ ACCUMULATOR_TYPE diff_value = max_value == min_value ? (grp_max) : (max_value - min_value);
93
ACCUMULATOR_TYPE scale_tmp = (ACCUMULATOR_TYPE)((CHAR_MAX - CHAR_MIN) / diff_value);
94
ACCUMULATOR_TYPE zp_tmp = (ACCUMULATOR_TYPE)(-min_value * scale_tmp) + CHAR_MIN;
95
OUTPUT1_TYPE scale = (OUTPUT1_TYPE)(scale_tmp);
0 commit comments