Skip to content

Commit 3fbf0b7

Browse files
Use regular division inside Scale Estimation
1 parent f3f232f commit 3fbf0b7

File tree

3 files changed

+3
-24
lines changed

3 files changed

+3
-24
lines changed

nncf/openvino/optimized_functions/functions.py

+2-10
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ def do_int_quantization(
3232
reduction_axes: Optional[ReductionAxes] = None,
3333
precomputed_scale: Tensor = None,
3434
precomputed_zero_point: Tensor = None,
35-
**kwargs,
3635
) -> Tuple[Tensor, Tensor, Tensor]:
3736
"""
3837
Quantizes the given weight tensor.
@@ -49,10 +48,7 @@ def do_int_quantization(
4948
scale_shape = None if precomputed_scale is None else precomputed_scale.shape
5049
zero_point_shape = None if precomputed_zero_point is None else precomputed_zero_point.shape
5150

52-
ov_model_params = OVModelParameters(
53-
dynamic_shapes=kwargs.get("dynamic_shapes") is True,
54-
convertable_division=kwargs.get("convertable_division") is True,
55-
)
51+
ov_model_params = OVModelParameters()
5652
ov_model_params.input_dtypes["weight"] = weight.dtype
5753
if precomputed_scale is not None:
5854
ov_model_params.input_dtypes["scale"] = precomputed_scale.dtype
@@ -107,7 +103,6 @@ def quantize_dequantize_weight(
107103
precomputed_scale: Optional[Tensor] = None,
108104
precomputed_zero_point: Optional[Tensor] = None,
109105
return_compressed_weight: Optional[bool] = False,
110-
**kwargs,
111106
) -> Union[Tensor, Tuple[Tensor, Tensor, Tensor, Tensor]]:
112107
"""
113108
Quantizes the given weight tensor and then dequantizes it back to obtain float32 values.
@@ -132,10 +127,7 @@ def quantize_dequantize_weight(
132127
scale_shape = precomputed_scale.shape if precomputed_scale is not None else None
133128
zero_point_shape = precomputed_zero_point.shape if precomputed_zero_point is not None else None
134129

135-
ov_model_params = OVModelParameters(
136-
dynamic_shapes=kwargs.get("dynamic_shapes") is True,
137-
convertable_division=kwargs.get("convertable_division") is True,
138-
)
130+
ov_model_params = OVModelParameters()
139131
ov_model_params.input_dtypes["weight"] = weight.dtype
140132
if precomputed_scale is not None:
141133
ov_model_params.input_dtypes["scale"] = precomputed_scale.dtype

nncf/quantization/algorithms/weight_compression/scale_estimation.py

-8
Original file line numberDiff line numberDiff line change
@@ -255,10 +255,6 @@ def calculate_quantization_params(
255255
zero_scale = 0.001
256256
zero_mask = zero_scale * zero_mask.astype(original_weight.dtype)
257257

258-
# This is required for alignment with a previous OpenVINO models implementation
259-
# TODO(Nikita Savelyev): remove this
260-
opt_fns_kwargs = dict(dynamic_shapes=False, convertable_division=True)
261-
262258
# iterative rectification of initial scale
263259
for i in range(initial_steps):
264260
near_to_ideal_scale = estimate_scales(original_weight, target, zero_mask, importance)
@@ -273,7 +269,6 @@ def calculate_quantization_params(
273269
config,
274270
precomputed_scale=near_to_ideal_scale,
275271
precomputed_zero_point=zp,
276-
**opt_fns_kwargs,
277272
)
278273

279274
q_weights_ = fns.zeros_like(original_weight) + out
@@ -308,7 +303,6 @@ def calculate_quantization_params(
308303
config,
309304
precomputed_scale=near_to_ideal_scale,
310305
precomputed_zero_point=zp,
311-
**opt_fns_kwargs,
312306
)
313307
compressed_weights = fns.zeros_like(original_weight) + out
314308
target, zero_mask = get_target_zero_mask(compressed_weights, zp)
@@ -327,7 +321,6 @@ def calculate_quantization_params(
327321
config,
328322
precomputed_scale=scaled_scale,
329323
precomputed_zero_point=zp,
330-
**opt_fns_kwargs,
331324
)
332325
compressed_weights = fns.zeros_like(original_weight) + out
333326

@@ -345,7 +338,6 @@ def calculate_quantization_params(
345338
config,
346339
precomputed_scale=near_to_ideal_scale,
347340
precomputed_zero_point=zp,
348-
**opt_fns_kwargs,
349341
)
350342
q_weights_ = fns.zeros_like(original_weight) + out
351343

nncf/quantization/algorithms/weight_compression/weight_lowering.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,6 @@ def do_int_quantization(
431431
reduction_axes: Optional[ReductionAxes] = None,
432432
precomputed_scale: Tensor = None,
433433
precomputed_zero_point: Tensor = None,
434-
**kwargs,
435434
) -> Tuple[Tensor, Tensor, Tensor]:
436435
"""
437436
Performs integer quantization on the given weight tensor.
@@ -461,9 +460,7 @@ def do_int_quantization(
461460
if is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]:
462461
from nncf.openvino.optimized_functions import do_int_quantization as do_int_quantization_ov
463462

464-
return do_int_quantization_ov(
465-
weight, config, reduction_axes, precomputed_scale, precomputed_zero_point, **kwargs
466-
)
463+
return do_int_quantization_ov(weight, config, reduction_axes, precomputed_scale, precomputed_zero_point)
467464
if not is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]:
468465
nncf_logger.info_once(
469466
"OpenVINO optimizations are disabled. Install OpenVINO to enable them and improve the performance."
@@ -496,7 +493,6 @@ def quantize_dequantize_weight(
496493
precomputed_scale: Optional[Tensor] = None,
497494
precomputed_zero_point: Optional[Tensor] = None,
498495
return_compressed_weight: Optional[bool] = False,
499-
**kwargs,
500496
) -> Union[Tensor, Tuple[Tensor, Tensor, Tensor, Tensor]]:
501497
"""
502498
First quantizes the given weight tensor and then dequantizes it back to obtain float32 values.
@@ -522,7 +518,6 @@ def quantize_dequantize_weight(
522518
precomputed_scale,
523519
precomputed_zero_point,
524520
return_compressed_weight,
525-
**kwargs,
526521
)
527522
if not is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]:
528523
nncf_logger.info_once(

0 commit comments

Comments
 (0)