openvinotoolkit · alexsu52 · Mar 10, 2025 · Jan 23, 2025 · Feb 11, 2025 · Feb 11, 2025
@@ -32,7 +32,6 @@ def do_int_quantization(
     reduction_axes: Optional[ReductionAxes] = None,
     precomputed_scale: Tensor = None,
     precomputed_zero_point: Tensor = None,
-    **kwargs,
 ) -> Tuple[Tensor, Tensor, Tensor]:
     """
     Quantizes the given weight tensor.
@@ -49,10 +48,7 @@ def do_int_quantization(
     scale_shape = None if precomputed_scale is None else precomputed_scale.shape
     zero_point_shape = None if precomputed_zero_point is None else precomputed_zero_point.shape
 
-    ov_model_params = OVModelParameters(
-        dynamic_shapes=kwargs.get("dynamic_shapes") is True,
-        convertable_division=kwargs.get("convertable_division") is True,
-    )
+    ov_model_params = OVModelParameters()
     ov_model_params.input_dtypes["weight"] = weight.dtype
     if precomputed_scale is not None:
         ov_model_params.input_dtypes["scale"] = precomputed_scale.dtype
@@ -107,7 +103,6 @@ def quantize_dequantize_weight(
     precomputed_scale: Optional[Tensor] = None,
     precomputed_zero_point: Optional[Tensor] = None,
     return_compressed_weight: Optional[bool] = False,
-    **kwargs,
 ) -> Union[Tensor, Tuple[Tensor, Tensor, Tensor, Tensor]]:
     """
     Quantizes the given weight tensor and then dequantizes it back to obtain float32 values.
@@ -132,10 +127,7 @@ def quantize_dequantize_weight(
     scale_shape = precomputed_scale.shape if precomputed_scale is not None else None
     zero_point_shape = precomputed_zero_point.shape if precomputed_zero_point is not None else None
 
-    ov_model_params = OVModelParameters(
-        dynamic_shapes=kwargs.get("dynamic_shapes") is True,
-        convertable_division=kwargs.get("convertable_division") is True,
-    )
+    ov_model_params = OVModelParameters()
     ov_model_params.input_dtypes["weight"] = weight.dtype
     if precomputed_scale is not None:
         ov_model_params.input_dtypes["scale"] = precomputed_scale.dtype

@@ -245,10 +245,6 @@ def calculate_quantization_params(
         zero_scale = 0.001
         zero_mask = zero_scale * zero_mask.astype(original_weight.dtype)
 
-        # This is required for alignment with a previous OpenVINO models implementation
-        # TODO(Nikita Savelyev): remove this
-        opt_fns_kwargs = dict(dynamic_shapes=False, convertable_division=True)
-
         # iterative rectification of initial scale
         for i in range(initial_steps):
             near_to_ideal_scale = estimate_scales(original_weight, target, zero_mask, importance)
@@ -263,7 +259,6 @@ def calculate_quantization_params(
                     config,
                     precomputed_scale=near_to_ideal_scale,
                     precomputed_zero_point=zp,
-                    **opt_fns_kwargs,
                 )
 
             q_weights_ = fns.zeros_like(original_weight) + out
@@ -298,7 +293,6 @@ def calculate_quantization_params(
                         config,
                         precomputed_scale=near_to_ideal_scale,
                         precomputed_zero_point=zp,
-                        **opt_fns_kwargs,
                     )
                 compressed_weights = fns.zeros_like(original_weight) + out
                 target, zero_mask = get_target_zero_mask(compressed_weights, zp)
@@ -317,7 +311,6 @@ def calculate_quantization_params(
                     config,
                     precomputed_scale=scaled_scale,
                     precomputed_zero_point=zp,
-                    **opt_fns_kwargs,
                 )
             compressed_weights = fns.zeros_like(original_weight) + out
 
@@ -335,7 +328,6 @@ def calculate_quantization_params(
                     config,
                     precomputed_scale=near_to_ideal_scale,
                     precomputed_zero_point=zp,
-                    **opt_fns_kwargs,
                 )
             q_weights_ = fns.zeros_like(original_weight) + out
 

@@ -430,7 +430,6 @@ def do_int_quantization(
     reduction_axes: Optional[ReductionAxes] = None,
     precomputed_scale: Tensor = None,
     precomputed_zero_point: Tensor = None,
-    **kwargs,
 ) -> Tuple[Tensor, Tensor, Tensor]:
     """
     Performs integer quantization on the given weight tensor.
@@ -462,9 +461,7 @@ def do_int_quantization(
     if is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]:
         from nncf.openvino.optimized_functions import do_int_quantization as do_int_quantization_ov
 
-        return do_int_quantization_ov(
-            weight, config, reduction_axes, precomputed_scale, precomputed_zero_point, **kwargs
-        )
+        return do_int_quantization_ov(weight, config, reduction_axes, precomputed_scale, precomputed_zero_point)
     if not is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]:
         nncf_logger.info_once(
             "OpenVINO optimizations are disabled. Install OpenVINO to enable them and improve the performance."
@@ -498,7 +495,6 @@ def quantize_dequantize_weight(
     precomputed_scale: Optional[Tensor] = None,
     precomputed_zero_point: Optional[Tensor] = None,
     return_compressed_weight: Optional[bool] = False,
-    **kwargs,
 ) -> Union[Tensor, Tuple[Tensor, Tensor, Tensor, Tensor]]:
     """
     First quantizes the given weight tensor and then dequantizes it back to obtain float32 values.
@@ -524,7 +520,6 @@ def quantize_dequantize_weight(
             precomputed_scale,
             precomputed_zero_point,
             return_compressed_weight,
-            **kwargs,
         )
     if not is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]:
         nncf_logger.info_once(

@@ -32,11 +32,11 @@ tinyllama_data_aware_gptq_scale_estimation_stateful_backend_OV:
   num_int8: 124
   metrics_xfail_reason: "Issue-148819"
 tinyllama_scale_estimation_per_channel_backend_OV:
-  metric_value: 0.81389
+  metric_value: 0.80873
   num_int4: 188
   num_int8: 124
 tinyllama_scale_estimation_per_channel_backend_TORCH:
-  metric_value: 0.81389
+  metric_value: 0.80873
   num_int4: 188
   num_int8: 124
   atol: 0.006 # difference across devices: 0.80873 vs 0.81389

@@ -19,7 +19,7 @@ tinyllama_scale_estimation_group_size_64_backend_TORCH:
   num_int8: 124
   num_compressed_xfail_reason: "Issue-160006"
 tinyllama_scale_estimation_per_channel_backend_TORCH:
-  metric_value: 0.81389
+  metric_value: 0.80873
   num_int4: 188
   num_int8: 124
   atol: 0.006 # difference across devices: 0.80873 vs 0.81389