Refactoring of minmax algorithm parameter setup and checking (#3309)

alexsu52 · web-flow · commit 1762c5c16f76 · 2025-03-05T10:42:13.000Z
### Changes - Fixed `_override_device` function https://github.com/openvinotoolkit/nncf/blob/5c75e22c2888ebde2a87534c8cb204497899b0b7/nncf/quantization/algorithms/min_max/algorithm.py#L253 - Apply overflow fix only for 8-bit quantization by default https://github.com/openvinotoolkit/nncf/blob/5c75e22c2888ebde2a87534c8cb204497899b0b7/nncf/quantization/advanced_parameters.py#L36 ### Reason for changes The overflow fix should only be applied for 8 bit quantization ### Related tickets None ### Tests test_target_device test_npu_target_device test_overflow_fix
diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py
@@ -208,16 +208,12 @@ def __init__(
         self._ignored_scope = IgnoredScope() if ignored_scope is None else ignored_scope
         self.quantizer_propagation_rule = quantizer_propagation_rule
 
-        # preset definition
-        if self._preset is None:
-            if model_type == ModelType.TRANSFORMER:
-                self._preset = QuantizationPreset.MIXED
-            else:
-                self._preset = QuantizationPreset.PERFORMANCE
+        # validate input parameter types
+        self._validate_param_types()
 
-        self._override_device()
-        self._set_mode_based_defaults()
-        self._review_mode_based_defaults()
+        # set and validate mode based parameters
+        self._set_mode_based_params()
+        self._review_mode_based_params()
 
         self._quantization_params = {
             QuantizerGroup.WEIGHTS: self._weights_quantization_params,
@@ -238,35 +234,64 @@ def __init__(
         self._reset_cache()
         self._algorithm_key = f"MMQ_{hash(self)}"
 
-    def _override_device(self) -> None:
+    def _validate_param_types(self) -> None:
+        """
+        Validates the types of the provided quantization parameters.
+
+        Raises:
+            nncf.ParameterNotSupportedError: If the parameter types do not match the expected quantization mode.
+        """
+        expected_cls = QuantizationParameters
+        if self._mode in (QuantizationMode.FP8_E4M3, QuantizationMode.FP8_E5M2):
+            expected_cls = FP8QuantizationParameters
+
+        for param, name in [
+            (self._weights_quantization_params, "weights"),
+            (self._activations_quantization_params, "activations"),
+        ]:
+            if param and not isinstance(param, expected_cls):
+                msg = f"Quantization parameters for {name} ({param}) are not supported with the selected mode!"
+                raise nncf.ParameterNotSupportedError(msg)
+
+    def _set_mode_based_params(self) -> None:
         """
-        Overrides NPU device to use CPU quantization scheme.
+        Sets parameters for the algorithms based on the provided mode.
         """
-        if self._target_device == TargetDevice.NPU:
-            act_bits, weight_bits = 8, 8
+        if self._mode is None:
+            if self._preset is None:
+                if self._model_type == ModelType.TRANSFORMER:
+                    self._preset = QuantizationPreset.MIXED
+                else:
+                    self._preset = QuantizationPreset.PERFORMANCE
+
+            act_bits = DEFAULT_QCONFIG.num_bits
+            weight_bits = DEFAULT_QCONFIG.num_bits
             if self._activations_quantization_params and self._activations_quantization_params.num_bits:
                 act_bits = self._activations_quantization_params.num_bits
             if self._weights_quantization_params and self._weights_quantization_params.num_bits:
                 weight_bits = self._weights_quantization_params.num_bits
 
-            if act_bits == 8 and weight_bits == 8:
-                self._target_device == TargetDevice.CPU
+            quant_scheme_a8w8 = act_bits == 8 and weight_bits == 8
+            if self._target_device == TargetDevice.NPU and quant_scheme_a8w8:
+                self._target_device = TargetDevice.CPU
                 nncf_logger.debug("Target device NPU was changed to CPU!")
 
-    def _set_mode_based_defaults(self) -> None:
-        """
-        Sets defaults for the algorithms based on the provided mode.
-        """
+            if self._overflow_fix is None and not quant_scheme_a8w8:
+                self._overflow_fix = OverflowFix.DISABLE
+                nncf_logger.debug("Overflow fix was disabled because quantization scheme is not A8W8.")
+        elif self._preset is None:
+            self._preset = QuantizationPreset.PERFORMANCE
+
         mode_based_defaults = MODE_BASED_DEFAULTS[self._mode]
         for field in dataclasses.fields(mode_based_defaults):
             self_name = "_" + field.name
             default_value = getattr(mode_based_defaults, field.name)
             if getattr(self, self_name) is None:
                 setattr(self, self_name, default_value)
 
-    def _review_mode_based_defaults(self):
+    def _review_mode_based_params(self):
         """
-        Reviews default values because mode option doesn't support them.
+        Reviews parameter values because mode option doesn't support them.
         """
         if self._mode in (QuantizationMode.FP8_E4M3, QuantizationMode.FP8_E5M2):
             nncf_logger.warning(f"You're using experimental option mode with {self._mode} value.")
@@ -287,38 +312,6 @@ def _review_mode_based_defaults(self):
                 msg = "quantize_outputs option is not supported with the mode option!"
                 raise nncf.ParameterNotSupportedError(msg)
 
-            if isinstance(self._weights_quantization_params, QuantizationParameters):
-                msg = (
-                    "quantization_params option for weights with "
-                    f"{self._weights_quantization_params} "
-                    "value is not supported with the mode option!"
-                )
-                raise nncf.ParameterNotSupportedError(msg)
-
-            if isinstance(self._activations_quantization_params, QuantizationParameters):
-                msg = (
-                    "quantization_params option for activations with "
-                    f"{self._activations_quantization_params} "
-                    "value is not supported with the mode option!"
-                )
-                raise nncf.ParameterNotSupportedError(msg)
-        elif self._mode is None:
-            if isinstance(self._weights_quantization_params, FP8QuantizationParameters):
-                msg = (
-                    "quantization_params option for weights with "
-                    f"{self._weights_quantization_params} "
-                    "value is not supported with the mode: None option!"
-                )
-                raise nncf.ParameterNotSupportedError(msg)
-
-            if isinstance(self._activations_quantization_params, FP8QuantizationParameters):
-                msg = (
-                    "quantization_params option for activations with "
-                    f"{self._activations_quantization_params} "
-                    "value is not supported with the mode: None option!"
-                )
-                raise nncf.ParameterNotSupportedError(msg)
-
     def _reset_cache(self) -> None:
         """
         Marks cache by noninitialized values. Needs to be called when the new quantizer setup is needed.
diff --git a/tests/common/quantization/test_minmax.py b/tests/common/quantization/test_minmax.py
@@ -245,3 +245,37 @@ def fill_qsetup_mock(self, *args):
     for _ in range(run_nums):
         algo._get_quantization_target_points(None, None)
     assert find_called == fill_called == 2
+
+
+@pytest.mark.parametrize(
+    "target_device", [target_device for target_device in TargetDevice if target_device != TargetDevice.NPU]
+)
+def test_target_device(target_device):
+    min_max_algo = MinMaxQuantization(target_device=target_device)
+    assert min_max_algo._target_device == target_device
+
+
+@pytest.mark.parametrize("num_bits, ref_hw_target_device", zip([8, 4], [TargetDevice.CPU, TargetDevice.NPU]))
+def test_npu_target_device(num_bits, ref_hw_target_device):
+    min_max_algo = MinMaxQuantization(
+        target_device=TargetDevice.NPU,
+        activations_quantization_params=QuantizationParameters(num_bits=num_bits),
+        weights_quantization_params=QuantizationParameters(num_bits=num_bits),
+    )
+    assert min_max_algo._target_device == ref_hw_target_device
+
+
+@pytest.mark.parametrize("activation_bits", [8, 4])
+@pytest.mark.parametrize("weight_bits", [8, 4])
+def test_overflow_fix(activation_bits, weight_bits):
+    quant_scheme_a8w8 = activation_bits == 8 and weight_bits == 8
+
+    min_max_algo = MinMaxQuantization(
+        activations_quantization_params=QuantizationParameters(num_bits=activation_bits),
+        weights_quantization_params=QuantizationParameters(num_bits=weight_bits),
+    )
+
+    if quant_scheme_a8w8:
+        assert min_max_algo._overflow_fix == OverflowFix.FIRST_LAYER
+    else:
+        assert min_max_algo._overflow_fix == OverflowFix.DISABLE
diff --git a/tests/onnx/quantization/test_ptq_params.py b/tests/onnx/quantization/test_ptq_params.py
@@ -27,7 +27,6 @@
 from nncf.onnx.graph.transformations.commands import ONNXQuantizerInsertionCommand
 from nncf.onnx.graph.transformations.commands import ONNXTargetPoint
 from nncf.parameters import TargetDevice
-from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
 from nncf.quantization.algorithms.min_max.onnx_backend import ONNXMinMaxAlgoBackend
 from nncf.scopes import IgnoredScope
 from tests.common.quantization.metatypes import CatTestMetatype
@@ -49,13 +48,6 @@ def get_ignored_patterns(device: TargetDevice = TargetDevice.ANY) -> GraphPatter
     return PatternsManager.get_full_ignored_pattern_graph(backend=BackendType.ONNX, device=device)
 
 
-@pytest.mark.parametrize("target_device", TargetDevice)
-def test_target_device(target_device):
-    min_max_algo = MinMaxQuantization(target_device=target_device)
-    min_max_algo._backend_entity = ONNXMinMaxAlgoBackend()
-    assert min_max_algo._target_device == target_device
-
-
 class TestPTQParams(TemplateTestPTQParams):
     def get_algo_backend(self):
         return ONNXMinMaxAlgoBackend()
diff --git a/tests/openvino/native/quantization/test_ptq_params.py b/tests/openvino/native/quantization/test_ptq_params.py
@@ -17,7 +17,6 @@
 from nncf.common.graph.patterns.manager import PatternsManager
 from nncf.common.graph.transformations.commands import TargetType
 from nncf.common.graph.transformations.commands import TransformationType
-from nncf.common.hardware.config import HW_CONFIG_TYPE_TARGET_DEVICE_MAP
 from nncf.common.utils.backend import BackendType
 from nncf.openvino.graph.metatypes.openvino_metatypes import OVConcatMetatype
 from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype
@@ -27,7 +26,6 @@
 from nncf.openvino.graph.transformations.commands import OVQuantizerInsertionCommand
 from nncf.openvino.graph.transformations.commands import OVTargetPoint
 from nncf.parameters import TargetDevice
-from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
 from nncf.quantization.algorithms.min_max.openvino_backend import OVMinMaxAlgoBackend
 from nncf.scopes import IgnoredScope
 from tests.common.quantization.metatypes import CatTestMetatype
@@ -48,13 +46,6 @@ def get_ignored_patterns(device: TargetDevice = TargetDevice.ANY) -> GraphPatter
     return PatternsManager.get_full_ignored_pattern_graph(backend=BackendType.OPENVINO, device=device)
 
 
-@pytest.mark.parametrize("target_device", [TargetDevice.CPU, TargetDevice.GPU, TargetDevice.NPU])
-def test_target_device(target_device):
-    min_max_algo = MinMaxQuantization(target_device=target_device)
-    min_max_algo._backend_entity = OVMinMaxAlgoBackend()
-    assert min_max_algo._target_device.value == HW_CONFIG_TYPE_TARGET_DEVICE_MAP[target_device.value]
-
-
 class TestPTQParams(TemplateTestPTQParams):
     def get_algo_backend(self):
         return OVMinMaxAlgoBackend()
diff --git a/tests/torch/fx/test_ptq_params.py b/tests/torch/fx/test_ptq_params.py
@@ -20,7 +20,6 @@
 from nncf.experimental.torch.fx.commands import FXApplyTransformationCommand
 from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter
 from nncf.parameters import TargetDevice
-from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
 from nncf.quantization.algorithms.min_max.torch_fx_backend import FXMinMaxAlgoBackend
 from nncf.scopes import IgnoredScope
 from nncf.torch.graph.graph import PTNNCFGraph
@@ -48,13 +47,6 @@ def get_ignored_patterns(device: TargetDevice = TargetDevice.ANY) -> GraphPatter
     return PatternsManager.get_full_ignored_pattern_graph(backend=BackendType.TORCH_FX, device=device)
 
 
-@pytest.mark.parametrize("target_device", TargetDevice)
-def test_target_device(target_device):
-    min_max_algo = MinMaxQuantization(target_device=target_device)
-    min_max_algo._backend_entity = FXMinMaxAlgoBackend()
-    assert min_max_algo._target_device == target_device
-
-
 class TestPTQParams(TemplateTestPTQParams):
     def get_algo_backend(self):
         return FXMinMaxAlgoBackend()
diff --git a/tests/torch/ptq/test_ptq_params.py b/tests/torch/ptq/test_ptq_params.py
@@ -19,7 +19,6 @@
 from nncf.common.graph.transformations.commands import TransformationType
 from nncf.common.utils.backend import BackendType
 from nncf.parameters import TargetDevice
-from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
 from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend
 from nncf.scopes import IgnoredScope
 from nncf.torch.graph.graph import PTNNCFGraph
@@ -68,13 +67,6 @@ def forward(self, x):
         return self.depthwise_conv(x)
 
 
-@pytest.mark.parametrize("target_device", TargetDevice)
-def test_target_device(target_device):
-    min_max_algo = MinMaxQuantization(target_device=target_device)
-    min_max_algo._backend_entity = PTMinMaxAlgoBackend()
-    assert min_max_algo._target_device == target_device
-
-
 class TestPTQParams(TemplateTestPTQParams):
     def get_algo_backend(self):
         return PTMinMaxAlgoBackend()