Initial commit

nikita-savelyevv · nikita-savelyevv · commit f600f5f166f4 · 2025-02-27T15:17:17.000+01:00
diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
@@ -294,7 +294,8 @@ def __init__(
         dataset: Optional[Union[str, List[str]]] = None,
         tokenizer: Optional[str] = None,
         processor: Optional[str] = None,
-        trust_remote_code: bool = False,
+        trust_remote_code: Optional[bool] = False,
+        init_kwargs: Optional[dict] = None,
         **kwargs,
     ):
         """
@@ -314,6 +315,8 @@ def __init__(
                 Allows to use custom code for the modeling hosted in the model repository. This option should only be
                 set for repositories you trust and in which you have read the code, as it will execute on your local
                 machine arbitrary code present in the model repository.
+            init_kwargs ('dict', *optional*):
+                Additional parameters for NNCF calls. This explicit argument is needed for deserialization from dict.
         """
         self.num_samples = num_samples
         self.dataset = dataset
@@ -323,6 +326,7 @@ def __init__(
         if isinstance(ignored_scope, nncf.IgnoredScope):
             ignored_scope = ignored_scope.__dict__
         self.ignored_scope = ignored_scope
+        self.init_kwargs = (init_kwargs or {}) | kwargs
 
     def post_init(self):
         try:
@@ -427,6 +431,9 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
                 retained in their original precision without any quantization.
             - "int8_sym" stands for 8-bit integer symmetric quantization without zero point.
             - "int8_asym" stands for 8-bit integer asymmetric quantization with zero points per each quantization group.
+        init_kwargs ('dict', *optional*):
+            Additional parameters for nncf.compress_weights() call. This explicit argument is needed for deserialization from dict.
+        kwargs: Additional parameters for nncf.compress_weights() call.
     """
 
     def __init__(
@@ -449,15 +456,25 @@ def __init__(
         processor: Optional[str] = None,
         lora_correction: bool = None,
         backup_precision: Optional[str] = None,
+        init_kwargs: Optional[dict] = None,
         **kwargs,
     ):
+        weight_format = kwargs.pop("weight_format", None)
+        if weight_format is not None:
+            logger.warning(
+                "The `weight_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
+                "Please use `dtype` instead."
+            )
+            dtype = weight_format
         super().__init__(
             ignored_scope=ignored_scope,
             num_samples=num_samples,
             dataset=dataset,
             tokenizer=tokenizer,
             processor=processor,
             trust_remote_code=trust_remote_code,
+            init_kwargs=init_kwargs,
+            **kwargs,
         )
         self.bits = bits
         self.sym = sym
@@ -470,12 +487,6 @@ def __init__(
         self.gptq = gptq
         self.lora_correction = lora_correction
         self.backup_precision = backup_precision
-        if kwargs.get("weight_format") is not None:
-            logger.warning(
-                "The `weight_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
-                "Please use `dtype` instead."
-            )
-            dtype = kwargs.get("weight_format")
         self.dtype = dtype
         self.post_init()
 
@@ -624,6 +635,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
             "gptq": self.gptq,
             "lora_correction": self.lora_correction,
             "backup_mode": backup_mode,
+            **self.init_kwargs,
         }
         return result
 
@@ -666,6 +678,7 @@ def __init__(
         trust_remote_code: bool = False,
         smooth_quant_alpha: Optional[float] = None,
         dtype: Optional[str] = "int8",
+        init_kwargs: Optional[dict] = None,
         **kwargs,
     ):
         """
@@ -712,27 +725,33 @@ def __init__(
                 reduces quantization error.
             dtype (`str`, defaults to "int8"):
                 Data type activations are compressed to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
+            init_kwargs ('dict', *optional*):
+                Additional parameters for nncf.quantize() call. This explicit argument is needed for deserialization from dict.
+            kwargs: Additional parameters for nncf.quantize() call.
         """
+        activation_format = kwargs.pop("activation_format", None)
+        if activation_format is not None:
+            logger.warning(
+                "The `activation_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
+                "Please use `dtype` instead."
+            )
+            dtype = activation_format
         super().__init__(
             ignored_scope=ignored_scope,
             num_samples=num_samples,
             dataset=dataset,
             tokenizer=tokenizer,
             processor=processor,
             trust_remote_code=trust_remote_code,
+            init_kwargs=init_kwargs,
+            **kwargs,
         )
         self.bits = bits
         self.sym = sym
         self.model_type = model_type
         self.fast_bias_correction = fast_bias_correction
         self.overflow_fix = overflow_fix
         self.smooth_quant_alpha = smooth_quant_alpha
-        if kwargs.get("activation_format") is not None:
-            logger.warning(
-                "The `activation_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
-                "Please use `dtype` instead."
-            )
-            dtype = kwargs.get("activation_format")
         self.dtype = dtype
 
         f8_dtypes = ["f8e4m3", "f8e5m2"]
@@ -769,23 +788,19 @@ def to_nncf_dict(self) -> Dict[str, Any]:
         Returns a dictionary with the variables that are ready to use for nncf.compress_weights() call.
         """
 
-        preset = "performance" if self.sym else "mixed"
-        advanced_parameters_dict = {"overflow_fix": self.overflow_fix}
+        # Merge advanced parameters from init_kwargs if they were provided
+        init_kwargs_copy = copy.deepcopy(self.init_kwargs)
+        advanced_parameters = init_kwargs_copy.pop("advanced_parameters", nncf.AdvancedQuantizationParameters())
+        advanced_parameters.overflow_fix = nncf.OverflowFix(self.overflow_fix)
         if self.smooth_quant_alpha:
-            advanced_parameters_dict["smooth_quant_alphas"] = {"matmul": self.smooth_quant_alpha}
+            advanced_parameters.smooth_quant_alphas.matmul = self.smooth_quant_alpha
 
         mode_map = {"f8e4m3": "fp8_e4m3", "f8e5m2": "fp8_e5m2"}
         mode = mode_map.get(self.dtype)
 
+        preset = "performance" if self.sym else "mixed"
         preset = nncf.QuantizationPreset(preset)
         model_type = nncf.ModelType(self.model_type)
-        advanced_parameters = nncf.AdvancedQuantizationParameters(
-            overflow_fix=advanced_parameters_dict["overflow_fix"],
-        )
-        if "smooth_quant_alphas" in advanced_parameters_dict:
-            advanced_parameters.smooth_quant_alphas = nncf.AdvancedSmoothQuantParameters(
-                **advanced_parameters_dict["smooth_quant_alphas"]
-            )
 
         return {
             "mode": mode,
@@ -795,6 +810,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
             "model_type": model_type,
             "ignored_scope": self.get_ignored_scope_instance(),
             "advanced_parameters": advanced_parameters,
+            **init_kwargs_copy,
         }
 
 
@@ -930,7 +946,6 @@ def __init__(
                 Allows to use custom code for the modeling hosted in the model repository. This option should only be
                 set for repositories you trust and in which you have read the code, as it will execute on your local
                 machine arbitrary code present in the model repository.
-            **kwargs:
         """
         self.weight_quantization_config = self._initialize_quantization_config(
             weight_quantization_config, OVWeightQuantizationConfig
diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
@@ -1034,8 +1034,17 @@ def _weight_only_quantization(
         else:
             dataset = nncf.Dataset(calibration_dataset)
 
-    wc_kwargs = copy.deepcopy(kwargs)
-    wc_kwargs.update(config.to_nncf_dict())
+    wc_kwargs = config.to_nncf_dict()
+
+    # Arguments provided in kwargs override the ones from the config
+    kwargs_intersection = set(wc_kwargs.keys()) & set(kwargs.keys())
+    if kwargs_intersection:
+        logger.warning(
+            f"The following nncf.compress_weights() arguments from the OVWeightQuantizationConfig will be overridden "
+            f"by the ones given in _weight_only_quantization call kwargs: {kwargs_intersection}."
+        )
+    wc_kwargs.update(kwargs)
+
     compressed_model = nncf.compress_weights(
         model,
         dataset=dataset,
@@ -1056,8 +1065,18 @@ def _full_quantization(
 ):
     if verify_not_optimized:
         _verify_not_optimized(model)
-    q_kwargs = copy.deepcopy(kwargs)
-    q_kwargs.update(quantization_config.to_nncf_dict())
+
+    q_kwargs = quantization_config.to_nncf_dict()
+
+    # Arguments provided in kwargs override the ones from the config
+    kwargs_intersection = set(q_kwargs.keys()) & set(kwargs.keys())
+    if kwargs_intersection:
+        logger.warning(
+            f"The following nncf.quantize() arguments from the OVQuantizationConfig will be overridden "
+            f"by the ones given in _full_quantization call kwargs: {kwargs_intersection}."
+        )
+    q_kwargs.update(kwargs)
+
     quantized_model = nncf.quantize(model, calibration_dataset=calibration_dataset, **q_kwargs)
 
     _remove_f16_kv_cache_precision_flag(quantized_model)
diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -11,17 +11,18 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+import dataclasses
 import inspect
 
 # ruff: noqa
 
 import itertools
 import logging
 import unittest
-from collections import defaultdict
+from collections import defaultdict, Iterable
 from enum import Enum
 from functools import partial
-from typing import Union
+from typing import Union, Type
 
 import openvino as ov
 import pytest
@@ -77,7 +78,7 @@
 from optimum.intel.openvino.utils import TemporaryDirectory
 from copy import deepcopy
 
-from optimum.intel.openvino.quantization import InferRequestWrapper
+from optimum.intel.openvino.quantization import InferRequestWrapper, _weight_only_quantization, _full_quantization
 from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version
 from utils_tests import (
     MODEL_NAMES,
@@ -1241,7 +1242,6 @@ class OVQuantizationConfigTest(unittest.TestCase):
             ),
         ),
         (OVQuantizationConfig(ignored_scope=nncf.IgnoredScope(names=["op_name"])),),
-        (OVDynamicQuantizationConfig(bits=8, sym=True),),
     )
 
     QUANTIZATION_CONFIG_DICTS = (
@@ -1306,6 +1306,60 @@ class OVQuantizationConfigTest(unittest.TestCase):
         (dict(bits=8, fast_bias_correction=True, weight_only=False), OVQuantizationConfig, None),
     )
 
+    QUANTIZATION_CONFIGS_WITH_KWARGS = (
+        (
+            OVWeightQuantizationConfig,
+            {
+                "advanced_parameters": nncf.AdvancedCompressionParameters(statistics_path="statistics_path"),
+                "some_arg": "some_value",
+            },
+            {
+                "advanced_parameters": nncf.AdvancedCompressionParameters(statistics_path="statistics_path"),
+                "some_arg": "some_value",
+            },
+        ),
+        (
+            OVQuantizationConfig,
+            {
+                "advanced_parameters": nncf.AdvancedQuantizationParameters(disable_channel_alignment=True),
+                "some_arg": "some_value",
+            },
+            {
+                "advanced_parameters": nncf.AdvancedQuantizationParameters(
+                    overflow_fix=nncf.OverflowFix.DISABLE,
+                    disable_channel_alignment=True,
+                ),
+                "some_arg": "some_value",
+            },
+        ),
+        (
+            OVQuantizationConfig,
+            {
+                "advanced_parameters": nncf.AdvancedQuantizationParameters(overflow_fix=nncf.OverflowFix.ENABLE),
+            },
+            {
+                "advanced_parameters": nncf.AdvancedQuantizationParameters(
+                    overflow_fix=nncf.OverflowFix.DISABLE,
+                ),
+            },
+        ),
+        (
+            OVQuantizationConfig,
+            {
+                "smooth_quant_alpha": 0.5,
+                "advanced_parameters": nncf.AdvancedQuantizationParameters(
+                    smooth_quant_alphas=nncf.AdvancedSmoothQuantParameters(matmul=0.7, convolution=0.7),
+                ),
+            },
+            {
+                "advanced_parameters": nncf.AdvancedQuantizationParameters(
+                    overflow_fix=nncf.OverflowFix.DISABLE,
+                    smooth_quant_alphas=nncf.AdvancedSmoothQuantParameters(matmul=0.5, convolution=0.7),
+                ),
+            },
+        ),
+    )
+
     def get_default_configurations() -> dict:
         default_configurations = deepcopy(_DEFAULT_4BIT_CONFIGS)
         default_configurations.update({"default": _DEFAULT_4BIT_CONFIG})
@@ -1357,6 +1411,57 @@ def test_for_no_short_id_duplicates(self):
             assert short_id not in short_ids
             short_ids.add(short_id)
 
+    @parameterized.expand(QUANTIZATION_CONFIGS_WITH_KWARGS)
+    def test_config_init_kwargs(
+        self,
+        config_type: Type[Union[OVWeightQuantizationConfig, OVQuantizationConfig]],
+        config_kwargs: dict,
+        ref_nncf_dict: dict,
+    ):
+        nncf_dict = config_type(**config_kwargs).to_nncf_dict()
+        ref_nncf_dict = config_type().to_nncf_dict() | ref_nncf_dict
+        self.assertTrue(self.compare_objects(nncf_dict, ref_nncf_dict))
+
+    @parameterized.expand(
+        [
+            ("nncf.compress_weights", "_weight_only_quantization", "dataset"),
+            ("nncf.quantize", "_full_quantization", "calibration_dataset"),
+        ]
+    )
+    def test_quantization_kwargs_override(self, mock_method_name, quantization_function, dataset_key):
+        with unittest.mock.patch(mock_method_name) as mock_method:
+            mock_model = unittest.mock.Mock([])
+            mock_model.get_rt_info = unittest.mock.Mock(return_value={})
+
+            mock_quantization_config = unittest.mock.Mock()
+            mock_quantization_config.to_nncf_dict.return_value = {"param1": "value1", "param2": "value2"}
+
+            additional_kwargs = {"param2": "new_value2", "param3": "value3"}
+
+            quantization_function = globals()[quantization_function]
+            quantization_function(mock_model, mock_quantization_config, None, **additional_kwargs)
+
+            expected_kwargs = {"param1": "value1", "param2": "new_value2", "param3": "value3", dataset_key: None}
+
+            mock_method.assert_called_once_with(mock_model, **expected_kwargs)
+
+    @staticmethod
+    def compare_objects(o1, o2) -> bool:
+        if dataclasses.is_dataclass(o1) and dataclasses.is_dataclass(o2):
+            o1 = o1.__dict__
+            o2 = o2.__dict__
+        if isinstance(o1, dict) and isinstance(o2, dict):
+            for k in set(o1.keys()) | set(o2.keys()):
+                if not OVQuantizationConfigTest.compare_objects(o1[k], o2[k]):
+                    return False
+            return True
+        if isinstance(o1, Iterable) and isinstance(o2, Iterable) and not (isinstance(o1, str) or isinstance(o2, str)):
+            for it1, it2 in zip(o1, o2):
+                if not OVQuantizationConfigTest.compare_objects(it1, it2):
+                    return False
+            return True
+        return o1 == o2
+
 
 class InferRequestWrapperTest(unittest.TestCase):
     MODEL_NAME = ("whisper",)