Remove nncf dependency from openvino configs

echarlaix · echarlaix · commit f332377baa98 · 2024-04-17T11:43:27.000+02:00
diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
@@ -35,7 +35,12 @@ jobs:
         pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
     - name: Test with Pytest
       run: |
-        pytest tests/openvino/ --ignore test_modeling_basic --durations=0
+        pytest tests/openvino/ --ignore tests/openvino/test_modeling_basic.py --durations=0
+
+    - name: Test basic
+      run: |
+        pip uninstall -y nncf
+        pytest tests/openvino/test_modeling_basic.py
     - name: Test openvino-nightly
       run: |
         pip uninstall -y openvino
diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
@@ -16,17 +16,18 @@
 import logging
 from dataclasses import dataclass
 from enum import Enum
-from typing import Any, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 
-import nncf
 import torch
-from nncf.quantization.advanced_parameters import OverflowFix
 from transformers import PretrainedConfig
 from transformers.utils.quantization_config import QuantizationConfigMixin, QuantizationMethod
 
 from optimum.configuration_utils import BaseConfig
 
 
+if TYPE_CHECKING:
+    import nncf
+
 logger = logging.getLogger(__name__)
 
 _DEFAULT_4BIT_CONFIGS = {
@@ -75,7 +76,7 @@ def __init__(
             weight_only (`bool`, *optional*):
                 Used to explicitly specify type of quantization (weight-only of full) to apply.
         """
-        if isinstance(ignored_scope, nncf.IgnoredScope):
+        if not isinstance(ignored_scope, dict):
             ignored_scope = ignored_scope.__dict__
         self.ignored_scope = ignored_scope
         self.num_samples = num_samples
@@ -91,7 +92,9 @@ def post_init(self):
         if not (self.num_samples is None or isinstance(self.num_samples, int) and self.num_samples > 0):
             raise ValueError(f"`num_samples` is expected to be a positive integer, but found: {self.num_samples}")
 
-    def get_ignored_scope_instance(self) -> nncf.IgnoredScope:
+    def get_ignored_scope_instance(self) -> "nncf.IgnoredScope":
+        import nncf
+
         if self.ignored_scope is None:
             return nncf.IgnoredScope()
         return nncf.IgnoredScope(**copy.deepcopy(self.ignored_scope))
@@ -309,12 +312,12 @@ def post_init(self):
 class OVQuantizationConfig(OVQuantizationConfigBase):
     def __init__(
         self,
+        sym: bool = False,
         ignored_scope: Optional[dict] = None,
         num_samples: Optional[int] = 300,
-        preset: nncf.QuantizationPreset = None,
-        model_type: nncf.ModelType = nncf.ModelType.TRANSFORMER,
+        model_type: "nncf.ModelType" = None,
         fast_bias_correction: bool = True,
-        overflow_fix: OverflowFix = OverflowFix.DISABLE,
+        overflow_fix: str = "disable",
         weight_only: Optional[bool] = False,
         **kwargs,
     ):
@@ -323,23 +326,18 @@ def __init__(
         compression, during quantization both weights and activations are converted to lower precision.
         For weight-only model quantization please see OVWeightQuantizationConfig.
         Args:
+            sym (`bool`, defaults to `False`):
+                Whether to use symmetric quantization on the activations. Symmetric quantization will be applied on the weights in any case.
             ignored_scope (`dict`, *optional*):
                 An ignored scope that defines the list of model nodes to be ignored during quantization. Dictionary
                 entries provided via this argument are used to create an instance of `nncf.IgnoredScope` class.
             num_samples (`int`, *optional*):
                 The maximum number of samples composing the calibration dataset.
-            preset (`nncf.QuantizationPreset`, *optional*):
-                A preset controls the quantization mode (symmetric and asymmetric).
-                It can take the following values:
-                - `performance`: Symmetric quantization of weights and activations.
-                - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
-                Default value is None. In this case, `mixed` preset is used for `transformer`
-                model type otherwise `performance`.
             model_type (`nncf.ModelType`, defaults to nncf.ModelType.TRANSFORMER):
                 Model type is needed to specify additional patterns in the model. Supported only `transformer` now.
             fast_bias_correction (`bool`, defaults to True):
                 Whether to apply fast or full bias correction algorithm.
-            overflow_fix (`nncf.OverflowFix`, default to OverflowFix.DISABLE):
+            overflow_fix (`str`, default to "disable"):
                 Parameter for controlling overflow fix setting.
             weight_only (`bool`, *optional*):
                 Used to explicitly specify type of quantization (weight-only of full) to apply. Useful when building
@@ -352,33 +350,24 @@ def __init__(
             )
         super().__init__(ignored_scope, num_samples, False)
         # TODO: remove checks below once NNCF is updated to 2.10
-        if isinstance(overflow_fix, str):
-            overflow_fix = OverflowFix(overflow_fix)
-        if isinstance(preset, str):
-            preset = nncf.QuantizationPreset(preset)
-
-        self.preset = preset
+        self.sym = sym
         self.model_type = model_type
         self.fast_bias_correction = fast_bias_correction
         self.overflow_fix = overflow_fix
         self.post_init()
 
     def to_dict(self) -> Dict[str, Any]:
         # TODO: remove code below once NNCF is updated to 2.10
-        if isinstance(self.overflow_fix, Enum) or isinstance(self.preset, Enum):
+        if isinstance(self.overflow_fix, Enum):
             overflow_fix_value = (
                 None
                 if self.overflow_fix is None
                 else self.overflow_fix
                 if isinstance(self.overflow_fix, str)
                 else self.overflow_fix.value
             )
-            preset_value = (
-                None if self.preset is None else self.preset if isinstance(self.preset, str) else self.preset.value
-            )
             self_copy = copy.deepcopy(self)
             self_copy.overflow_fix = overflow_fix_value
-            self_copy.preset = preset_value
             return self_copy.to_dict()
         return super().to_dict()
 
diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
@@ -26,7 +26,7 @@
 import torch
 import transformers
 from nncf import CompressWeightsMode, SensitivityMetric
-from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
+from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters, OverflowFix
 from nncf.torch import register_module
 from nncf.torch.initialization import PTInitializingDataLoader
 from openvino._offline_transformations import compress_quantize_weights_transformation
@@ -378,10 +378,12 @@ def _quantize_ovbasemodel(
             quantization_dataset,
             subset_size=quantization_config.num_samples,
             ignored_scope=quantization_config.get_ignored_scope_instance(),
-            model_type=quantization_config.model_type,
-            preset=quantization_config.preset,
+            model_type=quantization_config.model_type or nncf.ModelType.TRANSFORMER,
+            preset=nncf.QuantizationPreset.PERFORMANCE if quantization_config.sym else nncf.QuantizationPreset.MIXED,
             fast_bias_correction=quantization_config.fast_bias_correction,
-            advanced_parameters=nncf.AdvancedQuantizationParameters(overflow_fix=quantization_config.overflow_fix),
+            advanced_parameters=nncf.AdvancedQuantizationParameters(
+                overflow_fix=OverflowFix(quantization_config.overflow_fix)
+            ),
             **kwargs,
         )
         self.model.model = quantized_model
@@ -476,10 +478,14 @@ def _quantize_torchmodel(
                 quantization_dataset,
                 subset_size=quantization_config.num_samples,
                 ignored_scope=quantization_config.get_ignored_scope_instance(),
-                model_type=quantization_config.model_type,
-                preset=quantization_config.preset,
+                model_type=quantization_config.model_type or nncf.ModelType.TRANSFORMER,
+                preset=nncf.QuantizationPreset.PERFORMANCE
+                if quantization_config.sym
+                else nncf.QuantizationPreset.MIXED,
                 fast_bias_correction=quantization_config.fast_bias_correction,
-                advanced_parameters=nncf.AdvancedQuantizationParameters(overflow_fix=quantization_config.overflow_fix),
+                advanced_parameters=nncf.AdvancedQuantizationParameters(
+                    overflow_fix=OverflowFix(quantization_config.overflow_fix)
+                ),
                 **kwargs,
             )
 
diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -748,7 +748,7 @@ class OVQuantizationConfigTest(unittest.TestCase):
             OVQuantizationConfig(
                 ignored_scope={"names": ["op_name"]},
                 num_samples=100,
-                preset=nncf.QuantizationPreset.MIXED,
+                sym=False,
                 model_type=nncf.ModelType.TRANSFORMER,
                 fast_bias_correction=True,
                 overflow_fix=OverflowFix.DISABLE,
@@ -794,7 +794,7 @@ class OVQuantizationConfigTest(unittest.TestCase):
             dict(
                 ignored_scope={"names": ["op_name"]},
                 num_samples=100,
-                preset=nncf.QuantizationPreset.MIXED,
+                sym=False,
                 model_type=nncf.ModelType.TRANSFORMER,
                 fast_bias_correction=True,
                 overflow_fix=OverflowFix.DISABLE,
@@ -834,14 +834,11 @@ def str_to_enum(enum_cls, value):
                 return
             for key, value in loaded_ov_config.quantization_config.to_dict().items():
                 initial_value = getattr(ov_config.quantization_config, key)
-                if key == "preset" or key == "overflow_fix":
+                if key == "overflow_fix":
                     # TODO: remove once NNCF is updated to 2.10
                     if getattr(quantization_config, key) is not None:
                         self.assertTrue(isinstance(value, str))
-                        if key == "preset":
-                            value = str_to_enum(nncf.QuantizationPreset, value)
-                        else:
-                            value = str_to_enum(OverflowFix, value)
+                        value = str_to_enum(OverflowFix, value)
                 self.assertEqual(value, initial_value)
 
     @parameterized.expand(QUANTIZATION_CONFIG_DICTS)