Skip to content

Commit ff1d94b

Browse files
authored
Remove nncf dependency from openvino configs (#668)
* Remove nncf dependency from openvino configs * format * fix * fix format * Add quant_method attribute * format * set default value to quant_method attribute
1 parent 0d943f8 commit ff1d94b

File tree

7 files changed

+82
-99
lines changed

7 files changed

+82
-99
lines changed

.github/workflows/test_openvino.yml

+5-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,11 @@ jobs:
3535
pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
3636
- name: Test with Pytest
3737
run: |
38-
pytest tests/openvino/ --ignore test_modeling_basic --durations=0
38+
pytest tests/openvino/ --ignore tests/openvino/test_modeling_basic.py --durations=0
39+
- name: Test basic
40+
run: |
41+
pip uninstall -y nncf
42+
pytest tests/openvino/test_modeling_basic.py
3943
- name: Test openvino-nightly
4044
run: |
4145
pip uninstall -y openvino

optimum/intel/__init__.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,13 @@
5959
if not (is_openvino_available() and is_nncf_available()):
6060
raise OptionalDependencyNotAvailable()
6161
except OptionalDependencyNotAvailable:
62-
_import_structure["utils.dummy_openvino_and_nncf_objects"].extend(["OVQuantizer", "OVTrainingArguments"])
62+
_import_structure["utils.dummy_openvino_and_nncf_objects"].extend(
63+
["OVQuantizer", "OVTrainingArguments", "OVQuantizationConfig", "OVWeightQuantizationConfig"]
64+
)
6365
else:
64-
_import_structure["openvino"].extend(["OVQuantizer", "OVTrainingArguments"])
66+
_import_structure["openvino"].extend(
67+
["OVQuantizer", "OVTrainingArguments", "OVQuantizationConfig", "OVWeightQuantizationConfig"]
68+
)
6569

6670

6771
try:
@@ -124,8 +128,6 @@
124128
"OVModelForVision2Seq",
125129
"OVModelForSequenceClassification",
126130
"OVModelForTokenClassification",
127-
"OVQuantizationConfig",
128-
"OVWeightQuantizationConfig",
129131
"OVConfig",
130132
]
131133
)
@@ -188,9 +190,14 @@
188190
if not (is_openvino_available() and is_nncf_available()):
189191
raise OptionalDependencyNotAvailable()
190192
except OptionalDependencyNotAvailable:
191-
from .utils.dummy_openvino_and_nncf_objects import OVQuantizer, OVTrainingArguments
193+
from .utils.dummy_openvino_and_nncf_objects import (
194+
OVQuantizationConfig,
195+
OVQuantizer,
196+
OVTrainingArguments,
197+
OVWeightQuantizationConfig,
198+
)
192199
else:
193-
from .openvino import OVQuantizer, OVTrainingArguments
200+
from .openvino import OVQuantizationConfig, OVQuantizer, OVTrainingArguments, OVWeightQuantizationConfig
194201

195202
try:
196203
if not (is_openvino_available() and is_nncf_available() and is_accelerate_available()):
@@ -244,8 +251,6 @@
244251
OVModelForSpeechSeq2Seq,
245252
OVModelForTokenClassification,
246253
OVModelForVision2Seq,
247-
OVQuantizationConfig,
248-
OVWeightQuantizationConfig,
249254
)
250255

251256
try:

optimum/intel/openvino/configuration.py

+21-46
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,17 @@
1818
from enum import Enum
1919
from typing import Any, Dict, List, Optional, Union
2020

21-
import nncf
2221
import torch
23-
from nncf.quantization.advanced_parameters import OverflowFix
2422
from transformers import PretrainedConfig
2523
from transformers.utils.quantization_config import QuantizationConfigMixin, QuantizationMethod
2624

2725
from optimum.configuration_utils import BaseConfig
2826

27+
from ..utils.import_utils import is_nncf_available
28+
29+
30+
if is_nncf_available():
31+
import nncf
2932

3033
logger = logging.getLogger(__name__)
3134

@@ -52,12 +55,18 @@
5255
}
5356

5457

58+
class OVQuantizationMethod(str, Enum):
59+
DEFAULT = "default"
60+
61+
5562
@dataclass
5663
class OVQuantizationConfigBase(QuantizationConfigMixin):
5764
"""
5865
Base configuration class for quantization parameters
5966
"""
6067

68+
quant_method = OVQuantizationMethod.DEFAULT
69+
6170
def __init__(
6271
self,
6372
ignored_scope: Optional[dict] = None,
@@ -91,7 +100,7 @@ def post_init(self):
91100
if not (self.num_samples is None or isinstance(self.num_samples, int) and self.num_samples > 0):
92101
raise ValueError(f"`num_samples` is expected to be a positive integer, but found: {self.num_samples}")
93102

94-
def get_ignored_scope_instance(self) -> nncf.IgnoredScope:
103+
def get_ignored_scope_instance(self) -> "nncf.IgnoredScope":
95104
if self.ignored_scope is None:
96105
return nncf.IgnoredScope()
97106
return nncf.IgnoredScope(**copy.deepcopy(self.ignored_scope))
@@ -178,10 +187,6 @@ def to_diff_dict(self) -> Dict[str, Any]:
178187
return self._to_dict_safe(to_diff_dict=True)
179188

180189

181-
class OVQuantizationMethod(str, Enum):
182-
DEFAULT = "default"
183-
184-
185190
@dataclass
186191
class OVWeightQuantizationConfig(OVQuantizationConfigBase):
187192
"""
@@ -240,7 +245,7 @@ def __init__(
240245
sensitivity_metric: Optional[str] = None,
241246
ignored_scope: Optional[dict] = None,
242247
num_samples: Optional[int] = None,
243-
quant_method: Optional[Union[QuantizationMethod, OVQuantizationMethod]] = OVQuantizationMethod.DEFAULT,
248+
quant_method: Union[QuantizationMethod, OVQuantizationMethod] = OVQuantizationMethod.DEFAULT,
244249
weight_only: Optional[bool] = True,
245250
**kwargs,
246251
):
@@ -309,12 +314,12 @@ def post_init(self):
309314
class OVQuantizationConfig(OVQuantizationConfigBase):
310315
def __init__(
311316
self,
317+
sym: bool = False,
312318
ignored_scope: Optional[dict] = None,
313319
num_samples: Optional[int] = 300,
314-
preset: nncf.QuantizationPreset = None,
315-
model_type: nncf.ModelType = nncf.ModelType.TRANSFORMER,
320+
model_type: str = "transformer",
316321
fast_bias_correction: bool = True,
317-
overflow_fix: OverflowFix = OverflowFix.DISABLE,
322+
overflow_fix: str = "disable",
318323
weight_only: Optional[bool] = False,
319324
**kwargs,
320325
):
@@ -323,23 +328,18 @@ def __init__(
323328
compression, during quantization both weights and activations are converted to lower precision.
324329
For weight-only model quantization please see OVWeightQuantizationConfig.
325330
Args:
331+
sym (`bool`, defaults to `False`):
332+
Whether to use symmetric quantization on the activations. Symmetric quantization will be applied on the weights in any case.
326333
ignored_scope (`dict`, *optional*):
327334
An ignored scope that defines the list of model nodes to be ignored during quantization. Dictionary
328335
entries provided via this argument are used to create an instance of `nncf.IgnoredScope` class.
329336
num_samples (`int`, *optional*):
330337
The maximum number of samples composing the calibration dataset.
331-
preset (`nncf.QuantizationPreset`, *optional*):
332-
A preset controls the quantization mode (symmetric and asymmetric).
333-
It can take the following values:
334-
- `performance`: Symmetric quantization of weights and activations.
335-
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
336-
Default value is None. In this case, `mixed` preset is used for `transformer`
337-
model type otherwise `performance`.
338-
model_type (`nncf.ModelType`, defaults to nncf.ModelType.TRANSFORMER):
338+
model_type (`str`, defaults to "transformer"):
339339
Model type is needed to specify additional patterns in the model. Supported only `transformer` now.
340340
fast_bias_correction (`bool`, defaults to True):
341341
Whether to apply fast or full bias correction algorithm.
342-
overflow_fix (`nncf.OverflowFix`, default to OverflowFix.DISABLE):
342+
overflow_fix (`str`, default to "disable"):
343343
Parameter for controlling overflow fix setting.
344344
weight_only (`bool`, *optional*):
345345
Used to explicitly specify type of quantization (weight-only of full) to apply. Useful when building
@@ -351,37 +351,12 @@ def __init__(
351351
"Please check your configuration."
352352
)
353353
super().__init__(ignored_scope, num_samples, False)
354-
# TODO: remove checks below once NNCF is updated to 2.10
355-
if isinstance(overflow_fix, str):
356-
overflow_fix = OverflowFix(overflow_fix)
357-
if isinstance(preset, str):
358-
preset = nncf.QuantizationPreset(preset)
359-
360-
self.preset = preset
354+
self.sym = sym
361355
self.model_type = model_type
362356
self.fast_bias_correction = fast_bias_correction
363357
self.overflow_fix = overflow_fix
364358
self.post_init()
365359

366-
def to_dict(self) -> Dict[str, Any]:
367-
# TODO: remove code below once NNCF is updated to 2.10
368-
if isinstance(self.overflow_fix, Enum) or isinstance(self.preset, Enum):
369-
overflow_fix_value = (
370-
None
371-
if self.overflow_fix is None
372-
else self.overflow_fix
373-
if isinstance(self.overflow_fix, str)
374-
else self.overflow_fix.value
375-
)
376-
preset_value = (
377-
None if self.preset is None else self.preset if isinstance(self.preset, str) else self.preset.value
378-
)
379-
self_copy = copy.deepcopy(self)
380-
self_copy.overflow_fix = overflow_fix_value
381-
self_copy.preset = preset_value
382-
return self_copy.to_dict()
383-
return super().to_dict()
384-
385360

386361
def _check_default_4bit_configs(config: PretrainedConfig):
387362
return _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, None)

optimum/intel/openvino/quantization.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import torch
2727
import transformers
2828
from nncf import CompressWeightsMode, SensitivityMetric
29-
from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
29+
from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters, OverflowFix
3030
from nncf.torch import register_module
3131
from nncf.torch.initialization import PTInitializingDataLoader
3232
from openvino._offline_transformations import compress_quantize_weights_transformation
@@ -378,10 +378,12 @@ def _quantize_ovbasemodel(
378378
quantization_dataset,
379379
subset_size=quantization_config.num_samples,
380380
ignored_scope=quantization_config.get_ignored_scope_instance(),
381-
model_type=quantization_config.model_type,
382-
preset=quantization_config.preset,
381+
model_type=nncf.ModelType(quantization_config.model_type),
382+
preset=nncf.QuantizationPreset.PERFORMANCE if quantization_config.sym else nncf.QuantizationPreset.MIXED,
383383
fast_bias_correction=quantization_config.fast_bias_correction,
384-
advanced_parameters=nncf.AdvancedQuantizationParameters(overflow_fix=quantization_config.overflow_fix),
384+
advanced_parameters=nncf.AdvancedQuantizationParameters(
385+
overflow_fix=OverflowFix(quantization_config.overflow_fix)
386+
),
385387
**kwargs,
386388
)
387389
self.model.model = quantized_model
@@ -476,10 +478,14 @@ def _quantize_torchmodel(
476478
quantization_dataset,
477479
subset_size=quantization_config.num_samples,
478480
ignored_scope=quantization_config.get_ignored_scope_instance(),
479-
model_type=quantization_config.model_type,
480-
preset=quantization_config.preset,
481+
model_type=nncf.ModelType(quantization_config.model_type),
482+
preset=nncf.QuantizationPreset.PERFORMANCE
483+
if quantization_config.sym
484+
else nncf.QuantizationPreset.MIXED,
481485
fast_bias_correction=quantization_config.fast_bias_correction,
482-
advanced_parameters=nncf.AdvancedQuantizationParameters(overflow_fix=quantization_config.overflow_fix),
486+
advanced_parameters=nncf.AdvancedQuantizationParameters(
487+
overflow_fix=OverflowFix(quantization_config.overflow_fix)
488+
),
483489
**kwargs,
484490
)
485491

optimum/intel/utils/dummy_openvino_and_nncf_objects.py

+22
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,25 @@ def __init__(self, *args, **kwargs):
4646
@classmethod
4747
def from_pretrained(cls, *args, **kwargs):
4848
requires_backends(cls, ["openvino", "nncf"])
49+
50+
51+
class OVWeightQuantizationConfig(metaclass=DummyObject):
52+
_backends = ["openvino", "nncf"]
53+
54+
def __init__(self, *args, **kwargs):
55+
requires_backends(self, ["openvino", "nncf"])
56+
57+
@classmethod
58+
def from_pretrained(cls, *args, **kwargs):
59+
requires_backends(cls, ["openvino", "nncf"])
60+
61+
62+
class OVQuantizationConfig(metaclass=DummyObject):
63+
_backends = ["openvino", "nncf"]
64+
65+
def __init__(self, *args, **kwargs):
66+
requires_backends(self, ["openvino", "nncf"])
67+
68+
@classmethod
69+
def from_pretrained(cls, *args, **kwargs):
70+
requires_backends(cls, ["openvino", "nncf"])

optimum/intel/utils/dummy_openvino_objects.py

-11
Original file line numberDiff line numberDiff line change
@@ -189,14 +189,3 @@ def __init__(self, *args, **kwargs):
189189
@classmethod
190190
def from_pretrained(cls, *args, **kwargs):
191191
requires_backends(cls, ["openvino"])
192-
193-
194-
class OVWeightQuantizationConfig(metaclass=DummyObject):
195-
_backends = ["openvino"]
196-
197-
def __init__(self, *args, **kwargs):
198-
requires_backends(self, ["openvino"])
199-
200-
@classmethod
201-
def from_pretrained(cls, *args, **kwargs):
202-
requires_backends(cls, ["openvino"])

tests/openvino/test_quantization.py

+8-26
Original file line numberDiff line numberDiff line change
@@ -748,10 +748,10 @@ class OVQuantizationConfigTest(unittest.TestCase):
748748
OVQuantizationConfig(
749749
ignored_scope={"names": ["op_name"]},
750750
num_samples=100,
751-
preset=nncf.QuantizationPreset.MIXED,
752-
model_type=nncf.ModelType.TRANSFORMER,
751+
sym=False,
752+
model_type="transformer",
753753
fast_bias_correction=True,
754-
overflow_fix=OverflowFix.DISABLE,
754+
overflow_fix="disable",
755755
),
756756
),
757757
(OVQuantizationConfig(ignored_scope=nncf.IgnoredScope(names=["op_name"])),),
@@ -789,15 +789,15 @@ class OVQuantizationConfigTest(unittest.TestCase):
789789
OVWeightQuantizationConfig,
790790
"Can't determine type of OV quantization config",
791791
),
792-
(dict(model_type=nncf.ModelType.TRANSFORMER), OVQuantizationConfig, None),
792+
(dict(model_type="transformer"), OVQuantizationConfig, None),
793793
(
794794
dict(
795795
ignored_scope={"names": ["op_name"]},
796796
num_samples=100,
797-
preset=nncf.QuantizationPreset.MIXED,
798-
model_type=nncf.ModelType.TRANSFORMER,
797+
sym=False,
798+
model_type="transformer",
799799
fast_bias_correction=True,
800-
overflow_fix=OverflowFix.DISABLE,
800+
overflow_fix="disable",
801801
),
802802
OVQuantizationConfig,
803803
None,
@@ -809,21 +809,11 @@ class OVQuantizationConfigTest(unittest.TestCase):
809809
(dict(bits=8, fast_bias_correction=True, weight_only=True), OVWeightQuantizationConfig, None),
810810
(dict(bits=8, fast_bias_correction=True, weight_only=False), OVQuantizationConfig, None),
811811
(dict(bits=8, sym=True, weight_only=False), OVWeightQuantizationConfig, "Please check your configuration"),
812-
(
813-
dict(model_type=nncf.ModelType.TRANSFORMER, weight_only=True),
814-
OVQuantizationConfig,
815-
"Please check your configuration",
816-
),
812+
(dict(model_type="transformer", weight_only=True), OVQuantizationConfig, "Please check your configuration"),
817813
)
818814

819815
@parameterized.expand(QUANTIZATION_CONFIGS)
820816
def test_config_serialization(self, quantization_config: OVQuantizationConfigBase):
821-
def str_to_enum(enum_cls, value):
822-
for k, v in enum_cls.__members__.items():
823-
if getattr(enum_cls, k).value == value:
824-
return v
825-
raise ValueError(f"Could not convert string {value} to enum value of type {enum_cls}")
826-
827817
ov_config = OVConfig(quantization_config=quantization_config)
828818
with tempfile.TemporaryDirectory() as tmp_dir:
829819
ov_config.save_pretrained(tmp_dir)
@@ -834,14 +824,6 @@ def str_to_enum(enum_cls, value):
834824
return
835825
for key, value in loaded_ov_config.quantization_config.to_dict().items():
836826
initial_value = getattr(ov_config.quantization_config, key)
837-
if key == "preset" or key == "overflow_fix":
838-
# TODO: remove once NNCF is updated to 2.10
839-
if getattr(quantization_config, key) is not None:
840-
self.assertTrue(isinstance(value, str))
841-
if key == "preset":
842-
value = str_to_enum(nncf.QuantizationPreset, value)
843-
else:
844-
value = str_to_enum(OverflowFix, value)
845827
self.assertEqual(value, initial_value)
846828

847829
@parameterized.expand(QUANTIZATION_CONFIG_DICTS)

0 commit comments

Comments
 (0)