Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f7fa3a1

Browse files
committedApr 11, 2024·
Make quantization config contain only serializable properties.
1 parent 20fd761 commit f7fa3a1

File tree

6 files changed

+343
-255
lines changed

6 files changed

+343
-255
lines changed
 

‎Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))
2121

2222
# Run code quality checks
2323
style_check:
24-
black --check .
24+
black .
2525
ruff check .
2626

2727
style:

‎optimum/intel/openvino/configuration.py

+119-109
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
import json
14+
import copy
15+
import inspect
1516
import logging
1617
from dataclasses import dataclass
1718
from enum import Enum
18-
from typing import Any, Dict, List, Optional, Tuple, Union
19+
from typing import Any, Dict, List, Optional, Union
1920

20-
import datasets
2121
import nncf
2222
import torch
2323
from nncf.quantization.advanced_parameters import OverflowFix
@@ -52,36 +52,6 @@
5252
}
5353

5454

55-
class _replace_properties_values:
56-
"""
57-
A context manager for temporarily overriding an object's properties
58-
"""
59-
60-
def __init__(self, obj, property_names, property_values):
61-
self.obj = obj
62-
self.property_names = property_names
63-
self.new_property_values = property_values
64-
self.old_property_values = [None] * len(property_names)
65-
for i, property_name in enumerate(self.property_names):
66-
self.old_property_values[i] = getattr(obj, property_name)
67-
68-
def __enter__(self):
69-
for property_name, new_property_value in zip(self.property_names, self.new_property_values):
70-
setattr(self.obj, property_name, new_property_value)
71-
72-
def __exit__(self, exc_type, exc_val, exc_tb):
73-
for property_name, old_property_value in zip(self.property_names, self.old_property_values):
74-
setattr(self.obj, property_name, old_property_value)
75-
76-
77-
def _is_serializable(obj):
78-
try:
79-
json.dumps(obj)
80-
return True
81-
except Exception:
82-
return False
83-
84-
8555
@dataclass
8656
class OVQuantizationConfigBase(QuantizationConfigMixin):
8757
"""
@@ -90,53 +60,41 @@ class OVQuantizationConfigBase(QuantizationConfigMixin):
9060

9161
def __init__(
9262
self,
93-
dataset: Optional[Union[str, List[str], nncf.Dataset, datasets.Dataset]] = None,
94-
ignored_scope: Optional[Union[dict, nncf.IgnoredScope]] = None,
63+
ignored_scope: Optional[dict] = None,
9564
num_samples: Optional[int] = None,
65+
weight_only: Optional[bool] = None,
66+
**kwargs,
9667
):
9768
"""
9869
Args:
99-
dataset (`str or List[str] or nncf.Dataset or datasets.Dataset`, *optional*):
100-
The dataset used for data-aware weight compression or quantization with NNCF.
101-
ignored_scope (`dict or nncf.IgnoredScope`, *optional*):
102-
An ignored scope that defines the list of model nodes to be ignored during quantization.
70+
ignored_scope (`dict`, *optional*):
71+
An ignored scope that defines a list of model nodes to be ignored during quantization. Dictionary
72+
entries provided via this argument are used to create an instance of `nncf.IgnoredScope` class.
10373
num_samples (`int`, *optional*):
10474
The maximum number of samples composing the calibration dataset.
75+
weight_only (`bool`, *optional*):
76+
Used to explicitly specify type of quantization (weight-only of full) to apply.
10577
"""
106-
self.dataset = dataset
107-
if isinstance(ignored_scope, dict):
108-
ignored_scope = nncf.IgnoredScope(**ignored_scope)
78+
if isinstance(ignored_scope, nncf.IgnoredScope):
79+
ignored_scope = ignored_scope.__dict__
10980
self.ignored_scope = ignored_scope
11081
self.num_samples = num_samples
82+
self.weight_only = weight_only
11183

11284
def post_init(self):
113-
if not (self.dataset is None or isinstance(self.dataset, (str, list, nncf.Dataset, datasets.Dataset))):
85+
try:
86+
self.get_ignored_scope_instance()
87+
except Exception as e:
11488
raise ValueError(
115-
"Dataset must be a instance of either string, list of strings, nncf.Dataset or "
116-
f"dataset.Dataset, but found {type(self.dataset)}"
117-
)
118-
if not (self.ignored_scope is None or isinstance(self.ignored_scope, nncf.IgnoredScope)):
119-
raise ValueError(
120-
"Ignored scope must be a instance of either dict, or nncf.IgnoredScope but found "
121-
f"{type(self.dataset)}"
89+
f"Can't create an `IgnoredScope` object from the provided ignored scope dict: {self.ignored_scope}.\n{e}"
12290
)
91+
if not (self.num_samples is None or isinstance(self.num_samples, int) and self.num_samples > 0):
92+
raise ValueError(f"`num_samples` is expected to be a positive integer, but found: {self.num_samples}")
12393

124-
def _to_dict_without_properties(self, property_names: Union[List[str], Tuple[str]]) -> Dict[str, Any]:
125-
"""
126-
Calls to_dict() with given properties overwritten with None. Useful for hiding non-serializable properties.
127-
"""
128-
if len(property_names) == 0:
129-
return super().to_dict()
130-
with _replace_properties_values(self, property_names, [None] * len(property_names)):
131-
result = super().to_dict()
132-
return result
133-
134-
def to_dict(self) -> Dict[str, Any]:
135-
properties_to_omit = [] if _is_serializable(self.dataset) else ["dataset"]
136-
if isinstance(self.ignored_scope, nncf.IgnoredScope):
137-
with _replace_properties_values(self, ["ignored_scope"], [self.ignored_scope.__dict__]):
138-
return self._to_dict_without_properties(properties_to_omit)
139-
return self._to_dict_without_properties(properties_to_omit)
94+
def get_ignored_scope_instance(self) -> nncf.IgnoredScope:
95+
if self.ignored_scope is None:
96+
return nncf.IgnoredScope()
97+
return nncf.IgnoredScope(**copy.deepcopy(self.ignored_scope))
14098

14199

142100
class OVConfig(BaseConfig):
@@ -155,16 +113,11 @@ def __init__(
155113
self.input_info = input_info
156114
self.save_onnx_model = save_onnx_model
157115
self.optimum_version = kwargs.pop("optimum_version", None)
116+
if isinstance(quantization_config, dict):
117+
quantization_config = self._quantization_config_from_dict(quantization_config)
158118
self.quantization_config = quantization_config
159119
self.compression = None # A backward-compatability field for training-time compression parameters
160120

161-
if isinstance(self.quantization_config, dict):
162-
# Config is loaded as dict during deserialization
163-
logger.info(
164-
"`quantization_config` was provided as a dict, in this form it can't be used for quantization. "
165-
"Please provide config as an instance of OVWeightQuantizationConfig or OVQuantizationConfig"
166-
)
167-
168121
bits = (
169122
self.quantization_config.bits if isinstance(self.quantization_config, OVWeightQuantizationConfig) else None
170123
)
@@ -180,12 +133,40 @@ def add_input_info(self, model_inputs: Dict, force_batch_one: bool = False):
180133
for name, value in model_inputs.items()
181134
]
182135

136+
@staticmethod
137+
def _quantization_config_from_dict(quantization_config: dict) -> OVQuantizationConfigBase:
138+
wq_args = inspect.getfullargspec(OVWeightQuantizationConfig.__init__).args
139+
q_args = inspect.getfullargspec(OVQuantizationConfig.__init__).args
140+
config_keys = quantization_config.keys()
141+
matches_wq_config_signature = all(arg_name in wq_args for arg_name in config_keys)
142+
matches_q_config_signature = all(arg_name in q_args for arg_name in config_keys)
143+
if matches_wq_config_signature == matches_q_config_signature:
144+
weight_only = quantization_config.get("weight_only", None)
145+
if weight_only is None:
146+
logger.warning(
147+
"Can't determine type of OV quantization config. Please specify explicitly whether you intend to "
148+
"run weight-only quantization or not with `weight_only` parameter. Creating an instance of "
149+
"OVWeightQuantizationConfig."
150+
)
151+
return OVWeightQuantizationConfig.from_dict(quantization_config)
152+
matches_wq_config_signature = weight_only
153+
154+
config_type = OVWeightQuantizationConfig if matches_wq_config_signature else OVQuantizationConfig
155+
return config_type.from_dict(quantization_config)
156+
183157
def _to_dict_safe(self, to_diff_dict: bool = False) -> Dict[str, Any]:
158+
class ConfigStub:
159+
def to_dict(self):
160+
return None
161+
162+
def to_diff_dict(self):
163+
return None
164+
184165
if self.quantization_config is None:
185166
# Parent to_dict() implementation does not support quantization_config being None
186-
with _replace_properties_values(self, ("quantization_config",), (OVQuantizationConfigBase(),)):
187-
result = super().to_diff_dict() if to_diff_dict else super().to_dict()
188-
del result["quantization_config"]
167+
self_copy = copy.deepcopy(self)
168+
self_copy.quantization_config = ConfigStub()
169+
result = self_copy.to_diff_dict() if to_diff_dict else self_copy.to_dict()
189170
else:
190171
result = super().to_diff_dict() if to_diff_dict else super().to_dict()
191172
return result
@@ -212,9 +193,8 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
212193
The number of bits to quantize to.
213194
sym (`bool`, defaults to `False`):
214195
Whether to use symmetric quantization.
215-
tokenizer (`str` or `PreTrainedTokenizerBase`, *optional*):
196+
tokenizer (`str`, *optional*):
216197
The tokenizer used to process the dataset. You can pass either:
217-
- A custom tokenizer object.
218198
- A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
219199
Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
220200
user or organization name, like `dbmdz/bert-base-german-cased`.
@@ -224,6 +204,8 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
224204
The dataset used for data-aware compression or quantization with NNCF. You can provide your own dataset
225205
in a list of strings or just use the one from the list ['wikitext','c4','c4-new','ptb','ptb-new'] for LLLMs
226206
or ['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit'] for diffusion models.
207+
Alternatively, you can provide data objects via `calibration_dataset` argument
208+
of `OVQuantizer.quantize()` method.
227209
ratio (`float`, defaults to 1.0):
228210
The ratio between baseline and backup precisions (e.g. 0.9 means 90% of layers quantized to INT4_ASYM
229211
and the rest to INT8_ASYM).
@@ -235,32 +217,44 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
235217
The sensitivity metric for assigning quantization precision to layers. In order to
236218
preserve the accuracy of the model, the more sensitive layers receives a higher precision.
237219
ignored_scope (`dict`, *optional*):
238-
An ignored scope that defined the list of model control flow graph nodes to be ignored during quantization.
220+
An ignored scope that defines the list of model nodes to be ignored during quantization. Dictionary
221+
entries provided via this argument are used to create an instance of `nncf.IgnoredScope` class.
239222
num_samples (`int`, *optional*):
240223
The maximum number of samples composing the calibration dataset.
241224
quant_method (`str`, defaults of OVQuantizationMethod.DEFAULT):
242225
Weight compression method to apply.
226+
weight_only (`bool`, *optional*):
227+
Used to explicitly specify type of quantization to apply.
228+
weight_only (`bool`, *optional*):
229+
Used to explicitly specify type of quantization (weight-only of full) to apply.
243230
"""
244231

245232
def __init__(
246233
self,
247234
bits: int = 8,
248235
sym: bool = False,
249-
tokenizer: Optional[Any] = None,
250-
dataset: Optional[Union[str, List[str], nncf.Dataset, datasets.Dataset]] = None,
236+
tokenizer: Optional[str] = None,
237+
dataset: Optional[Union[str, List[str]]] = None,
251238
ratio: float = 1.0,
252239
group_size: Optional[int] = None,
253240
all_layers: Optional[bool] = None,
254241
sensitivity_metric: Optional[str] = None,
255-
ignored_scope: Optional[Union[dict, nncf.IgnoredScope]] = None,
242+
ignored_scope: Optional[dict] = None,
256243
num_samples: Optional[int] = None,
257244
quant_method: Optional[Union[QuantizationMethod, OVQuantizationMethod]] = OVQuantizationMethod.DEFAULT,
245+
weight_only: Optional[bool] = True,
258246
**kwargs,
259247
):
260-
super().__init__(dataset, ignored_scope, num_samples)
248+
if weight_only is False:
249+
logger.warning(
250+
"Trying to create an instance of `OVWeightQuantizationConfig` with `weight_only` being "
251+
"False. Please check your configuration."
252+
)
253+
super().__init__(ignored_scope, num_samples, True)
261254
self.bits = bits
262255
self.sym = sym
263256
self.tokenizer = tokenizer
257+
self.dataset = dataset
264258
self.group_size = group_size or (-1 if bits == 8 else 128)
265259
self.ratio = ratio
266260
self.all_layers = all_layers
@@ -277,6 +271,11 @@ def post_init(self):
277271
raise ValueError("`ratio` must between 0 and 1.")
278272
if self.group_size is not None and self.group_size != -1 and self.group_size <= 0:
279273
raise ValueError("`group_size` must be greater than 0 or equal to -1")
274+
if not (self.dataset is None or isinstance(self.dataset, (str, list))):
275+
raise ValueError(
276+
f"Dataset must be a instance of either string or list of strings, but found {type(self.dataset)}. "
277+
f"If you wish to provide a custom dataset please pass it via `calibration_dataset` argument."
278+
)
280279
if self.dataset is not None and isinstance(self.dataset, str):
281280
llm_datasets = ["wikitext", "c4", "c4-new", "ptb", "ptb-new"]
282281
stable_diffusion_datasets = [
@@ -303,34 +302,31 @@ def post_init(self):
303302
f"For 8-bit quantization, `group_size` is expected to be set to -1, but was set to {self.group_size}"
304303
)
305304

306-
def to_dict(self) -> Dict[str, Any]:
307-
if not _is_serializable(self.tokenizer):
308-
return self._to_dict_without_properties(("tokenizer",))
309-
return super().to_dict()
305+
if self.tokenizer is not None and not isinstance(self.tokenizer, str):
306+
raise ValueError(f"Tokenizer is expected to be a string, but found {self.tokenizer}")
310307

311308

312309
@dataclass
313310
class OVQuantizationConfig(OVQuantizationConfigBase):
314311
def __init__(
315312
self,
316-
dataset: Union[str, List[str], nncf.Dataset, datasets.Dataset],
317-
ignored_scope: Optional[Union[dict, nncf.IgnoredScope]] = None,
313+
ignored_scope: Optional[dict] = None,
318314
num_samples: Optional[int] = 300,
319315
preset: nncf.QuantizationPreset = None,
320316
model_type: nncf.ModelType = nncf.ModelType.TRANSFORMER,
321317
fast_bias_correction: bool = True,
322318
overflow_fix: OverflowFix = OverflowFix.DISABLE,
319+
weight_only: Optional[bool] = False,
323320
**kwargs,
324321
):
325322
"""
326323
Configuration class containing parameters related to model quantization with NNCF. Compared to weight
327324
compression, during quantization both weights and activations are converted to lower precision.
328325
For weight-only model quantization please see OVWeightQuantizationConfig.
329326
Args:
330-
dataset (`str or List[str] or nncf.Dataset or datasets.Dataset`):
331-
A dataset used for quantization parameters calibration. Required parameter.
332-
ignored_scope (`dict or nncf.IgnoredScope`, *optional*):
333-
An ignored scope that defines the list of model nodes to be ignored during quantization.
327+
ignored_scope (`dict`, *optional*):
328+
An ignored scope that defines the list of model nodes to be ignored during quantization. Dictionary
329+
entries provided via this argument are used to create an instance of `nncf.IgnoredScope` class.
334330
num_samples (`int`, *optional*):
335331
The maximum number of samples composing the calibration dataset.
336332
preset (`nncf.QuantizationPreset`, *optional*):
@@ -346,31 +342,45 @@ def __init__(
346342
Whether to apply fast or full bias correction algorithm.
347343
overflow_fix (`nncf.OverflowFix`, default to OverflowFix.DISABLE):
348344
Parameter for controlling overflow fix setting.
345+
weight_only (`bool`, *optional*):
346+
Used to explicitly specify type of quantization (weight-only of full) to apply.
349347
"""
350-
super().__init__(dataset, ignored_scope, num_samples)
348+
if weight_only is True:
349+
logger.warning(
350+
"Trying to create an instance of `OVQuantizationConfig` with `weight_only` being True. "
351+
"Please check your configuration."
352+
)
353+
super().__init__(ignored_scope, num_samples, False)
354+
# TODO: remove checks below once NNCF is updated to 2.10
355+
if isinstance(overflow_fix, str):
356+
overflow_fix = OverflowFix(overflow_fix)
357+
if isinstance(preset, str):
358+
preset = nncf.QuantizationPreset(preset)
359+
351360
self.preset = preset
352361
self.model_type = model_type
353362
self.fast_bias_correction = fast_bias_correction
354363
self.overflow_fix = overflow_fix
355364
self.post_init()
356365

357-
def post_init(self):
358-
"""
359-
Safety checker that arguments are correct
360-
"""
361-
super().post_init()
362-
if self.dataset is None:
363-
raise ValueError(
364-
"`dataset` is needed to compute the activations range during the calibration step and was not provided."
365-
" In case you only want to apply quantization on the weights, please run weight-only quantization."
366-
)
367-
368366
def to_dict(self) -> Dict[str, Any]:
369367
# TODO: remove code below once NNCF is updated to 2.10
370-
overflow_fix_value = None if self.overflow_fix is None else self.overflow_fix.value
371-
preset_value = None if self.preset is None else self.preset.value
372-
with _replace_properties_values(self, ("overflow_fix", "preset"), (overflow_fix_value, preset_value)):
373-
return super().to_dict()
368+
if isinstance(self.overflow_fix, Enum) or isinstance(self.preset, Enum):
369+
overflow_fix_value = (
370+
None
371+
if self.overflow_fix is None
372+
else self.overflow_fix
373+
if isinstance(self.overflow_fix, str)
374+
else self.overflow_fix.value
375+
)
376+
preset_value = (
377+
None if self.preset is None else self.preset if isinstance(self.preset, str) else self.preset.value
378+
)
379+
self_copy = copy.deepcopy(self)
380+
self_copy.overflow_fix = overflow_fix_value
381+
self_copy.preset = preset_value
382+
return self_copy.to_dict()
383+
return super().to_dict()
374384

375385

376386
def _check_default_4bit_configs(config: PretrainedConfig):

‎optimum/intel/openvino/modeling_base.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from tempfile import TemporaryDirectory, gettempdir
1919
from typing import Dict, Optional, Union
2020

21+
import nncf
2122
import openvino
2223
from huggingface_hub import hf_hub_download
2324
from openvino import Core, convert_model
@@ -100,7 +101,11 @@ def __init__(
100101
self._openvino_config = OVConfig(quantization_config=quantization_config)
101102

102103
@staticmethod
103-
def load_model(file_name: Union[str, Path], quantization_config: Union[OVWeightQuantizationConfig, Dict] = None):
104+
def load_model(
105+
file_name: Union[str, Path],
106+
quantization_config: Union[OVWeightQuantizationConfig, Dict] = None,
107+
calibration_dataset: Optional[nncf.Dataset] = None,
108+
):
104109
"""
105110
Loads the model.
106111
@@ -135,7 +140,7 @@ def fix_op_names_duplicates(model: openvino.runtime.Model):
135140

136141
from optimum.intel.openvino.quantization import _weight_only_quantization
137142

138-
model = _weight_only_quantization(model, quantization_config)
143+
model = _weight_only_quantization(model, quantization_config, calibration_dataset=calibration_dataset)
139144

140145
return model
141146

‎optimum/intel/openvino/modeling_decoder.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from tempfile import TemporaryDirectory
2020
from typing import Dict, Optional, Tuple, Union
2121

22+
import nncf
2223
import numpy as np
2324
import openvino
2425
import torch
@@ -572,7 +573,8 @@ def _from_pretrained(
572573
from_onnx: bool = False,
573574
local_files_only: bool = False,
574575
load_in_8bit: bool = False,
575-
quantization_config: Union[OVWeightQuantizationConfig, Dict] = None,
576+
quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None,
577+
calibration_dataset: Optional[nncf.Dataset] = None,
576578
**kwargs,
577579
):
578580
model_path = Path(model_id)
@@ -596,7 +598,11 @@ def _from_pretrained(
596598
quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)
597599

598600
load_in_4bit = quantization_config.bits == 4 if quantization_config else False
599-
model = cls.load_model(model_cache_path, quantization_config=None if load_in_4bit else quantization_config)
601+
model = cls.load_model(
602+
model_cache_path,
603+
quantization_config=None if load_in_4bit else quantization_config,
604+
calibration_dataset=calibration_dataset,
605+
)
600606

601607
model_type = config.model_type.replace("_", "-")
602608
if model_type == "bloom":
@@ -632,7 +638,7 @@ def _from_pretrained(
632638
f"For the given model, we recommend the following `quantization_config` : {default_config}"
633639
)
634640

635-
if isinstance(quantization_config.dataset, str):
641+
if calibration_dataset is None and isinstance(quantization_config.dataset, str):
636642
tokenizer = quantization_config.tokenizer or AutoTokenizer.from_pretrained(model_id)
637643

638644
from optimum.gptq.data import get_dataset, prepare_dataset
@@ -644,9 +650,9 @@ def _from_pretrained(
644650
dataset = get_dataset(quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples)
645651
dataset = prepare_dataset(dataset)
646652
quantization_config = copy.deepcopy(quantization_config)
647-
quantization_config.dataset = nncf.Dataset(dataset, lambda x: causal_model.prepare_inputs(**x))
653+
calibration_dataset = nncf.Dataset(dataset, lambda x: causal_model.prepare_inputs(**x))
648654

649-
_weight_only_quantization(model, quantization_config)
655+
_weight_only_quantization(model, quantization_config, calibration_dataset)
650656

651657
return causal_model
652658

‎optimum/intel/openvino/quantization.py

+95-67
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@
1818
import os
1919
from collections import deque
2020
from pathlib import Path
21-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
21+
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
2222

23+
import datasets
2324
import nncf
2425
import openvino
2526
import torch
@@ -203,6 +204,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs):
203204

204205
def quantize(
205206
self,
207+
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None,
206208
save_directory: Union[str, Path] = None,
207209
ov_config: OVConfig = None,
208210
file_name: Optional[str] = None,
@@ -216,6 +218,9 @@ def quantize(
216218
Quantize a model given the optimization specifications defined in `quantization_config`.
217219
218220
Args:
221+
calibration_dataset (`datasets.Dataset` or `nncf.Dataset` or `Iterable`, *optional*):
222+
A collection of data samples to use for quantization calibration. Is optional for weight-only
223+
quantization and is required for full quantization.
219224
save_directory (`Union[str, Path]`):
220225
The directory where the quantized model should be saved.
221226
ov_config (`OVConfig`, *optional*):
@@ -235,6 +240,16 @@ def quantize(
235240
floating-point. Fits best for LLM footprint reduction and performance acceleration.
236241
237242
Examples:
243+
```python
244+
>>> from optimum.intel.openvino import OVQuantizer, OVModelForCausalLM
245+
>>> from transformers import AutoModelForCausalLM
246+
>>> model = AutoModelForCausalLM.from_pretrained("databricks/dolly-v2-3b")
247+
>>> quantizer = OVQuantizer.from_pretrained(model, task="text-generation")
248+
>>> ov_config = OVConfig(quantization_config=OVWeightQuantizationConfig(bits=8, sym=True))
249+
>>> quantizer.quantize(ov_config=ov_config, save_directory="./quantized_model")
250+
>>> optimized_model = OVModelForCausalLM.from_pretrained("./quantized_model")
251+
```
252+
238253
```python
239254
>>> from optimum.intel.openvino import OVQuantizer, OVModelForSequenceClassification
240255
>>> from transformers import AutoModelForSequenceClassification
@@ -243,25 +258,10 @@ def quantize(
243258
>>> model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
244259
>>> quantizer = OVQuantizer.from_pretrained(model, task="text-classification")
245260
>>> ov_config = OVConfig(quantization_config=OVQuantizationConfig(dataset=calibration_dataset))
246-
>>> quantizer.quantize(ov_config=ov_config, save_directory="./quantized_model")
261+
>>> quantizer.quantize(calibration_dataset=dataset, ov_config=ov_config, save_directory="./quantized_model")
247262
>>> optimized_model = OVModelForSequenceClassification.from_pretrained("./quantized_model")
248263
```
249-
250-
```python
251-
>>> from optimum.intel.openvino import OVQuantizer, OVModelForCausalLM
252-
>>> from transformers import AutoModelForCausalLM
253-
>>> model = AutoModelForCausalLM.from_pretrained("databricks/dolly-v2-3b")
254-
>>> quantizer = OVQuantizer.from_pretrained(model, task="text-generation")
255-
>>> ov_config = OVConfig(quantization_config=OVWeightQuantizationConfig(bits=8, sym=True))
256-
>>> quantizer.quantize(ov_config=ov_config, save_directory="./quantized_model")
257-
>>> optimized_model = OVModelForCausalLM.from_pretrained("./quantized_model")
258-
```
259264
"""
260-
if "calibration_dataset" in kwargs:
261-
raise ValueError(
262-
"`calibration_dataset` argument is deprecated. Please provide calibration dataset "
263-
"with `ov_config.quantization_config.dataset`."
264-
)
265265
if weights_only is not None:
266266
logger.warning(
267267
"`weights_only` argument is deprecated. In the future please provide `ov_config.quantization_config` "
@@ -282,15 +282,21 @@ def quantize(
282282
if weights_only is None or weights_only is True:
283283
if weights_only is None:
284284
logger.info(
285-
"`quantization_config` was not provided, 8-bit symmetric weight quantization will be applied."
285+
"`quantization_config` was not provided, 8-bit asymmetric weight quantization will be applied."
286286
)
287-
ov_config.quantization_config = OVWeightQuantizationConfig(bits=8, sym=True)
287+
ov_config.quantization_config = OVWeightQuantizationConfig(bits=8)
288288
else:
289289
ov_config.quantization_config = OVQuantizationConfig()
290290

291291
if isinstance(self.model, OVBaseModel):
292292
self._quantize_ovbasemodel(
293-
ov_config, save_directory, batch_size, data_collator, remove_unused_columns, **kwargs
293+
ov_config,
294+
save_directory,
295+
calibration_dataset,
296+
batch_size,
297+
data_collator,
298+
remove_unused_columns,
299+
**kwargs,
294300
)
295301

296302
elif isinstance(self.model, torch.nn.Module):
@@ -299,7 +305,14 @@ def quantize(
299305
"To convert a PyTorch model to OpenVINO, you can set `export=True` when loading your model as `OVModelForXxx.from_pretrained(..., export=True)`"
300306
)
301307
self._quantize_torchmodel(
302-
ov_config, save_directory, file_name, batch_size, data_collator, remove_unused_columns, **kwargs
308+
ov_config,
309+
save_directory,
310+
calibration_dataset,
311+
file_name,
312+
batch_size,
313+
data_collator,
314+
remove_unused_columns,
315+
**kwargs,
303316
)
304317
else:
305318
raise TypeError(f"Unsupported model type: {type(self.model)}")
@@ -308,6 +321,7 @@ def _quantize_ovbasemodel(
308321
self,
309322
ov_config: OVConfig,
310323
save_directory: Union[str, Path],
324+
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None,
311325
batch_size: int = 1,
312326
data_collator: Optional[DataCollator] = None,
313327
remove_unused_columns: bool = True,
@@ -318,19 +332,18 @@ def _quantize_ovbasemodel(
318332

319333
quantization_config = ov_config.quantization_config
320334
if isinstance(quantization_config, OVWeightQuantizationConfig):
321-
_weight_only_quantization(self.model.model, quantization_config)
335+
_weight_only_quantization(self.model.model, quantization_config, calibration_dataset)
322336
self.model.save_pretrained(save_directory)
323337
ov_config.save_pretrained(save_directory)
324338
return
325339
if not isinstance(quantization_config, OVQuantizationConfig):
326340
raise ValueError(f"Unsupported type of quantization config: {type(quantization_config)}")
327341

328-
calibration_dataset = quantization_config.dataset
329342
if isinstance(calibration_dataset, nncf.Dataset):
330343
quantization_dataset = calibration_dataset
331-
else:
344+
elif isinstance(calibration_dataset, datasets.Dataset):
332345
calibration_dataloader = self._get_calibration_dataloader(
333-
calibration_dataset=quantization_config.dataset,
346+
calibration_dataset=calibration_dataset,
334347
batch_size=batch_size,
335348
remove_unused_columns=remove_unused_columns,
336349
data_collator=data_collator,
@@ -353,13 +366,17 @@ def _quantize_ovbasemodel(
353366
quantization_dataset = nncf.Dataset(collected_inputs)
354367
else:
355368
quantization_dataset = nncf.Dataset(calibration_dataloader)
369+
else:
370+
if calibration_dataset is None:
371+
raise ValueError("Calibration dataset is required to run quantization.")
372+
quantization_dataset = nncf.Dataset(calibration_dataset)
356373

357374
# Actual model quantization
358375
quantized_model = nncf.quantize(
359376
self.model.model,
360377
quantization_dataset,
361378
subset_size=quantization_config.num_samples,
362-
ignored_scope=quantization_config.ignored_scope,
379+
ignored_scope=quantization_config.get_ignored_scope_instance(),
363380
model_type=quantization_config.model_type,
364381
preset=quantization_config.preset,
365382
fast_bias_correction=quantization_config.fast_bias_correction,
@@ -374,6 +391,7 @@ def _quantize_torchmodel(
374391
self,
375392
ov_config: OVConfig,
376393
save_directory: Union[str, Path],
394+
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None,
377395
file_name: Optional[str] = None,
378396
batch_size: int = 1,
379397
data_collator: Optional[DataCollator] = None,
@@ -416,24 +434,18 @@ def _quantize_torchmodel(
416434

417435
quantization_config = ov_config.quantization_config
418436
if isinstance(quantization_config, OVWeightQuantizationConfig):
419-
dataset = quantization_config.dataset
420-
if not isinstance(dataset, nncf.Dataset):
421-
if dataset is not None:
422-
raise ValueError(
423-
"Please provide `dataset` for weight compression as an instance of `nncf.Dataset`."
424-
)
425-
if stateful:
426-
# patch model before weight compression
427-
model = patch_model_with_bettertransformer(model)
428-
429-
dummy_inputs = onnx_config.generate_dummy_inputs(framework="pt")
430-
device = get_model_device(model)
431-
dummy_inputs = tree_map(
432-
lambda value: value.to(device) if isinstance(value, torch.Tensor) else value, dummy_inputs
433-
)
434-
check_dummy_inputs_are_allowed(model, dummy_inputs)
435-
dataset = nncf.Dataset([dummy_inputs])
436-
nncf.compress_weights(model, dataset=dataset)
437+
if stateful:
438+
# patch model before weight compression
439+
model = patch_model_with_bettertransformer(model)
440+
441+
dummy_inputs = onnx_config.generate_dummy_inputs(framework="pt")
442+
device = get_model_device(model)
443+
dummy_inputs = tree_map(
444+
lambda value: value.to(device) if isinstance(value, torch.Tensor) else value, dummy_inputs
445+
)
446+
check_dummy_inputs_are_allowed(model, dummy_inputs)
447+
448+
nncf.compress_weights(model, dataset=nncf.Dataset([dummy_inputs]))
437449
else:
438450
if not isinstance(quantization_config, OVQuantizationConfig):
439451
raise ValueError(f"Unsupported type of quantization config: {type(quantization_config)}")
@@ -444,21 +456,25 @@ def _quantize_torchmodel(
444456
)
445457
stateful = False
446458

447-
if isinstance(quantization_config.dataset, nncf.Dataset):
448-
quantization_dataset = quantization_config.dataset
449-
else:
459+
if isinstance(calibration_dataset, nncf.Dataset):
460+
quantization_dataset = calibration_dataset
461+
elif isinstance(calibration_dataset, datasets.Dataset):
450462
calibration_dataloader = self._get_calibration_dataloader(
451-
calibration_dataset=quantization_config.dataset,
463+
calibration_dataset=calibration_dataset,
452464
batch_size=batch_size,
453465
remove_unused_columns=remove_unused_columns,
454466
data_collator=data_collator,
455467
)
456468
quantization_dataset = nncf.Dataset(calibration_dataloader)
469+
else:
470+
if calibration_dataset is None:
471+
raise ValueError("Calibration dataset is required to run quantization.")
472+
quantization_dataset = nncf.Dataset(calibration_dataset)
457473
model = nncf.quantize(
458474
model,
459475
quantization_dataset,
460476
subset_size=quantization_config.num_samples,
461-
ignored_scope=quantization_config.ignored_scope,
477+
ignored_scope=quantization_config.get_ignored_scope_instance(),
462478
model_type=quantization_config.model_type,
463479
preset=quantization_config.preset,
464480
fast_bias_correction=quantization_config.fast_bias_correction,
@@ -522,7 +538,7 @@ def get_calibration_dataset(
522538
preprocess_batch: bool = True,
523539
use_auth_token: bool = False,
524540
cache_dir: Optional[str] = None,
525-
) -> "Dataset":
541+
) -> datasets.Dataset:
526542
"""
527543
Create the calibration `datasets.Dataset` to use for the post-training static quantization calibration step.
528544
@@ -599,18 +615,33 @@ def _remove_unused_columns(self, dataset: "Dataset"):
599615

600616

601617
def _weight_only_quantization(
602-
model: openvino.runtime.Model, quantization_config: Union[OVWeightQuantizationConfig, Dict]
618+
model: openvino.runtime.Model,
619+
quantization_config: Union[OVWeightQuantizationConfig, Dict],
620+
calibration_dataset: Optional[Union[nncf.Dataset, Iterable]] = None,
603621
) -> openvino.runtime.Model:
604622
config = quantization_config
605623
if isinstance(config, dict):
606624
config = OVWeightQuantizationConfig.from_dict(quantization_config)
607625

608-
dataset = config.dataset
609-
610-
if config.dataset is not None and isinstance(config.dataset, str):
611-
tokenizer = config.tokenizer
612-
if isinstance(tokenizer, str):
613-
tokenizer = AutoTokenizer.from_pretrained(tokenizer)
626+
if config.dataset is not None and calibration_dataset is not None:
627+
logger.info(
628+
"Both `quantization_config.dataset` and `calibration_dataset` were provided for weight only "
629+
"quantization. Will rely on `calibration_dataset`."
630+
)
631+
dataset = None
632+
if calibration_dataset is not None:
633+
if isinstance(calibration_dataset, datasets.Dataset):
634+
raise ValueError(
635+
"Providing calibration dataset as an instance of `datasets.Dataset` for OV weight-only "
636+
"quantization is not supported. Please provide it as `nncf.Dataset` or as iterable of "
637+
"model inputs."
638+
)
639+
elif isinstance(calibration_dataset, nncf.Dataset):
640+
dataset = calibration_dataset
641+
else:
642+
dataset = nncf.Dataset(calibration_dataset)
643+
elif config.dataset is not None and isinstance(config.dataset, str):
644+
tokenizer = AutoTokenizer.from_pretrained(config.tokenizer)
614645

615646
from optimum.gptq.data import get_dataset, prepare_dataset
616647

@@ -635,7 +666,7 @@ def _weight_only_quantization(
635666
all_layers=config.all_layers,
636667
sensitivity_metric=sensitivity_metric,
637668
# awq=config.quant_method == QuantizationMethod.AWQ, # TODO : enable from nncf v2.9.0
638-
ignored_scope=config.ignored_scope,
669+
ignored_scope=config.get_ignored_scope_instance(),
639670
dataset=dataset,
640671
# subset_size=config.num_samples if config.num_samples else 128, # TODO : enable from nncf v2.9.0
641672
)
@@ -706,16 +737,13 @@ def _hybrid_quantization(
706737
"""
707738
ops_to_compress = _collect_ops_with_weights(model)
708739

709-
ignored_scope: Union[nncf.IgnoredScope, None] = quantization_config.ignored_scope
710-
ignored_scope = ignored_scope or nncf.IgnoredScope()
711-
ptq_ignored_scope = copy.deepcopy(ignored_scope)
712-
ptq_ignored_scope.names += ops_to_compress
713-
714-
wc_quantization_config = copy.deepcopy(quantization_config)
715-
wc_quantization_config.ignored_scope = ignored_scope
716-
wc_quantization_config.ignored_scope.types.append("Convolution")
717-
compressed_model = _weight_only_quantization(model, wc_quantization_config)
740+
wc_config = copy.deepcopy(quantization_config)
741+
wc_config.ignored_scope = wc_config.ignored_scope or {}
742+
wc_config.ignored_scope["types"] = wc_config.ignored_scope.get("types", []) + ["Convolution"]
743+
compressed_model = _weight_only_quantization(model, wc_config)
718744

745+
ptq_ignored_scope = quantization_config.get_ignored_scope_instance()
746+
ptq_ignored_scope.names += ops_to_compress
719747
subset_size = quantization_config.num_samples if quantization_config.num_samples else 200
720748
quantized_model = nncf.quantize(
721749
model=compressed_model,

‎tests/openvino/test_quantization.py

+110-71
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,13 @@
1515
# ruff: noqa
1616

1717
import itertools
18+
import logging
1819
import tempfile
1920
import unittest
2021
from collections import defaultdict
2122
from enum import Enum
2223
from functools import partial
23-
from typing import List
24+
from typing import List, Union
2425

2526
import evaluate
2627
import numpy as np
@@ -104,9 +105,13 @@ def preprocess_function(examples, tokenizer):
104105
num_samples=10,
105106
dataset_split="train",
106107
)
107-
quantization_config = OVQuantizationConfig(dataset=calibration_dataset)
108-
ov_config = OVConfig(quantization_config=quantization_config)
109-
quantizer.quantize(save_directory=tmp_dir, ov_config=ov_config, file_name=file_name)
108+
ov_config = OVConfig(quantization_config=OVQuantizationConfig())
109+
quantizer.quantize(
110+
save_directory=tmp_dir,
111+
calibration_dataset=calibration_dataset,
112+
file_name=file_name,
113+
ov_config=ov_config,
114+
)
110115
model = model_cls.from_pretrained(tmp_dir, file_name=file_name)
111116

112117
# TODO: uncomment once move to a newer version of NNCF which has some fixes (addmm, baddmm)
@@ -120,7 +125,7 @@ def preprocess_function(examples, tokenizer):
120125

121126
# Verify that the configuration is correctly saved and loaded
122127
loaded_config = OVConfig.from_pretrained(tmp_dir)
123-
self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config)
128+
self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config.to_dict())
124129

125130
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
126131
def test_ovmodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8):
@@ -146,9 +151,8 @@ def preprocess_function(examples, tokenizer):
146151
num_samples=10,
147152
dataset_split="train",
148153
)
149-
quantization_config = OVQuantizationConfig(dataset=calibration_dataset)
150-
ov_config = OVConfig(quantization_config=quantization_config)
151-
quantizer.quantize(save_directory=tmp_dir, ov_config=ov_config)
154+
ov_config = OVConfig(quantization_config=OVQuantizationConfig())
155+
quantizer.quantize(save_directory=tmp_dir, calibration_dataset=calibration_dataset, ov_config=ov_config)
152156

153157
model = model_cls.from_pretrained(tmp_dir)
154158

@@ -162,7 +166,7 @@ def preprocess_function(examples, tokenizer):
162166

163167
# Verify that the configuration is correctly saved and loaded
164168
loaded_config = OVConfig.from_pretrained(tmp_dir)
165-
self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config)
169+
self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config.to_dict())
166170

167171

168172
class OVWeightCompressionTest(unittest.TestCase):
@@ -281,12 +285,12 @@ def test_automodel_weight_compression(self, model_cls, model_name, expected_pt_i
281285

282286
# Verify that the configuration is correctly saved and loaded
283287
loaded_config = OVConfig.from_pretrained(tmp_dir)
284-
original_config_as_dict = OVWeightQuantizationConfig(bits=8, sym=True).to_dict()
288+
original_config_as_dict = OVWeightQuantizationConfig().to_dict()
285289
for k in original_config_as_dict.keys():
286290
v = original_config_as_dict[k]
287291
if isinstance(v, Enum):
288292
original_config_as_dict[k] = v.value
289-
self.assertEqual(original_config_as_dict, loaded_config.quantization_config)
293+
self.assertEqual(original_config_as_dict, loaded_config.quantization_config.to_dict())
290294

291295
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS)
292296
def test_ovmodel_8bit_weight_compression(self, model_cls, model_name, expected_pt_int8, expected_ov_int8):
@@ -311,7 +315,7 @@ def test_ovmodel_8bit_weight_compression(self, model_cls, model_name, expected_p
311315

312316
# Verify that the configuration is correctly saved and loaded
313317
loaded_config = OVConfig.from_pretrained(tmp_dir)
314-
self.assertEqual(OVWeightQuantizationConfig(bits=8, sym=True).to_dict(), loaded_config.quantization_config)
318+
self.assertEqual(OVWeightQuantizationConfig().to_dict(), loaded_config.quantization_config.to_dict())
315319

316320
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS)
317321
def test_ovmodel_4bit_weight_compression(self, model_cls, model_name, expected_int8, expected_int4):
@@ -342,7 +346,7 @@ def test_ovmodel_4bit_weight_compression(self, model_cls, model_name, expected_i
342346

343347
# Verify that the configuration is correctly saved and loaded
344348
loaded_config = OVConfig.from_pretrained(tmp_dir)
345-
self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config)
349+
self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config.to_dict())
346350

347351
@parameterized.expand(SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS)
348352
@unittest.skipIf(not IS_SUPPORT_STATEFUL, "Stateful models supported only in 2023.3 and above")
@@ -368,7 +372,7 @@ def test_ovmodel_8bit_weight_compression_stateful(self, model_cls, model_id, exp
368372

369373
# Verify that the configuration is correctly saved and loaded
370374
loaded_config = OVConfig.from_pretrained(tmp_dir)
371-
self.assertEqual(OVWeightQuantizationConfig(bits=8, sym=True).to_dict(), loaded_config.quantization_config)
375+
self.assertEqual(OVWeightQuantizationConfig().to_dict(), loaded_config.quantization_config.to_dict())
372376

373377
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION)
374378
def test_ovmodel_load_with_compressed_weights(self, model_cls, model_type):
@@ -439,11 +443,11 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_
439443
model.save_pretrained(tmp_dir)
440444

441445
openvino_config = OVConfig.from_pretrained(tmp_dir)
442-
self.assertEqual(openvino_config.quantization_config["bits"], 4)
446+
self.assertEqual(openvino_config.quantization_config.bits, 4)
443447
self.assertEqual(openvino_config.dtype, "int4")
444448
if model_id == "facebook/opt-125m":
445449
for key, value in self.DEFAULT_INT4_CONFIG.items():
446-
self.assertEqual(value, openvino_config.quantization_config[key])
450+
self.assertEqual(value, getattr(openvino_config.quantization_config, key))
447451

448452
@parameterized.expand(LOAD_IN_4_BITS_SCOPE)
449453
def test_ovmodel_4bit_auto_compression_with_config(
@@ -461,7 +465,7 @@ def test_ovmodel_4bit_auto_compression_with_config(
461465
model.save_pretrained(tmp_dir)
462466

463467
openvino_config = OVConfig.from_pretrained(tmp_dir)
464-
self.assertEqual(openvino_config.quantization_config["bits"], 4)
468+
self.assertEqual(openvino_config.quantization_config.bits, 4)
465469
self.assertEqual(openvino_config.dtype, "int4")
466470

467471
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS)
@@ -492,9 +496,8 @@ def transform_fn(data, tokenizer):
492496
model = model_cls.from_pretrained(
493497
model_id,
494498
export=True,
495-
quantization_config=OVWeightQuantizationConfig(
496-
bits=4, sym=True, group_size=-1, ratio=0.8, dataset=quantization_dataset
497-
),
499+
quantization_config=OVWeightQuantizationConfig(bits=4, sym=True, group_size=-1, ratio=0.8),
500+
calibration_dataset=quantization_dataset,
498501
)
499502

500503
_, num_int8, num_int4 = get_num_quantized_nodes(model)
@@ -584,7 +587,7 @@ def test_ovmodel_load_large_model_with_additional_quantization_config(self):
584587
"all_layers": None,
585588
"sensitivity_metric": None,
586589
"dataset": None,
587-
"ignored_scope": None,
590+
"ignored_scope": nncf.IgnoredScope(),
588591
}
589592
compress_weights_patch.assert_called_with(unittest.mock.ANY, **compression_params)
590593

@@ -610,9 +613,8 @@ def preprocess_function(examples, tokenizer):
610613
num_samples=10,
611614
dataset_split="test",
612615
)
613-
quantization_config = OVQuantizationConfig(dataset=calibration_dataset)
614-
ov_config = OVConfig(quantization_config=quantization_config)
615-
quantizer.quantize(save_directory=tmp_dir, ov_config=ov_config)
616+
ov_config = OVConfig(quantization_config=OVQuantizationConfig())
617+
quantizer.quantize(save_directory=tmp_dir, calibration_dataset=calibration_dataset, ov_config=ov_config)
616618

617619
# Test that inference on quantized model works
618620
model = OVModelForQuestionAnswering.from_pretrained(tmp_dir)
@@ -629,7 +631,7 @@ def preprocess_function(examples, tokenizer):
629631

630632
# Verify that the configuration is correctly saved and loaded
631633
loaded_config = OVConfig.from_pretrained(tmp_dir)
632-
self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config)
634+
self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config.to_dict())
633635

634636
@parameterized.expand(SUPPORTED_ARCHITECTURES)
635637
def test_ovmodel_static_quantization(self, model_name):
@@ -649,9 +651,8 @@ def preprocess_function(examples, tokenizer):
649651
num_samples=10,
650652
dataset_split="test",
651653
)
652-
quantization_config = OVQuantizationConfig(dataset=calibration_dataset)
653-
ov_config = OVConfig(quantization_config=quantization_config)
654-
quantizer.quantize(save_directory=tmp_dir, ov_config=ov_config)
654+
ov_config = OVConfig(quantization_config=OVQuantizationConfig())
655+
quantizer.quantize(save_directory=tmp_dir, calibration_dataset=calibration_dataset, ov_config=ov_config)
655656

656657
# Test that inference on quantized model works
657658
model = OVModelForQuestionAnswering.from_pretrained(tmp_dir)
@@ -668,7 +669,7 @@ def preprocess_function(examples, tokenizer):
668669

669670
# Verify that the configuration is correctly saved and loaded
670671
loaded_config = OVConfig.from_pretrained(tmp_dir)
671-
self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config)
672+
self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config.to_dict())
672673

673674

674675
class OVTrainerTest(unittest.TestCase):
@@ -719,24 +720,13 @@ def compute_metrics(p):
719720

720721
class OVQuantizationConfigTest(unittest.TestCase):
721722
QUANTIZATION_CONFIGS = (
722-
(
723-
None,
724-
[],
725-
),
726-
(OVWeightQuantizationConfig(), []),
723+
(None,),
724+
(OVWeightQuantizationConfig(),),
727725
(
728726
OVWeightQuantizationConfig(
729727
bits=8,
730728
sym=True,
731729
),
732-
[],
733-
),
734-
(
735-
{
736-
"bits": 8,
737-
"sym": True,
738-
},
739-
[],
740730
),
741731
(
742732
OVWeightQuantizationConfig(
@@ -752,43 +742,82 @@ class OVQuantizationConfigTest(unittest.TestCase):
752742
num_samples=100,
753743
quant_method=OVQuantizationMethod.DEFAULT,
754744
),
755-
["ignored_scope"],
756745
),
757-
(OVWeightQuantizationConfig(dataset=["wikitext", "c4"]), []),
758-
(OVWeightQuantizationConfig(dataset=load_dataset("wikitext", "wikitext-2-raw-v1", split="test")), ["dataset"]),
759-
(OVWeightQuantizationConfig(dataset=nncf.Dataset([np.zeros((1, 10))])), ["dataset"]),
746+
(OVWeightQuantizationConfig(dataset=["hello world", "i'm alive"]),),
760747
(
761-
OVWeightQuantizationConfig(tokenizer=AutoTokenizer.from_pretrained("dbmdz/bert-base-german-cased")),
762-
["tokenizer"],
748+
OVQuantizationConfig(
749+
ignored_scope={"names": ["op_name"]},
750+
num_samples=100,
751+
preset=nncf.QuantizationPreset.MIXED,
752+
model_type=nncf.ModelType.TRANSFORMER,
753+
fast_bias_correction=True,
754+
overflow_fix=OverflowFix.DISABLE,
755+
),
763756
),
764-
(OVWeightQuantizationConfig(ignored_scope=nncf.IgnoredScope(names=["op_name"])), ["ignored_scope"]),
765-
(OVQuantizationConfig(dataset="wikitext"), []),
766-
({"dataset": "wikitext"}, []),
757+
(OVQuantizationConfig(ignored_scope=nncf.IgnoredScope(names=["op_name"])),),
758+
)
759+
760+
QUANTIZATION_CONFIG_DICTS = (
761+
(dict(bits=8, sym=True), OVWeightQuantizationConfig, None),
767762
(
768-
OVQuantizationConfig(
763+
dict(
769764
dataset="wikitext",
765+
bits=4,
766+
ignored_scope={"names": ["op_name"]},
767+
sym=False,
768+
tokenizer="dbmdz/bert-base-german-cased",
769+
ratio=1.0,
770+
group_size=128,
771+
all_layers=True,
772+
sensitivity_metric="mean_activation_magnitude",
773+
num_samples=100,
774+
quant_method=OVQuantizationMethod.DEFAULT,
775+
),
776+
OVWeightQuantizationConfig,
777+
None,
778+
),
779+
(dict(), OVWeightQuantizationConfig, "Can't determine type of OV quantization config"),
780+
(
781+
dict(ignored_scope={"names": ["op_name"]}),
782+
OVWeightQuantizationConfig,
783+
"Can't determine type of OV quantization config",
784+
),
785+
(dict(num_samples=100), OVWeightQuantizationConfig, "Can't determine type of OV quantization config"),
786+
(dict(abc="def"), OVWeightQuantizationConfig, "Can't determine type of OV quantization config"),
787+
(
788+
dict(bits=8, fast_bias_correction=True),
789+
OVWeightQuantizationConfig,
790+
"Can't determine type of OV quantization config",
791+
),
792+
(dict(model_type=nncf.ModelType.TRANSFORMER), OVQuantizationConfig, None),
793+
(
794+
dict(
770795
ignored_scope={"names": ["op_name"]},
771796
num_samples=100,
772797
preset=nncf.QuantizationPreset.MIXED,
773798
model_type=nncf.ModelType.TRANSFORMER,
774799
fast_bias_correction=True,
775800
overflow_fix=OverflowFix.DISABLE,
776801
),
777-
["ignored_scope"],
802+
OVQuantizationConfig,
803+
None,
778804
),
779-
(OVQuantizationConfig(dataset=["wikitext", "c4"]), []),
780-
(OVQuantizationConfig(dataset=load_dataset("wikitext", "wikitext-2-raw-v1", split="test")), ["dataset"]),
781-
(OVQuantizationConfig(dataset=nncf.Dataset([np.zeros((1, 10))])), ["dataset"]),
805+
(dict(weight_only=True), OVWeightQuantizationConfig, None),
806+
(dict(weight_only=False), OVQuantizationConfig, None),
807+
(dict(abc="def", weight_only=False), OVQuantizationConfig, None),
808+
(dict(abc="def", weight_only=True), OVWeightQuantizationConfig, None),
809+
(dict(bits=8, fast_bias_correction=True, weight_only=True), OVWeightQuantizationConfig, None),
810+
(dict(bits=8, fast_bias_correction=True, weight_only=False), OVQuantizationConfig, None),
811+
(dict(bits=8, sym=True, weight_only=False), OVWeightQuantizationConfig, "Please check your configuration"),
782812
(
783-
OVQuantizationConfig(dataset=["wikitext", "c4"], ignored_scope=nncf.IgnoredScope(names=["op_name"])),
784-
["ignored_scope"],
813+
dict(model_type=nncf.ModelType.TRANSFORMER, weight_only=True),
814+
OVQuantizationConfig,
815+
"Please check your configuration",
785816
),
786817
)
787818

788819
@parameterized.expand(QUANTIZATION_CONFIGS)
789-
def test_config_serialization(
790-
self, quantization_config: OVQuantizationConfigBase, non_equal_property_names: List[str]
791-
):
820+
def test_config_serialization(self, quantization_config: OVQuantizationConfigBase):
792821
def str_to_enum(enum_cls, value):
793822
for k, v in enum_cls.__members__.items():
794823
if getattr(enum_cls, k).value == value:
@@ -803,12 +832,8 @@ def str_to_enum(enum_cls, value):
803832
if quantization_config is None:
804833
self.assertEqual(loaded_ov_config.quantization_config, None)
805834
return
806-
for key, value in loaded_ov_config.quantization_config.items():
807-
initial_value = (
808-
quantization_config[key]
809-
if isinstance(quantization_config, dict)
810-
else getattr(ov_config.quantization_config, key)
811-
)
835+
for key, value in loaded_ov_config.quantization_config.to_dict().items():
836+
initial_value = getattr(ov_config.quantization_config, key)
812837
if key == "preset" or key == "overflow_fix":
813838
# TODO: remove once NNCF is updated to 2.10
814839
if getattr(quantization_config, key) is not None:
@@ -817,10 +842,24 @@ def str_to_enum(enum_cls, value):
817842
value = str_to_enum(nncf.QuantizationPreset, value)
818843
else:
819844
value = str_to_enum(OverflowFix, value)
820-
if key in non_equal_property_names:
821-
self.assertNotEqual(value, initial_value)
822-
else:
823-
self.assertEqual(value, initial_value)
845+
self.assertEqual(value, initial_value)
846+
847+
@parameterized.expand(QUANTIZATION_CONFIG_DICTS)
848+
def test_config_from_dict(self, quantization_config: dict, config_type: type, warning_log: Union[str, None]):
849+
from optimum.intel.openvino.configuration import logger as configuration_logger
850+
851+
if warning_log is not None:
852+
with self.assertLogs(configuration_logger, logging.WARN) as cm:
853+
ov_config = OVConfig(quantization_config=quantization_config)
854+
self.assertTrue(any(warning_log in log for log in cm.output))
855+
else:
856+
ov_config = OVConfig(quantization_config=quantization_config)
857+
self.assertIsInstance(ov_config.quantization_config, config_type)
858+
for k, v in quantization_config.items():
859+
if k == "weight_only" and warning_log == "Please check your configuration":
860+
continue
861+
if hasattr(ov_config.quantization_config, k):
862+
self.assertEqual(getattr(ov_config.quantization_config, k), v)
824863

825864

826865
class InferRequestWrapperTest(unittest.TestCase):

0 commit comments

Comments
 (0)
Please sign in to comment.