Skip to content

Commit 20fd761

Browse files
Addressed minor comments
1 parent 123e227 commit 20fd761

File tree

5 files changed

+53
-43
lines changed

5 files changed

+53
-43
lines changed

optimum/intel/openvino/configuration.py

+25-26
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
}
5353

5454

55-
class replace_properties_values:
55+
class _replace_properties_values:
5656
"""
5757
A context manager for temporarily overriding an object's properties
5858
"""
@@ -74,7 +74,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
7474
setattr(self.obj, property_name, old_property_value)
7575

7676

77-
def is_serializable(obj):
77+
def _is_serializable(obj):
7878
try:
7979
json.dumps(obj)
8080
return True
@@ -92,22 +92,22 @@ def __init__(
9292
self,
9393
dataset: Optional[Union[str, List[str], nncf.Dataset, datasets.Dataset]] = None,
9494
ignored_scope: Optional[Union[dict, nncf.IgnoredScope]] = None,
95-
subset_size: Optional[int] = None,
95+
num_samples: Optional[int] = None,
9696
):
9797
"""
9898
Args:
9999
dataset (`str or List[str] or nncf.Dataset or datasets.Dataset`, *optional*):
100100
The dataset used for data-aware weight compression or quantization with NNCF.
101101
ignored_scope (`dict or nncf.IgnoredScope`, *optional*):
102102
An ignored scope that defines the list of model nodes to be ignored during quantization.
103-
subset_size (`int`, *optional*):
103+
num_samples (`int`, *optional*):
104104
The maximum number of samples composing the calibration dataset.
105105
"""
106106
self.dataset = dataset
107107
if isinstance(ignored_scope, dict):
108108
ignored_scope = nncf.IgnoredScope(**ignored_scope)
109109
self.ignored_scope = ignored_scope
110-
self.subset_size = subset_size
110+
self.num_samples = num_samples
111111

112112
def post_init(self):
113113
if not (self.dataset is None or isinstance(self.dataset, (str, list, nncf.Dataset, datasets.Dataset))):
@@ -121,22 +121,22 @@ def post_init(self):
121121
f"{type(self.dataset)}"
122122
)
123123

124-
def to_dict_without_properties(self, property_names: Union[List[str], Tuple[str]]) -> Dict[str, Any]:
124+
def _to_dict_without_properties(self, property_names: Union[List[str], Tuple[str]]) -> Dict[str, Any]:
125125
"""
126126
Calls to_dict() with given properties overwritten with None. Useful for hiding non-serializable properties.
127127
"""
128128
if len(property_names) == 0:
129129
return super().to_dict()
130-
with replace_properties_values(self, property_names, [None] * len(property_names)):
130+
with _replace_properties_values(self, property_names, [None] * len(property_names)):
131131
result = super().to_dict()
132132
return result
133133

134134
def to_dict(self) -> Dict[str, Any]:
135-
properties_to_omit = [] if is_serializable(self.dataset) else ["dataset"]
135+
properties_to_omit = [] if _is_serializable(self.dataset) else ["dataset"]
136136
if isinstance(self.ignored_scope, nncf.IgnoredScope):
137-
with replace_properties_values(self, ["ignored_scope"], [self.ignored_scope.__dict__]):
138-
return self.to_dict_without_properties(properties_to_omit)
139-
return self.to_dict_without_properties(properties_to_omit)
137+
with _replace_properties_values(self, ["ignored_scope"], [self.ignored_scope.__dict__]):
138+
return self._to_dict_without_properties(properties_to_omit)
139+
return self._to_dict_without_properties(properties_to_omit)
140140

141141

142142
class OVConfig(BaseConfig):
@@ -180,21 +180,21 @@ def add_input_info(self, model_inputs: Dict, force_batch_one: bool = False):
180180
for name, value in model_inputs.items()
181181
]
182182

183-
def to_dict_safe(self, to_diff_dict: bool = False) -> Dict[str, Any]:
183+
def _to_dict_safe(self, to_diff_dict: bool = False) -> Dict[str, Any]:
184184
if self.quantization_config is None:
185185
# Parent to_dict() implementation does not support quantization_config being None
186-
with replace_properties_values(self, ("quantization_config",), (OVQuantizationConfigBase(),)):
186+
with _replace_properties_values(self, ("quantization_config",), (OVQuantizationConfigBase(),)):
187187
result = super().to_diff_dict() if to_diff_dict else super().to_dict()
188188
del result["quantization_config"]
189189
else:
190190
result = super().to_diff_dict() if to_diff_dict else super().to_dict()
191191
return result
192192

193193
def to_dict(self) -> Dict[str, Any]:
194-
return self.to_dict_safe(to_diff_dict=False)
194+
return self._to_dict_safe(to_diff_dict=False)
195195

196196
def to_diff_dict(self) -> Dict[str, Any]:
197-
return self.to_dict_safe(to_diff_dict=True)
197+
return self._to_dict_safe(to_diff_dict=True)
198198

199199

200200
class OVQuantizationMethod(str, Enum):
@@ -236,7 +236,7 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
236236
preserve the accuracy of the model, the more sensitive layers receives a higher precision.
237237
ignored_scope (`dict`, *optional*):
238238
An ignored scope that defined the list of model control flow graph nodes to be ignored during quantization.
239-
subset_size (`int`, *optional*):
239+
num_samples (`int`, *optional*):
240240
The maximum number of samples composing the calibration dataset.
241241
quant_method (`str`, defaults of OVQuantizationMethod.DEFAULT):
242242
Weight compression method to apply.
@@ -253,19 +253,18 @@ def __init__(
253253
all_layers: Optional[bool] = None,
254254
sensitivity_metric: Optional[str] = None,
255255
ignored_scope: Optional[Union[dict, nncf.IgnoredScope]] = None,
256-
subset_size: Optional[int] = None,
256+
num_samples: Optional[int] = None,
257257
quant_method: Optional[Union[QuantizationMethod, OVQuantizationMethod]] = OVQuantizationMethod.DEFAULT,
258258
**kwargs,
259259
):
260-
super().__init__(dataset, ignored_scope, subset_size)
260+
super().__init__(dataset, ignored_scope, num_samples)
261261
self.bits = bits
262262
self.sym = sym
263263
self.tokenizer = tokenizer
264264
self.group_size = group_size or (-1 if bits == 8 else 128)
265265
self.ratio = ratio
266266
self.all_layers = all_layers
267267
self.sensitivity_metric = sensitivity_metric
268-
self.subset_size = subset_size
269268
self.quant_method = quant_method
270269
self.post_init()
271270

@@ -305,8 +304,8 @@ def post_init(self):
305304
)
306305

307306
def to_dict(self) -> Dict[str, Any]:
308-
if not is_serializable(self.tokenizer):
309-
return self.to_dict_without_properties(("tokenizer",))
307+
if not _is_serializable(self.tokenizer):
308+
return self._to_dict_without_properties(("tokenizer",))
310309
return super().to_dict()
311310

312311

@@ -316,7 +315,7 @@ def __init__(
316315
self,
317316
dataset: Union[str, List[str], nncf.Dataset, datasets.Dataset],
318317
ignored_scope: Optional[Union[dict, nncf.IgnoredScope]] = None,
319-
subset_size: Optional[int] = 300,
318+
num_samples: Optional[int] = 300,
320319
preset: nncf.QuantizationPreset = None,
321320
model_type: nncf.ModelType = nncf.ModelType.TRANSFORMER,
322321
fast_bias_correction: bool = True,
@@ -332,7 +331,7 @@ def __init__(
332331
A dataset used for quantization parameters calibration. Required parameter.
333332
ignored_scope (`dict or nncf.IgnoredScope`, *optional*):
334333
An ignored scope that defines the list of model nodes to be ignored during quantization.
335-
subset_size (`int`, *optional*):
334+
num_samples (`int`, *optional*):
336335
The maximum number of samples composing the calibration dataset.
337336
preset (`nncf.QuantizationPreset`, *optional*):
338337
A preset controls the quantization mode (symmetric and asymmetric).
@@ -345,10 +344,10 @@ def __init__(
345344
Model type is needed to specify additional patterns in the model. Supported only `transformer` now.
346345
fast_bias_correction (`bool`, defaults to True):
347346
Whether to apply fast or full bias correction algorithm.
348-
overflow_fix (`bool`, default to OverflowFix.DISABLE):
347+
overflow_fix (`nncf.OverflowFix`, default to OverflowFix.DISABLE):
349348
Parameter for controlling overflow fix setting.
350349
"""
351-
super().__init__(dataset, ignored_scope, subset_size)
350+
super().__init__(dataset, ignored_scope, num_samples)
352351
self.preset = preset
353352
self.model_type = model_type
354353
self.fast_bias_correction = fast_bias_correction
@@ -370,7 +369,7 @@ def to_dict(self) -> Dict[str, Any]:
370369
# TODO: remove code below once NNCF is updated to 2.10
371370
overflow_fix_value = None if self.overflow_fix is None else self.overflow_fix.value
372371
preset_value = None if self.preset is None else self.preset.value
373-
with replace_properties_values(self, ("overflow_fix", "preset"), (overflow_fix_value, preset_value)):
372+
with _replace_properties_values(self, ("overflow_fix", "preset"), (overflow_fix_value, preset_value)):
374373
return super().to_dict()
375374

376375

optimum/intel/openvino/modeling_decoder.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,7 @@ def _from_pretrained(
640640
# from optimum.gptq.utils import get_seqlen
641641

642642
# seqlen = get_seqlen(causal_model)
643-
nsamples = quantization_config.subset_size if quantization_config.subset_size else 128
643+
nsamples = quantization_config.num_samples if quantization_config.num_samples else 128
644644
dataset = get_dataset(quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples)
645645
dataset = prepare_dataset(dataset)
646646
quantization_config = copy.deepcopy(quantization_config)

optimum/intel/openvino/modeling_diffusion.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ def _from_pretrained(
321321
if not isinstance(sd_model, supported_pipelines):
322322
raise NotImplementedError(f"Quantization in hybrid mode is not supported for {cls.__name__}")
323323

324-
nsamples = quantization_config.subset_size if quantization_config.subset_size else 200
324+
nsamples = quantization_config.num_samples if quantization_config.num_samples else 200
325325
unet_inputs = sd_model._prepare_unet_inputs(quantization_config.dataset, nsamples)
326326

327327
from .quantization import _hybrid_quantization

optimum/intel/openvino/quantization.py

+22-11
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def quantize(
209209
batch_size: int = 1,
210210
data_collator: Optional[DataCollator] = None,
211211
remove_unused_columns: bool = True,
212+
weights_only: bool = None,
212213
**kwargs,
213214
):
214215
"""
@@ -228,6 +229,10 @@ def quantize(
228229
The function to use to form a batch from a list of elements of the calibration dataset.
229230
remove_unused_columns (`bool`, defaults to `True`):
230231
Whether to remove the columns unused by the model forward method.
232+
weights_only (`bool`, *optional*):
233+
Being deprecated.
234+
Compress weights to integer precision (8-bit by default) while keeping activations
235+
floating-point. Fits best for LLM footprint reduction and performance acceleration.
231236
232237
Examples:
233238
```python
@@ -257,9 +262,9 @@ def quantize(
257262
"`calibration_dataset` argument is deprecated. Please provide calibration dataset "
258263
"with `ov_config.quantization_config.dataset`."
259264
)
260-
if "weights_only" in kwargs:
261-
raise ValueError(
262-
"`weights_only` argument is deprecated. Please provide `ov_config.quantization_config` "
265+
if weights_only is not None:
266+
logger.warning(
267+
"`weights_only` argument is deprecated. In the future please provide `ov_config.quantization_config` "
263268
"as an instance of OVWeightQuantizationConfig for weight-only compression."
264269
)
265270

@@ -274,8 +279,14 @@ def quantize(
274279
raise TypeError(f"`ov_config` should be an `OVConfig`, but got: {type(ov_config)} instead.")
275280
quantization_config = ov_config.quantization_config
276281
if quantization_config is None:
277-
ov_config.quantization_config = OVWeightQuantizationConfig(bits=8, sym=True)
278-
logger.info("`quantization_config` was not provided, 8-bit symmetric weight quantization will be applied.")
282+
if weights_only is None or weights_only is True:
283+
if weights_only is None:
284+
logger.info(
285+
"`quantization_config` was not provided, 8-bit symmetric weight quantization will be applied."
286+
)
287+
ov_config.quantization_config = OVWeightQuantizationConfig(bits=8, sym=True)
288+
else:
289+
ov_config.quantization_config = OVQuantizationConfig()
279290

280291
if isinstance(self.model, OVBaseModel):
281292
self._quantize_ovbasemodel(
@@ -335,7 +346,7 @@ def _quantize_ovbasemodel(
335346
try:
336347
for data in calibration_dataloader:
337348
self.model.generate(**data, max_new_tokens=1)
338-
if len(collected_inputs) >= quantization_config.subset_size:
349+
if len(collected_inputs) >= quantization_config.num_samples:
339350
break
340351
finally:
341352
self.model.request = self.model.request.request
@@ -347,7 +358,7 @@ def _quantize_ovbasemodel(
347358
quantized_model = nncf.quantize(
348359
self.model.model,
349360
quantization_dataset,
350-
subset_size=quantization_config.subset_size,
361+
subset_size=quantization_config.num_samples,
351362
ignored_scope=quantization_config.ignored_scope,
352363
model_type=quantization_config.model_type,
353364
preset=quantization_config.preset,
@@ -446,7 +457,7 @@ def _quantize_torchmodel(
446457
model = nncf.quantize(
447458
model,
448459
quantization_dataset,
449-
subset_size=quantization_config.subset_size,
460+
subset_size=quantization_config.num_samples,
450461
ignored_scope=quantization_config.ignored_scope,
451462
model_type=quantization_config.model_type,
452463
preset=quantization_config.preset,
@@ -603,7 +614,7 @@ def _weight_only_quantization(
603614

604615
from optimum.gptq.data import get_dataset, prepare_dataset
605616

606-
nsamples = config.subset_size if config.subset_size else 128
617+
nsamples = config.num_samples if config.num_samples else 128
607618
dataset = get_dataset(config.dataset, tokenizer, seqlen=32, nsamples=nsamples)
608619
dataset = prepare_dataset(dataset)
609620

@@ -626,7 +637,7 @@ def _weight_only_quantization(
626637
# awq=config.quant_method == QuantizationMethod.AWQ, # TODO : enable from nncf v2.9.0
627638
ignored_scope=config.ignored_scope,
628639
dataset=dataset,
629-
# subset_size=config.subset_size if config.subset_size else 128, # TODO : enable from nncf v2.9.0
640+
# subset_size=config.num_samples if config.num_samples else 128, # TODO : enable from nncf v2.9.0
630641
)
631642

632643

@@ -705,7 +716,7 @@ def _hybrid_quantization(
705716
wc_quantization_config.ignored_scope.types.append("Convolution")
706717
compressed_model = _weight_only_quantization(model, wc_quantization_config)
707718

708-
subset_size = quantization_config.subset_size if quantization_config.subset_size else 200
719+
subset_size = quantization_config.num_samples if quantization_config.num_samples else 200
709720
quantized_model = nncf.quantize(
710721
model=compressed_model,
711722
calibration_dataset=nncf.Dataset(dataset),

tests/openvino/test_quantization.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ def test_ovmodel_load_with_compressed_weights(self, model_cls, model_type):
392392
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_HYBRID_QUANTIZATION)
393393
def test_ovmodel_hybrid_quantization(self, model_cls, model_type, expected_num_fake_quantize, expected_ov_int8):
394394
model_id = MODEL_NAMES[model_type]
395-
quantization_config = OVWeightQuantizationConfig(bits=8, dataset="conceptual_captions", subset_size=2)
395+
quantization_config = OVWeightQuantizationConfig(bits=8, dataset="conceptual_captions", num_samples=2)
396396
with tempfile.TemporaryDirectory() as tmp_dir:
397397
model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config)
398398

@@ -414,7 +414,7 @@ def test_ovmodel_hybrid_quantization_with_custom_dataset(
414414
model = model_cls.from_pretrained(
415415
model_id,
416416
export=True,
417-
quantization_config=OVWeightQuantizationConfig(bits=8, dataset=dataset, subset_size=3),
417+
quantization_config=OVWeightQuantizationConfig(bits=8, dataset=dataset, num_samples=3),
418418
)
419419
num_fake_quantize, num_int8, num_int4 = get_num_quantized_nodes(model.unet)
420420
self.assertEqual(expected_num_fake_quantize, num_fake_quantize)
@@ -749,7 +749,7 @@ class OVQuantizationConfigTest(unittest.TestCase):
749749
group_size=128,
750750
all_layers=True,
751751
sensitivity_metric="mean_activation_magnitude",
752-
subset_size=100,
752+
num_samples=100,
753753
quant_method=OVQuantizationMethod.DEFAULT,
754754
),
755755
["ignored_scope"],
@@ -768,7 +768,7 @@ class OVQuantizationConfigTest(unittest.TestCase):
768768
OVQuantizationConfig(
769769
dataset="wikitext",
770770
ignored_scope={"names": ["op_name"]},
771-
subset_size=100,
771+
num_samples=100,
772772
preset=nncf.QuantizationPreset.MIXED,
773773
model_type=nncf.ModelType.TRANSFORMER,
774774
fast_bias_correction=True,

0 commit comments

Comments
 (0)