Skip to content

Commit f600f5f

Browse files
Initial commit
1 parent 4f79e05 commit f600f5f

File tree

3 files changed

+171
-32
lines changed

3 files changed

+171
-32
lines changed

optimum/intel/openvino/configuration.py

+39-24
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,8 @@ def __init__(
294294
dataset: Optional[Union[str, List[str]]] = None,
295295
tokenizer: Optional[str] = None,
296296
processor: Optional[str] = None,
297-
trust_remote_code: bool = False,
297+
trust_remote_code: Optional[bool] = False,
298+
init_kwargs: Optional[dict] = None,
298299
**kwargs,
299300
):
300301
"""
@@ -314,6 +315,8 @@ def __init__(
314315
Allows to use custom code for the modeling hosted in the model repository. This option should only be
315316
set for repositories you trust and in which you have read the code, as it will execute on your local
316317
machine arbitrary code present in the model repository.
318+
init_kwargs ('dict', *optional*):
319+
Additional parameters for NNCF calls. This explicit argument is needed for deserialization from dict.
317320
"""
318321
self.num_samples = num_samples
319322
self.dataset = dataset
@@ -323,6 +326,7 @@ def __init__(
323326
if isinstance(ignored_scope, nncf.IgnoredScope):
324327
ignored_scope = ignored_scope.__dict__
325328
self.ignored_scope = ignored_scope
329+
self.init_kwargs = (init_kwargs or {}) | kwargs
326330

327331
def post_init(self):
328332
try:
@@ -427,6 +431,9 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
427431
retained in their original precision without any quantization.
428432
- "int8_sym" stands for 8-bit integer symmetric quantization without zero point.
429433
- "int8_asym" stands for 8-bit integer asymmetric quantization with zero points per each quantization group.
434+
init_kwargs ('dict', *optional*):
435+
Additional parameters for nncf.compress_weights() call. This explicit argument is needed for deserialization from dict.
436+
kwargs: Additional parameters for nncf.compress_weights() call.
430437
"""
431438

432439
def __init__(
@@ -449,15 +456,25 @@ def __init__(
449456
processor: Optional[str] = None,
450457
lora_correction: bool = None,
451458
backup_precision: Optional[str] = None,
459+
init_kwargs: Optional[dict] = None,
452460
**kwargs,
453461
):
462+
weight_format = kwargs.pop("weight_format", None)
463+
if weight_format is not None:
464+
logger.warning(
465+
"The `weight_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
466+
"Please use `dtype` instead."
467+
)
468+
dtype = weight_format
454469
super().__init__(
455470
ignored_scope=ignored_scope,
456471
num_samples=num_samples,
457472
dataset=dataset,
458473
tokenizer=tokenizer,
459474
processor=processor,
460475
trust_remote_code=trust_remote_code,
476+
init_kwargs=init_kwargs,
477+
**kwargs,
461478
)
462479
self.bits = bits
463480
self.sym = sym
@@ -470,12 +487,6 @@ def __init__(
470487
self.gptq = gptq
471488
self.lora_correction = lora_correction
472489
self.backup_precision = backup_precision
473-
if kwargs.get("weight_format") is not None:
474-
logger.warning(
475-
"The `weight_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
476-
"Please use `dtype` instead."
477-
)
478-
dtype = kwargs.get("weight_format")
479490
self.dtype = dtype
480491
self.post_init()
481492

@@ -624,6 +635,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
624635
"gptq": self.gptq,
625636
"lora_correction": self.lora_correction,
626637
"backup_mode": backup_mode,
638+
**self.init_kwargs,
627639
}
628640
return result
629641

@@ -666,6 +678,7 @@ def __init__(
666678
trust_remote_code: bool = False,
667679
smooth_quant_alpha: Optional[float] = None,
668680
dtype: Optional[str] = "int8",
681+
init_kwargs: Optional[dict] = None,
669682
**kwargs,
670683
):
671684
"""
@@ -712,27 +725,33 @@ def __init__(
712725
reduces quantization error.
713726
dtype (`str`, defaults to "int8"):
714727
Data type activations are compressed to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
728+
init_kwargs ('dict', *optional*):
729+
Additional parameters for nncf.quantize() call. This explicit argument is needed for deserialization from dict.
730+
kwargs: Additional parameters for nncf.quantize() call.
715731
"""
732+
activation_format = kwargs.pop("activation_format", None)
733+
if activation_format is not None:
734+
logger.warning(
735+
"The `activation_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
736+
"Please use `dtype` instead."
737+
)
738+
dtype = activation_format
716739
super().__init__(
717740
ignored_scope=ignored_scope,
718741
num_samples=num_samples,
719742
dataset=dataset,
720743
tokenizer=tokenizer,
721744
processor=processor,
722745
trust_remote_code=trust_remote_code,
746+
init_kwargs=init_kwargs,
747+
**kwargs,
723748
)
724749
self.bits = bits
725750
self.sym = sym
726751
self.model_type = model_type
727752
self.fast_bias_correction = fast_bias_correction
728753
self.overflow_fix = overflow_fix
729754
self.smooth_quant_alpha = smooth_quant_alpha
730-
if kwargs.get("activation_format") is not None:
731-
logger.warning(
732-
"The `activation_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
733-
"Please use `dtype` instead."
734-
)
735-
dtype = kwargs.get("activation_format")
736755
self.dtype = dtype
737756

738757
f8_dtypes = ["f8e4m3", "f8e5m2"]
@@ -769,23 +788,19 @@ def to_nncf_dict(self) -> Dict[str, Any]:
769788
Returns a dictionary with the variables that are ready to use for nncf.compress_weights() call.
770789
"""
771790

772-
preset = "performance" if self.sym else "mixed"
773-
advanced_parameters_dict = {"overflow_fix": self.overflow_fix}
791+
# Merge advanced parameters from init_kwargs if they were provided
792+
init_kwargs_copy = copy.deepcopy(self.init_kwargs)
793+
advanced_parameters = init_kwargs_copy.pop("advanced_parameters", nncf.AdvancedQuantizationParameters())
794+
advanced_parameters.overflow_fix = nncf.OverflowFix(self.overflow_fix)
774795
if self.smooth_quant_alpha:
775-
advanced_parameters_dict["smooth_quant_alphas"] = {"matmul": self.smooth_quant_alpha}
796+
advanced_parameters.smooth_quant_alphas.matmul = self.smooth_quant_alpha
776797

777798
mode_map = {"f8e4m3": "fp8_e4m3", "f8e5m2": "fp8_e5m2"}
778799
mode = mode_map.get(self.dtype)
779800

801+
preset = "performance" if self.sym else "mixed"
780802
preset = nncf.QuantizationPreset(preset)
781803
model_type = nncf.ModelType(self.model_type)
782-
advanced_parameters = nncf.AdvancedQuantizationParameters(
783-
overflow_fix=advanced_parameters_dict["overflow_fix"],
784-
)
785-
if "smooth_quant_alphas" in advanced_parameters_dict:
786-
advanced_parameters.smooth_quant_alphas = nncf.AdvancedSmoothQuantParameters(
787-
**advanced_parameters_dict["smooth_quant_alphas"]
788-
)
789804

790805
return {
791806
"mode": mode,
@@ -795,6 +810,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
795810
"model_type": model_type,
796811
"ignored_scope": self.get_ignored_scope_instance(),
797812
"advanced_parameters": advanced_parameters,
813+
**init_kwargs_copy,
798814
}
799815

800816

@@ -930,7 +946,6 @@ def __init__(
930946
Allows to use custom code for the modeling hosted in the model repository. This option should only be
931947
set for repositories you trust and in which you have read the code, as it will execute on your local
932948
machine arbitrary code present in the model repository.
933-
**kwargs:
934949
"""
935950
self.weight_quantization_config = self._initialize_quantization_config(
936951
weight_quantization_config, OVWeightQuantizationConfig

optimum/intel/openvino/quantization.py

+23-4
Original file line numberDiff line numberDiff line change
@@ -1034,8 +1034,17 @@ def _weight_only_quantization(
10341034
else:
10351035
dataset = nncf.Dataset(calibration_dataset)
10361036

1037-
wc_kwargs = copy.deepcopy(kwargs)
1038-
wc_kwargs.update(config.to_nncf_dict())
1037+
wc_kwargs = config.to_nncf_dict()
1038+
1039+
# Arguments provided in kwargs override the ones from the config
1040+
kwargs_intersection = set(wc_kwargs.keys()) & set(kwargs.keys())
1041+
if kwargs_intersection:
1042+
logger.warning(
1043+
f"The following nncf.compress_weights() arguments from the OVWeightQuantizationConfig will be overridden "
1044+
f"by the ones given in _weight_only_quantization call kwargs: {kwargs_intersection}."
1045+
)
1046+
wc_kwargs.update(kwargs)
1047+
10391048
compressed_model = nncf.compress_weights(
10401049
model,
10411050
dataset=dataset,
@@ -1056,8 +1065,18 @@ def _full_quantization(
10561065
):
10571066
if verify_not_optimized:
10581067
_verify_not_optimized(model)
1059-
q_kwargs = copy.deepcopy(kwargs)
1060-
q_kwargs.update(quantization_config.to_nncf_dict())
1068+
1069+
q_kwargs = quantization_config.to_nncf_dict()
1070+
1071+
# Arguments provided in kwargs override the ones from the config
1072+
kwargs_intersection = set(q_kwargs.keys()) & set(kwargs.keys())
1073+
if kwargs_intersection:
1074+
logger.warning(
1075+
f"The following nncf.quantize() arguments from the OVQuantizationConfig will be overridden "
1076+
f"by the ones given in _full_quantization call kwargs: {kwargs_intersection}."
1077+
)
1078+
q_kwargs.update(kwargs)
1079+
10611080
quantized_model = nncf.quantize(model, calibration_dataset=calibration_dataset, **q_kwargs)
10621081

10631082
_remove_f16_kv_cache_precision_flag(quantized_model)

tests/openvino/test_quantization.py

+109-4
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,18 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
import dataclasses
1415
import inspect
1516

1617
# ruff: noqa
1718

1819
import itertools
1920
import logging
2021
import unittest
21-
from collections import defaultdict
22+
from collections import defaultdict, Iterable
2223
from enum import Enum
2324
from functools import partial
24-
from typing import Union
25+
from typing import Union, Type
2526

2627
import openvino as ov
2728
import pytest
@@ -77,7 +78,7 @@
7778
from optimum.intel.openvino.utils import TemporaryDirectory
7879
from copy import deepcopy
7980

80-
from optimum.intel.openvino.quantization import InferRequestWrapper
81+
from optimum.intel.openvino.quantization import InferRequestWrapper, _weight_only_quantization, _full_quantization
8182
from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version
8283
from utils_tests import (
8384
MODEL_NAMES,
@@ -1241,7 +1242,6 @@ class OVQuantizationConfigTest(unittest.TestCase):
12411242
),
12421243
),
12431244
(OVQuantizationConfig(ignored_scope=nncf.IgnoredScope(names=["op_name"])),),
1244-
(OVDynamicQuantizationConfig(bits=8, sym=True),),
12451245
)
12461246

12471247
QUANTIZATION_CONFIG_DICTS = (
@@ -1306,6 +1306,60 @@ class OVQuantizationConfigTest(unittest.TestCase):
13061306
(dict(bits=8, fast_bias_correction=True, weight_only=False), OVQuantizationConfig, None),
13071307
)
13081308

1309+
QUANTIZATION_CONFIGS_WITH_KWARGS = (
1310+
(
1311+
OVWeightQuantizationConfig,
1312+
{
1313+
"advanced_parameters": nncf.AdvancedCompressionParameters(statistics_path="statistics_path"),
1314+
"some_arg": "some_value",
1315+
},
1316+
{
1317+
"advanced_parameters": nncf.AdvancedCompressionParameters(statistics_path="statistics_path"),
1318+
"some_arg": "some_value",
1319+
},
1320+
),
1321+
(
1322+
OVQuantizationConfig,
1323+
{
1324+
"advanced_parameters": nncf.AdvancedQuantizationParameters(disable_channel_alignment=True),
1325+
"some_arg": "some_value",
1326+
},
1327+
{
1328+
"advanced_parameters": nncf.AdvancedQuantizationParameters(
1329+
overflow_fix=nncf.OverflowFix.DISABLE,
1330+
disable_channel_alignment=True,
1331+
),
1332+
"some_arg": "some_value",
1333+
},
1334+
),
1335+
(
1336+
OVQuantizationConfig,
1337+
{
1338+
"advanced_parameters": nncf.AdvancedQuantizationParameters(overflow_fix=nncf.OverflowFix.ENABLE),
1339+
},
1340+
{
1341+
"advanced_parameters": nncf.AdvancedQuantizationParameters(
1342+
overflow_fix=nncf.OverflowFix.DISABLE,
1343+
),
1344+
},
1345+
),
1346+
(
1347+
OVQuantizationConfig,
1348+
{
1349+
"smooth_quant_alpha": 0.5,
1350+
"advanced_parameters": nncf.AdvancedQuantizationParameters(
1351+
smooth_quant_alphas=nncf.AdvancedSmoothQuantParameters(matmul=0.7, convolution=0.7),
1352+
),
1353+
},
1354+
{
1355+
"advanced_parameters": nncf.AdvancedQuantizationParameters(
1356+
overflow_fix=nncf.OverflowFix.DISABLE,
1357+
smooth_quant_alphas=nncf.AdvancedSmoothQuantParameters(matmul=0.5, convolution=0.7),
1358+
),
1359+
},
1360+
),
1361+
)
1362+
13091363
def get_default_configurations() -> dict:
13101364
default_configurations = deepcopy(_DEFAULT_4BIT_CONFIGS)
13111365
default_configurations.update({"default": _DEFAULT_4BIT_CONFIG})
@@ -1357,6 +1411,57 @@ def test_for_no_short_id_duplicates(self):
13571411
assert short_id not in short_ids
13581412
short_ids.add(short_id)
13591413

1414+
@parameterized.expand(QUANTIZATION_CONFIGS_WITH_KWARGS)
1415+
def test_config_init_kwargs(
1416+
self,
1417+
config_type: Type[Union[OVWeightQuantizationConfig, OVQuantizationConfig]],
1418+
config_kwargs: dict,
1419+
ref_nncf_dict: dict,
1420+
):
1421+
nncf_dict = config_type(**config_kwargs).to_nncf_dict()
1422+
ref_nncf_dict = config_type().to_nncf_dict() | ref_nncf_dict
1423+
self.assertTrue(self.compare_objects(nncf_dict, ref_nncf_dict))
1424+
1425+
@parameterized.expand(
1426+
[
1427+
("nncf.compress_weights", "_weight_only_quantization", "dataset"),
1428+
("nncf.quantize", "_full_quantization", "calibration_dataset"),
1429+
]
1430+
)
1431+
def test_quantization_kwargs_override(self, mock_method_name, quantization_function, dataset_key):
1432+
with unittest.mock.patch(mock_method_name) as mock_method:
1433+
mock_model = unittest.mock.Mock([])
1434+
mock_model.get_rt_info = unittest.mock.Mock(return_value={})
1435+
1436+
mock_quantization_config = unittest.mock.Mock()
1437+
mock_quantization_config.to_nncf_dict.return_value = {"param1": "value1", "param2": "value2"}
1438+
1439+
additional_kwargs = {"param2": "new_value2", "param3": "value3"}
1440+
1441+
quantization_function = globals()[quantization_function]
1442+
quantization_function(mock_model, mock_quantization_config, None, **additional_kwargs)
1443+
1444+
expected_kwargs = {"param1": "value1", "param2": "new_value2", "param3": "value3", dataset_key: None}
1445+
1446+
mock_method.assert_called_once_with(mock_model, **expected_kwargs)
1447+
1448+
@staticmethod
1449+
def compare_objects(o1, o2) -> bool:
1450+
if dataclasses.is_dataclass(o1) and dataclasses.is_dataclass(o2):
1451+
o1 = o1.__dict__
1452+
o2 = o2.__dict__
1453+
if isinstance(o1, dict) and isinstance(o2, dict):
1454+
for k in set(o1.keys()) | set(o2.keys()):
1455+
if not OVQuantizationConfigTest.compare_objects(o1[k], o2[k]):
1456+
return False
1457+
return True
1458+
if isinstance(o1, Iterable) and isinstance(o2, Iterable) and not (isinstance(o1, str) or isinstance(o2, str)):
1459+
for it1, it2 in zip(o1, o2):
1460+
if not OVQuantizationConfigTest.compare_objects(it1, it2):
1461+
return False
1462+
return True
1463+
return o1 == o2
1464+
13601465

13611466
class InferRequestWrapperTest(unittest.TestCase):
13621467
MODEL_NAME = ("whisper",)

0 commit comments

Comments
 (0)