Skip to content

Commit e3412a6

Browse files
OVMixedQuantizationConfig proposal
1 parent 89b3afc commit e3412a6

File tree

7 files changed

+239
-415
lines changed

7 files changed

+239
-415
lines changed

optimum/commands/export/openvino.py

+4-17
Original file line numberDiff line numberDiff line change
@@ -307,14 +307,7 @@ def parse_args(parser: "ArgumentParser"):
307307
def run(self):
308308
from ...exporters.openvino.__main__ import infer_task, main_export, maybe_convert_tokenizers
309309
from ...exporters.openvino.utils import save_preprocessors
310-
from ...intel.openvino.configuration import (
311-
_DEFAULT_4BIT_CONFIG,
312-
OVCompressWeightsOptions,
313-
OVConfig,
314-
OVGeneralQuantizationConfig,
315-
OVQuantizeOptions,
316-
get_default_int4_config,
317-
)
310+
from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIG, OVConfig, get_default_int4_config
318311

319312
if self.args.library is None:
320313
# TODO: add revision, subfolder and token to args
@@ -363,23 +356,17 @@ def run(self):
363356
if self.args.quant_mode == "nf4_f8e4m3":
364357
wc_config = prepare_for_wc_config(self.args, _DEFAULT_4BIT_CONFIG)
365358
wc_config["weight_format"] = "nf4"
366-
cw_options = OVCompressWeightsOptions.init_with_format(**wc_config)
367359

368360
q_config = prepare_for_q_config(self.args)
369361
q_config["activation_format"] = "f8e4m3"
370-
q_options = OVQuantizeOptions.init_with_format(**q_config)
371362

372-
quantization_config = OVGeneralQuantizationConfig.init_with_format(
373-
bits=8,
374-
sym=self.args.sym,
375-
ignored_scope=None,
363+
quantization_config = dict(
364+
weight_quantization_config=wc_config,
365+
quantization_config=q_config,
376366
num_samples=self.args.num_samples,
377367
dataset=self.args.dataset,
378368
trust_remote_code=self.args.trust_remote_code,
379-
weight_format=self.args.weight_format,
380369
)
381-
quantization_config.compress_weights_options = cw_options
382-
quantization_config.quantize_options = q_options
383370
else:
384371
quantization_config = prepare_for_q_config(self.args)
385372
ov_config = OVConfig(quantization_config=quantization_config)

optimum/intel/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
"OVQuantizationConfig",
8585
"OVWeightQuantizationConfig",
8686
"OVDynamicQuantizationConfig",
87+
"OVMixedQuantizationConfig",
8788
]
8889
)
8990
else:
@@ -94,6 +95,7 @@
9495
"OVQuantizationConfig",
9596
"OVWeightQuantizationConfig",
9697
"OVDynamicQuantizationConfig",
98+
"OVMixedQuantizationConfig",
9799
]
98100
)
99101

@@ -270,6 +272,7 @@
270272
except OptionalDependencyNotAvailable:
271273
from .utils.dummy_openvino_and_nncf_objects import (
272274
OVDynamicQuantizationConfig,
275+
OVMixedQuantizationConfig,
273276
OVQuantizationConfig,
274277
OVQuantizer,
275278
OVTrainingArguments,
@@ -278,6 +281,7 @@
278281
else:
279282
from .openvino import (
280283
OVDynamicQuantizationConfig,
284+
OVMixedQuantizationConfig,
281285
OVQuantizationConfig,
282286
OVQuantizer,
283287
OVTrainingArguments,

optimum/intel/openvino/__init__.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,13 @@
5555
from .trainer import OVTrainer
5656

5757

58-
from .configuration import OVConfig, OVDynamicQuantizationConfig, OVQuantizationConfig, OVWeightQuantizationConfig
58+
from .configuration import (
59+
OVConfig,
60+
OVDynamicQuantizationConfig,
61+
OVMixedQuantizationConfig,
62+
OVQuantizationConfig,
63+
OVWeightQuantizationConfig,
64+
)
5965
from .modeling import (
6066
OVModelForAudioClassification,
6167
OVModelForAudioFrameClassification,

0 commit comments

Comments
 (0)