Skip to content

Commit efeea22

Browse files
committed
add ov config test
1 parent 050bc9f commit efeea22

File tree

4 files changed

+17
-15
lines changed

4 files changed

+17
-15
lines changed

optimum/exporters/openvino/convert.py

+2
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,8 @@ def export_models(
509509
Returns:
510510
list of input_names and output_names from ONNX configuration
511511
"""
512+
513+
# TODO : modify compression_option to quantization_config
512514
outputs = []
513515

514516
if output_names is not None and len(output_names) != len(models_and_onnx_configs):

optimum/intel/openvino/configuration.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from dataclasses import dataclass
1616
from typing import Any, Dict, List, Optional, Union
1717

18-
import nncf
1918
import torch
2019
from transformers import PretrainedConfig
2120
from transformers.utils.quantization_config import QuantizationConfigMixin
@@ -78,7 +77,6 @@
7877
}
7978

8079

81-
8280
DEFAULT_4BIT_CONFIGS = {
8381
"databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 32, "ratio": 0.5},
8482
"EleutherAI/gpt-j-6b": {"bits": 4, "sym": False, "group_size": 64},
@@ -215,7 +213,7 @@ def __init__(
215213
self.all_layers = all_layers
216214
self.sensitivity_metric = sensitivity_metric
217215
self.ignored_scope = ignored_scope
218-
self.quant_method = "default" # TODO : enable AWQ after nncf v2.9.0 release
216+
self.quant_method = "default" # TODO : enable AWQ after nncf v2.9.0 release
219217
self.post_init()
220218

221219
def post_init(self):
@@ -233,7 +231,6 @@ def post_init(self):
233231
['wikitext2','c4','c4-new','ptb','ptb-new'], but we found {self.dataset}"""
234232
)
235233

236-
237234
if self.bits not in [4, 8]:
238235
raise ValueError(f"Only support quantization to [4,8] bits but found {self.bits}")
239236

optimum/intel/openvino/quantization.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,10 @@
2424
import transformers
2525
from accelerate.data_loader import DataLoaderStateMixin
2626
from datasets import Dataset, load_dataset
27-
from nncf import NNCFConfig, CompressWeightsMode, SensitivityMetric, IgnoredScope
27+
from nncf import CompressWeightsMode, IgnoredScope, NNCFConfig, SensitivityMetric
2828
from nncf.torch import create_compressed_model, register_default_init_args, register_module
2929
from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk
3030
from nncf.torch.initialization import PTInitializingDataLoader
31-
3231
from openvino._offline_transformations import compress_quantize_weights_transformation
3332
from openvino.runtime import Core, Tensor
3433
from torch.utils._pytree import tree_map
@@ -55,7 +54,6 @@
5554
)
5655

5756

58-
5957
# TODO : remove as unused
6058
_COMPRESSION_OPTIONS = {
6159
"int8": {"mode": nncf.CompressWeightsMode.INT8},
@@ -613,7 +611,6 @@ def _int4_weight_only_quantization(
613611
dataset = prepare_dataset(dataset)
614612
dataset = nncf.Dataset(dataset, lambda x: model.prepare_inputs(**x))
615613

616-
617614
sensitivity_metric = None
618615
if isinstance(config.sensitivity_metric, str):
619616
sensitivity_metric = getattr(SensitivityMetric, config.sensitivity_metric.upper())

tests/openvino/test_quantization.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -174,26 +174,28 @@ class OVWeightCompressionTest(unittest.TestCase):
174174
OVModelForCausalLM,
175175
"hf-internal-testing/tiny-random-gpt2",
176176
dict(
177-
mode=nncf.CompressWeightsMode.INT4_ASYM,
177+
bits=4,
178+
sym=False,
178179
group_size=32,
179-
ignored_scope=nncf.IgnoredScope(names=["__module.model.transformer.h.2.mlp.c_fc/aten::addmm/MatMul"]),
180+
ignored_scope={"names": ["__module.model.transformer.h.2.mlp.c_fc/aten::addmm/MatMul"]},
180181
),
181182
6,
182183
),
183184
(
184185
OVModelForCausalLM,
185186
"hf-internal-testing/tiny-random-gpt2",
186-
dict(mode=nncf.CompressWeightsMode.INT4_ASYM, group_size=-1, ratio=0.8, all_layers=True),
187+
dict(bits=4, sym=False, group_size=-1, ratio=0.8, all_layers=True),
187188
22,
188189
),
189190
(
190191
OVModelForCausalLM,
191192
"hf-internal-testing/tiny-random-OPTForCausalLM",
192193
dict(
193-
mode=nncf.CompressWeightsMode.INT4_SYM,
194+
bits=4,
195+
sym=True,
194196
group_size=-1,
195197
ratio=0.8,
196-
sensitivity_metric=nncf.SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE,
198+
sensitivity_metric="mean_activation_magnitude",
197199
dataset="ptb",
198200
),
199201
16,
@@ -202,10 +204,11 @@ class OVWeightCompressionTest(unittest.TestCase):
202204
OVModelForCausalLM,
203205
"hf-internal-testing/tiny-random-OPTForCausalLM",
204206
dict(
205-
mode=nncf.CompressWeightsMode.INT4_SYM,
207+
bits=4,
208+
sym=True,
206209
group_size=-1,
207210
ratio=0.8,
208-
sensitivity_metric=nncf.SensitivityMetric.MEAN_ACTIVATION_MAGNITUDE,
211+
sensitivity_metric="mean_activation_magnitude",
209212
dataset="ptb",
210213
awq=True,
211214
),
@@ -374,6 +377,9 @@ def test_ovmodel_4bit_auto_compression_with_config(
374377
self.assertEqual(expected_ov_int4, num_int4)
375378
model.save_pretrained(tmp_dir)
376379

380+
ov_config = OVConfig(quantization_config=quantization_config)
381+
ov_config.save_pretrained(tmp_dir)
382+
377383
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS)
378384
def test_ovmodel_4bit_auto_compression_with_custom_dataset(
379385
self, model_cls, model_id, expected_int8, expected_int4

0 commit comments

Comments
 (0)