Skip to content

Commit 920b237

Browse files
authored
Merge pull request #673 from nikita-savelyevv/nncf-210-update
Update NNCF to 2.10. Enable AWQ algorithm.
2 parents b383ffb + a65c9e5 commit 920b237

File tree

4 files changed

+15
-7
lines changed

4 files changed

+15
-7
lines changed

optimum/intel/openvino/quantization.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from transformers import AutoTokenizer, DataCollator, PreTrainedModel, default_data_collator
3838
from transformers.pytorch_utils import Conv1D
3939
from transformers.utils import is_accelerate_available
40+
from transformers.utils.quantization_config import QuantizationMethod
4041

4142
from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed
4243
from optimum.exporters.tasks import TasksManager
@@ -670,10 +671,10 @@ def _weight_only_quantization(
670671
group_size=config.group_size,
671672
all_layers=config.all_layers,
672673
sensitivity_metric=sensitivity_metric,
673-
# awq=config.quant_method == QuantizationMethod.AWQ, # TODO : enable from nncf v2.9.0
674+
awq=config.quant_method == QuantizationMethod.AWQ or None,
674675
ignored_scope=config.get_ignored_scope_instance(),
675676
dataset=dataset,
676-
# subset_size=config.num_samples if config.num_samples else 128, # TODO : enable from nncf v2.9.0
677+
subset_size=config.num_samples if config.num_samples else 128,
677678
)
678679

679680

setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@
6060

6161
EXTRAS_REQUIRE = {
6262
"neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"],
63-
"openvino": ["openvino>=2023.3", "nncf>=2.8.1", "openvino-tokenizers[transformers]"],
64-
"nncf": ["nncf>=2.8.1"],
63+
"openvino": ["openvino>=2023.3", "nncf>=2.10.0", "openvino-tokenizers[transformers]"],
64+
"nncf": ["nncf>=2.10.0"],
6565
"ipex": ["intel-extension-for-pytorch", "transformers>=4.36.0,<4.39.0"],
6666
"diffusers": ["diffusers"],
6767
"quality": QUALITY_REQUIRE,

tests/openvino/test_quantization.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -221,17 +221,17 @@ class OVWeightCompressionTest(unittest.TestCase):
221221
),
222222
(
223223
OVModelForCausalLM,
224-
"opt",
224+
"llama_awq",
225225
dict(
226226
bits=4,
227227
sym=True,
228-
group_size=-1,
228+
group_size=16,
229229
ratio=0.8,
230230
sensitivity_metric="mean_activation_magnitude",
231231
dataset="ptb",
232232
quant_method=QuantizationMethod.AWQ,
233233
),
234-
14,
234+
16,
235235
),
236236
)
237237

@@ -452,6 +452,10 @@ def test_ovmodel_4bit_auto_compression_with_config(
452452
with tempfile.TemporaryDirectory() as tmp_dir:
453453
quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config)
454454
model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config)
455+
if quantization_config.quant_method == QuantizationMethod.AWQ:
456+
# TODO: Check that AWQ was actually applied
457+
pass
458+
455459
tokenizer = AutoTokenizer.from_pretrained(model_id)
456460
if tokenizer.pad_token is None:
457461
tokenizer.pad_token = tokenizer.eos_token
@@ -548,6 +552,8 @@ def test_ovmodel_load_large_model_with_additional_quantization_config(self):
548552
"sensitivity_metric": None,
549553
"dataset": None,
550554
"ignored_scope": nncf.IgnoredScope(),
555+
"awq": None,
556+
"subset_size": 128,
551557
}
552558
compress_weights_patch.assert_called_with(unittest.mock.ANY, **compression_params)
553559

tests/openvino/utils_tests.py

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
"levit": "hf-internal-testing/tiny-random-LevitModel",
5858
"longt5": "hf-internal-testing/tiny-random-longt5",
5959
"llama": "fxmarty/tiny-llama-fast-tokenizer",
60+
"llama_awq": "HuggingFaceH4/tiny-random-LlamaForCausalLM",
6061
"llama_gptq": "hf-internal-testing/TinyLlama-1.1B-Chat-v0.3-GPTQ",
6162
"m2m_100": "hf-internal-testing/tiny-random-m2m_100",
6263
"opt": "hf-internal-testing/tiny-random-OPTModel",

0 commit comments

Comments
 (0)