From f696353cafc848b6a4f6cc453ec67aebf5ad1d5c Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 22 Apr 2024 17:14:01 +0200 Subject: [PATCH 01/10] Enable AWQ; add AWQ test --- optimum/intel/openvino/quantization.py | 6 ++++-- setup.py | 4 ++-- tests/openvino/test_quantization.py | 21 +++++++++++++++++---- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index aae66c148b..07765920bc 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -21,6 +21,8 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union import datasets +from transformers.utils.quantization_config import QuantizationMethod + import nncf import openvino import torch @@ -677,10 +679,10 @@ def _weight_only_quantization( group_size=config.group_size, all_layers=config.all_layers, sensitivity_metric=sensitivity_metric, - # awq=config.quant_method == QuantizationMethod.AWQ, # TODO : enable from nncf v2.9.0 + awq=config.quant_method == QuantizationMethod.AWQ, ignored_scope=config.get_ignored_scope_instance(), dataset=dataset, - # subset_size=config.num_samples if config.num_samples else 128, # TODO : enable from nncf v2.9.0 + subset_size=config.num_samples if config.num_samples else 128, ) diff --git a/setup.py b/setup.py index ea87e6ad59..bc91ba2472 100644 --- a/setup.py +++ b/setup.py @@ -60,8 +60,8 @@ EXTRAS_REQUIRE = { "neural-compressor": ["neural-compressor>=2.2.0", "onnxruntime<1.15.0", "accelerate"], - "openvino": ["openvino>=2023.3", "nncf>=2.8.1", "openvino-tokenizers[transformers]"], - "nncf": ["nncf>=2.8.1"], + "openvino": ["openvino>=2023.3", "nncf>=2.10.0", "openvino-tokenizers[transformers]"], + "nncf": ["nncf>=2.10.0"], "ipex": ["intel-extension-for-pytorch", "transformers>=4.36.0,<4.39.0"], "diffusers": ["diffusers"], "quality": QUALITY_REQUIRE, diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index e269578c35..ca4d43d295 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -224,17 +224,17 @@ class OVWeightCompressionTest(unittest.TestCase): ), ( OVModelForCausalLM, - "hf-internal-testing/tiny-random-OPTForCausalLM", + "HuggingFaceH4/tiny-random-LlamaForCausalLM", dict( bits=4, sym=True, - group_size=-1, + group_size=16, ratio=0.8, sensitivity_metric="mean_activation_magnitude", dataset="ptb", quant_method=QuantizationMethod.AWQ, ), - 14, + 16, ), ) @@ -455,7 +455,20 @@ def test_ovmodel_4bit_auto_compression_with_config( ): with tempfile.TemporaryDirectory() as tmp_dir: quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config) - model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config) + + from nncf.common.logging.track_progress import track + + with unittest.mock.patch("nncf.common.logging.track_progress.track", wraps=track) as track_patch: + model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config) + if quantization_config.quant_method == QuantizationMethod.AWQ: + # Called at least once with description="Applying AWQ" + self.assertTrue( + any( + args.kwargs.get("description", None) == "Applying AWQ" + for args in track_patch.call_args_list + ) + ) + tokenizer = AutoTokenizer.from_pretrained(model_id) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token From 420d453dff24985e194638f5cc753d3fc5486159 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Mon, 22 Apr 2024 17:17:55 +0200 Subject: [PATCH 02/10] Move import --- optimum/intel/openvino/quantization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 07765920bc..89d620d45c 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -21,7 +21,6 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union import datasets -from transformers.utils.quantization_config import QuantizationMethod import nncf import openvino @@ -38,6 +37,7 @@ from transformers import AutoTokenizer, DataCollator, PreTrainedModel, default_data_collator from transformers.pytorch_utils import Conv1D from transformers.utils import is_accelerate_available +from transformers.utils.quantization_config import QuantizationMethod from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed from optimum.exporters.tasks import TasksManager From 36b95d6625d01b66d6f8906a5808dbdcfab76d26 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Tue, 23 Apr 2024 16:23:22 +0200 Subject: [PATCH 03/10] Tweak --- optimum/intel/openvino/quantization.py | 1 - 1 file changed, 1 deletion(-) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 89d620d45c..41cc6cbf5a 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -21,7 +21,6 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union import datasets - import nncf import openvino import torch From 47dd8221cc7f37c804e4a909c0a88f3a146b99ff Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 25 Apr 2024 11:53:59 +0200 Subject: [PATCH 04/10] Tweak awq test --- tests/openvino/test_quantization.py | 29 +++++++++++++++++++---------- tests/openvino/utils_tests.py | 1 + 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index aa1e0b3c19..1cc830ae4b 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -27,6 +27,7 @@ import numpy as np import torch from datasets import load_dataset +from nncf.common.logging.track_progress import track from nncf.quantization.advanced_parameters import OverflowFix from parameterized import parameterized import openvino.runtime as ov @@ -221,7 +222,7 @@ class OVWeightCompressionTest(unittest.TestCase): ), ( OVModelForCausalLM, - "HuggingFaceH4/tiny-random-LlamaForCausalLM", + "llama_awq", dict( bits=4, sym=True, @@ -448,22 +449,30 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_ def test_ovmodel_4bit_auto_compression_with_config( self, model_cls, model_name, quantization_config, expected_ov_int4 ): + # If this variable is defined locally, collect_descriptions() for some reason will collect values to the list + # defined for the first test case + if "track_descriptions" not in globals(): + globals()["track_descriptions"] = [] + track_descriptions = globals()["track_descriptions"] + track_descriptions.clear() + + def collect_descriptions(*args, **kwargs): + track_descriptions.append(kwargs["description"]) + return unittest.mock.DEFAULT + model_id = MODEL_NAMES[model_name] with tempfile.TemporaryDirectory() as tmp_dir: quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config) - from nncf.common.logging.track_progress import track - - with unittest.mock.patch("nncf.common.logging.track_progress.track", wraps=track) as track_patch: + with unittest.mock.patch( + "nncf.common.logging.track_progress.track", + wraps=track, + side_effect=collect_descriptions + ): model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config) if quantization_config.quant_method == QuantizationMethod.AWQ: # Called at least once with description="Applying AWQ" - self.assertTrue( - any( - args.kwargs.get("description", None) == "Applying AWQ" - for args in track_patch.call_args_list - ) - ) + self.assertTrue(any(it == "Applying AWQ" for it in track_descriptions)) tokenizer = AutoTokenizer.from_pretrained(model_id) if tokenizer.pad_token is None: diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index ca56f6d552..9dbc188bac 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -56,6 +56,7 @@ "levit": "hf-internal-testing/tiny-random-LevitModel", "longt5": "hf-internal-testing/tiny-random-longt5", "llama": "fxmarty/tiny-llama-fast-tokenizer", + "llama_awq": "HuggingFaceH4/tiny-random-LlamaForCausalLM", "llama_gptq": "hf-internal-testing/TinyLlama-1.1B-Chat-v0.3-GPTQ", "m2m_100": "hf-internal-testing/tiny-random-m2m_100", "opt": "hf-internal-testing/tiny-random-OPTModel", From f14f62c3cd9f5c5ee2c425a6f87fccf0da9a03a8 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 25 Apr 2024 13:48:15 +0200 Subject: [PATCH 05/10] Black --- tests/openvino/test_quantization.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 1cc830ae4b..99da502cb2 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -465,9 +465,7 @@ def collect_descriptions(*args, **kwargs): quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config) with unittest.mock.patch( - "nncf.common.logging.track_progress.track", - wraps=track, - side_effect=collect_descriptions + "nncf.common.logging.track_progress.track", wraps=track, side_effect=collect_descriptions ): model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config) if quantization_config.quant_method == QuantizationMethod.AWQ: From 29c3274d82bd0751e07d72f2b29f56d37309d2e0 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 25 Apr 2024 17:33:56 +0200 Subject: [PATCH 06/10] Tweak awq argument --- optimum/intel/openvino/quantization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index 8ac7a8a328..bb62ba312f 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -671,7 +671,7 @@ def _weight_only_quantization( group_size=config.group_size, all_layers=config.all_layers, sensitivity_metric=sensitivity_metric, - awq=config.quant_method == QuantizationMethod.AWQ, + awq=config.quant_method == QuantizationMethod.AWQ or None, ignored_scope=config.get_ignored_scope_instance(), dataset=dataset, subset_size=config.num_samples if config.num_samples else 128, From 768edff7dd68fc37dea5abfdf8d5dad2144ced25 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 25 Apr 2024 20:59:40 +0200 Subject: [PATCH 07/10] Tweak tests --- tests/openvino/test_quantization.py | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 99da502cb2..e64a087f93 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -449,28 +449,14 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_ def test_ovmodel_4bit_auto_compression_with_config( self, model_cls, model_name, quantization_config, expected_ov_int4 ): - # If this variable is defined locally, collect_descriptions() for some reason will collect values to the list - # defined for the first test case - if "track_descriptions" not in globals(): - globals()["track_descriptions"] = [] - track_descriptions = globals()["track_descriptions"] - track_descriptions.clear() - - def collect_descriptions(*args, **kwargs): - track_descriptions.append(kwargs["description"]) - return unittest.mock.DEFAULT - model_id = MODEL_NAMES[model_name] with tempfile.TemporaryDirectory() as tmp_dir: quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config) - with unittest.mock.patch( - "nncf.common.logging.track_progress.track", wraps=track, side_effect=collect_descriptions - ): - model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config) - if quantization_config.quant_method == QuantizationMethod.AWQ: - # Called at least once with description="Applying AWQ" - self.assertTrue(any(it == "Applying AWQ" for it in track_descriptions)) + model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config) + if quantization_config.quant_method == QuantizationMethod.AWQ: + # TODO: Check that AWQ was actually applied + pass tokenizer = AutoTokenizer.from_pretrained(model_id) if tokenizer.pad_token is None: @@ -568,6 +554,8 @@ def test_ovmodel_load_large_model_with_additional_quantization_config(self): "sensitivity_metric": None, "dataset": None, "ignored_scope": nncf.IgnoredScope(), + "awq": None, + "subset_size": 128 } compress_weights_patch.assert_called_with(unittest.mock.ANY, **compression_params) From e080c4b9971abc72caa8960114faa54146ca4adb Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 25 Apr 2024 21:00:47 +0200 Subject: [PATCH 08/10] Remove import --- tests/openvino/test_quantization.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index e64a087f93..7ef8f6e8b1 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -27,7 +27,6 @@ import numpy as np import torch from datasets import load_dataset -from nncf.common.logging.track_progress import track from nncf.quantization.advanced_parameters import OverflowFix from parameterized import parameterized import openvino.runtime as ov From bf93b2a7750df05c0dbd54f25e1b2b8223cab2b9 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 25 Apr 2024 21:01:32 +0200 Subject: [PATCH 09/10] Extra line --- tests/openvino/test_quantization.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 7ef8f6e8b1..1e8ef45cfa 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -451,7 +451,6 @@ def test_ovmodel_4bit_auto_compression_with_config( model_id = MODEL_NAMES[model_name] with tempfile.TemporaryDirectory() as tmp_dir: quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config) - model = model_cls.from_pretrained(model_id, export=True, quantization_config=quantization_config) if quantization_config.quant_method == QuantizationMethod.AWQ: # TODO: Check that AWQ was actually applied From a65c9e5251ceac3d6c26b032c50d71cd3a5752f5 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 25 Apr 2024 21:06:15 +0200 Subject: [PATCH 10/10] Black --- tests/openvino/test_quantization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 1e8ef45cfa..d873878abb 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -553,7 +553,7 @@ def test_ovmodel_load_large_model_with_additional_quantization_config(self): "dataset": None, "ignored_scope": nncf.IgnoredScope(), "awq": None, - "subset_size": 128 + "subset_size": 128, } compress_weights_patch.assert_called_with(unittest.mock.ANY, **compression_params)