From b84ef4a13a575c4b74208848f7fd40ab6d500971 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 22 May 2024 15:58:57 +0200 Subject: [PATCH 01/13] Bump transformers version --- setup.py | 4 ++-- tests/openvino/test_modeling.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 251ec61cdd..0ccaa1f202 100644 --- a/setup.py +++ b/setup.py @@ -28,8 +28,8 @@ INSTALL_REQUIRE = [ "torch>=1.11", - "transformers>=4.36.0,<4.41.0", - "optimum~=1.19", + "transformers>=4.36.0,<4.42.0", + "optimum @ git+https://github.com/huggingface/optimum.git@bump-transformers", "datasets>=1.4.0", "sentencepiece", "scipy", diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 692720a972..1c42eff5a9 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -1675,7 +1675,7 @@ def test_compare_output_attentions(self, model_arch): preprocessor = AutoFeatureExtractor.from_pretrained(model_id) inputs = preprocessor(images=image, return_tensors="pt") - transformers_model = AutoModelForImageClassification.from_pretrained(model_id) + transformers_model = AutoModelForImageClassification.from_pretrained(model_id, attn_implementation="eager") transformers_model.eval() with torch.no_grad(): transformers_outputs = transformers_model(**inputs, output_attentions=True) From f68b2c744cd4c9def10fb87a7486d758ede59d9e Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 22 May 2024 16:09:44 +0200 Subject: [PATCH 02/13] fix default ignored scope for model using sdpa by default --- .../openvino/image-classification/configs/swin-base-jpqd.json | 2 -- .../openvino/question-answering/configs/bert-base-jpqd.json | 2 -- .../openvino/text-classification/configs/bert-base-jpqd.json | 2 -- optimum/intel/openvino/trainer.py | 2 -- 4 files changed, 8 deletions(-) diff --git a/examples/openvino/image-classification/configs/swin-base-jpqd.json b/examples/openvino/image-classification/configs/swin-base-jpqd.json index 3f03c276aa..23b2fd3d84 100644 --- a/examples/openvino/image-classification/configs/swin-base-jpqd.json +++ b/examples/openvino/image-classification/configs/swin-base-jpqd.json @@ -36,8 +36,6 @@ "ignored_scopes": [ "{re}.*__add___[0-1]", "{re}.*layer_norm_0", - "{re}.*matmul_1", - "{re}.*__truediv__*" ] } ] diff --git a/examples/openvino/question-answering/configs/bert-base-jpqd.json b/examples/openvino/question-answering/configs/bert-base-jpqd.json index 425bd9f31b..342d327a34 100644 --- a/examples/openvino/question-answering/configs/bert-base-jpqd.json +++ b/examples/openvino/question-answering/configs/bert-base-jpqd.json @@ -36,8 +36,6 @@ "ignored_scopes": [ "{re}.*__add___[0-1]", "{re}.*layer_norm_0", - "{re}.*matmul_1", - "{re}.*__truediv__*" ] } ] diff --git a/examples/openvino/text-classification/configs/bert-base-jpqd.json b/examples/openvino/text-classification/configs/bert-base-jpqd.json index 25c8f28861..d177e4efd7 100644 --- a/examples/openvino/text-classification/configs/bert-base-jpqd.json +++ b/examples/openvino/text-classification/configs/bert-base-jpqd.json @@ -40,8 +40,6 @@ "ignored_scopes": [ "{re}.*__add___[0-1]", "{re}.*layer_norm_0", - "{re}.*matmul_1", - "{re}.*__truediv__*" ] } ] diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py index 0745a1cd79..26b6562a54 100644 --- a/optimum/intel/openvino/trainer.py +++ b/optimum/intel/openvino/trainer.py @@ -153,8 +153,6 @@ "{re}.*Embedding.*", "{re}.*add___.*", "{re}.*layer_norm_.*", - "{re}.*matmul_1", - "{re}.*__truediv__.*", ], } From d120a457d78ab00485af9bd39a94787a9234544e Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 22 May 2024 16:18:40 +0200 Subject: [PATCH 03/13] fix quant ops test --- tests/openvino/test_quantization.py | 4 +-- tests/openvino/test_training.py | 44 ++++++++++++++--------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 98eb121d72..cafc03c641 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -74,7 +74,7 @@ class OVQuantizerTest(unittest.TestCase): SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = ( - (OVModelForSequenceClassification, "bert", 32, 35), + (OVModelForSequenceClassification, "bert", 22, 35), # (OVModelForCausalLM, "gpt2", 41, 23), ) @@ -663,7 +663,7 @@ def preprocess_function(examples, tokenizer): class OVTrainerTest(unittest.TestCase): - SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("distilbert-base-uncased", 49, 38),) + SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("distilbert-base-uncased", 67, 38),) @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS) def test_aware_training_quantization(self, model_name, expected_fake_quantize, expected_int8): diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index c998d00d8b..8b66052eaa 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -322,7 +322,7 @@ def tearDown(self): "default_quantization": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG, - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss"], ), @@ -330,14 +330,14 @@ def tearDown(self): model_id="hf-internal-testing/tiny-random-bert", teacher_model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG, - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), "customized_quantization": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG, - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss"], ), @@ -345,7 +345,7 @@ def tearDown(self): model_id="hf-internal-testing/tiny-random-bert", teacher_model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG, - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], ), @@ -365,7 +365,7 @@ def tearDown(self): "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss"], @@ -376,7 +376,7 @@ def tearDown(self): CUSTOMIZED_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss"], @@ -385,7 +385,7 @@ def tearDown(self): model_id="hf-internal-testing/tiny-random-bert", teacher_model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -397,7 +397,7 @@ def tearDown(self): CUSTOMIZED_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -418,7 +418,7 @@ def tearDown(self): "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss"], @@ -429,7 +429,7 @@ def tearDown(self): CUSTOMIZED_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss"], @@ -438,7 +438,7 @@ def tearDown(self): model_id="hf-internal-testing/tiny-random-bert", teacher_model_id="hf-internal-testing/tiny-random-bert", nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT], - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -450,7 +450,7 @@ def tearDown(self): CUSTOMIZED_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT, ], - expected_fake_quantize=34, + expected_fake_quantize=22, expected_int8=32, expected_binary_masks=60, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -553,7 +553,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): "default_quantization": OVTrainerTestDescriptor( model_id="yujiepan/tiny-random-swin-patch4-window7-224", nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG, - expected_fake_quantize=28, + expected_fake_quantize=36, expected_int8=28, compression_metrics=["compression_loss"], ), @@ -572,7 +572,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor( model_id="yujiepan/tiny-random-swin-patch4-window7-224", nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=28, + expected_fake_quantize=36, expected_int8=28, expected_binary_masks=48, compression_metrics=["compression_loss"], @@ -580,7 +580,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( model_id="yujiepan/tiny-random-swin-patch4-window7-224", nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=28, + expected_fake_quantize=36, expected_int8=28, expected_binary_masks=48, compression_metrics=["compression_loss"], @@ -589,7 +589,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): model_id="yujiepan/tiny-random-swin-patch4-window7-224", teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224", nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=28, + expected_fake_quantize=36, expected_int8=28, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -598,7 +598,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): model_id="yujiepan/tiny-random-swin-patch4-window7-224", teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224", nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG], - expected_fake_quantize=28, + expected_fake_quantize=36, expected_int8=28, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -730,7 +730,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): "quantization": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=40, + expected_fake_quantize=24, expected_int8=30, compression_metrics=["compression_loss"], ), @@ -749,7 +749,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): "quantization,structured_movement_sparsity": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=40, + expected_fake_quantize=24, expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss"], @@ -757,7 +757,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): "quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=40, + expected_fake_quantize=24, expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss"], @@ -766,7 +766,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=40, + expected_fake_quantize=24, expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -775,7 +775,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=40, + expected_fake_quantize=24, expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], From 822606bb9855b61480f9913589a8ad7d2dc8953e Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 22 May 2024 16:24:06 +0200 Subject: [PATCH 04/13] fix test --- tests/openvino/test_training.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index 8b66052eaa..0f2df801cd 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -797,7 +797,9 @@ def prepare_model_and_dataset(self, desc: OVTrainerTestDescriptor): self.feature_extractor = AutoFeatureExtractor.from_pretrained(desc.model_id) self.tokenizer = self.feature_extractor - self.model = AutoModelForAudioClassification.from_pretrained(desc.model_id, num_labels=self.num_labels) + self.model = AutoModelForAudioClassification.from_pretrained( + desc.model_id, num_labels=self.num_labels, attn_implementation="eager" + ) self.teacher_model = None if desc.teacher_model_id: self.teacher_model = AutoModelForAudioClassification.from_pretrained( From 1456ea2aab15ee2b0a46661843e36a9e135a0543 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 23 May 2024 16:21:34 +0200 Subject: [PATCH 05/13] fix test --- setup.py | 2 +- tests/openvino/test_quantization.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 0ccaa1f202..421157ae89 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", "transformers>=4.36.0,<4.42.0", - "optimum @ git+https://github.com/huggingface/optimum.git@bump-transformers", + "optimum @ git+https://github.com/huggingface/optimum.git", "datasets>=1.4.0", "sentencepiece", "scipy", diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index cafc03c641..1f5121c973 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -73,12 +73,16 @@ class OVQuantizerTest(unittest.TestCase): - SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = ( + SUPPORTED_ARCHITECTURES_TORCH_MODEL = ( (OVModelForSequenceClassification, "bert", 22, 35), - # (OVModelForCausalLM, "gpt2", 41, 23), + (OVModelForCausalLM, "gpt2", 41, 3), + ) + SUPPORTED_ARCHITECTURES_OV_MODEL= ( + (OVModelForSequenceClassification, "bert", 32, 35), ) - @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS) + + @parameterized.expand(SUPPORTED_ARCHITECTURES_TORCH_MODEL) def test_automodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8): model_id = MODEL_NAMES[model_name] task = model_cls.export_feature @@ -123,7 +127,7 @@ def preprocess_function(examples, tokenizer): loaded_config = OVConfig.from_pretrained(tmp_dir) self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config.to_dict()) - @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS) + @parameterized.expand(SUPPORTED_ARCHITECTURES_OV_MODEL) def test_ovmodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8): model_id = MODEL_NAMES[model_name] task = model_cls.export_feature From 87903bccd3a45385965d2155777fee594b29f25d Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 23 May 2024 16:40:41 +0200 Subject: [PATCH 06/13] fix ops --- tests/openvino/test_training.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py index 0f2df801cd..89d644319c 100644 --- a/tests/openvino/test_training.py +++ b/tests/openvino/test_training.py @@ -730,7 +730,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): "quantization": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=24, + expected_fake_quantize=40, expected_int8=30, compression_metrics=["compression_loss"], ), @@ -749,7 +749,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): "quantization,structured_movement_sparsity": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=24, + expected_fake_quantize=40, expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss"], @@ -757,7 +757,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): "quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor( model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=24, + expected_fake_quantize=40, expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss"], @@ -766,7 +766,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=24, + expected_fake_quantize=40, expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], @@ -775,7 +775,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel): model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model", nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2], - expected_fake_quantize=24, + expected_fake_quantize=40, expected_int8=30, expected_binary_masks=48, compression_metrics=["compression_loss", "distillation_loss", "task_loss"], From 451fc0fa0b69327856208bd8810353c930993c45 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 23 May 2024 16:42:31 +0200 Subject: [PATCH 07/13] tmp for test --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 421157ae89..9d54b5d9c9 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", "transformers>=4.36.0,<4.42.0", - "optimum @ git+https://github.com/huggingface/optimum.git", + "optimum @ git+https://github.com/huggingface/optimum.git@fix-bt-ci", "datasets>=1.4.0", "sentencepiece", "scipy", From 67598c11785ba4097cef749279c8dfbfea98a312 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 23 May 2024 16:47:20 +0200 Subject: [PATCH 08/13] style --- tests/openvino/test_quantization.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 1f5121c973..1f7d64e4b0 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -77,10 +77,7 @@ class OVQuantizerTest(unittest.TestCase): (OVModelForSequenceClassification, "bert", 22, 35), (OVModelForCausalLM, "gpt2", 41, 3), ) - SUPPORTED_ARCHITECTURES_OV_MODEL= ( - (OVModelForSequenceClassification, "bert", 32, 35), - ) - + SUPPORTED_ARCHITECTURES_OV_MODEL = ((OVModelForSequenceClassification, "bert", 32, 35),) @parameterized.expand(SUPPORTED_ARCHITECTURES_TORCH_MODEL) def test_automodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8): From f3be817d3026aee9c962746a2a827e5e63bda926 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 23 May 2024 17:02:49 +0200 Subject: [PATCH 09/13] update setup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9d54b5d9c9..0057c34c90 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", "transformers>=4.36.0,<4.42.0", - "optimum @ git+https://github.com/huggingface/optimum.git@fix-bt-ci", + "optimum~=1.19", "datasets>=1.4.0", "sentencepiece", "scipy", From ae1f449bec15e06ba6185889da67a33c4fbd0b64 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 23 May 2024 19:23:46 +0200 Subject: [PATCH 10/13] add ops quant num gpt2 --- tests/openvino/test_quantization.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 1f7d64e4b0..e7d2877c72 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -77,7 +77,10 @@ class OVQuantizerTest(unittest.TestCase): (OVModelForSequenceClassification, "bert", 22, 35), (OVModelForCausalLM, "gpt2", 41, 3), ) - SUPPORTED_ARCHITECTURES_OV_MODEL = ((OVModelForSequenceClassification, "bert", 32, 35),) + SUPPORTED_ARCHITECTURES_OV_MODEL = ( + (OVModelForSequenceClassification, "bert", 32, 35), + # (OVModelForCausalLM, "gpt2", 31, 3), + ) @parameterized.expand(SUPPORTED_ARCHITECTURES_TORCH_MODEL) def test_automodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8): From 8ea75f7e04db66ebb97cd4e56399ee49b86e83c3 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Tue, 28 May 2024 17:33:45 +0200 Subject: [PATCH 11/13] tmp install optimum from source --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0057c34c90..f4531a8a27 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,8 @@ INSTALL_REQUIRE = [ "torch>=1.11", "transformers>=4.36.0,<4.42.0", - "optimum~=1.19", + # "optimum~=1.20", + "optimum @ git+https://github.com/huggingface/optimum.git", "datasets>=1.4.0", "sentencepiece", "scipy", From bd777cbe4e6b0fa227692569100eda74e1a80572 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 29 May 2024 11:13:52 +0200 Subject: [PATCH 12/13] fix expected ops quant in test --- tests/openvino/test_quantization.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index 6cb6171fae..b7ed36d3e6 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -79,8 +79,7 @@ class OVQuantizerTest(unittest.TestCase): ) SUPPORTED_ARCHITECTURES_OV_MODEL = ( (OVModelForSequenceClassification, "bert", 32, 35), - (OVModelForCausalLM, "gpt2", 31, 3), - # (OVModelForCausalLM, "gpt2", 31, 22), + (OVModelForCausalLM, "gpt2", 31, 22), ) @parameterized.expand(SUPPORTED_ARCHITECTURES_TORCH_MODEL) From 061bf479536829a532e4bba44d24c9d2051c8062 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 29 May 2024 16:18:37 +0200 Subject: [PATCH 13/13] update optimum version --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index f4531a8a27..d00ce1dd92 100644 --- a/setup.py +++ b/setup.py @@ -29,8 +29,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", "transformers>=4.36.0,<4.42.0", - # "optimum~=1.20", - "optimum @ git+https://github.com/huggingface/optimum.git", + "optimum~=1.20", "datasets>=1.4.0", "sentencepiece", "scipy",