From b84ef4a13a575c4b74208848f7fd40ab6d500971 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 22 May 2024 15:58:57 +0200
Subject: [PATCH 01/13] Bump transformers version

---
 setup.py                        | 4 ++--
 tests/openvino/test_modeling.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 251ec61cdd..0ccaa1f202 100644
--- a/setup.py
+++ b/setup.py
@@ -28,8 +28,8 @@
 
 INSTALL_REQUIRE = [
     "torch>=1.11",
-    "transformers>=4.36.0,<4.41.0",
-    "optimum~=1.19",
+    "transformers>=4.36.0,<4.42.0",
+    "optimum @ git+https://github.com/huggingface/optimum.git@bump-transformers",
     "datasets>=1.4.0",
     "sentencepiece",
     "scipy",
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index 692720a972..1c42eff5a9 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -1675,7 +1675,7 @@ def test_compare_output_attentions(self, model_arch):
         preprocessor = AutoFeatureExtractor.from_pretrained(model_id)
         inputs = preprocessor(images=image, return_tensors="pt")
 
-        transformers_model = AutoModelForImageClassification.from_pretrained(model_id)
+        transformers_model = AutoModelForImageClassification.from_pretrained(model_id, attn_implementation="eager")
         transformers_model.eval()
         with torch.no_grad():
             transformers_outputs = transformers_model(**inputs, output_attentions=True)

From f68b2c744cd4c9def10fb87a7486d758ede59d9e Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 22 May 2024 16:09:44 +0200
Subject: [PATCH 02/13] fix default ignored scope for model using sdpa by
 default

---
 .../openvino/image-classification/configs/swin-base-jpqd.json   | 2 --
 .../openvino/question-answering/configs/bert-base-jpqd.json     | 2 --
 .../openvino/text-classification/configs/bert-base-jpqd.json    | 2 --
 optimum/intel/openvino/trainer.py                               | 2 --
 4 files changed, 8 deletions(-)

diff --git a/examples/openvino/image-classification/configs/swin-base-jpqd.json b/examples/openvino/image-classification/configs/swin-base-jpqd.json
index 3f03c276aa..23b2fd3d84 100644
--- a/examples/openvino/image-classification/configs/swin-base-jpqd.json
+++ b/examples/openvino/image-classification/configs/swin-base-jpqd.json
@@ -36,8 +36,6 @@
       "ignored_scopes": [
         "{re}.*__add___[0-1]",
         "{re}.*layer_norm_0",
-        "{re}.*matmul_1",
-        "{re}.*__truediv__*"
       ]
   }
 ]
diff --git a/examples/openvino/question-answering/configs/bert-base-jpqd.json b/examples/openvino/question-answering/configs/bert-base-jpqd.json
index 425bd9f31b..342d327a34 100644
--- a/examples/openvino/question-answering/configs/bert-base-jpqd.json
+++ b/examples/openvino/question-answering/configs/bert-base-jpqd.json
@@ -36,8 +36,6 @@
         "ignored_scopes": [
             "{re}.*__add___[0-1]",
             "{re}.*layer_norm_0",
-            "{re}.*matmul_1",
-            "{re}.*__truediv__*"
         ]
     }
 ]
diff --git a/examples/openvino/text-classification/configs/bert-base-jpqd.json b/examples/openvino/text-classification/configs/bert-base-jpqd.json
index 25c8f28861..d177e4efd7 100644
--- a/examples/openvino/text-classification/configs/bert-base-jpqd.json
+++ b/examples/openvino/text-classification/configs/bert-base-jpqd.json
@@ -40,8 +40,6 @@
         "ignored_scopes": [
             "{re}.*__add___[0-1]",
             "{re}.*layer_norm_0",
-            "{re}.*matmul_1",
-            "{re}.*__truediv__*"
         ]
     }
 ]
diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py
index 0745a1cd79..26b6562a54 100644
--- a/optimum/intel/openvino/trainer.py
+++ b/optimum/intel/openvino/trainer.py
@@ -153,8 +153,6 @@
         "{re}.*Embedding.*",
         "{re}.*add___.*",
         "{re}.*layer_norm_.*",
-        "{re}.*matmul_1",
-        "{re}.*__truediv__.*",
     ],
 }
 

From d120a457d78ab00485af9bd39a94787a9234544e Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 22 May 2024 16:18:40 +0200
Subject: [PATCH 03/13] fix quant ops test

---
 tests/openvino/test_quantization.py |  4 +--
 tests/openvino/test_training.py     | 44 ++++++++++++++---------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index 98eb121d72..cafc03c641 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -74,7 +74,7 @@
 
 class OVQuantizerTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
-        (OVModelForSequenceClassification, "bert", 32, 35),
+        (OVModelForSequenceClassification, "bert", 22, 35),
         # (OVModelForCausalLM, "gpt2", 41, 23),
     )
 
@@ -663,7 +663,7 @@ def preprocess_function(examples, tokenizer):
 
 
 class OVTrainerTest(unittest.TestCase):
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("distilbert-base-uncased", 49, 38),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("distilbert-base-uncased", 67, 38),)
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
     def test_aware_training_quantization(self, model_name, expected_fake_quantize, expected_int8):
diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
index c998d00d8b..8b66052eaa 100644
--- a/tests/openvino/test_training.py
+++ b/tests/openvino/test_training.py
@@ -322,7 +322,7 @@ def tearDown(self):
     "default_quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss"],
     ),
@@ -330,14 +330,14 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "customized_quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss"],
     ),
@@ -345,7 +345,7 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
@@ -365,7 +365,7 @@ def tearDown(self):
     "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -376,7 +376,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -385,7 +385,7 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -397,7 +397,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -418,7 +418,7 @@ def tearDown(self):
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -429,7 +429,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -438,7 +438,7 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -450,7 +450,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -553,7 +553,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "default_quantization": OVTrainerTestDescriptor(
         model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=28,
+        expected_fake_quantize=36,
         expected_int8=28,
         compression_metrics=["compression_loss"],
     ),
@@ -572,7 +572,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=28,
+        expected_fake_quantize=36,
         expected_int8=28,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
@@ -580,7 +580,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=28,
+        expected_fake_quantize=36,
         expected_int8=28,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
@@ -589,7 +589,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=28,
+        expected_fake_quantize=36,
         expected_int8=28,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -598,7 +598,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=28,
+        expected_fake_quantize=36,
         expected_int8=28,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -730,7 +730,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=40,
+        expected_fake_quantize=24,
         expected_int8=30,
         compression_metrics=["compression_loss"],
     ),
@@ -749,7 +749,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=40,
+        expected_fake_quantize=24,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
@@ -757,7 +757,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=40,
+        expected_fake_quantize=24,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
@@ -766,7 +766,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=40,
+        expected_fake_quantize=24,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -775,7 +775,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=40,
+        expected_fake_quantize=24,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],

From 822606bb9855b61480f9913589a8ad7d2dc8953e Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 22 May 2024 16:24:06 +0200
Subject: [PATCH 04/13] fix test

---
 tests/openvino/test_training.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
index 8b66052eaa..0f2df801cd 100644
--- a/tests/openvino/test_training.py
+++ b/tests/openvino/test_training.py
@@ -797,7 +797,9 @@ def prepare_model_and_dataset(self, desc: OVTrainerTestDescriptor):
 
         self.feature_extractor = AutoFeatureExtractor.from_pretrained(desc.model_id)
         self.tokenizer = self.feature_extractor
-        self.model = AutoModelForAudioClassification.from_pretrained(desc.model_id, num_labels=self.num_labels)
+        self.model = AutoModelForAudioClassification.from_pretrained(
+            desc.model_id, num_labels=self.num_labels, attn_implementation="eager"
+        )
         self.teacher_model = None
         if desc.teacher_model_id:
             self.teacher_model = AutoModelForAudioClassification.from_pretrained(

From 1456ea2aab15ee2b0a46661843e36a9e135a0543 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 23 May 2024 16:21:34 +0200
Subject: [PATCH 05/13] fix test

---
 setup.py                            |  2 +-
 tests/openvino/test_quantization.py | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index 0ccaa1f202..421157ae89 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "transformers>=4.36.0,<4.42.0",
-    "optimum @ git+https://github.com/huggingface/optimum.git@bump-transformers",
+    "optimum @ git+https://github.com/huggingface/optimum.git",
     "datasets>=1.4.0",
     "sentencepiece",
     "scipy",
diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index cafc03c641..1f5121c973 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -73,12 +73,16 @@
 
 
 class OVQuantizerTest(unittest.TestCase):
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
+    SUPPORTED_ARCHITECTURES_TORCH_MODEL = (
         (OVModelForSequenceClassification, "bert", 22, 35),
-        # (OVModelForCausalLM, "gpt2", 41, 23),
+        (OVModelForCausalLM, "gpt2", 41, 3),
+    )
+    SUPPORTED_ARCHITECTURES_OV_MODEL= (
+        (OVModelForSequenceClassification, "bert", 32, 35),
     )
 
-    @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
+
+    @parameterized.expand(SUPPORTED_ARCHITECTURES_TORCH_MODEL)
     def test_automodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8):
         model_id = MODEL_NAMES[model_name]
         task = model_cls.export_feature
@@ -123,7 +127,7 @@ def preprocess_function(examples, tokenizer):
             loaded_config = OVConfig.from_pretrained(tmp_dir)
             self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config.to_dict())
 
-    @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
+    @parameterized.expand(SUPPORTED_ARCHITECTURES_OV_MODEL)
     def test_ovmodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8):
         model_id = MODEL_NAMES[model_name]
         task = model_cls.export_feature

From 87903bccd3a45385965d2155777fee594b29f25d Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 23 May 2024 16:40:41 +0200
Subject: [PATCH 06/13] fix ops

---
 tests/openvino/test_training.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
index 0f2df801cd..89d644319c 100644
--- a/tests/openvino/test_training.py
+++ b/tests/openvino/test_training.py
@@ -730,7 +730,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=24,
+        expected_fake_quantize=40,
         expected_int8=30,
         compression_metrics=["compression_loss"],
     ),
@@ -749,7 +749,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=24,
+        expected_fake_quantize=40,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
@@ -757,7 +757,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=24,
+        expected_fake_quantize=40,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
@@ -766,7 +766,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=24,
+        expected_fake_quantize=40,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -775,7 +775,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=24,
+        expected_fake_quantize=40,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],

From 451fc0fa0b69327856208bd8810353c930993c45 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 23 May 2024 16:42:31 +0200
Subject: [PATCH 07/13] tmp for test

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 421157ae89..9d54b5d9c9 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "transformers>=4.36.0,<4.42.0",
-    "optimum @ git+https://github.com/huggingface/optimum.git",
+    "optimum @ git+https://github.com/huggingface/optimum.git@fix-bt-ci",
     "datasets>=1.4.0",
     "sentencepiece",
     "scipy",

From 67598c11785ba4097cef749279c8dfbfea98a312 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 23 May 2024 16:47:20 +0200
Subject: [PATCH 08/13] style

---
 tests/openvino/test_quantization.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index 1f5121c973..1f7d64e4b0 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -77,10 +77,7 @@ class OVQuantizerTest(unittest.TestCase):
         (OVModelForSequenceClassification, "bert", 22, 35),
         (OVModelForCausalLM, "gpt2", 41, 3),
     )
-    SUPPORTED_ARCHITECTURES_OV_MODEL= (
-        (OVModelForSequenceClassification, "bert", 32, 35),
-    )
-
+    SUPPORTED_ARCHITECTURES_OV_MODEL = ((OVModelForSequenceClassification, "bert", 32, 35),)
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES_TORCH_MODEL)
     def test_automodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8):

From f3be817d3026aee9c962746a2a827e5e63bda926 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 23 May 2024 17:02:49 +0200
Subject: [PATCH 09/13] update setup

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 9d54b5d9c9..0057c34c90 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "transformers>=4.36.0,<4.42.0",
-    "optimum @ git+https://github.com/huggingface/optimum.git@fix-bt-ci",
+    "optimum~=1.19",
     "datasets>=1.4.0",
     "sentencepiece",
     "scipy",

From ae1f449bec15e06ba6185889da67a33c4fbd0b64 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Thu, 23 May 2024 19:23:46 +0200
Subject: [PATCH 10/13] add ops quant num gpt2

---
 tests/openvino/test_quantization.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index 1f7d64e4b0..e7d2877c72 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -77,7 +77,10 @@ class OVQuantizerTest(unittest.TestCase):
         (OVModelForSequenceClassification, "bert", 22, 35),
         (OVModelForCausalLM, "gpt2", 41, 3),
     )
-    SUPPORTED_ARCHITECTURES_OV_MODEL = ((OVModelForSequenceClassification, "bert", 32, 35),)
+    SUPPORTED_ARCHITECTURES_OV_MODEL = (
+        (OVModelForSequenceClassification, "bert", 32, 35),
+        # (OVModelForCausalLM, "gpt2", 31, 3),
+    )
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES_TORCH_MODEL)
     def test_automodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8):

From 8ea75f7e04db66ebb97cd4e56399ee49b86e83c3 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Tue, 28 May 2024 17:33:45 +0200
Subject: [PATCH 11/13] tmp install optimum from source

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 0057c34c90..f4531a8a27 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,8 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "transformers>=4.36.0,<4.42.0",
-    "optimum~=1.19",
+    # "optimum~=1.20",
+    "optimum @ git+https://github.com/huggingface/optimum.git",
     "datasets>=1.4.0",
     "sentencepiece",
     "scipy",

From bd777cbe4e6b0fa227692569100eda74e1a80572 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 29 May 2024 11:13:52 +0200
Subject: [PATCH 12/13] fix expected ops quant in test

---
 tests/openvino/test_quantization.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index 6cb6171fae..b7ed36d3e6 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -79,8 +79,7 @@ class OVQuantizerTest(unittest.TestCase):
     )
     SUPPORTED_ARCHITECTURES_OV_MODEL = (
         (OVModelForSequenceClassification, "bert", 32, 35),
-        (OVModelForCausalLM, "gpt2", 31, 3),
-        # (OVModelForCausalLM, "gpt2", 31, 22),
+        (OVModelForCausalLM, "gpt2", 31, 22),
     )
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES_TORCH_MODEL)

From 061bf479536829a532e4bba44d24c9d2051c8062 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 29 May 2024 16:18:37 +0200
Subject: [PATCH 13/13] update optimum version

---
 setup.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index f4531a8a27..d00ce1dd92 100644
--- a/setup.py
+++ b/setup.py
@@ -29,8 +29,7 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "transformers>=4.36.0,<4.42.0",
-    # "optimum~=1.20",
-    "optimum @ git+https://github.com/huggingface/optimum.git",
+    "optimum~=1.20",
     "datasets>=1.4.0",
     "sentencepiece",
     "scipy",