Merge branch 'main' into ea/mpt_sdpa

eaidova · eaidova · commit 2f960af0e8b4 · 2024-04-25T20:01:14.000+04:00
diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
@@ -454,6 +454,12 @@ class OrionOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
 
 
+@register_in_tasks_manager("olmo", *["text-generation", "text-generation-with-past"], library_name="transformers")
+class OlmoOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
+    DEFAULT_ONNX_OPSET = 14
+    NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
+
+
 @register_in_tasks_manager(
     "mpt", *["text-generation", "text-generation-with-past", "text-classification"], library_name="transformers"
 )
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
@@ -529,6 +529,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
         "mistral",
         "mixtral",
         "mpt",
+        "olmo",
         "opt",
         "pegasus",
         "qwen",
@@ -551,20 +552,19 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
         "jais",
         "qwen",
         "internlm2",
-        "olmo",
         "orion",
         "phi3",
     )
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
     def test_compare_to_transformers(self, model_arch):
         model_id = MODEL_NAMES[model_arch]
-        not_stateful = ["gpt_bigcode"]
+        not_stateful = []
         if is_openvino_version("<", "2024.0"):
             not_stateful.append("mixtral")
 
         if is_openvino_version("<", "2024.1"):
-            not_stateful.extend(["llama", "gemma"])
+            not_stateful.extend(["llama", "gemma", "gpt_bigcode"])
 
         if "gptq" in model_arch:
             self.skipTest("GPTQ model loading unsupported with AutoModelForCausalLM")
diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -657,7 +657,7 @@ def preprocess_function(examples, tokenizer):
 
 
 class OVTrainerTest(unittest.TestCase):
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("distilbert-base-uncased", 50, 38),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("distilbert-base-uncased", 49, 38),)
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
     def test_aware_training_quantization(self, model_name, expected_fake_quantize, expected_int8):
diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
@@ -322,30 +322,30 @@ def tearDown(self):
     "default_quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         compression_metrics=["compression_loss"],
     ),
     "distillation,default_quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "customized_quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         compression_metrics=["compression_loss"],
     ),
     "distillation,customized_quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
@@ -418,7 +418,7 @@ def tearDown(self):
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -429,7 +429,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -438,7 +438,7 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -450,7 +450,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=44,
+        expected_fake_quantize=34,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -730,7 +730,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=48,
+        expected_fake_quantize=40,
         expected_int8=30,
         compression_metrics=["compression_loss"],
     ),
@@ -757,7 +757,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=48,
+        expected_fake_quantize=40,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
@@ -775,7 +775,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         teacher_model_id="hf-internal-testing/tiny-random-Wav2Vec2Model",
         nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=48,
+        expected_fake_quantize=40,
         expected_int8=30,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
@@ -74,7 +74,7 @@
     "mpt": "hf-internal-testing/tiny-random-MptForCausalLM",
     "mt5": "stas/mt5-tiny-random",
     "nystromformer": "hf-internal-testing/tiny-random-NystromformerModel",
-    "olmo": "katuni4ka/tiny-random-olmo",
+    "olmo": "katuni4ka/tiny-random-olmo-hf",
     "orion": "katuni4ka/tiny-random-orion",
     "pegasus": "hf-internal-testing/tiny-random-pegasus",
     "pix2struct": "fxmarty/pix2struct-tiny-random",