fix test for models that require position ids

IlyasMoutawwakil · IlyasMoutawwakil · commit e9f3aa369bef · 2024-05-02T14:25:07.000+02:00
diff --git a/tests/neural_compressor/test_optimization.py b/tests/neural_compressor/test_optimization.py
@@ -70,12 +70,13 @@
 
 
 class QuantizationTest(INCTestMixin):
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
+    SUPPORTED_ARCHITECTURES_STATIC = (
+        ("text-generation", "gpt_neo", 17),
         ("text-classification", "bert", 21),
         ("text-generation", "bloom", 21),
     )
 
-    SUPPORTED_ARCHITECTURES_DYNAMIC = SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS + (
+    SUPPORTED_ARCHITECTURES_DYNAMIC = SUPPORTED_ARCHITECTURES_STATIC + (
         ("fill-mask", "bert", 22),
         ("token-classification", "albert", 26),
     )
@@ -123,7 +124,7 @@ def test_dynamic_quantization(self, task, model_arch, expected_quantized_matmuls
                 load_inc_model=True,
             )
 
-    @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
+    @parameterized.expand(SUPPORTED_ARCHITECTURES_STATIC)
     def test_static_quantization(self, task, model_arch, expected_quantized_matmuls):
         num_samples = 10
         model_name = MODEL_NAMES[model_arch]
@@ -134,22 +135,19 @@ def test_static_quantization(self, task, model_arch, expected_quantized_matmuls)
 
         quantized_model = None
         save_onnx_model = False
-        op_type_dict = (
-            {"Embedding": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}}
-            if save_onnx_model
-            else None
-        )
+        quantization_config = PostTrainingQuantConfig(approach="static")
         model_kwargs = {"use_cache": False, "use_io_binding": False} if task == "text-generation" else {}
-        quantization_config = PostTrainingQuantConfig(approach="static", op_type_dict=op_type_dict)
 
         with tempfile.TemporaryDirectory() as tmp_dir:
             for backend in ["torch", "ort"]:
                 if backend == "torch":
                     model = model_class.auto_model_class.from_pretrained(model_name)
                 else:
                     model = model_class.from_pretrained(model_name, export=True, **model_kwargs)
+
                 quantizer = INCQuantizer.from_pretrained(model, task=task)
                 calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=num_samples)
+
                 quantizer.quantize(
                     quantization_config=quantization_config,
                     calibration_dataset=calibration_dataset,
diff --git a/tests/neural_compressor/utils_tests.py b/tests/neural_compressor/utils_tests.py
@@ -47,6 +47,7 @@
 from optimum.intel.utils.constant import ONNX_WEIGHTS_NAME
 from optimum.onnxruntime import ORTModelForCausalLM, ORTModelForSequenceClassification
 from optimum.pipelines import ORT_SUPPORTED_TASKS
+from optimum.exporters.onnx import MODEL_TYPES_REQUIRING_POSITION_IDS
 
 if is_ipex_available():
     from optimum.intel import (
@@ -135,6 +136,13 @@ def _generate_dataset(quantizer, tokenizer, num_samples=10):
         num_samples=num_samples,
         dataset_split="train",
     )
+    model_type = quantizer._original_model.config.model_type.replace("_", "-")
+    if model_type in MODEL_TYPES_REQUIRING_POSITION_IDS:
+        dataset = dataset.map(
+            lambda x: {
+                "position_ids": np.arange(len(x["input_ids"])),
+            }
+        )
     return dataset
 
 
@@ -187,6 +195,9 @@ def check_model_outputs(
 
             self.assertEqual(expected_quantized_matmuls, num_quantized_matmul)
             ort_model = ORT_SUPPORTED_TASKS[task]["class"][0].from_pretrained(save_directory, **model_kwargs)
+            model_type = ort_model.config.model_type.replace("_", "-")
+            if model_type in MODEL_TYPES_REQUIRING_POSITION_IDS:
+                tokens["position_ids"] = torch.arange(len(tokens["input_ids"])).unsqueeze(0)
             ort_outputs = ort_model(**tokens)
             self.assertTrue("logits" in ort_outputs)
             # self.assertTrue(torch.allclose(ort_outputs.logits, outputs, atol=1e-2))