Skip to content

Commit e9f3aa3

Browse files
fix test for models that require position ids
1 parent df72e9f commit e9f3aa3

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

tests/neural_compressor/test_optimization.py

+7-9
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,13 @@
7070

7171

7272
class QuantizationTest(INCTestMixin):
73-
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
73+
SUPPORTED_ARCHITECTURES_STATIC = (
74+
("text-generation", "gpt_neo", 17),
7475
("text-classification", "bert", 21),
7576
("text-generation", "bloom", 21),
7677
)
7778

78-
SUPPORTED_ARCHITECTURES_DYNAMIC = SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS + (
79+
SUPPORTED_ARCHITECTURES_DYNAMIC = SUPPORTED_ARCHITECTURES_STATIC + (
7980
("fill-mask", "bert", 22),
8081
("token-classification", "albert", 26),
8182
)
@@ -123,7 +124,7 @@ def test_dynamic_quantization(self, task, model_arch, expected_quantized_matmuls
123124
load_inc_model=True,
124125
)
125126

126-
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
127+
@parameterized.expand(SUPPORTED_ARCHITECTURES_STATIC)
127128
def test_static_quantization(self, task, model_arch, expected_quantized_matmuls):
128129
num_samples = 10
129130
model_name = MODEL_NAMES[model_arch]
@@ -134,22 +135,19 @@ def test_static_quantization(self, task, model_arch, expected_quantized_matmuls)
134135

135136
quantized_model = None
136137
save_onnx_model = False
137-
op_type_dict = (
138-
{"Embedding": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}}
139-
if save_onnx_model
140-
else None
141-
)
138+
quantization_config = PostTrainingQuantConfig(approach="static")
142139
model_kwargs = {"use_cache": False, "use_io_binding": False} if task == "text-generation" else {}
143-
quantization_config = PostTrainingQuantConfig(approach="static", op_type_dict=op_type_dict)
144140

145141
with tempfile.TemporaryDirectory() as tmp_dir:
146142
for backend in ["torch", "ort"]:
147143
if backend == "torch":
148144
model = model_class.auto_model_class.from_pretrained(model_name)
149145
else:
150146
model = model_class.from_pretrained(model_name, export=True, **model_kwargs)
147+
151148
quantizer = INCQuantizer.from_pretrained(model, task=task)
152149
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=num_samples)
150+
153151
quantizer.quantize(
154152
quantization_config=quantization_config,
155153
calibration_dataset=calibration_dataset,

tests/neural_compressor/utils_tests.py

+11
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from optimum.intel.utils.constant import ONNX_WEIGHTS_NAME
4848
from optimum.onnxruntime import ORTModelForCausalLM, ORTModelForSequenceClassification
4949
from optimum.pipelines import ORT_SUPPORTED_TASKS
50+
from optimum.exporters.onnx import MODEL_TYPES_REQUIRING_POSITION_IDS
5051

5152
if is_ipex_available():
5253
from optimum.intel import (
@@ -135,6 +136,13 @@ def _generate_dataset(quantizer, tokenizer, num_samples=10):
135136
num_samples=num_samples,
136137
dataset_split="train",
137138
)
139+
model_type = quantizer._original_model.config.model_type.replace("_", "-")
140+
if model_type in MODEL_TYPES_REQUIRING_POSITION_IDS:
141+
dataset = dataset.map(
142+
lambda x: {
143+
"position_ids": np.arange(len(x["input_ids"])),
144+
}
145+
)
138146
return dataset
139147

140148

@@ -187,6 +195,9 @@ def check_model_outputs(
187195

188196
self.assertEqual(expected_quantized_matmuls, num_quantized_matmul)
189197
ort_model = ORT_SUPPORTED_TASKS[task]["class"][0].from_pretrained(save_directory, **model_kwargs)
198+
model_type = ort_model.config.model_type.replace("_", "-")
199+
if model_type in MODEL_TYPES_REQUIRING_POSITION_IDS:
200+
tokens["position_ids"] = torch.arange(len(tokens["input_ids"])).unsqueeze(0)
190201
ort_outputs = ort_model(**tokens)
191202
self.assertTrue("logits" in ort_outputs)
192203
# self.assertTrue(torch.allclose(ort_outputs.logits, outputs, atol=1e-2))

0 commit comments

Comments
 (0)