Skip to content

Commit 9bb4334

Browse files
Adding more INC tests (#698)
* added bert static test * fix test for models that require position ids * remove io_binding * optimum refuses io binding without kv cache
1 parent e1b6a59 commit 9bb4334

File tree

2 files changed

+28
-16
lines changed

2 files changed

+28
-16
lines changed

tests/neural_compressor/test_optimization.py

+17-16
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,13 @@
7070

7171

7272
class QuantizationTest(INCTestMixin):
73-
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
73+
SUPPORTED_ARCHITECTURES_STATIC = (
74+
("text-generation", "gpt_neo", 17),
7475
("text-classification", "bert", 21),
75-
# ("text-generation", "bloom", 21),
76+
("text-generation", "bloom", 21),
7677
)
7778

78-
SUPPORTED_ARCHITECTURES_DYNAMIC = SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS + (
79+
SUPPORTED_ARCHITECTURES_DYNAMIC = SUPPORTED_ARCHITECTURES_STATIC + (
7980
("fill-mask", "bert", 22),
8081
("token-classification", "albert", 26),
8182
)
@@ -88,12 +89,14 @@ class QuantizationTest(INCTestMixin):
8889
@parameterized.expand(SUPPORTED_ARCHITECTURES_DYNAMIC)
8990
def test_dynamic_quantization(self, task, model_arch, expected_quantized_matmuls):
9091
model_name = MODEL_NAMES[model_arch]
91-
quantization_config = PostTrainingQuantConfig(approach="dynamic")
9292
model_class = ORT_SUPPORTED_TASKS[task]["class"][0]
9393
tokenizer = AutoTokenizer.from_pretrained(model_name)
94-
save_onnx_model = False
94+
9595
quantized_model = None
96+
save_onnx_model = False
9697
model_kwargs = {"use_cache": False, "use_io_binding": False} if task == "text-generation" else {}
98+
quantization_config = PostTrainingQuantConfig(approach="dynamic")
99+
97100
with tempfile.TemporaryDirectory() as tmp_dir:
98101
for backend in ["torch", "ort"]:
99102
if backend == "torch":
@@ -104,8 +107,8 @@ def test_dynamic_quantization(self, task, model_arch, expected_quantized_matmuls
104107
quantizer = INCQuantizer.from_pretrained(model, task=task)
105108
quantizer.quantize(
106109
quantization_config=quantization_config,
107-
save_directory=tmp_dir,
108110
save_onnx_model=save_onnx_model,
111+
save_directory=tmp_dir,
109112
)
110113
if backend == "torch":
111114
quantized_model = quantizer._quantized_model
@@ -121,7 +124,7 @@ def test_dynamic_quantization(self, task, model_arch, expected_quantized_matmuls
121124
load_inc_model=True,
122125
)
123126

124-
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
127+
@parameterized.expand(SUPPORTED_ARCHITECTURES_STATIC)
125128
def test_static_quantization(self, task, model_arch, expected_quantized_matmuls):
126129
num_samples = 10
127130
model_name = MODEL_NAMES[model_arch]
@@ -130,28 +133,26 @@ def test_static_quantization(self, task, model_arch, expected_quantized_matmuls)
130133
if tokenizer.pad_token is None:
131134
tokenizer.pad_token = tokenizer.eos_token
132135

133-
save_onnx_model = False
134-
op_type_dict = (
135-
{"Embedding": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}}
136-
if save_onnx_model
137-
else None
138-
)
139-
quantization_config = PostTrainingQuantConfig(approach="static", op_type_dict=op_type_dict)
140136
quantized_model = None
137+
save_onnx_model = False
138+
quantization_config = PostTrainingQuantConfig(approach="static")
139+
model_kwargs = {"use_cache": False, "use_io_binding": False} if task == "text-generation" else {}
141140

142141
with tempfile.TemporaryDirectory() as tmp_dir:
143142
for backend in ["torch", "ort"]:
144143
if backend == "torch":
145144
model = model_class.auto_model_class.from_pretrained(model_name)
146145
else:
147-
model = model_class.from_pretrained(model_name, export=True)
146+
model = model_class.from_pretrained(model_name, export=True, **model_kwargs)
147+
148148
quantizer = INCQuantizer.from_pretrained(model, task=task)
149149
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=num_samples)
150+
150151
quantizer.quantize(
151152
quantization_config=quantization_config,
152153
calibration_dataset=calibration_dataset,
153-
save_directory=tmp_dir,
154154
save_onnx_model=save_onnx_model,
155+
save_directory=tmp_dir,
155156
)
156157
if backend == "torch":
157158
quantized_model = quantizer._quantized_model

tests/neural_compressor/utils_tests.py

+11
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from optimum.intel.utils.constant import ONNX_WEIGHTS_NAME
4848
from optimum.onnxruntime import ORTModelForCausalLM, ORTModelForSequenceClassification
4949
from optimum.pipelines import ORT_SUPPORTED_TASKS
50+
from optimum.exporters.onnx import MODEL_TYPES_REQUIRING_POSITION_IDS
5051

5152
if is_ipex_available():
5253
from optimum.intel import (
@@ -135,6 +136,13 @@ def _generate_dataset(quantizer, tokenizer, num_samples=10):
135136
num_samples=num_samples,
136137
dataset_split="train",
137138
)
139+
model_type = quantizer._original_model.config.model_type.replace("_", "-")
140+
if model_type in MODEL_TYPES_REQUIRING_POSITION_IDS:
141+
dataset = dataset.map(
142+
lambda x: {
143+
"position_ids": np.arange(len(x["input_ids"])),
144+
}
145+
)
138146
return dataset
139147

140148

@@ -187,6 +195,9 @@ def check_model_outputs(
187195

188196
self.assertEqual(expected_quantized_matmuls, num_quantized_matmul)
189197
ort_model = ORT_SUPPORTED_TASKS[task]["class"][0].from_pretrained(save_directory, **model_kwargs)
198+
model_type = ort_model.config.model_type.replace("_", "-")
199+
if model_type in MODEL_TYPES_REQUIRING_POSITION_IDS:
200+
tokens["position_ids"] = torch.arange(len(tokens["input_ids"])).unsqueeze(0)
190201
ort_outputs = ort_model(**tokens)
191202
self.assertTrue("logits" in ort_outputs)
192203
# self.assertTrue(torch.allclose(ort_outputs.logits, outputs, atol=1e-2))

0 commit comments

Comments
 (0)