44
44
pipeline ,
45
45
set_seed ,
46
46
)
47
- from utils_tests import SEED , INCTestMixin , _generate_dataset
47
+ from utils_tests import MODEL_NAMES , SEED , INCTestMixin , _generate_dataset
48
48
from optimum .intel .utils .import_utils import is_torch_version , is_intel_extension_for_transformers_available
49
49
50
50
71
71
72
72
class QuantizationTest (INCTestMixin ):
73
73
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
74
- ("text-classification" , "hf-internal-testing/tiny-random-BertForSequenceClassification " , 21 ),
75
- ("text-generation" , "hf-internal-testing/tiny-random-BloomForCausalLM " , 21 ),
74
+ ("text-classification" , "bert " , 21 ),
75
+ # ("text-generation", "bloom ", 21),
76
76
)
77
77
78
78
SUPPORTED_ARCHITECTURES_DYNAMIC = SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS + (
79
- ("fill-mask" , "hf-internal-testing/tiny-random-BertForMaskedLM " , 22 ),
80
- ("token-classification" , "hf-internal-testing/tiny-random-AlbertForTokenClassification " , 26 ),
79
+ ("fill-mask" , "bert " , 22 ),
80
+ ("token-classification" , "albert " , 26 ),
81
81
)
82
82
83
83
TEXT_GENERATION_SUPPORTED_ARCHITECTURES = (
84
- "hf-internal-testing/tiny-random-BloomForCausalLM " ,
85
- "hf-internal-testing/tiny-random-GPTNeoForCausalLM " ,
84
+ "bloom " ,
85
+ "gpt_neo " ,
86
86
)
87
87
88
88
@parameterized .expand (SUPPORTED_ARCHITECTURES_DYNAMIC )
89
- def test_dynamic_quantization (self , task , model_name , expected_quantized_matmuls ):
89
+ def test_dynamic_quantization (self , task , model_arch , expected_quantized_matmuls ):
90
+ model_name = MODEL_NAMES [model_arch ]
90
91
quantization_config = PostTrainingQuantConfig (approach = "dynamic" )
91
92
model_class = ORT_SUPPORTED_TASKS [task ]["class" ][0 ]
92
93
tokenizer = AutoTokenizer .from_pretrained (model_name )
@@ -121,8 +122,9 @@ def test_dynamic_quantization(self, task, model_name, expected_quantized_matmuls
121
122
)
122
123
123
124
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS )
124
- def test_static_quantization (self , task , model_name , expected_quantized_matmuls ):
125
+ def test_static_quantization (self , task , model_arch , expected_quantized_matmuls ):
125
126
num_samples = 10
127
+ model_name = MODEL_NAMES [model_arch ]
126
128
model_class = ORT_SUPPORTED_TASKS [task ]["class" ][0 ]
127
129
tokenizer = AutoTokenizer .from_pretrained (model_name )
128
130
if tokenizer .pad_token is None :
@@ -245,7 +247,8 @@ def test_dynamic_diffusion_model(self):
245
247
self .assertTrue (np .allclose (loaded_pipe_outputs , outputs , atol = 1e-4 ))
246
248
247
249
@parameterized .expand (TEXT_GENERATION_SUPPORTED_ARCHITECTURES )
248
- def test_quantize_text_generate_model (self , model_id ):
250
+ def test_quantize_text_generate_model (self , model_arch ):
251
+ model_id = MODEL_NAMES [model_arch ]
249
252
set_seed (42 )
250
253
model = AutoModelForCausalLM .from_pretrained (model_id )
251
254
tokenizer = AutoTokenizer .from_pretrained (model_id )
@@ -274,13 +277,11 @@ def calibration_fn(p_model):
274
277
275
278
276
279
class TrainingOptimizationTest (INCTestMixin ):
277
- SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
278
- ("text-classification" , "hf-internal-testing/tiny-random-BertForSequenceClassification" , 21 ),
279
- ("text-generation" , "hf-internal-testing/tiny-random-BloomForCausalLM" , 21 ),
280
- )
280
+ SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("text-classification" , "bert" , 21 ),)
281
281
282
282
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS )
283
- def test_aware_training_quantization (self , task , model_name , expected_quantized_matmuls ):
283
+ def test_aware_training_quantization (self , task , model_arch , expected_quantized_matmuls ):
284
+ model_name = MODEL_NAMES [model_arch ]
284
285
quantization_config = QuantizationAwareTrainingConfig ()
285
286
save_onnx_model = False
286
287
@@ -303,7 +304,8 @@ def test_aware_training_quantization(self, task, model_name, expected_quantized_
303
304
)
304
305
305
306
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS )
306
- def test_aware_training_quantization_pruning (self , task , model_name , expected_quantized_matmuls ):
307
+ def test_aware_training_quantization_pruning (self , task , model_arch , expected_quantized_matmuls ):
308
+ model_name = MODEL_NAMES [model_arch ]
307
309
quantization_config = QuantizationAwareTrainingConfig ()
308
310
target_sparsity = 0.9
309
311
pruning_config = WeightPruningConfig (
@@ -335,7 +337,8 @@ def test_aware_training_quantization_pruning(self, task, model_name, expected_qu
335
337
)
336
338
337
339
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS )
338
- def test_magnitude_pruning (self , task , model_name , expected_quantized_matmuls ):
340
+ def test_magnitude_pruning (self , task , model_arch , expected_quantized_matmuls ):
341
+ model_name = MODEL_NAMES [model_arch ]
339
342
target_sparsity = 0.9
340
343
# end_step should be training_args.num_train_epochs * (len(train_dataset) // training_args.per_device_train_batch_size)
341
344
pruning_config = WeightPruningConfig (
@@ -374,7 +377,8 @@ def test_magnitude_pruning(self, task, model_name, expected_quantized_matmuls):
374
377
self .assertEqual (inc_config .pruning ["pattern" ], "4x1" )
375
378
376
379
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS )
377
- def test_distillation (self , task , model_name , expected_quantized_matmuls ):
380
+ def test_distillation (self , task , model_arch , expected_quantized_matmuls ):
381
+ model_name = MODEL_NAMES [model_arch ]
378
382
teacher_model = ORT_SUPPORTED_TASKS [task ]["class" ][0 ].auto_model_class .from_pretrained (model_name )
379
383
distillation_config = DistillationConfig (teacher_model = teacher_model )
380
384
save_onnx_model = True
0 commit comments