@@ -155,6 +155,7 @@ class OVWeightCompressionTest(unittest.TestCase):
155
155
)
156
156
157
157
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM , "opt125m" , 64 , 365 ),)
158
+ SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM , "opt125m" , 6 , 379 ),)
158
159
SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS = (
159
160
(OVModelForCausalLM , "hf-internal-testing/tiny-random-OPTForCausalLM" , 16 , 136 ),
160
161
)
@@ -287,9 +288,7 @@ def test_ovmodel_4bit_weight_compression(self, model_cls, model_name, expected_i
287
288
288
289
quantizer = OVQuantizer .from_pretrained (transformers_model , task = task )
289
290
ov_config = OVConfig (
290
- weight_quantization_config = OVWeightQuantizationConfig (
291
- mode = nncf .CompressWeightsMode .INT4_SYM , ratio = 0.8
292
- )
291
+ quantization_config = OVWeightQuantizationConfig (mode = nncf .CompressWeightsMode .INT4_SYM , ratio = 0.8 )
293
292
)
294
293
quantizer .quantize (
295
294
save_directory = tmp_dir ,
@@ -330,25 +329,43 @@ def test_ovmodel_8bit_weight_compression_stateful(self, model_cls, model_id, exp
330
329
331
330
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION )
332
331
def test_ovmodel_load_with_compressed_weights (self , model_cls , model_type ):
333
- model = model_cls .from_pretrained (MODEL_NAMES [model_type ], export = True , load_in_8bit = True , stateful = False )
332
+ with tempfile .TemporaryDirectory () as tmp_dir :
333
+ model = model_cls .from_pretrained (MODEL_NAMES [model_type ], export = True , load_in_8bit = True , stateful = False )
334
+
335
+ if model .export_feature .startswith ("text2text-generation" ):
336
+ models = [model .encoder , model .decoder , model .decoder_with_past ]
337
+ elif model .export_feature .startswith ("stable-diffusion" ):
338
+ models = [model .unet , model .vae_encoder , model .vae_decoder ]
339
+ models .append (
340
+ model .text_encoder if model .export_feature == "stable-diffusion" else model .text_encoder_2
341
+ )
342
+ else :
343
+ models = [model ]
334
344
335
- if model .export_feature .startswith ("text2text-generation" ):
336
- models = [model .encoder , model .decoder , model .decoder_with_past ]
337
- elif model .export_feature .startswith ("stable-diffusion" ):
338
- models = [model .unet , model .vae_encoder , model .vae_decoder ]
339
- models .append (model .text_encoder if model .export_feature == "stable-diffusion" else model .text_encoder_2 )
340
- else :
341
- models = [model ]
345
+ expected_ov_int8 = _ARCHITECTURES_TO_EXPECTED_INT8 [model_type ]
346
+ for i , model in enumerate (models ):
347
+ _ , num_int8 , _ = get_num_quantized_nodes (model )
348
+ self .assertEqual (expected_ov_int8 [i ], num_int8 )
349
+ model .save_pretrained (tmp_dir )
342
350
343
- expected_ov_int8 = _ARCHITECTURES_TO_EXPECTED_INT8 [model_type ]
344
- for i , model in enumerate (models ):
345
- _ , num_int8 , _ = get_num_quantized_nodes (model )
346
- self .assertEqual (expected_ov_int8 [i ], num_int8 )
351
+ @parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS )
352
+ def test_ovmodel_4bit_auto_compression (self , model_cls , model_type , expected_ov_int8 , expected_ov_int4 ):
353
+ with tempfile .TemporaryDirectory () as tmp_dir :
354
+ model_id = MODEL_NAMES [model_type ]
355
+ model = model_cls .from_pretrained (model_id , export = True , load_in_4bit = True )
356
+ tokenizer = AutoTokenizer .from_pretrained (model_id )
357
+ if tokenizer .pad_token is None :
358
+ tokenizer .pad_token = tokenizer .eos_token
347
359
348
- @parameterized .expand (LOAD_IN_4_BITS_SCOPE )
349
- def test_ovmodel_4bit_auto_compression (self , model_cls , model_id , quantization_config , expected_ov_int4 ):
350
- task = model_cls .export_feature
360
+ _ , num_int8 , num_int4 = get_num_quantized_nodes (model )
361
+ self .assertEqual (expected_ov_int4 , num_int4 )
362
+ self .assertEqual (expected_ov_int8 , num_int8 )
363
+ model .save_pretrained (tmp_dir )
351
364
365
+ @parameterized .expand (LOAD_IN_4_BITS_SCOPE )
366
+ def test_ovmodel_4bit_auto_compression_with_config (
367
+ self , model_cls , model_id , quantization_config , expected_ov_int4
368
+ ):
352
369
with tempfile .TemporaryDirectory () as tmp_dir :
353
370
model = model_cls .from_pretrained (
354
371
model_id , export = True , load_in_4bit = True , quantization_config = quantization_config
@@ -359,6 +376,7 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_id, quantization_c
359
376
360
377
_ , num_int4 , _ = get_num_quantized_nodes (model )
361
378
self .assertEqual (expected_ov_int4 , num_int4 )
379
+ model .save_pretrained (tmp_dir )
362
380
363
381
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS )
364
382
def test_ovmodel_4bit_auto_compression_with_custom_dataset (
0 commit comments