@@ -552,6 +552,29 @@ def test_ovmodel_load_large_model_with_additional_quantization_config(self):
552
552
compress_weights_patch .assert_called_with (unittest .mock .ANY , ** compression_params )
553
553
554
554
555
+ @parameterized .expand (LOAD_IN_4_BITS_SCOPE )
556
+ def test_ovmodel_4bit_dynamic_with_config (self , model_cls , model_name , quantization_config , expected_ov_int4 ):
557
+ model_id = MODEL_NAMES [model_name ]
558
+ with tempfile .TemporaryDirectory () as tmp_dir :
559
+ group_size = quantization_config .pop ("group_size" , 32 )
560
+ quantization_config = OVDynamicQuantizationConfig (weights_group_size = group_size , activations_group_size = group_size , ** quantization_config )
561
+ model = model_cls .from_pretrained (model_id , export = True , quantization_config = quantization_config )
562
+ self .assertEqual (model .ov_config ["DYNAMIC_QUANTIZATION_GROUP_SIZE" ], str (group_size ))
563
+ self .assertEqual (model .ov_config ["KV_CACHE_PRECISION" ], "u8" )
564
+
565
+ tokenizer = AutoTokenizer .from_pretrained (model_id )
566
+ if tokenizer .pad_token is None :
567
+ tokenizer .pad_token = tokenizer .eos_token
568
+
569
+ _ , num_int4 , _ = get_num_quantized_nodes (model )
570
+ self .assertEqual (expected_ov_int4 , num_int4 )
571
+ model .save_pretrained (tmp_dir )
572
+
573
+ openvino_config = OVConfig .from_pretrained (tmp_dir )
574
+ self .assertEqual (openvino_config .quantization_config .bits , 4 )
575
+ self .assertEqual (openvino_config .dtype , "int4" )
576
+
577
+
555
578
class OVQuantizerQATest (unittest .TestCase ):
556
579
SUPPORTED_ARCHITECTURES = (("hf-internal-testing/tiny-random-BertForQuestionAnswering" ,),)
557
580
0 commit comments