@@ -214,6 +214,7 @@ def preprocess_function(examples, tokenizer):
214
214
# Verify that the configuration is correctly saved and loaded
215
215
loaded_config = OVConfig .from_pretrained (tmp_dir )
216
216
self .assertEqual (ov_config .quantization_config .to_dict (), loaded_config .quantization_config .to_dict ())
217
+ check_optimization_not_applicable_to_optimized_model (model , quantization_config = OVWeightQuantizationConfig (bits = 8 ))
217
218
218
219
@parameterized .expand (SUPPORTED_ARCHITECTURES_OV_MODEL_WITH_AUTO_DATASET )
219
220
def test_ov_model_static_quantization_with_auto_dataset (
@@ -255,6 +256,7 @@ def test_ov_model_static_quantization_with_auto_dataset(
255
256
self .assertTrue ("logits" in outputs )
256
257
else :
257
258
raise Exception ("Unexpected model class." )
259
+ check_optimization_not_applicable_to_optimized_model (ov_model , quantization_config = quantization_config )
258
260
259
261
260
262
class OVWeightCompressionTest (unittest .TestCase ):
@@ -718,28 +720,18 @@ def test_ovmodel_load_with_compressed_weights(self, model_cls, model_type, trust
718
720
else :
719
721
models = [model ]
720
722
723
+ if model_type == "open-clip" :
724
+ pytest .skip (reason = "ticket 161043" )
725
+ elif model_type == "t5" :
726
+ pytest .skip (reason = "ticket 160958" )
727
+ else :
728
+ check_optimization_not_applicable_to_optimized_model (model , quantization_config = {"bits" : 8 })
729
+
721
730
expected_ov_int8 = _ARCHITECTURES_TO_EXPECTED_INT8 [model_type ]
722
731
for i , model in enumerate (models ):
723
732
_ , num_weight_nodes = get_num_quantized_nodes (model )
724
733
self .assertEqual (expected_ov_int8 [i ], num_weight_nodes ["int8" ])
725
734
726
- @parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION )
727
- def test_raise_error_WC_over_WC (self , model_cls , model_type , trust_remote_code ):
728
- model = model_cls .from_pretrained (
729
- MODEL_NAMES [model_type ],
730
- export = True ,
731
- load_in_8bit = True ,
732
- trust_remote_code = trust_remote_code ,
733
- )
734
- quantization_config = OVWeightQuantizationConfig (bits = 4 , sym = True )
735
- quantizer = OVQuantizer (model )
736
- if isinstance (model , OVModelOpenCLIPForZeroShotImageClassification ):
737
- with pytest .raises (TypeError ):
738
- quantizer .quantize (ov_config = OVConfig (quantization_config = quantization_config ))
739
- else :
740
- with pytest .raises (RuntimeError ):
741
- quantizer .quantize (ov_config = OVConfig (quantization_config = quantization_config ))
742
-
743
735
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_HYBRID_QUANTIZATION )
744
736
def test_ovmodel_hybrid_quantization (self , model_cls , model_type , expected_fake_nodes , expected_int8_nodes ):
745
737
model_id = MODEL_NAMES [model_type ]
@@ -755,6 +747,7 @@ def test_ovmodel_hybrid_quantization(self, model_cls, model_type, expected_fake_
755
747
self .assertEqual (0 , num_weight_nodes ["int4" ])
756
748
757
749
model .save_pretrained (tmp_dir )
750
+ check_optimization_not_applicable_to_optimized_model (model , quantization_config = quantization_config )
758
751
759
752
def test_stable_diffusion_with_weight_compression (self ):
760
753
int8_pipe = OVStableDiffusionPipeline .from_pretrained (model_id = MODEL_NAMES ["stable-diffusion" ], export = True )
@@ -769,6 +762,8 @@ def test_stable_diffusion_with_weight_compression(self):
769
762
self .assertEqual (0 , num_fake_nodes )
770
763
self .assertEqual (242 , num_weight_nodes ["int8" ])
771
764
self .assertEqual (0 , num_weight_nodes ["int4" ])
765
+ quantization_config = OVWeightQuantizationConfig (bits = 8 , dataset = "conceptual_captions" , num_samples = 2 , quant_method = OVQuantizationMethod .HYBRID )
766
+ check_optimization_not_applicable_to_optimized_model (int8_pipe , quantization_config = quantization_config )
772
767
773
768
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_HYBRID_QUANTIZATION [- 1 :])
774
769
def test_ovmodel_hybrid_quantization_with_custom_dataset (
@@ -814,6 +809,7 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_
814
809
if model_id == "facebook/opt-125m" :
815
810
for key , value in self .DEFAULT_INT4_CONFIG .items ():
816
811
self .assertEqual (value , getattr (openvino_config .quantization_config , key ))
812
+ check_optimization_not_applicable_to_optimized_model (model , quantization_config = {"bits" : 8 })
817
813
818
814
@parameterized .expand (LOAD_IN_4_BITS_SCOPE )
819
815
def test_ovmodel_4bit_auto_compression_with_config (
@@ -1338,3 +1334,9 @@ def test_calibration_data_uniqueness(self, model_name, apply_caching):
1338
1334
else :
1339
1335
# Without caching, encoder hidden states tensors will be unique for each collected input
1340
1336
self .assertGreater (len (data_id_per_key ["encoder_hidden_states" ]), 2 )
1337
+
1338
+
1339
+ def check_optimization_not_applicable_to_optimized_model (model , quantization_config ):
1340
+ quantizer = OVQuantizer (model )
1341
+ with pytest .raises (RuntimeError , match = "Cannot apply optimization to the model because it was already optimized with the following config" ):
1342
+ quantizer .quantize (quantization_config = quantization_config )
0 commit comments