15
15
# ruff: noqa
16
16
17
17
import itertools
18
+ import logging
18
19
import tempfile
19
20
import unittest
20
21
from collections import defaultdict
21
22
from enum import Enum
22
23
from functools import partial
23
- from typing import List
24
+ from typing import List , Union
24
25
25
26
import evaluate
26
27
import numpy as np
@@ -104,9 +105,13 @@ def preprocess_function(examples, tokenizer):
104
105
num_samples = 10 ,
105
106
dataset_split = "train" ,
106
107
)
107
- quantization_config = OVQuantizationConfig (dataset = calibration_dataset )
108
- ov_config = OVConfig (quantization_config = quantization_config )
109
- quantizer .quantize (save_directory = tmp_dir , ov_config = ov_config , file_name = file_name )
108
+ ov_config = OVConfig (quantization_config = OVQuantizationConfig ())
109
+ quantizer .quantize (
110
+ save_directory = tmp_dir ,
111
+ calibration_dataset = calibration_dataset ,
112
+ file_name = file_name ,
113
+ ov_config = ov_config ,
114
+ )
110
115
model = model_cls .from_pretrained (tmp_dir , file_name = file_name )
111
116
112
117
# TODO: uncomment once move to a newer version of NNCF which has some fixes (addmm, baddmm)
@@ -120,7 +125,7 @@ def preprocess_function(examples, tokenizer):
120
125
121
126
# Verify that the configuration is correctly saved and loaded
122
127
loaded_config = OVConfig .from_pretrained (tmp_dir )
123
- self .assertEqual (ov_config .quantization_config .to_dict (), loaded_config .quantization_config )
128
+ self .assertEqual (ov_config .quantization_config .to_dict (), loaded_config .quantization_config . to_dict () )
124
129
125
130
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS )
126
131
def test_ovmodel_static_quantization (self , model_cls , model_name , expected_fake_quantize , expected_int8 ):
@@ -146,9 +151,8 @@ def preprocess_function(examples, tokenizer):
146
151
num_samples = 10 ,
147
152
dataset_split = "train" ,
148
153
)
149
- quantization_config = OVQuantizationConfig (dataset = calibration_dataset )
150
- ov_config = OVConfig (quantization_config = quantization_config )
151
- quantizer .quantize (save_directory = tmp_dir , ov_config = ov_config )
154
+ ov_config = OVConfig (quantization_config = OVQuantizationConfig ())
155
+ quantizer .quantize (save_directory = tmp_dir , calibration_dataset = calibration_dataset , ov_config = ov_config )
152
156
153
157
model = model_cls .from_pretrained (tmp_dir )
154
158
@@ -162,7 +166,7 @@ def preprocess_function(examples, tokenizer):
162
166
163
167
# Verify that the configuration is correctly saved and loaded
164
168
loaded_config = OVConfig .from_pretrained (tmp_dir )
165
- self .assertEqual (ov_config .quantization_config .to_dict (), loaded_config .quantization_config )
169
+ self .assertEqual (ov_config .quantization_config .to_dict (), loaded_config .quantization_config . to_dict () )
166
170
167
171
168
172
class OVWeightCompressionTest (unittest .TestCase ):
@@ -281,12 +285,12 @@ def test_automodel_weight_compression(self, model_cls, model_name, expected_pt_i
281
285
282
286
# Verify that the configuration is correctly saved and loaded
283
287
loaded_config = OVConfig .from_pretrained (tmp_dir )
284
- original_config_as_dict = OVWeightQuantizationConfig (bits = 8 , sym = True ).to_dict ()
288
+ original_config_as_dict = OVWeightQuantizationConfig ().to_dict ()
285
289
for k in original_config_as_dict .keys ():
286
290
v = original_config_as_dict [k ]
287
291
if isinstance (v , Enum ):
288
292
original_config_as_dict [k ] = v .value
289
- self .assertEqual (original_config_as_dict , loaded_config .quantization_config )
293
+ self .assertEqual (original_config_as_dict , loaded_config .quantization_config . to_dict () )
290
294
291
295
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS )
292
296
def test_ovmodel_8bit_weight_compression (self , model_cls , model_name , expected_pt_int8 , expected_ov_int8 ):
@@ -311,7 +315,7 @@ def test_ovmodel_8bit_weight_compression(self, model_cls, model_name, expected_p
311
315
312
316
# Verify that the configuration is correctly saved and loaded
313
317
loaded_config = OVConfig .from_pretrained (tmp_dir )
314
- self .assertEqual (OVWeightQuantizationConfig (bits = 8 , sym = True ).to_dict (), loaded_config .quantization_config )
318
+ self .assertEqual (OVWeightQuantizationConfig ().to_dict (), loaded_config .quantization_config . to_dict () )
315
319
316
320
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS )
317
321
def test_ovmodel_4bit_weight_compression (self , model_cls , model_name , expected_int8 , expected_int4 ):
@@ -342,7 +346,7 @@ def test_ovmodel_4bit_weight_compression(self, model_cls, model_name, expected_i
342
346
343
347
# Verify that the configuration is correctly saved and loaded
344
348
loaded_config = OVConfig .from_pretrained (tmp_dir )
345
- self .assertEqual (ov_config .quantization_config .to_dict (), loaded_config .quantization_config )
349
+ self .assertEqual (ov_config .quantization_config .to_dict (), loaded_config .quantization_config . to_dict () )
346
350
347
351
@parameterized .expand (SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS )
348
352
@unittest .skipIf (not IS_SUPPORT_STATEFUL , "Stateful models supported only in 2023.3 and above" )
@@ -368,7 +372,7 @@ def test_ovmodel_8bit_weight_compression_stateful(self, model_cls, model_id, exp
368
372
369
373
# Verify that the configuration is correctly saved and loaded
370
374
loaded_config = OVConfig .from_pretrained (tmp_dir )
371
- self .assertEqual (OVWeightQuantizationConfig (bits = 8 , sym = True ).to_dict (), loaded_config .quantization_config )
375
+ self .assertEqual (OVWeightQuantizationConfig ().to_dict (), loaded_config .quantization_config . to_dict () )
372
376
373
377
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION )
374
378
def test_ovmodel_load_with_compressed_weights (self , model_cls , model_type ):
@@ -439,11 +443,11 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_
439
443
model .save_pretrained (tmp_dir )
440
444
441
445
openvino_config = OVConfig .from_pretrained (tmp_dir )
442
- self .assertEqual (openvino_config .quantization_config [ " bits" ] , 4 )
446
+ self .assertEqual (openvino_config .quantization_config . bits , 4 )
443
447
self .assertEqual (openvino_config .dtype , "int4" )
444
448
if model_id == "facebook/opt-125m" :
445
449
for key , value in self .DEFAULT_INT4_CONFIG .items ():
446
- self .assertEqual (value , openvino_config .quantization_config [ key ] )
450
+ self .assertEqual (value , getattr ( openvino_config .quantization_config , key ) )
447
451
448
452
@parameterized .expand (LOAD_IN_4_BITS_SCOPE )
449
453
def test_ovmodel_4bit_auto_compression_with_config (
@@ -461,7 +465,7 @@ def test_ovmodel_4bit_auto_compression_with_config(
461
465
model .save_pretrained (tmp_dir )
462
466
463
467
openvino_config = OVConfig .from_pretrained (tmp_dir )
464
- self .assertEqual (openvino_config .quantization_config [ " bits" ] , 4 )
468
+ self .assertEqual (openvino_config .quantization_config . bits , 4 )
465
469
self .assertEqual (openvino_config .dtype , "int4" )
466
470
467
471
@parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTO_COMPRESSED_MATMULS )
@@ -492,9 +496,8 @@ def transform_fn(data, tokenizer):
492
496
model = model_cls .from_pretrained (
493
497
model_id ,
494
498
export = True ,
495
- quantization_config = OVWeightQuantizationConfig (
496
- bits = 4 , sym = True , group_size = - 1 , ratio = 0.8 , dataset = quantization_dataset
497
- ),
499
+ quantization_config = OVWeightQuantizationConfig (bits = 4 , sym = True , group_size = - 1 , ratio = 0.8 ),
500
+ calibration_dataset = quantization_dataset ,
498
501
)
499
502
500
503
_ , num_int8 , num_int4 = get_num_quantized_nodes (model )
@@ -584,7 +587,7 @@ def test_ovmodel_load_large_model_with_additional_quantization_config(self):
584
587
"all_layers" : None ,
585
588
"sensitivity_metric" : None ,
586
589
"dataset" : None ,
587
- "ignored_scope" : None ,
590
+ "ignored_scope" : nncf . IgnoredScope () ,
588
591
}
589
592
compress_weights_patch .assert_called_with (unittest .mock .ANY , ** compression_params )
590
593
@@ -610,9 +613,8 @@ def preprocess_function(examples, tokenizer):
610
613
num_samples = 10 ,
611
614
dataset_split = "test" ,
612
615
)
613
- quantization_config = OVQuantizationConfig (dataset = calibration_dataset )
614
- ov_config = OVConfig (quantization_config = quantization_config )
615
- quantizer .quantize (save_directory = tmp_dir , ov_config = ov_config )
616
+ ov_config = OVConfig (quantization_config = OVQuantizationConfig ())
617
+ quantizer .quantize (save_directory = tmp_dir , calibration_dataset = calibration_dataset , ov_config = ov_config )
616
618
617
619
# Test that inference on quantized model works
618
620
model = OVModelForQuestionAnswering .from_pretrained (tmp_dir )
@@ -629,7 +631,7 @@ def preprocess_function(examples, tokenizer):
629
631
630
632
# Verify that the configuration is correctly saved and loaded
631
633
loaded_config = OVConfig .from_pretrained (tmp_dir )
632
- self .assertEqual (ov_config .quantization_config .to_dict (), loaded_config .quantization_config )
634
+ self .assertEqual (ov_config .quantization_config .to_dict (), loaded_config .quantization_config . to_dict () )
633
635
634
636
@parameterized .expand (SUPPORTED_ARCHITECTURES )
635
637
def test_ovmodel_static_quantization (self , model_name ):
@@ -649,9 +651,8 @@ def preprocess_function(examples, tokenizer):
649
651
num_samples = 10 ,
650
652
dataset_split = "test" ,
651
653
)
652
- quantization_config = OVQuantizationConfig (dataset = calibration_dataset )
653
- ov_config = OVConfig (quantization_config = quantization_config )
654
- quantizer .quantize (save_directory = tmp_dir , ov_config = ov_config )
654
+ ov_config = OVConfig (quantization_config = OVQuantizationConfig ())
655
+ quantizer .quantize (save_directory = tmp_dir , calibration_dataset = calibration_dataset , ov_config = ov_config )
655
656
656
657
# Test that inference on quantized model works
657
658
model = OVModelForQuestionAnswering .from_pretrained (tmp_dir )
@@ -668,7 +669,7 @@ def preprocess_function(examples, tokenizer):
668
669
669
670
# Verify that the configuration is correctly saved and loaded
670
671
loaded_config = OVConfig .from_pretrained (tmp_dir )
671
- self .assertEqual (ov_config .quantization_config .to_dict (), loaded_config .quantization_config )
672
+ self .assertEqual (ov_config .quantization_config .to_dict (), loaded_config .quantization_config . to_dict () )
672
673
673
674
674
675
class OVTrainerTest (unittest .TestCase ):
@@ -719,24 +720,13 @@ def compute_metrics(p):
719
720
720
721
class OVQuantizationConfigTest (unittest .TestCase ):
721
722
QUANTIZATION_CONFIGS = (
722
- (
723
- None ,
724
- [],
725
- ),
726
- (OVWeightQuantizationConfig (), []),
723
+ (None ,),
724
+ (OVWeightQuantizationConfig (),),
727
725
(
728
726
OVWeightQuantizationConfig (
729
727
bits = 8 ,
730
728
sym = True ,
731
729
),
732
- [],
733
- ),
734
- (
735
- {
736
- "bits" : 8 ,
737
- "sym" : True ,
738
- },
739
- [],
740
730
),
741
731
(
742
732
OVWeightQuantizationConfig (
@@ -752,43 +742,82 @@ class OVQuantizationConfigTest(unittest.TestCase):
752
742
num_samples = 100 ,
753
743
quant_method = OVQuantizationMethod .DEFAULT ,
754
744
),
755
- ["ignored_scope" ],
756
745
),
757
- (OVWeightQuantizationConfig (dataset = ["wikitext" , "c4" ]), []),
758
- (OVWeightQuantizationConfig (dataset = load_dataset ("wikitext" , "wikitext-2-raw-v1" , split = "test" )), ["dataset" ]),
759
- (OVWeightQuantizationConfig (dataset = nncf .Dataset ([np .zeros ((1 , 10 ))])), ["dataset" ]),
746
+ (OVWeightQuantizationConfig (dataset = ["hello world" , "i'm alive" ]),),
760
747
(
761
- OVWeightQuantizationConfig (tokenizer = AutoTokenizer .from_pretrained ("dbmdz/bert-base-german-cased" )),
762
- ["tokenizer" ],
748
+ OVQuantizationConfig (
749
+ ignored_scope = {"names" : ["op_name" ]},
750
+ num_samples = 100 ,
751
+ preset = nncf .QuantizationPreset .MIXED ,
752
+ model_type = nncf .ModelType .TRANSFORMER ,
753
+ fast_bias_correction = True ,
754
+ overflow_fix = OverflowFix .DISABLE ,
755
+ ),
763
756
),
764
- (OVWeightQuantizationConfig (ignored_scope = nncf .IgnoredScope (names = ["op_name" ])), ["ignored_scope" ]),
765
- (OVQuantizationConfig (dataset = "wikitext" ), []),
766
- ({"dataset" : "wikitext" }, []),
757
+ (OVQuantizationConfig (ignored_scope = nncf .IgnoredScope (names = ["op_name" ])),),
758
+ )
759
+
760
+ QUANTIZATION_CONFIG_DICTS = (
761
+ (dict (bits = 8 , sym = True ), OVWeightQuantizationConfig , None ),
767
762
(
768
- OVQuantizationConfig (
763
+ dict (
769
764
dataset = "wikitext" ,
765
+ bits = 4 ,
766
+ ignored_scope = {"names" : ["op_name" ]},
767
+ sym = False ,
768
+ tokenizer = "dbmdz/bert-base-german-cased" ,
769
+ ratio = 1.0 ,
770
+ group_size = 128 ,
771
+ all_layers = True ,
772
+ sensitivity_metric = "mean_activation_magnitude" ,
773
+ num_samples = 100 ,
774
+ quant_method = OVQuantizationMethod .DEFAULT ,
775
+ ),
776
+ OVWeightQuantizationConfig ,
777
+ None ,
778
+ ),
779
+ (dict (), OVWeightQuantizationConfig , "Can't determine type of OV quantization config" ),
780
+ (
781
+ dict (ignored_scope = {"names" : ["op_name" ]}),
782
+ OVWeightQuantizationConfig ,
783
+ "Can't determine type of OV quantization config" ,
784
+ ),
785
+ (dict (num_samples = 100 ), OVWeightQuantizationConfig , "Can't determine type of OV quantization config" ),
786
+ (dict (abc = "def" ), OVWeightQuantizationConfig , "Can't determine type of OV quantization config" ),
787
+ (
788
+ dict (bits = 8 , fast_bias_correction = True ),
789
+ OVWeightQuantizationConfig ,
790
+ "Can't determine type of OV quantization config" ,
791
+ ),
792
+ (dict (model_type = nncf .ModelType .TRANSFORMER ), OVQuantizationConfig , None ),
793
+ (
794
+ dict (
770
795
ignored_scope = {"names" : ["op_name" ]},
771
796
num_samples = 100 ,
772
797
preset = nncf .QuantizationPreset .MIXED ,
773
798
model_type = nncf .ModelType .TRANSFORMER ,
774
799
fast_bias_correction = True ,
775
800
overflow_fix = OverflowFix .DISABLE ,
776
801
),
777
- ["ignored_scope" ],
802
+ OVQuantizationConfig ,
803
+ None ,
778
804
),
779
- (OVQuantizationConfig (dataset = ["wikitext" , "c4" ]), []),
780
- (OVQuantizationConfig (dataset = load_dataset ("wikitext" , "wikitext-2-raw-v1" , split = "test" )), ["dataset" ]),
781
- (OVQuantizationConfig (dataset = nncf .Dataset ([np .zeros ((1 , 10 ))])), ["dataset" ]),
805
+ (dict (weight_only = True ), OVWeightQuantizationConfig , None ),
806
+ (dict (weight_only = False ), OVQuantizationConfig , None ),
807
+ (dict (abc = "def" , weight_only = False ), OVQuantizationConfig , None ),
808
+ (dict (abc = "def" , weight_only = True ), OVWeightQuantizationConfig , None ),
809
+ (dict (bits = 8 , fast_bias_correction = True , weight_only = True ), OVWeightQuantizationConfig , None ),
810
+ (dict (bits = 8 , fast_bias_correction = True , weight_only = False ), OVQuantizationConfig , None ),
811
+ (dict (bits = 8 , sym = True , weight_only = False ), OVWeightQuantizationConfig , "Please check your configuration" ),
782
812
(
783
- OVQuantizationConfig (dataset = ["wikitext" , "c4" ], ignored_scope = nncf .IgnoredScope (names = ["op_name" ])),
784
- ["ignored_scope" ],
813
+ dict (model_type = nncf .ModelType .TRANSFORMER , weight_only = True ),
814
+ OVQuantizationConfig ,
815
+ "Please check your configuration" ,
785
816
),
786
817
)
787
818
788
819
@parameterized .expand (QUANTIZATION_CONFIGS )
789
- def test_config_serialization (
790
- self , quantization_config : OVQuantizationConfigBase , non_equal_property_names : List [str ]
791
- ):
820
+ def test_config_serialization (self , quantization_config : OVQuantizationConfigBase ):
792
821
def str_to_enum (enum_cls , value ):
793
822
for k , v in enum_cls .__members__ .items ():
794
823
if getattr (enum_cls , k ).value == value :
@@ -803,12 +832,8 @@ def str_to_enum(enum_cls, value):
803
832
if quantization_config is None :
804
833
self .assertEqual (loaded_ov_config .quantization_config , None )
805
834
return
806
- for key , value in loaded_ov_config .quantization_config .items ():
807
- initial_value = (
808
- quantization_config [key ]
809
- if isinstance (quantization_config , dict )
810
- else getattr (ov_config .quantization_config , key )
811
- )
835
+ for key , value in loaded_ov_config .quantization_config .to_dict ().items ():
836
+ initial_value = getattr (ov_config .quantization_config , key )
812
837
if key == "preset" or key == "overflow_fix" :
813
838
# TODO: remove once NNCF is updated to 2.10
814
839
if getattr (quantization_config , key ) is not None :
@@ -817,10 +842,24 @@ def str_to_enum(enum_cls, value):
817
842
value = str_to_enum (nncf .QuantizationPreset , value )
818
843
else :
819
844
value = str_to_enum (OverflowFix , value )
820
- if key in non_equal_property_names :
821
- self .assertNotEqual (value , initial_value )
822
- else :
823
- self .assertEqual (value , initial_value )
845
+ self .assertEqual (value , initial_value )
846
+
847
+ @parameterized .expand (QUANTIZATION_CONFIG_DICTS )
848
+ def test_config_from_dict (self , quantization_config : dict , config_type : type , warning_log : Union [str , None ]):
849
+ from optimum .intel .openvino .configuration import logger as configuration_logger
850
+
851
+ if warning_log is not None :
852
+ with self .assertLogs (configuration_logger , logging .WARN ) as cm :
853
+ ov_config = OVConfig (quantization_config = quantization_config )
854
+ self .assertTrue (any (warning_log in log for log in cm .output ))
855
+ else :
856
+ ov_config = OVConfig (quantization_config = quantization_config )
857
+ self .assertIsInstance (ov_config .quantization_config , config_type )
858
+ for k , v in quantization_config .items ():
859
+ if k == "weight_only" and warning_log == "Please check your configuration" :
860
+ continue
861
+ if hasattr (ov_config .quantization_config , k ):
862
+ self .assertEqual (getattr (ov_config .quantization_config , k ), v )
824
863
825
864
826
865
class InferRequestWrapperTest (unittest .TestCase ):
0 commit comments