@@ -294,7 +294,8 @@ def __init__(
294
294
dataset : Optional [Union [str , List [str ]]] = None ,
295
295
tokenizer : Optional [str ] = None ,
296
296
processor : Optional [str ] = None ,
297
- trust_remote_code : bool = False ,
297
+ trust_remote_code : Optional [bool ] = False ,
298
+ init_kwargs : Optional [dict ] = None ,
298
299
** kwargs ,
299
300
):
300
301
"""
@@ -314,6 +315,8 @@ def __init__(
314
315
Allows to use custom code for the modeling hosted in the model repository. This option should only be
315
316
set for repositories you trust and in which you have read the code, as it will execute on your local
316
317
machine arbitrary code present in the model repository.
318
+ init_kwargs ('dict', *optional*):
319
+ Additional parameters for NNCF calls. This explicit argument is needed for deserialization from dict.
317
320
"""
318
321
self .num_samples = num_samples
319
322
self .dataset = dataset
@@ -323,6 +326,7 @@ def __init__(
323
326
if isinstance (ignored_scope , nncf .IgnoredScope ):
324
327
ignored_scope = ignored_scope .__dict__
325
328
self .ignored_scope = ignored_scope
329
+ self .init_kwargs = (init_kwargs or {}) | kwargs
326
330
327
331
def post_init (self ):
328
332
try :
@@ -427,6 +431,9 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
427
431
retained in their original precision without any quantization.
428
432
- "int8_sym" stands for 8-bit integer symmetric quantization without zero point.
429
433
- "int8_asym" stands for 8-bit integer asymmetric quantization with zero points per each quantization group.
434
+ init_kwargs ('dict', *optional*):
435
+ Additional parameters for nncf.compress_weights() call. This explicit argument is needed for deserialization from dict.
436
+ kwargs: Additional parameters for nncf.compress_weights() call.
430
437
"""
431
438
432
439
def __init__ (
@@ -449,15 +456,25 @@ def __init__(
449
456
processor : Optional [str ] = None ,
450
457
lora_correction : bool = None ,
451
458
backup_precision : Optional [str ] = None ,
459
+ init_kwargs : Optional [dict ] = None ,
452
460
** kwargs ,
453
461
):
462
+ weight_format = kwargs .pop ("weight_format" , None )
463
+ if weight_format is not None :
464
+ logger .warning (
465
+ "The `weight_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
466
+ "Please use `dtype` instead."
467
+ )
468
+ dtype = weight_format
454
469
super ().__init__ (
455
470
ignored_scope = ignored_scope ,
456
471
num_samples = num_samples ,
457
472
dataset = dataset ,
458
473
tokenizer = tokenizer ,
459
474
processor = processor ,
460
475
trust_remote_code = trust_remote_code ,
476
+ init_kwargs = init_kwargs ,
477
+ ** kwargs ,
461
478
)
462
479
self .bits = bits
463
480
self .sym = sym
@@ -470,12 +487,6 @@ def __init__(
470
487
self .gptq = gptq
471
488
self .lora_correction = lora_correction
472
489
self .backup_precision = backup_precision
473
- if kwargs .get ("weight_format" ) is not None :
474
- logger .warning (
475
- "The `weight_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
476
- "Please use `dtype` instead."
477
- )
478
- dtype = kwargs .get ("weight_format" )
479
490
self .dtype = dtype
480
491
self .post_init ()
481
492
@@ -624,6 +635,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
624
635
"gptq" : self .gptq ,
625
636
"lora_correction" : self .lora_correction ,
626
637
"backup_mode" : backup_mode ,
638
+ ** self .init_kwargs ,
627
639
}
628
640
return result
629
641
@@ -666,6 +678,7 @@ def __init__(
666
678
trust_remote_code : bool = False ,
667
679
smooth_quant_alpha : Optional [float ] = None ,
668
680
dtype : Optional [str ] = "int8" ,
681
+ init_kwargs : Optional [dict ] = None ,
669
682
** kwargs ,
670
683
):
671
684
"""
@@ -712,27 +725,33 @@ def __init__(
712
725
reduces quantization error.
713
726
dtype (`str`, defaults to "int8"):
714
727
Data type activations are compressed to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
728
+ init_kwargs ('dict', *optional*):
729
+ Additional parameters for nncf.quantize() call. This explicit argument is needed for deserialization from dict.
730
+ kwargs: Additional parameters for nncf.quantize() call.
715
731
"""
732
+ activation_format = kwargs .pop ("activation_format" , None )
733
+ if activation_format is not None :
734
+ logger .warning (
735
+ "The `activation_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
736
+ "Please use `dtype` instead."
737
+ )
738
+ dtype = activation_format
716
739
super ().__init__ (
717
740
ignored_scope = ignored_scope ,
718
741
num_samples = num_samples ,
719
742
dataset = dataset ,
720
743
tokenizer = tokenizer ,
721
744
processor = processor ,
722
745
trust_remote_code = trust_remote_code ,
746
+ init_kwargs = init_kwargs ,
747
+ ** kwargs ,
723
748
)
724
749
self .bits = bits
725
750
self .sym = sym
726
751
self .model_type = model_type
727
752
self .fast_bias_correction = fast_bias_correction
728
753
self .overflow_fix = overflow_fix
729
754
self .smooth_quant_alpha = smooth_quant_alpha
730
- if kwargs .get ("activation_format" ) is not None :
731
- logger .warning (
732
- "The `activation_format` parameter is deprecated and will be removed in optimum-intel v1.24.0. "
733
- "Please use `dtype` instead."
734
- )
735
- dtype = kwargs .get ("activation_format" )
736
755
self .dtype = dtype
737
756
738
757
f8_dtypes = ["f8e4m3" , "f8e5m2" ]
@@ -769,23 +788,19 @@ def to_nncf_dict(self) -> Dict[str, Any]:
769
788
Returns a dictionary with the variables that are ready to use for nncf.compress_weights() call.
770
789
"""
771
790
772
- preset = "performance" if self .sym else "mixed"
773
- advanced_parameters_dict = {"overflow_fix" : self .overflow_fix }
791
+ # Merge advanced parameters from init_kwargs if they were provided
792
+ init_kwargs_copy = copy .deepcopy (self .init_kwargs )
793
+ advanced_parameters = init_kwargs_copy .pop ("advanced_parameters" , nncf .AdvancedQuantizationParameters ())
794
+ advanced_parameters .overflow_fix = nncf .OverflowFix (self .overflow_fix )
774
795
if self .smooth_quant_alpha :
775
- advanced_parameters_dict [ " smooth_quant_alphas" ] = { "matmul" : self .smooth_quant_alpha }
796
+ advanced_parameters . smooth_quant_alphas . matmul = self .smooth_quant_alpha
776
797
777
798
mode_map = {"f8e4m3" : "fp8_e4m3" , "f8e5m2" : "fp8_e5m2" }
778
799
mode = mode_map .get (self .dtype )
779
800
801
+ preset = "performance" if self .sym else "mixed"
780
802
preset = nncf .QuantizationPreset (preset )
781
803
model_type = nncf .ModelType (self .model_type )
782
- advanced_parameters = nncf .AdvancedQuantizationParameters (
783
- overflow_fix = advanced_parameters_dict ["overflow_fix" ],
784
- )
785
- if "smooth_quant_alphas" in advanced_parameters_dict :
786
- advanced_parameters .smooth_quant_alphas = nncf .AdvancedSmoothQuantParameters (
787
- ** advanced_parameters_dict ["smooth_quant_alphas" ]
788
- )
789
804
790
805
return {
791
806
"mode" : mode ,
@@ -795,6 +810,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
795
810
"model_type" : model_type ,
796
811
"ignored_scope" : self .get_ignored_scope_instance (),
797
812
"advanced_parameters" : advanced_parameters ,
813
+ ** init_kwargs_copy ,
798
814
}
799
815
800
816
@@ -930,7 +946,6 @@ def __init__(
930
946
Allows to use custom code for the modeling hosted in the model repository. This option should only be
931
947
set for repositories you trust and in which you have read the code, as it will execute on your local
932
948
machine arbitrary code present in the model repository.
933
- **kwargs:
934
949
"""
935
950
self .weight_quantization_config = self ._initialize_quantization_config (
936
951
weight_quantization_config , OVWeightQuantizationConfig
0 commit comments