@@ -379,8 +379,8 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
379
379
scale_estimation (`bool`, *optional*):
380
380
Indicates whether to apply a scale estimation algorithm that minimizes the L2 error between the original and
381
381
compressed layers. Providing a dataset is required to run scale estimation.
382
- weight_format (`str`, *optional*):
383
- Data format weights are compressed to. Possible values: ['int4', 'int8', 'mxfp4', 'nf4'].
382
+ dtype (`str`, *optional*):
383
+ Data type weights are compressed to. Possible values: ['int4', 'int8', 'mxfp4', 'nf4'].
384
384
qptq (`bool`, *optional*):
385
385
Whether to apply GPTQ algorithm. GPTQ optimizes compressed weights in a layer-wise fashion to minimize the
386
386
difference between activations of a compressed and original layer. Dataset is required to run GPTQ.
@@ -418,7 +418,7 @@ def __init__(
418
418
num_samples : Optional [int ] = None ,
419
419
quant_method : Union [str , OVQuantizationMethod ] = OVQuantizationMethod .DEFAULT ,
420
420
scale_estimation : bool = None ,
421
- weight_format : Optional [str ] = None ,
421
+ dtype : Optional [str ] = None ,
422
422
gptq : bool = None ,
423
423
processor : Optional [str ] = None ,
424
424
lora_correction : bool = None ,
@@ -444,7 +444,7 @@ def __init__(
444
444
self .gptq = gptq
445
445
self .lora_correction = lora_correction
446
446
self .backup_precision = backup_precision
447
- self .weight_format = weight_format
447
+ self .dtype = dtype
448
448
self .post_init ()
449
449
450
450
def post_init (self ):
@@ -486,7 +486,7 @@ def post_init(self):
486
486
if self .bits not in [4 , 8 ]:
487
487
raise ValueError (f"Only support quantization to [4,8] bits but found { self .bits } " )
488
488
489
- if self .bits == 8 and self .weight_format :
489
+ if self .bits == 8 and self .dtype :
490
490
if self .ratio != 1 :
491
491
raise ValueError (
492
492
f"For 8-bit quantization, `ratio` is expected to be set to 1.0, but was set to { self .ratio } "
@@ -532,26 +532,26 @@ def post_init(self):
532
532
if self .processor is not None and not isinstance (self .processor , str ):
533
533
raise ValueError (f"Processor is expected to be a string, but found { self .processor } " )
534
534
535
- if self .weight_format is None :
536
- self .weight_format = "int4" if self .bits == 4 else "int8"
537
- if self .weight_format not in ["int4" , "int8" , "mxfp4" , "nf4" ]:
535
+ if self .dtype is None :
536
+ self .dtype = "int4" if self .bits == 4 else "int8"
537
+ if self .dtype not in ["int4" , "int8" , "mxfp4" , "nf4" ]:
538
538
raise ValueError (
539
- f"Weight format must be one of the following: ['int4', 'int8', 'mxfp4', 'nf4'], but found: { self .weight_format } ."
539
+ f"Weights quantization data type must be one of the following: ['int4', 'int8', 'mxfp4', 'nf4'], but found: { self .dtype } ."
540
540
)
541
- if self .weight_format in ["mxfp4" , "nf4" ]:
541
+ if self .dtype in ["mxfp4" , "nf4" ]:
542
542
if self .bits != 4 :
543
543
raise ValueError (
544
- f"When applying weight compression with '{ self .weight_format } ' weight format , the `bits` parameter must be set to 4, but found { self .bits } "
544
+ f"When applying weight compression with '{ self .dtype } ' data type , the `bits` parameter must be set to 4, but found { self .bits } "
545
545
)
546
- if self .weight_format == "mxfp4" :
546
+ if self .dtype == "mxfp4" :
547
547
if self .quant_method == OVQuantizationMethod .AWQ :
548
- raise ValueError ("The AWQ algorithm is not supported for 'mxpf4' weight format " )
548
+ raise ValueError ("The AWQ algorithm is not supported for 'mxpf4' data type " )
549
549
if self .scale_estimation :
550
- raise ValueError ("The Scale Estimation algorithm is not supported for 'mxpf4' weight format " )
550
+ raise ValueError ("The Scale Estimation algorithm is not supported for 'mxpf4' data type " )
551
551
if self .gptq :
552
- raise ValueError ("The GPTQ algorithm is not supported for 'mxfp4' weight format " )
552
+ raise ValueError ("The GPTQ algorithm is not supported for 'mxfp4' data type " )
553
553
if self .lora_correction :
554
- raise ValueError ("The LoRA Correction algorithm is not supported for 'mxfp4' weight format " )
554
+ raise ValueError ("The LoRA Correction algorithm is not supported for 'mxfp4' data type " )
555
555
if self .gptq and self .lora_correction :
556
556
raise ValueError ("The GPTQ and LoRA Correction algorithms can't be applied simultaneously" )
557
557
@@ -561,7 +561,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
561
561
"""
562
562
563
563
signed_bitness = {4 : "int4" , 8 : "int8" }
564
- mode = self .weight_format if self .weight_format else signed_bitness [self .bits ]
564
+ mode = self .dtype if self .dtype else signed_bitness [self .bits ]
565
565
if mode in signed_bitness .values ():
566
566
mode += "_sym" if self .sym else "_asym"
567
567
if mode == "mxfp4" :
@@ -627,7 +627,7 @@ def __init__(
627
627
processor : Optional [str ] = None ,
628
628
trust_remote_code : bool = False ,
629
629
smooth_quant_alpha : Optional [float ] = None ,
630
- activation_format : Optional [str ] = "int8" ,
630
+ dtype : Optional [str ] = "int8" ,
631
631
** kwargs ,
632
632
):
633
633
"""
@@ -672,8 +672,8 @@ def __init__(
672
672
smooth_quant_alpha (`float`, *optional*):
673
673
SmoothQuant alpha parameter that improves the distribution of activations before MatMul layers and
674
674
reduces quantization error.
675
- activation_format (`str`, defaults to "int8"):
676
- Data format activations are compressed to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
675
+ dtype (`str`, defaults to "int8"):
676
+ Data type activations are compressed to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
677
677
"""
678
678
super ().__init__ (
679
679
ignored_scope = ignored_scope ,
@@ -689,11 +689,10 @@ def __init__(
689
689
self .fast_bias_correction = fast_bias_correction
690
690
self .overflow_fix = overflow_fix
691
691
self .smooth_quant_alpha = smooth_quant_alpha
692
- self .activation_format = activation_format
692
+ self .dtype = dtype
693
693
694
- f8_formats = ["f8e4m3" , "f8e5m2" ]
695
- if self .activation_format in f8_formats :
696
- logger .info (f"{ self .activation_format } for activations was found. A symmetrical scheme will be used." )
694
+ f8_dtypes = ["f8e4m3" , "f8e5m2" ]
695
+ if self .dtype in f8_dtypes :
697
696
self .sym = True
698
697
self .post_init ()
699
698
@@ -732,7 +731,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
732
731
advanced_parameters_dict ["smooth_quant_alphas" ] = {"matmul" : self .smooth_quant_alpha }
733
732
734
733
mode_map = {"f8e4m3" : "fp8_e4m3" , "f8e5m2" : "fp8_e5m2" }
735
- mode = mode_map .get (self .activation_format )
734
+ mode = mode_map .get (self .dtype )
736
735
737
736
preset = nncf .QuantizationPreset (preset )
738
737
model_type = nncf .ModelType (self .model_type )
@@ -778,14 +777,14 @@ def __init__(
778
777
"compression" , None
779
778
) # A field for backward-compatability of training-time compression parameters
780
779
if self .quantization_config is not None :
781
- if isinstance (self .quantization_config , OVWeightQuantizationConfig ):
782
- self .dtype = self . quantization_config . weight_format
783
- elif isinstance ( self . quantization_config , OVQuantizationConfig ):
784
- self .dtype = self .quantization_config .activation_format
780
+ if isinstance (self .quantization_config , OVWeightQuantizationConfig ) or isinstance (
781
+ self .quantization_config , OVQuantizationConfig
782
+ ):
783
+ self .dtype = self .quantization_config .dtype
785
784
elif isinstance (self .quantization_config , OVMixedQuantizationConfig ):
786
- weight_format = self .quantization_config .weight_quantization_config .weight_format
787
- activation_format = self .quantization_config .full_quantization_config .activation_format
788
- self .dtype = f"{ weight_format } _{ activation_format } "
785
+ wc_dtype = self .quantization_config .weight_quantization_config .dtype
786
+ q_dtype = self .quantization_config .full_quantization_config .dtype
787
+ self .dtype = f"{ wc_dtype } _{ q_dtype } "
789
788
else :
790
789
raise ValueError (f"Unsupported type of quantization config: { type (self .quantization_config )} " )
791
790
else :
0 commit comments