@@ -266,6 +266,7 @@ def __init__(
266
266
tokenizer : Optional [str ] = None ,
267
267
processor : Optional [str ] = None ,
268
268
trust_remote_code : bool = False ,
269
+ weight_format : Optional [str ] = None ,
269
270
** kwargs ,
270
271
):
271
272
"""
@@ -279,6 +280,18 @@ def __init__(
279
280
entries provided via this argument are used to create an instance of `nncf.IgnoredScope` class.
280
281
num_samples (`int`, *optional*):
281
282
The maximum number of samples composing the calibration dataset.
283
+ dataset (`str or List[str]`, *optional*):
284
+ The dataset used for data-aware optimization with NNCF.
285
+ tokenizer (`str`, *optional*):
286
+ The tokenizer used to process the dataset.
287
+ processor (`str`, *optional*):
288
+ A transformers processor used to process the dataset inputs.
289
+ trust_remote_code (`bool`, defaults to `False`):
290
+ Allows to use custom code for the modeling hosted in the model repository. This option should only be
291
+ set for repositories you trust and in which you have read the code, as it will execute on your local
292
+ machine arbitrary code present in the model repository.
293
+ weight_format (`str`, *optional*):
294
+ Data format weights are compressed to.
282
295
"""
283
296
self .bits = bits
284
297
self .sym = sym
@@ -287,6 +300,7 @@ def __init__(
287
300
self .tokenizer = tokenizer
288
301
self .processor = processor
289
302
self .trust_remote_code = trust_remote_code
303
+ self .weight_format = weight_format
290
304
291
305
if isinstance (ignored_scope , nncf .IgnoredScope ):
292
306
ignored_scope = ignored_scope .__dict__
@@ -370,7 +384,7 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
370
384
scale_estimation (`bool`, *optional*):
371
385
Indicates whether to apply a scale estimation algorithm that minimizes the L2 error between the original and
372
386
compressed layers. Providing a dataset is required to run scale estimation.
373
- weight_format (`str`, defaults to 'int' ):
387
+ weight_format (`str`, *optional* ):
374
388
Data format weights are compressed to. Possible values: ['int4', 'int8', 'mxfp4', 'nf4'].
375
389
qptq (`bool`, *optional*):
376
390
Whether to apply GPTQ algorithm. GPTQ optimizes compressed weights in a layer-wise fashion to minimize the
@@ -425,14 +439,14 @@ def __init__(
425
439
tokenizer = tokenizer ,
426
440
processor = processor ,
427
441
trust_remote_code = trust_remote_code ,
442
+ weight_format = weight_format ,
428
443
)
429
444
self .group_size = group_size or (- 1 if bits == 8 else 128 )
430
445
self .ratio = ratio
431
446
self .all_layers = all_layers
432
447
self .sensitivity_metric = sensitivity_metric
433
448
self .quant_method = OVQuantizationMethod (quant_method ) if isinstance (quant_method , str ) else quant_method
434
449
self .scale_estimation = scale_estimation
435
- self .weight_format = weight_format
436
450
self .gptq = gptq
437
451
self .lora_correction = lora_correction
438
452
self .backup_precision = backup_precision
@@ -578,6 +592,8 @@ def __init__(
578
592
processor : Optional [str ] = None ,
579
593
trust_remote_code : bool = False ,
580
594
smooth_quant_alpha : Optional [float ] = None ,
595
+ weight_format : Optional [str ] = "int8" ,
596
+ activation_format : Optional [str ] = "int8" ,
581
597
** kwargs ,
582
598
):
583
599
"""
@@ -621,6 +637,10 @@ def __init__(
621
637
smooth_quant_alpha (`float`, *optional*):
622
638
SmoothQuant alpha parameter that improves the distribution of activations before MatMul layers and
623
639
reduces quantization error.
640
+ weight_format (`str`, defaults to "int8"):
641
+ Data format weights are quantized to. Possible values: ['int8'].
642
+ activation_format (`str`, defaults to "int8"):
643
+ Data format activations are compressed to. Possible values: ['int8'].
624
644
"""
625
645
super ().__init__ (
626
646
bits = bits ,
@@ -631,11 +651,13 @@ def __init__(
631
651
tokenizer = tokenizer ,
632
652
processor = processor ,
633
653
trust_remote_code = trust_remote_code ,
654
+ weight_format = weight_format ,
634
655
)
635
656
self .model_type = model_type
636
657
self .fast_bias_correction = fast_bias_correction
637
658
self .overflow_fix = overflow_fix
638
659
self .smooth_quant_alpha = smooth_quant_alpha
660
+ self .activation_format = activation_format
639
661
self .post_init ()
640
662
641
663
def post_init (self ):
@@ -659,6 +681,12 @@ def post_init(self):
659
681
f"SmoothQuant alpha parameter must be in range [0, 1], but found { self .smooth_quant_alpha } "
660
682
)
661
683
684
+ if self .weight_format != "int8" :
685
+ raise ValueError ("Only 'int8' weight format is currently supported." )
686
+
687
+ if self .activation_format != "int8" :
688
+ raise ValueError ("Only 'int8' activation format is currently supported." )
689
+
662
690
663
691
class OVConfig (BaseConfig ):
664
692
CONFIG_NAME = "openvino_config.json"
0 commit comments