Skip to content

Commit 50c77bf

Browse files
Rename weight & activation format to dtype
1 parent 4524149 commit 50c77bf

File tree

3 files changed

+45
-45
lines changed

3 files changed

+45
-45
lines changed

optimum/commands/export/openvino.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -361,10 +361,10 @@ def run(self):
361361

362362
if self.args.quant_mode == "nf4_f8e4m3":
363363
wc_config = prepare_wc_config(self.args, _DEFAULT_4BIT_CONFIG)
364-
wc_config["weight_format"] = "nf4"
364+
wc_config["dtype"] = "nf4"
365365

366366
q_config = prepare_q_config(self.args)
367-
q_config["activation_format"] = "f8e4m3"
367+
q_config["dtype"] = "f8e4m3"
368368

369369
quantization_config = {
370370
"weight_quantization_config": wc_config,
@@ -490,14 +490,14 @@ def prepare_wc_config(args, default_configs):
490490
"scale_estimation": args.scale_estimation,
491491
"gptq": args.gptq,
492492
"lora_correction": args.lora_correction,
493-
"weight_format": args.weight_format,
493+
"dtype": args.weight_format,
494494
"backup_precision": args.backup_precision,
495495
}
496496

497497

498498
def prepare_q_config(args):
499499
return {
500-
"activation_format": args.quant_mode,
500+
"dtype": args.quant_mode,
501501
"bits": 8,
502502
"sym": args.sym or False,
503503
"dataset": args.dataset,

optimum/intel/openvino/configuration.py

+31-32
Original file line numberDiff line numberDiff line change
@@ -379,8 +379,8 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
379379
scale_estimation (`bool`, *optional*):
380380
Indicates whether to apply a scale estimation algorithm that minimizes the L2 error between the original and
381381
compressed layers. Providing a dataset is required to run scale estimation.
382-
weight_format (`str`, *optional*):
383-
Data format weights are compressed to. Possible values: ['int4', 'int8', 'mxfp4', 'nf4'].
382+
dtype (`str`, *optional*):
383+
Data type weights are compressed to. Possible values: ['int4', 'int8', 'mxfp4', 'nf4'].
384384
qptq (`bool`, *optional*):
385385
Whether to apply GPTQ algorithm. GPTQ optimizes compressed weights in a layer-wise fashion to minimize the
386386
difference between activations of a compressed and original layer. Dataset is required to run GPTQ.
@@ -418,7 +418,7 @@ def __init__(
418418
num_samples: Optional[int] = None,
419419
quant_method: Union[str, OVQuantizationMethod] = OVQuantizationMethod.DEFAULT,
420420
scale_estimation: bool = None,
421-
weight_format: Optional[str] = None,
421+
dtype: Optional[str] = None,
422422
gptq: bool = None,
423423
processor: Optional[str] = None,
424424
lora_correction: bool = None,
@@ -444,7 +444,7 @@ def __init__(
444444
self.gptq = gptq
445445
self.lora_correction = lora_correction
446446
self.backup_precision = backup_precision
447-
self.weight_format = weight_format
447+
self.dtype = dtype
448448
self.post_init()
449449

450450
def post_init(self):
@@ -486,7 +486,7 @@ def post_init(self):
486486
if self.bits not in [4, 8]:
487487
raise ValueError(f"Only support quantization to [4,8] bits but found {self.bits}")
488488

489-
if self.bits == 8 and self.weight_format:
489+
if self.bits == 8 and self.dtype:
490490
if self.ratio != 1:
491491
raise ValueError(
492492
f"For 8-bit quantization, `ratio` is expected to be set to 1.0, but was set to {self.ratio}"
@@ -532,26 +532,26 @@ def post_init(self):
532532
if self.processor is not None and not isinstance(self.processor, str):
533533
raise ValueError(f"Processor is expected to be a string, but found {self.processor}")
534534

535-
if self.weight_format is None:
536-
self.weight_format = "int4" if self.bits == 4 else "int8"
537-
if self.weight_format not in ["int4", "int8", "mxfp4", "nf4"]:
535+
if self.dtype is None:
536+
self.dtype = "int4" if self.bits == 4 else "int8"
537+
if self.dtype not in ["int4", "int8", "mxfp4", "nf4"]:
538538
raise ValueError(
539-
f"Weight format must be one of the following: ['int4', 'int8', 'mxfp4', 'nf4'], but found: {self.weight_format}."
539+
f"Weights quantization data type must be one of the following: ['int4', 'int8', 'mxfp4', 'nf4'], but found: {self.dtype}."
540540
)
541-
if self.weight_format in ["mxfp4", "nf4"]:
541+
if self.dtype in ["mxfp4", "nf4"]:
542542
if self.bits != 4:
543543
raise ValueError(
544-
f"When applying weight compression with '{self.weight_format}' weight format, the `bits` parameter must be set to 4, but found {self.bits}"
544+
f"When applying weight compression with '{self.dtype}' data type, the `bits` parameter must be set to 4, but found {self.bits}"
545545
)
546-
if self.weight_format == "mxfp4":
546+
if self.dtype == "mxfp4":
547547
if self.quant_method == OVQuantizationMethod.AWQ:
548-
raise ValueError("The AWQ algorithm is not supported for 'mxpf4' weight format")
548+
raise ValueError("The AWQ algorithm is not supported for 'mxpf4' data type")
549549
if self.scale_estimation:
550-
raise ValueError("The Scale Estimation algorithm is not supported for 'mxpf4' weight format")
550+
raise ValueError("The Scale Estimation algorithm is not supported for 'mxpf4' data type")
551551
if self.gptq:
552-
raise ValueError("The GPTQ algorithm is not supported for 'mxfp4' weight format")
552+
raise ValueError("The GPTQ algorithm is not supported for 'mxfp4' data type")
553553
if self.lora_correction:
554-
raise ValueError("The LoRA Correction algorithm is not supported for 'mxfp4' weight format")
554+
raise ValueError("The LoRA Correction algorithm is not supported for 'mxfp4' data type")
555555
if self.gptq and self.lora_correction:
556556
raise ValueError("The GPTQ and LoRA Correction algorithms can't be applied simultaneously")
557557

@@ -561,7 +561,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
561561
"""
562562

563563
signed_bitness = {4: "int4", 8: "int8"}
564-
mode = self.weight_format if self.weight_format else signed_bitness[self.bits]
564+
mode = self.dtype if self.dtype else signed_bitness[self.bits]
565565
if mode in signed_bitness.values():
566566
mode += "_sym" if self.sym else "_asym"
567567
if mode == "mxfp4":
@@ -627,7 +627,7 @@ def __init__(
627627
processor: Optional[str] = None,
628628
trust_remote_code: bool = False,
629629
smooth_quant_alpha: Optional[float] = None,
630-
activation_format: Optional[str] = "int8",
630+
dtype: Optional[str] = "int8",
631631
**kwargs,
632632
):
633633
"""
@@ -672,8 +672,8 @@ def __init__(
672672
smooth_quant_alpha (`float`, *optional*):
673673
SmoothQuant alpha parameter that improves the distribution of activations before MatMul layers and
674674
reduces quantization error.
675-
activation_format (`str`, defaults to "int8"):
676-
Data format activations are compressed to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
675+
dtype (`str`, defaults to "int8"):
676+
Data type activations are compressed to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
677677
"""
678678
super().__init__(
679679
ignored_scope=ignored_scope,
@@ -689,11 +689,10 @@ def __init__(
689689
self.fast_bias_correction = fast_bias_correction
690690
self.overflow_fix = overflow_fix
691691
self.smooth_quant_alpha = smooth_quant_alpha
692-
self.activation_format = activation_format
692+
self.dtype = dtype
693693

694-
f8_formats = ["f8e4m3", "f8e5m2"]
695-
if self.activation_format in f8_formats:
696-
logger.info(f"{self.activation_format} for activations was found. A symmetrical scheme will be used.")
694+
f8_dtypes = ["f8e4m3", "f8e5m2"]
695+
if self.dtype in f8_dtypes:
697696
self.sym = True
698697
self.post_init()
699698

@@ -732,7 +731,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
732731
advanced_parameters_dict["smooth_quant_alphas"] = {"matmul": self.smooth_quant_alpha}
733732

734733
mode_map = {"f8e4m3": "fp8_e4m3", "f8e5m2": "fp8_e5m2"}
735-
mode = mode_map.get(self.activation_format)
734+
mode = mode_map.get(self.dtype)
736735

737736
preset = nncf.QuantizationPreset(preset)
738737
model_type = nncf.ModelType(self.model_type)
@@ -778,14 +777,14 @@ def __init__(
778777
"compression", None
779778
) # A field for backward-compatability of training-time compression parameters
780779
if self.quantization_config is not None:
781-
if isinstance(self.quantization_config, OVWeightQuantizationConfig):
782-
self.dtype = self.quantization_config.weight_format
783-
elif isinstance(self.quantization_config, OVQuantizationConfig):
784-
self.dtype = self.quantization_config.activation_format
780+
if isinstance(self.quantization_config, OVWeightQuantizationConfig) or isinstance(
781+
self.quantization_config, OVQuantizationConfig
782+
):
783+
self.dtype = self.quantization_config.dtype
785784
elif isinstance(self.quantization_config, OVMixedQuantizationConfig):
786-
weight_format = self.quantization_config.weight_quantization_config.weight_format
787-
activation_format = self.quantization_config.full_quantization_config.activation_format
788-
self.dtype = f"{weight_format}_{activation_format}"
785+
wc_dtype = self.quantization_config.weight_quantization_config.dtype
786+
q_dtype = self.quantization_config.full_quantization_config.dtype
787+
self.dtype = f"{wc_dtype}_{q_dtype}"
789788
else:
790789
raise ValueError(f"Unsupported type of quantization config: {type(self.quantization_config)}")
791790
else:

tests/openvino/test_quantization.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ class OVQuantizerTest(unittest.TestCase):
124124
dict(
125125
dataset="wikitext2",
126126
num_samples=1,
127-
activation_format="f8e4m3",
127+
dtype="f8e4m3",
128+
weight_only=False,
128129
),
129130
[
130131
13,
@@ -137,8 +138,8 @@ class OVQuantizerTest(unittest.TestCase):
137138
OVModelForCausalLM,
138139
"llama",
139140
dict(
140-
weight_quantization_config=dict(bits=4, weight_format="nf4", group_size=16),
141-
full_quantization_config=dict(activation_format="f8e4m3"),
141+
weight_quantization_config=dict(bits=4, dtype="nf4", group_size=16, weight_only=True),
142+
full_quantization_config=dict(dtype="f8e4m3", weight_only=False),
142143
dataset="wikitext2",
143144
num_samples=1,
144145
),
@@ -155,12 +156,12 @@ class OVQuantizerTest(unittest.TestCase):
155156
OVMixedQuantizationConfig(
156157
weight_quantization_config=OVWeightQuantizationConfig(
157158
bits=4,
158-
weight_format="nf4",
159+
dtype="nf4",
159160
group_size=16,
160161
ignored_scope={"patterns": ["^__module.model.layers.0.self_attn"]},
161162
),
162163
full_quantization_config=OVQuantizationConfig(
163-
activation_format="f8e4m3", ignored_scope={"patterns": ["^__module.model.layers.0.mlp"]}
164+
dtype="f8e4m3", ignored_scope={"patterns": ["^__module.model.layers.0.mlp"]}
164165
),
165166
ignored_scope={"patterns": ["^__module.model.layers.1.self_attn"]},
166167
dataset="wikitext2",
@@ -335,14 +336,14 @@ class OVWeightCompressionTest(unittest.TestCase):
335336
OVModelForCausalLM,
336337
"gpt2",
337338
False,
338-
dict(bits=4, weight_format="mxfp4", group_size=32),
339+
dict(bits=4, dtype="mxfp4", group_size=32),
339340
[{"int8": 4, "f4e2m1": 20, "f8e8m0": 20}],
340341
),
341342
(
342343
OVModelForCausalLM,
343344
"gpt2",
344345
False,
345-
dict(bits=4, weight_format="nf4", group_size=32),
346+
dict(bits=4, dtype="nf4", group_size=32),
346347
[
347348
{
348349
"int8": 4,
@@ -905,7 +906,7 @@ def test_ovmodel_4bit_auto_compression_with_config(
905906

906907
openvino_config = OVConfig.from_pretrained(tmp_dir)
907908
self.assertEqual(openvino_config.quantization_config.bits, 4)
908-
self.assertEqual(openvino_config.dtype, quantization_config.weight_format)
909+
self.assertEqual(openvino_config.dtype, quantization_config.dtype)
909910

910911
@parameterized.expand(((OVModelForCausalLM, "gpt2"),))
911912
def test_ovmodel_stateful_load_with_compressed_weights(self, model_cls, model_type):
@@ -1062,7 +1063,7 @@ def test_ovmodel_4bit_dynamic_with_config(
10621063
model.save_pretrained(tmp_dir)
10631064
openvino_config = OVConfig.from_pretrained(tmp_dir)
10641065
self.assertEqual(openvino_config.quantization_config.bits, 4)
1065-
self.assertEqual(openvino_config.dtype, quantization_config.weight_format)
1066+
self.assertEqual(openvino_config.dtype, quantization_config.dtype)
10661067

10671068

10681069
class OVQuantizerQATest(unittest.TestCase):

0 commit comments

Comments
 (0)