Skip to content

Commit b54abf1

Browse files
Added test
1 parent 44f11a7 commit b54abf1

File tree

6 files changed

+39
-29
lines changed

6 files changed

+39
-29
lines changed

docs/source/openvino/export.mdx

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Check out the help for more options:
3131

3232
```text
3333
usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code]
34-
[--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}] [--quant-mode {int8,fp8_e4m3,fp8_e5m2}]
34+
[--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}] [--quant-mode {int8,f8e4m3,f8e5m2}]
3535
[--library {transformers,diffusers,timm,sentence_transformers,open_clip}]
3636
[--cache_dir CACHE_DIR] [--pad-token-id PAD_TOKEN_ID] [--ratio RATIO] [--sym]
3737
[--group-size GROUP_SIZE] [--backup-precision {none,int8_sym,int8_asym}]
@@ -67,7 +67,7 @@ Optional arguments:
6767
on your local machine arbitrary code present in the model repository.
6868
--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}
6969
The weight format of the exported model.
70-
--quant-mode {int8,fp8_e4m3,fp8_e5m2}
70+
--quant-mode {int8,f8e4m3,f8e5m2}
7171
Quantization precision mode. This is used for applying full model quantization including
7272
activations.
7373
--library {transformers,diffusers,timm,sentence_transformers,open_clip}

optimum/commands/export/openvino.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
7878
optional_group.add_argument(
7979
"--quant-mode",
8080
type=str,
81-
choices=["int8", "fp8_e4m3", "fp8_e5m2"],
81+
choices=["int8", "f8e4m3", "f8e5m2"],
8282
default=None,
8383
help=(
8484
"Quantization precision mode. This is used for applying full model quantization including activations. "

optimum/intel/openvino/configuration.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from optimum.configuration_utils import BaseConfig
2727

2828
from ..utils.import_utils import is_nncf_available
29-
from .utils import PREDEFINED_SD_DATASETS, PREDEFINED_SPEECH_TO_TEXT_DATASETS, PREDEFINED_VISUAL_LM_DATASETS
29+
from .utils import PREDEFINED_SD_DATASETS, PREDEFINED_VISUAL_LM_DATASETS
3030

3131

3232
if is_nncf_available():
@@ -638,9 +638,9 @@ def __init__(
638638
SmoothQuant alpha parameter that improves the distribution of activations before MatMul layers and
639639
reduces quantization error.
640640
weight_format (`str`, defaults to "int8"):
641-
Data format weights are quantized to. Possible values: ['int8', 'fp8_e4m3', 'fp8_e5m2'].
641+
Data format weights are quantized to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
642642
activation_format (`str`, defaults to "int8"):
643-
Data format activations are compressed to. Possible values: ['int8', 'fp8_e4m3', 'fp8_e5m2'].
643+
Data format activations are compressed to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
644644
"""
645645
super().__init__(
646646
bits=bits,

optimum/intel/openvino/quantization.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1067,8 +1067,8 @@ def _full_quantization(
10671067
)
10681068

10691069
q_mode_map = {
1070-
"fp8_e4m3": nncf.QuantizationMode.FP8_E4M3,
1071-
"fp8_e5m2": nncf.QuantizationMode.FP8_E5M2,
1070+
"f8e4m3": nncf.QuantizationMode.FP8_E4M3,
1071+
"f8e5m2": nncf.QuantizationMode.FP8_E5M2,
10721072
}
10731073

10741074
if quantization_config.activation_format in q_mode_map:

tests/openvino/test_exporters_cli.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,16 @@ class OVCLIExportTestCase(unittest.TestCase):
114114
(
115115
"automatic-speech-recognition",
116116
"whisper",
117-
"--quant-mode int8 --dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code",
117+
"int8",
118+
"--dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code",
119+
(14, 22, 21) if is_transformers_version("<=", "4.36.0") else (14, 22, 25),
120+
(14, 21, 17) if is_transformers_version("<=", "4.36.0") else (14, 22, 18),
121+
),
122+
(
123+
"automatic-speech-recognition",
124+
"whisper",
125+
"f8e4m3",
126+
"--dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code --sym",
118127
(14, 22, 21) if is_transformers_version("<=", "4.36.0") else (14, 22, 25),
119128
(14, 21, 17) if is_transformers_version("<=", "4.36.0") else (14, 22, 18),
120129
),
@@ -407,13 +416,14 @@ def test_exporters_cli_full_quantization(
407416
self,
408417
task: str,
409418
model_type: str,
419+
quant_mode: str,
410420
option: str,
411421
expected_num_fq_nodes_per_model: Tuple[int],
412422
expected_num_weight_nodes_per_model: Tuple[int],
413423
):
414424
with TemporaryDirectory() as tmpdir:
415425
subprocess.run(
416-
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} {option} {tmpdir}",
426+
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --quant-mode {quant_mode} {option} {tmpdir}",
417427
shell=True,
418428
check=True,
419429
)
@@ -424,9 +434,9 @@ def test_exporters_cli_full_quantization(
424434
submodels = [model.encoder, model.decoder, model.decoder_with_past]
425435
self.assertEqual(len(expected_num_fq_nodes_per_model), len(submodels))
426436
for i, model in enumerate(submodels):
427-
actual_num_fq_nodes, actual_num_weight_nodes = get_num_quantized_nodes(model)
428-
self.assertEqual(expected_num_fq_nodes_per_model[i], actual_num_fq_nodes)
429-
self.assertEqual(expected_num_weight_nodes_per_model[i], actual_num_weight_nodes["int8"])
437+
actual_num_f_nodes, actual_num_weight_nodes = get_num_quantized_nodes(model)
438+
self.assertEqual(expected_num_fq_nodes_per_model[i], actual_num_f_nodes)
439+
self.assertEqual(expected_num_weight_nodes_per_model[i], actual_num_weight_nodes[quant_mode])
430440

431441
def test_exporters_cli_int4_with_local_model_and_default_config(self):
432442
with TemporaryDirectory() as tmpdir:

tests/openvino/utils_tests.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -203,29 +203,29 @@
203203

204204
def get_num_quantized_nodes(model):
205205
num_fake_quantize = 0
206-
num_weight_nodes = {
207-
"int8": 0,
208-
"int4": 0,
209-
"f4e2m1": 0,
210-
"f8e8m0": 0,
211-
"nf4": 0,
206+
types_map = {
207+
"i8": "int8",
208+
"u8": "int8",
209+
"i4": "int4",
210+
"u4": "int4",
211+
"f4e2m1": "f4e2m1",
212+
"f8e8m0": "f8e8m0",
213+
"nf4": "nf4",
214+
"f8e4m3": "f8e4m3",
215+
"f8e5m2": "f8e5m2",
212216
}
217+
num_weight_nodes = {n: 0 for n in types_map.values()}
213218
ov_model = model if isinstance(model, ov.Model) else model.model
214219
for elem in ov_model.get_ops():
215220
if "FakeQuantize" in elem.name:
216221
num_fake_quantize += 1
222+
elif "FakeConvert" in elem.name:
223+
num_fake_quantize += 1
217224
for i in range(elem.get_output_size()):
218225
type_name = elem.get_output_element_type(i).get_type_name()
219-
if type_name in ["i8", "u8"]:
220-
num_weight_nodes["int8"] += 1
221-
if type_name in ["i4", "u4"]:
222-
num_weight_nodes["int4"] += 1
223-
if type_name == "f4e2m1":
224-
num_weight_nodes["f4e2m1"] += 1
225-
if type_name == "f8e8m0":
226-
num_weight_nodes["f8e8m0"] += 1
227-
if type_name == "nf4":
228-
num_weight_nodes["nf4"] += 1
226+
if type_name in types_map:
227+
name = types_map[type_name]
228+
num_weight_nodes[name] += 1
229229
return num_fake_quantize, num_weight_nodes
230230

231231

0 commit comments

Comments
 (0)