Added test

nikita-malininn · nikita-malininn · commit b54abf195303 · 2025-01-08T12:12:01.000+01:00
diff --git a/docs/source/openvino/export.mdx b/docs/source/openvino/export.mdx
@@ -31,7 +31,7 @@ Check out the help for more options:
 
 ```text
 usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code]
-                                   [--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}] [--quant-mode {int8,fp8_e4m3,fp8_e5m2}]
+                                   [--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}] [--quant-mode {int8,f8e4m3,f8e5m2}]
                                    [--library {transformers,diffusers,timm,sentence_transformers,open_clip}]
                                    [--cache_dir CACHE_DIR] [--pad-token-id PAD_TOKEN_ID] [--ratio RATIO] [--sym]
                                    [--group-size GROUP_SIZE] [--backup-precision {none,int8_sym,int8_asym}]
@@ -67,7 +67,7 @@ Optional arguments:
                         on your local machine arbitrary code present in the model repository.
   --weight-format {fp32,fp16,int8,int4,mxfp4,nf4}
                         The weight format of the exported model.
-  --quant-mode {int8,fp8_e4m3,fp8_e5m2}
+  --quant-mode {int8,f8e4m3,f8e5m2}
                         Quantization precision mode. This is used for applying full model quantization including
                         activations.
   --library {transformers,diffusers,timm,sentence_transformers,open_clip}
diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -78,7 +78,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
     optional_group.add_argument(
         "--quant-mode",
         type=str,
-        choices=["int8", "fp8_e4m3", "fp8_e5m2"],
+        choices=["int8", "f8e4m3", "f8e5m2"],
         default=None,
         help=(
             "Quantization precision mode. This is used for applying full model quantization including activations. "
diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
@@ -26,7 +26,7 @@
 from optimum.configuration_utils import BaseConfig
 
 from ..utils.import_utils import is_nncf_available
-from .utils import PREDEFINED_SD_DATASETS, PREDEFINED_SPEECH_TO_TEXT_DATASETS, PREDEFINED_VISUAL_LM_DATASETS
+from .utils import PREDEFINED_SD_DATASETS, PREDEFINED_VISUAL_LM_DATASETS
 
 
 if is_nncf_available():
@@ -638,9 +638,9 @@ def __init__(
                 SmoothQuant alpha parameter that improves the distribution of activations before MatMul layers and
                 reduces quantization error.
             weight_format (`str`, defaults to "int8"):
-                Data format weights are quantized to. Possible values: ['int8', 'fp8_e4m3', 'fp8_e5m2'].
+                Data format weights are quantized to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
             activation_format (`str`, defaults to "int8"):
-                Data format activations are compressed to. Possible values: ['int8', 'fp8_e4m3', 'fp8_e5m2'].
+                Data format activations are compressed to. Possible values: ['int8', 'f8e4m3', 'f8e5m2'].
         """
         super().__init__(
             bits=bits,
diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
@@ -1067,8 +1067,8 @@ def _full_quantization(
         )
 
     q_mode_map = {
-        "fp8_e4m3": nncf.QuantizationMode.FP8_E4M3,
-        "fp8_e5m2": nncf.QuantizationMode.FP8_E5M2,
+        "f8e4m3": nncf.QuantizationMode.FP8_E4M3,
+        "f8e5m2": nncf.QuantizationMode.FP8_E5M2,
     }
 
     if quantization_config.activation_format in q_mode_map:
diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
@@ -114,7 +114,16 @@ class OVCLIExportTestCase(unittest.TestCase):
         (
             "automatic-speech-recognition",
             "whisper",
-            "--quant-mode int8 --dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code",
+            "int8",
+            "--dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code",
+            (14, 22, 21) if is_transformers_version("<=", "4.36.0") else (14, 22, 25),
+            (14, 21, 17) if is_transformers_version("<=", "4.36.0") else (14, 22, 18),
+        ),
+        (
+            "automatic-speech-recognition",
+            "whisper",
+            "f8e4m3",
+            "--dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code --sym",
             (14, 22, 21) if is_transformers_version("<=", "4.36.0") else (14, 22, 25),
             (14, 21, 17) if is_transformers_version("<=", "4.36.0") else (14, 22, 18),
         ),
@@ -407,13 +416,14 @@ def test_exporters_cli_full_quantization(
         self,
         task: str,
         model_type: str,
+        quant_mode: str,
         option: str,
         expected_num_fq_nodes_per_model: Tuple[int],
         expected_num_weight_nodes_per_model: Tuple[int],
     ):
         with TemporaryDirectory() as tmpdir:
             subprocess.run(
-                f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} {option} {tmpdir}",
+                f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --quant-mode {quant_mode} {option} {tmpdir}",
                 shell=True,
                 check=True,
             )
@@ -424,9 +434,9 @@ def test_exporters_cli_full_quantization(
                 submodels = [model.encoder, model.decoder, model.decoder_with_past]
             self.assertEqual(len(expected_num_fq_nodes_per_model), len(submodels))
             for i, model in enumerate(submodels):
-                actual_num_fq_nodes, actual_num_weight_nodes = get_num_quantized_nodes(model)
-                self.assertEqual(expected_num_fq_nodes_per_model[i], actual_num_fq_nodes)
-                self.assertEqual(expected_num_weight_nodes_per_model[i], actual_num_weight_nodes["int8"])
+                actual_num_f_nodes, actual_num_weight_nodes = get_num_quantized_nodes(model)
+                self.assertEqual(expected_num_fq_nodes_per_model[i], actual_num_f_nodes)
+                self.assertEqual(expected_num_weight_nodes_per_model[i], actual_num_weight_nodes[quant_mode])
 
     def test_exporters_cli_int4_with_local_model_and_default_config(self):
         with TemporaryDirectory() as tmpdir:
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
@@ -203,29 +203,29 @@
 
 def get_num_quantized_nodes(model):
     num_fake_quantize = 0
-    num_weight_nodes = {
-        "int8": 0,
-        "int4": 0,
-        "f4e2m1": 0,
-        "f8e8m0": 0,
-        "nf4": 0,
+    types_map = {
+        "i8": "int8",
+        "u8": "int8",
+        "i4": "int4",
+        "u4": "int4",
+        "f4e2m1": "f4e2m1",
+        "f8e8m0": "f8e8m0",
+        "nf4": "nf4",
+        "f8e4m3": "f8e4m3",
+        "f8e5m2": "f8e5m2",
     }
+    num_weight_nodes = {n: 0 for n in types_map.values()}
     ov_model = model if isinstance(model, ov.Model) else model.model
     for elem in ov_model.get_ops():
         if "FakeQuantize" in elem.name:
             num_fake_quantize += 1
+        elif "FakeConvert" in elem.name:
+            num_fake_quantize += 1
         for i in range(elem.get_output_size()):
             type_name = elem.get_output_element_type(i).get_type_name()
-            if type_name in ["i8", "u8"]:
-                num_weight_nodes["int8"] += 1
-            if type_name in ["i4", "u4"]:
-                num_weight_nodes["int4"] += 1
-            if type_name == "f4e2m1":
-                num_weight_nodes["f4e2m1"] += 1
-            if type_name == "f8e8m0":
-                num_weight_nodes["f8e8m0"] += 1
-            if type_name == "nf4":
-                num_weight_nodes["nf4"] += 1
+            if type_name in types_map:
+                name = types_map[type_name]
+                num_weight_nodes[name] += 1
     return num_fake_quantize, num_weight_nodes
 
 

Original file line number	Diff line number	Diff line change
`@@ -1067,8 +1067,8 @@ def _full_quantization(`
`1067`	`1067`	`)`
`1068`	`1068`
`1069`	`1069`	`q_mode_map = {`
`1070`		`- "fp8_e4m3": nncf.QuantizationMode.FP8_E4M3,`
`1071`		`- "fp8_e5m2": nncf.QuantizationMode.FP8_E5M2,`
	`1070`	`+ "f8e4m3": nncf.QuantizationMode.FP8_E4M3,`
	`1071`	`+ "f8e5m2": nncf.QuantizationMode.FP8_E5M2,`
`1072`	`1072`	`}`
`1073`	`1073`
`1074`	`1074`	`if quantization_config.activation_format in q_mode_map:`