Skip to content

Commit 8c95cae

Browse files
authored
Fix default int8 quantization for CLI (#592)
1 parent 81e180f commit 8c95cae

File tree

2 files changed

+11
-25
lines changed

2 files changed

+11
-25
lines changed

optimum/commands/export/openvino.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,12 @@ def run(self):
157157
)
158158
self.args.weight_format = "int8"
159159

160-
weight_format = self.args.weight_format or "fp32"
161-
162-
ov_config = None
163-
if weight_format in {"fp16", "fp32"}:
164-
ov_config = OVConfig(dtype=weight_format)
160+
if self.args.weight_format is None:
161+
ov_config = None
162+
elif self.args.weight_format in {"fp16", "fp32"}:
163+
ov_config = OVConfig(dtype=self.args.weight_format)
165164
else:
166-
is_int8 = weight_format == "int8"
165+
is_int8 = self.args.weight_format == "int8"
167166

168167
# For int4 quantization if not parameter is provided, then use the default config if exist
169168
if (
@@ -182,12 +181,12 @@ def run(self):
182181
"group_size": -1 if is_int8 else self.args.group_size,
183182
}
184183

185-
if weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:
184+
if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:
186185
logger.warning(
187-
f"--weight-format {weight_format} is deprecated, possible choices are fp32, fp16, int8, int4"
186+
f"--weight-format {self.args.weight_format} is deprecated, possible choices are fp32, fp16, int8, int4"
188187
)
189-
quantization_config["sym"] = "asym" not in weight_format
190-
quantization_config["group_size"] = 128 if "128" in weight_format else 64
188+
quantization_config["sym"] = "asym" not in self.args.weight_format
189+
quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64
191190
ov_config = OVConfig(quantization_config=quantization_config)
192191

193192
# TODO : add input shapes

optimum/exporters/openvino/__main__.py

+2-15
Original file line numberDiff line numberDiff line change
@@ -21,26 +21,13 @@
2121

2222
from optimum.exporters import TasksManager
2323
from optimum.exporters.onnx.base import OnnxConfig
24+
from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED
2425
from optimum.utils.save_utils import maybe_load_preprocessors
2526

26-
from ...intel.utils.import_utils import (
27-
is_openvino_tokenizers_available,
28-
is_optimum_version,
29-
is_transformers_version,
30-
)
27+
from ...intel.utils.import_utils import is_openvino_tokenizers_available, is_transformers_version
3128
from .convert import export_from_model, export_tokenizer
3229

3330

34-
if is_optimum_version(">=", "1.16.0"):
35-
from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED
36-
else:
37-
# Copied from https://github.com/huggingface/optimum/blob/main/optimum/exporters/onnx/constants.py
38-
SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED = [
39-
"bart",
40-
"whisper",
41-
]
42-
43-
4431
if TYPE_CHECKING:
4532
from optimum.intel.openvino.configuration import OVConfig
4633

0 commit comments

Comments
 (0)