Skip to content

Commit 3b1f28e

Browse files
committed
Update CLI argument
1 parent e9984e1 commit 3b1f28e

File tree

5 files changed

+61
-10
lines changed

5 files changed

+61
-10
lines changed

optimum/commands/export/openvino.py

+48-4
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
7777
optional_group.add_argument(
7878
"--weight-format",
7979
type=str,
80-
choices=["fp32", "fp16", "int8", "int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"],
80+
choices=["fp32", "fp16", "int8", "int4", "int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"],
8181
default=None,
8282
help=(
8383
"The weight format of the exporting model, e.g. f32 stands for float32 weights, f16 - for float16 weights, i8 - INT8 weights, int4_* - for INT4 compressed weights."
@@ -86,12 +86,25 @@ def parse_args_openvino(parser: "ArgumentParser"):
8686
optional_group.add_argument(
8787
"--ratio",
8888
type=float,
89-
default=0.8,
89+
default=None,
9090
help=(
9191
"Compression ratio between primary and backup precision. In the case of INT4, NNCF evaluates layer sensitivity and keeps the most impactful layers in INT8"
9292
"precision (by default 20%% in INT8). This helps to achieve better accuracy after weight compression."
9393
),
9494
)
95+
optional_group.add_argument(
96+
"--sym",
97+
type=bool,
98+
default=None,
99+
help=("Whether to apply symmetric quantization"),
100+
)
101+
102+
optional_group.add_argument(
103+
"--group-size",
104+
type=int,
105+
default=None,
106+
help=("The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization."),
107+
)
95108
optional_group.add_argument(
96109
"--disable-stateful",
97110
action="store_true",
@@ -132,6 +145,7 @@ def parse_args(parser: "ArgumentParser"):
132145

133146
def run(self):
134147
from ...exporters.openvino.__main__ import main_export
148+
from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS, OVConfig
135149

136150
if self.args.fp16:
137151
logger.warning(
@@ -144,6 +158,37 @@ def run(self):
144158
)
145159
self.args.weight_format = "int8"
146160

161+
ov_config = None
162+
if self.args.weight_format in {"fp16", "fp32"}:
163+
ov_config = OVConfig(dtype=self.args.weight_format)
164+
else:
165+
is_int8 = self.args.weight_format == "int8"
166+
167+
# For int4 quantization if not parameter is provided, then use the default config if exist
168+
if (
169+
not is_int8
170+
and self.args.ratio is None
171+
and self.args.group_size is None
172+
and self.args.sym is None
173+
and self.args.model in _DEFAULT_4BIT_CONFIGS
174+
):
175+
quantization_config = _DEFAULT_4BIT_CONFIGS[self.args.model]
176+
else:
177+
quantization_config = {
178+
"bits": 8 if is_int8 else 4,
179+
"ratio": 1 if is_int8 else (self.args.ratio or 0.8),
180+
"sym": self.args.sym or False,
181+
"group_size": -1 if is_int8 else self.args.group_size,
182+
}
183+
184+
if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:
185+
logger.warning(
186+
f"--weight-format {self.args.weight_format} is deprecated, possible choices are fp32, fp16, int8, int4"
187+
)
188+
quantization_config["sym"] = "asym" not in self.args.weight_format
189+
quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64
190+
ov_config = OVConfig(quantization_config=quantization_config)
191+
147192
# TODO : add input shapes
148193
main_export(
149194
model_name_or_path=self.args.model,
@@ -153,8 +198,7 @@ def run(self):
153198
cache_dir=self.args.cache_dir,
154199
trust_remote_code=self.args.trust_remote_code,
155200
pad_token_id=self.args.pad_token_id,
156-
compression_option=self.args.weight_format,
157-
compression_ratio=self.args.ratio,
201+
ov_config=ov_config,
158202
stateful=not self.args.disable_stateful,
159203
convert_tokenizer=self.args.convert_tokenizer,
160204
# **input_shapes,

optimum/exporters/openvino/__main__.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import logging
1616
from pathlib import Path
17-
from typing import Any, Callable, Dict, Optional, Union
17+
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
1818

1919
from requests.exceptions import ConnectionError as RequestsConnectionError
2020
from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizerBase
@@ -41,6 +41,9 @@
4141
]
4242

4343

44+
if TYPE_CHECKING:
45+
from optimum.intel.openvino.configuration import OVConfig
46+
4447
_COMPRESSION_OPTIONS = {
4548
"int8": {"bits": 8},
4649
"int4_sym_g128": {"bits": 4, "sym": True, "group_size": 128},

optimum/exporters/openvino/convert.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import logging
1919
import os
2020
from pathlib import Path
21-
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
21+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
2222

2323
from transformers import T5Tokenizer, T5TokenizerFast
2424
from transformers.utils import is_tf_available, is_torch_available
@@ -71,6 +71,10 @@
7171
from transformers.modeling_tf_utils import TFPreTrainedModel
7272

7373

74+
if TYPE_CHECKING:
75+
from optimum.intel.openvino.configuration import OVConfig
76+
77+
7478
def _save_model(model, path: str, ov_config: Optional["OVConfig"] = None):
7579
compress_to_fp16 = False
7680
if ov_config is not None:

optimum/intel/openvino/configuration.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@
7777
}
7878

7979

80-
DEFAULT_4BIT_CONFIGS = {
80+
_DEFAULT_4BIT_CONFIGS = {
8181
"databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 32, "ratio": 0.5},
8282
"EleutherAI/gpt-j-6b": {"bits": 4, "sym": False, "group_size": 64},
8383
"facebook/opt-6.7b": {"bits": 4, "sym": False, "group_size": 64, "ratio": 0.8},
@@ -241,4 +241,4 @@ def post_init(self):
241241

242242

243243
def _check_default_4bit_configs(config: PretrainedConfig):
244-
return DEFAULT_4BIT_CONFIGS.get(config.name_or_path, None)
244+
return _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, None)

optimum/intel/utils/dummy_openvino_objects.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def __init__(self, *args, **kwargs):
177177

178178
@classmethod
179179
def from_pretrained(cls, *args, **kwargs):
180-
requires_backends(self, ["openvino"])
180+
requires_backends(cls, ["openvino"])
181181

182182

183183
class OVWeightQuantizationConfig(metaclass=DummyObject):
@@ -188,4 +188,4 @@ def __init__(self, *args, **kwargs):
188188

189189
@classmethod
190190
def from_pretrained(cls, *args, **kwargs):
191-
requires_backends(self, ["openvino"])
191+
requires_backends(cls, ["openvino"])

0 commit comments

Comments
 (0)