Skip to content

Commit bc5051f

Browse files
Add --all-layers argument to openvino CLI (#713)
* Add --all-layers argument to CLI * Update description
1 parent 60d5bf6 commit bc5051f

File tree

3 files changed

+20
-14
lines changed

3 files changed

+20
-14
lines changed

optimum/commands/export/openvino.py

+11
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,15 @@ def parse_args_openvino(parser: "ArgumentParser"):
119119
"or ['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit'] for diffusion models."
120120
),
121121
)
122+
optional_group.add_argument(
123+
"--all-layers",
124+
action="store_true",
125+
default=None,
126+
help=(
127+
"Whether embeddings and last MatMul layers should be compressed to INT4. If not provided an weight "
128+
"compression is applied, they are compressed to INT8."
129+
),
130+
)
122131
optional_group.add_argument(
123132
"--disable-stateful",
124133
action="store_true",
@@ -198,6 +207,7 @@ def run(self):
198207
and self.args.ratio is None
199208
and self.args.group_size is None
200209
and self.args.sym is None
210+
and self.args.all_layers is None
201211
and self.args.model in _DEFAULT_4BIT_CONFIGS
202212
):
203213
quantization_config = _DEFAULT_4BIT_CONFIGS[self.args.model]
@@ -207,6 +217,7 @@ def run(self):
207217
"ratio": 1 if is_int8 else (self.args.ratio or 0.8),
208218
"sym": self.args.sym or False,
209219
"group_size": -1 if is_int8 else self.args.group_size,
220+
"all_layers": None if is_int8 else self.args.all_layers,
210221
}
211222

212223
if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:

tests/openvino/test_exporters_cli.py

+9-12
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
from parameterized import parameterized
2020
from utils_tests import (
21-
_ARCHITECTURES_TO_EXPECTED_INT4_INT8,
2221
_ARCHITECTURES_TO_EXPECTED_INT8,
2322
MODEL_NAMES,
2423
get_num_quantized_nodes,
@@ -84,14 +83,13 @@ class OVCLIExportTestCase(unittest.TestCase):
8483
("latent-consistency", 50, 135),
8584
)
8685

87-
SUPPORTED_4BIT_ARCHITECTURES = (("text-generation-with-past", "opt125m"),)
88-
89-
SUPPORTED_4BIT_OPTIONS = ["int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"]
90-
91-
TEST_4BIT_CONFIGURATONS = []
92-
for arch in SUPPORTED_4BIT_ARCHITECTURES:
93-
for option in SUPPORTED_4BIT_OPTIONS:
94-
TEST_4BIT_CONFIGURATONS.append([arch[0], arch[1], option])
86+
TEST_4BIT_CONFIGURATONS = [
87+
("text-generation-with-past", "opt125m", "int4_sym_g128", 62, 86),
88+
("text-generation-with-past", "opt125m", "int4_asym_g128", 62, 86),
89+
("text-generation-with-past", "opt125m", "int4_sym_g64", 62, 86),
90+
("text-generation-with-past", "opt125m", "int4_asym_g64", 62, 86),
91+
("text-generation-with-past", "llama_awq", "int4 --ratio 1.0 --sym --group-size 16 --all-layers", 0, 32),
92+
]
9593

9694
def _openvino_export(
9795
self, model_name: str, task: str, compression_option: str = None, compression_ratio: float = None
@@ -197,17 +195,16 @@ def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: in
197195
self.assertEqual(exp_num_fq, num_fq)
198196

199197
@parameterized.expand(TEST_4BIT_CONFIGURATONS)
200-
def test_exporters_cli_int4(self, task: str, model_type: str, option: str):
198+
def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expected_int8: int, expected_int4: int):
201199
with TemporaryDirectory() as tmpdir:
202200
subprocess.run(
203-
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}",
201+
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}",
204202
shell=True,
205203
check=True,
206204
)
207205
model_kwargs = {"use_cache": task.endswith("with-past")} if "generation" in task else {}
208206
model = eval(_HEAD_TO_AUTOMODELS[task.replace("-with-past", "")]).from_pretrained(tmpdir, **model_kwargs)
209207

210-
expected_int8, expected_int4 = _ARCHITECTURES_TO_EXPECTED_INT4_INT8[model_type]
211208
_, num_int8, num_int4 = get_num_quantized_nodes(model)
212209
self.assertEqual(expected_int8, num_int8)
213210
self.assertEqual(expected_int4, num_int4)

tests/openvino/utils_tests.py

-2
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,6 @@
149149
"stable-diffusion-xl-refiner": (366, 34, 42, 66),
150150
}
151151

152-
_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (62, 86)}
153-
154152

155153
def get_num_quantized_nodes(ov_model):
156154
num_fake_quantize = 0

0 commit comments

Comments
 (0)