Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OV] Add --all-layers argument to CLI #713

Merged
merged 2 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ def parse_args_openvino(parser: "ArgumentParser"):
"or ['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit'] for diffusion models."
),
)
optional_group.add_argument(
"--all-layers",
action="store_true",
default=None,
help=(
"Whether embeddings and last MatMul layers should be compressed to INT4. If not provided an weight "
"compression is applied, they are compressed to INT8."
),
)
optional_group.add_argument(
"--disable-stateful",
action="store_true",
Expand Down Expand Up @@ -198,6 +207,7 @@ def run(self):
and self.args.ratio is None
and self.args.group_size is None
and self.args.sym is None
and self.args.all_layers is None
and self.args.model in _DEFAULT_4BIT_CONFIGS
):
quantization_config = _DEFAULT_4BIT_CONFIGS[self.args.model]
Expand All @@ -207,6 +217,7 @@ def run(self):
"ratio": 1 if is_int8 else (self.args.ratio or 0.8),
"sym": self.args.sym or False,
"group_size": -1 if is_int8 else self.args.group_size,
"all_layers": None if is_int8 else self.args.all_layers,
}

if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:
Expand Down
21 changes: 9 additions & 12 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

from parameterized import parameterized
from utils_tests import (
_ARCHITECTURES_TO_EXPECTED_INT4_INT8,
_ARCHITECTURES_TO_EXPECTED_INT8,
MODEL_NAMES,
get_num_quantized_nodes,
Expand Down Expand Up @@ -84,14 +83,13 @@ class OVCLIExportTestCase(unittest.TestCase):
("latent-consistency", 50, 135),
)

SUPPORTED_4BIT_ARCHITECTURES = (("text-generation-with-past", "opt125m"),)

SUPPORTED_4BIT_OPTIONS = ["int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"]

TEST_4BIT_CONFIGURATONS = []
for arch in SUPPORTED_4BIT_ARCHITECTURES:
for option in SUPPORTED_4BIT_OPTIONS:
TEST_4BIT_CONFIGURATONS.append([arch[0], arch[1], option])
TEST_4BIT_CONFIGURATONS = [
("text-generation-with-past", "opt125m", "int4_sym_g128", 62, 86),
("text-generation-with-past", "opt125m", "int4_asym_g128", 62, 86),
("text-generation-with-past", "opt125m", "int4_sym_g64", 62, 86),
("text-generation-with-past", "opt125m", "int4_asym_g64", 62, 86),
("text-generation-with-past", "llama_awq", "int4 --ratio 1.0 --sym --group-size 16 --all-layers", 0, 32),
]

def _openvino_export(
self, model_name: str, task: str, compression_option: str = None, compression_ratio: float = None
Expand Down Expand Up @@ -197,17 +195,16 @@ def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: in
self.assertEqual(exp_num_fq, num_fq)

@parameterized.expand(TEST_4BIT_CONFIGURATONS)
def test_exporters_cli_int4(self, task: str, model_type: str, option: str):
def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expected_int8: int, expected_int4: int):
with TemporaryDirectory() as tmpdir:
subprocess.run(
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}",
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}",
shell=True,
check=True,
)
model_kwargs = {"use_cache": task.endswith("with-past")} if "generation" in task else {}
model = eval(_HEAD_TO_AUTOMODELS[task.replace("-with-past", "")]).from_pretrained(tmpdir, **model_kwargs)

expected_int8, expected_int4 = _ARCHITECTURES_TO_EXPECTED_INT4_INT8[model_type]
_, num_int8, num_int4 = get_num_quantized_nodes(model)
self.assertEqual(expected_int8, num_int8)
self.assertEqual(expected_int4, num_int4)
Expand Down
2 changes: 0 additions & 2 deletions tests/openvino/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,6 @@
"stable-diffusion-xl-refiner": (366, 34, 42, 66),
}

_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (62, 86)}


def get_num_quantized_nodes(ov_model):
num_fake_quantize = 0
Expand Down
Loading