|
18 | 18 |
|
19 | 19 | from parameterized import parameterized
|
20 | 20 | from utils_tests import (
|
21 |
| - _ARCHITECTURES_TO_EXPECTED_INT4_INT8, |
22 | 21 | _ARCHITECTURES_TO_EXPECTED_INT8,
|
23 | 22 | MODEL_NAMES,
|
24 | 23 | get_num_quantized_nodes,
|
@@ -84,14 +83,13 @@ class OVCLIExportTestCase(unittest.TestCase):
|
84 | 83 | ("latent-consistency", 50, 135),
|
85 | 84 | )
|
86 | 85 |
|
87 |
| - SUPPORTED_4BIT_ARCHITECTURES = (("text-generation-with-past", "opt125m"),) |
88 |
| - |
89 |
| - SUPPORTED_4BIT_OPTIONS = ["int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"] |
90 |
| - |
91 |
| - TEST_4BIT_CONFIGURATONS = [] |
92 |
| - for arch in SUPPORTED_4BIT_ARCHITECTURES: |
93 |
| - for option in SUPPORTED_4BIT_OPTIONS: |
94 |
| - TEST_4BIT_CONFIGURATONS.append([arch[0], arch[1], option]) |
| 86 | + TEST_4BIT_CONFIGURATONS = [ |
| 87 | + ("text-generation-with-past", "opt125m", "int4_sym_g128", 62, 86), |
| 88 | + ("text-generation-with-past", "opt125m", "int4_asym_g128", 62, 86), |
| 89 | + ("text-generation-with-past", "opt125m", "int4_sym_g64", 62, 86), |
| 90 | + ("text-generation-with-past", "opt125m", "int4_asym_g64", 62, 86), |
| 91 | + ("text-generation-with-past", "llama_awq", "int4 --ratio 1.0 --sym --group-size 16 --all-layers", 0, 32), |
| 92 | + ] |
95 | 93 |
|
96 | 94 | def _openvino_export(
|
97 | 95 | self, model_name: str, task: str, compression_option: str = None, compression_ratio: float = None
|
@@ -197,17 +195,16 @@ def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: in
|
197 | 195 | self.assertEqual(exp_num_fq, num_fq)
|
198 | 196 |
|
199 | 197 | @parameterized.expand(TEST_4BIT_CONFIGURATONS)
|
200 |
| - def test_exporters_cli_int4(self, task: str, model_type: str, option: str): |
| 198 | + def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expected_int8: int, expected_int4: int): |
201 | 199 | with TemporaryDirectory() as tmpdir:
|
202 | 200 | subprocess.run(
|
203 |
| - f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}", |
| 201 | + f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}", |
204 | 202 | shell=True,
|
205 | 203 | check=True,
|
206 | 204 | )
|
207 | 205 | model_kwargs = {"use_cache": task.endswith("with-past")} if "generation" in task else {}
|
208 | 206 | model = eval(_HEAD_TO_AUTOMODELS[task.replace("-with-past", "")]).from_pretrained(tmpdir, **model_kwargs)
|
209 | 207 |
|
210 |
| - expected_int8, expected_int4 = _ARCHITECTURES_TO_EXPECTED_INT4_INT8[model_type] |
211 | 208 | _, num_int8, num_int4 = get_num_quantized_nodes(model)
|
212 | 209 | self.assertEqual(expected_int8, num_int8)
|
213 | 210 | self.assertEqual(expected_int4, num_int4)
|
|
0 commit comments