14
14
import subprocess
15
15
import unittest
16
16
from pathlib import Path
17
+ from typing import Dict , List
17
18
18
19
from parameterized import parameterized
19
20
from transformers import AutoModelForCausalLM
20
21
from utils_tests import (
21
22
_ARCHITECTURES_TO_EXPECTED_INT8 ,
22
23
MODEL_NAMES ,
24
+ compare_num_quantized_nodes_per_model ,
23
25
get_num_quantized_nodes ,
24
26
)
25
27
@@ -108,37 +110,47 @@ class OVCLIExportTestCase(unittest.TestCase):
108
110
SUPPORTED_SD_HYBRID_ARCHITECTURES .append (("flux" , 7 , 56 ))
109
111
110
112
TEST_4BIT_CONFIGURATIONS = [
111
- ("text-generation-with-past" , "opt125m" , "int4 --sym --group-size 128" , {"int8" : 4 , "int4" : 72 }),
112
- ("text-generation-with-past" , "opt125m" , "int4 --group-size 64" , {"int8" : 4 , "int4" : 144 }),
113
- ("text-generation-with-past" , "opt125m" , "mxfp4" , {"int8" : 4 , "f4e2m1" : 72 , "f8e8m0" : 72 }),
114
- ("text-generation-with-past" , "opt125m" , "nf4" , {"int8" : 4 , "nf4" : 72 }),
115
- ("text-generation-with-past" , "llama_awq" , "int4 --ratio 1.0 --sym --group-size 8 --all-layers" , {"int4" : 16 }),
113
+ ("text-generation-with-past" , "opt125m" , "int4 --sym --group-size 128" , [{"int8" : 4 , "int4" : 72 }]),
114
+ ("text-generation-with-past" , "opt125m" , "int4 --group-size 64" , [{"int8" : 4 , "int4" : 144 }]),
115
+ ("text-generation-with-past" , "opt125m" , "mxfp4" , [{"int8" : 4 , "f4e2m1" : 72 , "f8e8m0" : 72 }]),
116
+ ("text-generation-with-past" , "opt125m" , "nf4" , [{"int8" : 4 , "nf4" : 72 }]),
117
+ (
118
+ "text-generation-with-past" ,
119
+ "llama_awq" ,
120
+ "int4 --ratio 1.0 --sym --group-size 8 --all-layers" ,
121
+ [{"int4" : 16 }],
122
+ ),
116
123
(
117
124
"text-generation-with-past" ,
118
125
"llama_awq" ,
119
126
"int4 --ratio 1.0 --sym --group-size 16 --awq --dataset wikitext2 --num-samples 100 "
120
127
"--sensitivity-metric max_activation_variance" ,
121
- {"int8" : 4 , "int4" : 14 },
128
+ [ {"int8" : 4 , "int4" : 14 }] ,
122
129
),
123
130
(
124
131
"text-generation-with-past" ,
125
132
"llama_awq" ,
126
133
"int4 --ratio 1.0 --sym --group-size 16 --scale-estimation --dataset wikitext2 --num-samples 100 " ,
127
- {"int8" : 4 , "int4" : 14 },
134
+ [ {"int8" : 4 , "int4" : 14 }] ,
128
135
),
129
136
(
130
137
"text-generation-with-past" ,
131
138
"llama_awq" ,
132
139
"int4 --ratio 1.0 --sym --group-size 16 --gptq --dataset wikitext2 --num-samples 100 " ,
133
- {"int8" : 4 , "int4" : 14 },
140
+ [ {"int8" : 4 , "int4" : 14 }] ,
134
141
),
135
142
(
136
143
"text-generation-with-past" ,
137
144
"llama_awq" ,
138
145
"int4 --ratio 1.0 --sym --group-size 16 --lora-correction --dataset auto --num-samples 16" ,
139
- {"int8" : 60 , "int4" : 14 },
146
+ [{"int8" : 60 , "int4" : 14 }],
147
+ ),
148
+ (
149
+ "text-generation-with-past" ,
150
+ "llama_awq" ,
151
+ "int4 --group-size 16 --backup-precision none --ratio 0.5" ,
152
+ [{"int4" : 6 }],
140
153
),
141
- ("text-generation-with-past" , "llama_awq" , "int4 --group-size 16 --backup-precision none" , {"int4" : 28 }),
142
154
]
143
155
144
156
if is_transformers_version (">=" , "4.40.0" ):
@@ -147,36 +159,73 @@ class OVCLIExportTestCase(unittest.TestCase):
147
159
(
148
160
"image-text-to-text" ,
149
161
"llava_next" ,
150
- 'int4 --group-size 16 --ratio 0.9 --sensitivity-metric "mean_activation_magnitude" '
162
+ "int4 --group-size 16 --ratio 0.8" ,
163
+ [{"int8" : 14 , "int4" : 16 }, {"int8" : 9 }, {"int8" : 1 }],
164
+ ),
165
+ (
166
+ "image-text-to-text" ,
167
+ "llava_next" ,
168
+ 'int4 --group-size 16 --ratio 0.8 --sensitivity-metric "hessian_input_activation" '
151
169
"--dataset contextual --num-samples 1" ,
152
- {"int8" : 8 , "int4" : 22 },
170
+ [{"int8" : 6 , "int4" : 24 }, {"int8" : 9 }, {"int8" : 1 }],
171
+ ),
172
+ (
173
+ "image-text-to-text" ,
174
+ "nanollava" ,
175
+ "int4 --group-size 8 --ratio 0.8 --trust-remote-code" ,
176
+ [{"int8" : 16 , "int4" : 14 }, {"int8" : 15 }, {"int8" : 1 }],
153
177
),
154
178
(
155
179
"image-text-to-text" ,
156
180
"nanollava" ,
157
- 'int4 --group-size 8 --ratio 0.9 --sensitivity-metric "mean_activation_variance" '
181
+ 'int4 --group-size 8 --ratio 0.8 --sensitivity-metric "mean_activation_variance" '
158
182
"--dataset contextual --num-samples 1 --trust-remote-code" ,
159
- {"int8" : 12 , "int4" : 18 } ,
183
+ [ {"int8" : 16 , "int4" : 14 }, { "int8" : 15 }, { "int8" : 1 }] ,
160
184
),
161
185
]
162
186
)
163
187
164
188
if is_transformers_version (">=" , "4.45.0" ):
165
189
TEST_4BIT_CONFIGURATIONS .extend (
166
190
[
191
+ (
192
+ "image-text-to-text" ,
193
+ "minicpmv" ,
194
+ "int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
195
+ [{"int8" : 10 , "int4" : 20 }, {"int8" : 26 }, {"int8" : 1 }, {"int8" : 6 }],
196
+ ),
197
+ (
198
+ "image-text-to-text" ,
199
+ "minicpmv" ,
200
+ 'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
201
+ "--dataset contextual --num-samples 1 --trust-remote-code" ,
202
+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 26 }, {"int8" : 1 }, {"int8" : 6 }],
203
+ ),
204
+ (
205
+ "image-text-to-text" ,
206
+ "internvl2" ,
207
+ "int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
208
+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 11 }, {"int8" : 1 }],
209
+ ),
167
210
(
168
211
"image-text-to-text" ,
169
212
"internvl2" ,
170
- 'int4 --group-size 4 --ratio 0.9 --sensitivity-metric "hessian_input_activation " '
213
+ 'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude " '
171
214
"--dataset contextual --num-samples 1 --trust-remote-code" ,
172
- {"int8" : 6 , "int4" : 24 },
215
+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 11 }, {"int8" : 1 }],
216
+ ),
217
+ (
218
+ "image-text-to-text" ,
219
+ "phi3_v" ,
220
+ "int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
221
+ [{"int8" : 8 , "int4" : 10 }, {"int8" : 7 }, {"int8" : 1 }, {"int8" : 2 }],
173
222
),
174
223
(
175
224
"image-text-to-text" ,
176
225
"phi3_v" ,
177
- 'int4 --group-size 4 --ratio 0.9 --sensitivity-metric "mean_activation_magnitude" '
226
+ 'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
178
227
"--dataset contextual --num-samples 1 --trust-remote-code" ,
179
- {"int8" : 4 , "int4" : 14 },
228
+ [ {"int8" : 4 , "int4" : 14 }, { "int8" : 7 }, { "int8" : 1 }, { "int8" : 2 }] ,
180
229
),
181
230
]
182
231
)
@@ -300,7 +349,9 @@ def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: in
300
349
self .assertEqual (exp_num_fq , num_fq )
301
350
302
351
@parameterized .expand (TEST_4BIT_CONFIGURATIONS )
303
- def test_exporters_cli_4bit (self , task : str , model_type : str , option : str , expected_num_weight_nodes : dict ):
352
+ def test_exporters_cli_4bit (
353
+ self , task : str , model_type : str , option : str , expected_num_weight_nodes_per_model : List [Dict ]
354
+ ):
304
355
with TemporaryDirectory () as tmpdir :
305
356
result = subprocess .run (
306
357
f"optimum-cli export openvino --model { MODEL_NAMES [model_type ]} --task { task } --weight-format { option } { tmpdir } " ,
@@ -317,11 +368,15 @@ def test_exporters_cli_4bit(self, task: str, model_type: str, option: str, expec
317
368
else _HEAD_TO_AUTOMODELS [model_type .replace ("-refiner" , "" )]
318
369
).from_pretrained (tmpdir , ** model_kwargs )
319
370
320
- ov_model = model .lm_model if task == "image-text-to-text" else model .model
371
+ submodels = []
372
+ if task == "text-generation-with-past" :
373
+ submodels = [model ]
374
+ elif task == "image-text-to-text" :
375
+ submodels = [model .lm_model , model .vision_embeddings_model , model .text_embeddings_model ]
376
+ submodels += [getattr (model , part ) for part in model .additional_parts ]
377
+
378
+ compare_num_quantized_nodes_per_model (self , submodels , expected_num_weight_nodes_per_model )
321
379
322
- _ , num_weight_nodes = get_num_quantized_nodes (ov_model )
323
- expected_num_weight_nodes .update ({k : 0 for k in set (num_weight_nodes ) - set (expected_num_weight_nodes )})
324
- self .assertEqual (expected_num_weight_nodes , num_weight_nodes )
325
380
self .assertTrue ("--awq" not in option or b"Applying AWQ" in result .stdout )
326
381
self .assertTrue ("--scale-estimation" not in option or b"Applying Scale Estimation" in result .stdout )
327
382
self .assertTrue ("--gptq" not in option or b"Applying GPTQ" in result .stdout )
0 commit comments