21
21
from utils_tests import (
22
22
_ARCHITECTURES_TO_EXPECTED_INT8 ,
23
23
MODEL_NAMES ,
24
- compare_num_quantized_nodes_per_model ,
24
+ check_compression_state_per_model ,
25
25
get_num_quantized_nodes ,
26
26
)
27
27
@@ -192,27 +192,27 @@ class OVCLIExportTestCase(unittest.TestCase):
192
192
"image-text-to-text" ,
193
193
"llava_next" ,
194
194
"int4 --group-size 16 --ratio 0.8" ,
195
- [{"int8" : 14 , "int4" : 16 }, {"int8" : 9 }, {"int8" : 1 }],
195
+ [{"int8" : 14 , "int4" : 16 }, {"int8" : 1 }, {"int8" : 9 }],
196
196
),
197
197
(
198
198
"image-text-to-text" ,
199
199
"llava_next" ,
200
200
'int4 --group-size 16 --ratio 0.8 --sensitivity-metric "hessian_input_activation" '
201
201
"--dataset contextual --num-samples 1" ,
202
- [{"int8" : 6 , "int4" : 24 }, {"int8" : 9 }, {"int8" : 1 }],
202
+ [{"int8" : 6 , "int4" : 24 }, {"int8" : 1 }, {"int8" : 9 }],
203
203
),
204
204
(
205
205
"image-text-to-text" ,
206
206
"nanollava" ,
207
207
"int4 --group-size 8 --ratio 0.8 --trust-remote-code" ,
208
- [{"int8" : 16 , "int4" : 14 }, {"int8" : 15 }, {"int8" : 1 }],
208
+ [{"int8" : 16 , "int4" : 14 }, {"int8" : 1 }, {"int8" : 15 }],
209
209
),
210
210
(
211
211
"image-text-to-text" ,
212
212
"nanollava" ,
213
213
'int4 --group-size 8 --ratio 0.8 --sensitivity-metric "mean_activation_variance" '
214
214
"--dataset contextual --num-samples 1 --trust-remote-code" ,
215
- [{"int8" : 16 , "int4" : 14 }, {"int8" : 15 }, {"int8" : 1 }],
215
+ [{"int8" : 16 , "int4" : 14 }, {"int8" : 1 }, {"int8" : 15 }],
216
216
),
217
217
]
218
218
)
@@ -224,40 +224,40 @@ class OVCLIExportTestCase(unittest.TestCase):
224
224
"image-text-to-text" ,
225
225
"minicpmv" ,
226
226
"int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
227
- [{"int8" : 10 , "int4" : 20 }, {"int8" : 26 }, {"int8" : 1 }, {"int8" : 6 }],
227
+ [{"int8" : 10 , "int4" : 20 }, {"int8" : 1 }, {"int8" : 26 }, {"int8" : 6 }],
228
228
),
229
229
(
230
230
"image-text-to-text" ,
231
231
"minicpmv" ,
232
232
'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
233
233
"--dataset contextual --num-samples 1 --trust-remote-code" ,
234
- [{"int8" : 8 , "int4" : 22 }, {"int8" : 26 }, {"int8" : 1 }, {"int8" : 6 }],
234
+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 1 }, {"int8" : 26 }, {"int8" : 6 }],
235
235
),
236
236
(
237
237
"image-text-to-text" ,
238
238
"internvl2" ,
239
239
"int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
240
- [{"int8" : 8 , "int4" : 22 }, {"int8" : 11 }, {"int8" : 1 }],
240
+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 1 }, {"int8" : 11 }],
241
241
),
242
242
(
243
243
"image-text-to-text" ,
244
244
"internvl2" ,
245
245
'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
246
246
"--dataset contextual --num-samples 1 --trust-remote-code" ,
247
- [{"int8" : 8 , "int4" : 22 }, {"int8" : 11 }, {"int8" : 1 }],
247
+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 1 }, {"int8" : 11 }],
248
248
),
249
249
(
250
250
"image-text-to-text" ,
251
251
"phi3_v" ,
252
252
"int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
253
- [{"int8" : 8 , "int4" : 10 }, {"int8" : 7 }, {"int8" : 1 }, {"int8" : 2 }],
253
+ [{"int8" : 8 , "int4" : 10 }, {"int8" : 1 }, {"int8" : 7 }, {"int8" : 2 }],
254
254
),
255
255
(
256
256
"image-text-to-text" ,
257
257
"phi3_v" ,
258
258
'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
259
259
"--dataset contextual --num-samples 1 --trust-remote-code" ,
260
- [{"int8" : 4 , "int4" : 14 }, {"int8" : 7 }, {"int8" : 1 }, {"int8" : 2 }],
260
+ [{"int8" : 4 , "int4" : 14 }, {"int8" : 1 }, {"int8" : 7 }, {"int8" : 2 }],
261
261
),
262
262
(
263
263
"image-text-to-text" ,
@@ -369,14 +369,15 @@ def test_exporters_cli_int8(self, task: str, model_type: str):
369
369
model .text_encoder if model_type in ["stable-diffusion" , "sana" ] else model .text_encoder_2
370
370
)
371
371
elif task .startswith ("image-text-to-text" ):
372
- models = [ model .language_model , model . vision_embeddings ]
372
+ models = list ( model .submodels . values ())
373
373
else :
374
374
models = [model ]
375
375
376
376
expected_int8 = _ARCHITECTURES_TO_EXPECTED_INT8 [model_type ]
377
- for i , model in enumerate (models ):
378
- _ , num_weight_nodes = get_num_quantized_nodes (model )
379
- self .assertEqual (expected_int8 [i ], num_weight_nodes ["int8" ])
377
+ expected_int8 = [{"int8" : it } for it in expected_int8 ]
378
+ if task .startswith ("text2text-generation" ) and (not task .endswith ("with-past" ) or model .decoder .stateful ):
379
+ expected_int8 = expected_int8 [:2 ]
380
+ check_compression_state_per_model (self , models , expected_int8 )
380
381
381
382
@parameterized .expand (SUPPORTED_SD_HYBRID_ARCHITECTURES )
382
383
def test_exporters_cli_hybrid_quantization (
@@ -389,11 +390,11 @@ def test_exporters_cli_hybrid_quantization(
389
390
check = True ,
390
391
)
391
392
model = eval (_HEAD_TO_AUTOMODELS [model_type .replace ("-refiner" , "" )]).from_pretrained (tmpdir )
392
- num_fake_nodes , num_weight_nodes = get_num_quantized_nodes (
393
- model .unet if model .unet is not None else model .transformer
394
- )
393
+ vision_model = model .unet .model if model .unet is not None else model .transformer .model
394
+ num_fake_nodes , num_weight_nodes = get_num_quantized_nodes (vision_model )
395
395
self .assertEqual (expected_int8_nodes , num_weight_nodes ["int8" ])
396
396
self .assertEqual (expected_fake_nodes , num_fake_nodes )
397
+ self .assertFalse (vision_model .has_rt_info (["runtime_options" , "KV_CACHE_PRECISION" ]))
397
398
398
399
@parameterized .expand (TEST_4BIT_CONFIGURATIONS )
399
400
def test_exporters_cli_4bit (
@@ -419,10 +420,9 @@ def test_exporters_cli_4bit(
419
420
if task == "text-generation-with-past" :
420
421
submodels = [model ]
421
422
elif task == "image-text-to-text" :
422
- submodels = [model .lm_model , model .vision_embeddings_model , model .text_embeddings_model ]
423
- submodels += [getattr (model , part ) for part in model .additional_parts ]
423
+ submodels = list (model .submodels .values ())
424
424
425
- compare_num_quantized_nodes_per_model (self , submodels , expected_num_weight_nodes_per_model )
425
+ check_compression_state_per_model (self , submodels , expected_num_weight_nodes_per_model )
426
426
427
427
self .assertTrue ("--awq" not in option or b"Applying AWQ" in result .stdout )
428
428
self .assertTrue ("--scale-estimation" not in option or b"Applying Scale Estimation" in result .stdout )
0 commit comments