@@ -192,27 +192,27 @@ class OVCLIExportTestCase(unittest.TestCase):
192
192
"image-text-to-text" ,
193
193
"llava_next" ,
194
194
"int4 --group-size 16 --ratio 0.8" ,
195
- [{"int8" : 14 , "int4" : 16 }, {"int8" : 9 }, {"int8" : 1 }],
195
+ [{"int8" : 14 , "int4" : 16 }, {"int8" : 1 }, {"int8" : 9 }],
196
196
),
197
197
(
198
198
"image-text-to-text" ,
199
199
"llava_next" ,
200
200
'int4 --group-size 16 --ratio 0.8 --sensitivity-metric "hessian_input_activation" '
201
201
"--dataset contextual --num-samples 1" ,
202
- [{"int8" : 6 , "int4" : 24 }, {"int8" : 9 }, {"int8" : 1 }],
202
+ [{"int8" : 6 , "int4" : 24 }, {"int8" : 1 }, {"int8" : 9 }],
203
203
),
204
204
(
205
205
"image-text-to-text" ,
206
206
"nanollava" ,
207
207
"int4 --group-size 8 --ratio 0.8 --trust-remote-code" ,
208
- [{"int8" : 16 , "int4" : 14 }, {"int8" : 15 }, {"int8" : 1 }],
208
+ [{"int8" : 16 , "int4" : 14 }, {"int8" : 1 }, {"int8" : 15 }],
209
209
),
210
210
(
211
211
"image-text-to-text" ,
212
212
"nanollava" ,
213
213
'int4 --group-size 8 --ratio 0.8 --sensitivity-metric "mean_activation_variance" '
214
214
"--dataset contextual --num-samples 1 --trust-remote-code" ,
215
- [{"int8" : 16 , "int4" : 14 }, {"int8" : 15 }, {"int8" : 1 }],
215
+ [{"int8" : 16 , "int4" : 14 }, {"int8" : 1 }, {"int8" : 15 }],
216
216
),
217
217
]
218
218
)
@@ -224,40 +224,40 @@ class OVCLIExportTestCase(unittest.TestCase):
224
224
"image-text-to-text" ,
225
225
"minicpmv" ,
226
226
"int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
227
- [{"int8" : 10 , "int4" : 20 }, {"int8" : 26 }, {"int8" : 1 }, {"int8" : 6 }],
227
+ [{"int8" : 10 , "int4" : 20 }, {"int8" : 1 }, {"int8" : 26 }, {"int8" : 6 }],
228
228
),
229
229
(
230
230
"image-text-to-text" ,
231
231
"minicpmv" ,
232
232
'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
233
233
"--dataset contextual --num-samples 1 --trust-remote-code" ,
234
- [{"int8" : 8 , "int4" : 22 }, {"int8" : 26 }, {"int8" : 1 }, {"int8" : 6 }],
234
+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 1 }, {"int8" : 26 }, {"int8" : 6 }],
235
235
),
236
236
(
237
237
"image-text-to-text" ,
238
238
"internvl2" ,
239
239
"int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
240
- [{"int8" : 8 , "int4" : 22 }, {"int8" : 11 }, {"int8" : 1 }],
240
+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 1 }, {"int8" : 11 }],
241
241
),
242
242
(
243
243
"image-text-to-text" ,
244
244
"internvl2" ,
245
245
'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
246
246
"--dataset contextual --num-samples 1 --trust-remote-code" ,
247
- [{"int8" : 8 , "int4" : 22 }, {"int8" : 11 }, {"int8" : 1 }],
247
+ [{"int8" : 8 , "int4" : 22 }, {"int8" : 1 }, {"int8" : 11 }],
248
248
),
249
249
(
250
250
"image-text-to-text" ,
251
251
"phi3_v" ,
252
252
"int4 --group-size 4 --ratio 0.8 --trust-remote-code" ,
253
- [{"int8" : 8 , "int4" : 10 }, {"int8" : 7 }, {"int8" : 1 }, {"int8" : 2 }],
253
+ [{"int8" : 8 , "int4" : 10 }, {"int8" : 1 }, {"int8" : 7 }, {"int8" : 2 }],
254
254
),
255
255
(
256
256
"image-text-to-text" ,
257
257
"phi3_v" ,
258
258
'int4 --group-size 4 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
259
259
"--dataset contextual --num-samples 1 --trust-remote-code" ,
260
- [{"int8" : 4 , "int4" : 14 }, {"int8" : 7 }, {"int8" : 1 }, {"int8" : 2 }],
260
+ [{"int8" : 4 , "int4" : 14 }, {"int8" : 1 }, {"int8" : 7 }, {"int8" : 2 }],
261
261
),
262
262
(
263
263
"image-text-to-text" ,
@@ -356,27 +356,31 @@ def test_exporters_cli_int8(self, task: str, model_type: str):
356
356
).from_pretrained (tmpdir , ** model_kwargs )
357
357
358
358
if task .startswith ("text2text-generation" ):
359
- models = [model .encoder , model .decoder ]
359
+ models = [model .encoder . model , model .decoder . model ]
360
360
if task .endswith ("with-past" ) and not model .decoder .stateful :
361
- models .append (model .decoder_with_past )
361
+ models .append (model .decoder_with_past . model )
362
362
elif (
363
363
model_type .startswith ("stable-diffusion" )
364
364
or model_type .startswith ("flux" )
365
365
or model_type .startswith ("sana" )
366
366
):
367
- models = [model .unet or model .transformer , model .vae_encoder , model .vae_decoder ]
367
+ vision_model = model .unet .model if model .unet is not None else model .transformer .model
368
+ models = [vision_model , model .vae_encoder .model , model .vae_decoder .model ]
368
369
models .append (
369
- model .text_encoder if model_type in ["stable-diffusion" , "sana" ] else model .text_encoder_2
370
+ model .text_encoder .model
371
+ if model_type in ["stable-diffusion" , "sana" ]
372
+ else model .text_encoder_2 .model
370
373
)
371
374
elif task .startswith ("image-text-to-text" ):
372
- models = [ model .language_model , model . vision_embeddings ]
375
+ models = list ( model .submodels . values ())
373
376
else :
374
- models = [model ]
377
+ models = [model . model ]
375
378
376
379
expected_int8 = _ARCHITECTURES_TO_EXPECTED_INT8 [model_type ]
377
380
for i , model in enumerate (models ):
378
381
_ , num_weight_nodes = get_num_quantized_nodes (model )
379
382
self .assertEqual (expected_int8 [i ], num_weight_nodes ["int8" ])
383
+ self .assertFalse (model .has_rt_info (["runtime_options" , "KV_CACHE_PRECISION" ]))
380
384
381
385
@parameterized .expand (SUPPORTED_SD_HYBRID_ARCHITECTURES )
382
386
def test_exporters_cli_hybrid_quantization (
@@ -389,11 +393,11 @@ def test_exporters_cli_hybrid_quantization(
389
393
check = True ,
390
394
)
391
395
model = eval (_HEAD_TO_AUTOMODELS [model_type .replace ("-refiner" , "" )]).from_pretrained (tmpdir )
392
- num_fake_nodes , num_weight_nodes = get_num_quantized_nodes (
393
- model .unet if model .unet is not None else model .transformer
394
- )
396
+ vision_model = model .unet .model if model .unet is not None else model .transformer .model
397
+ num_fake_nodes , num_weight_nodes = get_num_quantized_nodes (vision_model )
395
398
self .assertEqual (expected_int8_nodes , num_weight_nodes ["int8" ])
396
399
self .assertEqual (expected_fake_nodes , num_fake_nodes )
400
+ self .assertFalse (vision_model .has_rt_info (["runtime_options" , "KV_CACHE_PRECISION" ]))
397
401
398
402
@parameterized .expand (TEST_4BIT_CONFIGURATIONS )
399
403
def test_exporters_cli_4bit (
@@ -417,10 +421,11 @@ def test_exporters_cli_4bit(
417
421
418
422
submodels = []
419
423
if task == "text-generation-with-past" :
420
- submodels = [model ]
424
+ submodels = [model . model ]
421
425
elif task == "image-text-to-text" :
422
- submodels = [model .lm_model , model .vision_embeddings_model , model .text_embeddings_model ]
423
- submodels += [getattr (model , part ) for part in model .additional_parts ]
426
+ submodels = list (model .submodels .values ())
427
+ for submodel in submodels :
428
+ self .assertFalse (submodel .has_rt_info (["runtime_options" , "KV_CACHE_PRECISION" ]))
424
429
425
430
compare_num_quantized_nodes_per_model (self , submodels , expected_num_weight_nodes_per_model )
426
431
0 commit comments