@@ -466,13 +466,7 @@ def test_ovmodel_load_large_model_with_default_compressed_weights(self):
466
466
_ = OVModelForCausalLM .from_pretrained (
467
467
MODEL_NAMES ["llama" ], export = True , compile = False , use_cache = False
468
468
)
469
- saving_params = {
470
- "model" : unittest .mock .ANY ,
471
- "path" : unittest .mock .ANY ,
472
- "compression_option" : "int8" ,
473
- "compression_ratio" : None ,
474
- }
475
- save_model_patch .aasert_called_with (saving_params )
469
+ save_model_patch .assert_called_with (unittest .mock .ANY , unittest .mock .ANY , ov_config = None )
476
470
477
471
def test_ovmodel_load_large_model_with_uncompressed_weights (self ):
478
472
with unittest .mock .patch ("transformers.modeling_utils.ModuleUtilsMixin" ) as model_mixin_patch :
@@ -482,34 +476,37 @@ def test_ovmodel_load_large_model_with_uncompressed_weights(self):
482
476
_ = OVModelForCausalLM .from_pretrained (
483
477
MODEL_NAMES ["llama" ], export = True , load_in_8bit = False , compile = False , use_cache = False
484
478
)
485
- saving_params = {
486
- "model" : unittest .mock .ANY ,
487
- "path" : unittest .mock .ANY ,
488
- "compression_option" : "fp32" ,
489
- "compression_ratio" : None ,
490
- }
491
- save_model_patch .aasert_called_with (saving_params )
479
+ save_model_patch .assert_called_with (
480
+ unittest .mock .ANY , unittest .mock .ANY , ov_config = OVConfig (dtype = "fp32" )
481
+ )
492
482
493
483
def test_ovmodel_load_large_model_with_additional_quantization_config (self ):
494
484
with unittest .mock .patch ("transformers.modeling_utils.ModuleUtilsMixin" ) as model_mixin_patch :
495
485
model_mixin_patch .num_parameters .return_value = 2e9
496
486
with unittest .mock .patch ("openvino.runtime.ie_api.Core.read_model" ) as core_patch :
497
487
with unittest .mock .patch ("optimum.exporters.openvino.convert._save_model" ) as save_model_patch :
498
- _ = OVModelForCausalLM .from_pretrained (
499
- MODEL_NAMES ["llama" ],
500
- export = True ,
501
- compile = False ,
502
- use_cache = False ,
503
- quantization_config = OVWeightQuantizationConfig (bits = 4 , sym = True , group_size = - 1 , ratio = 0.8 ),
504
- )
505
- # quantization will be performed later, using load_model
506
- saving_params = {
507
- "model" : unittest .mock .ANY ,
508
- "path" : unittest .mock .ANY ,
509
- "compression_option" : "fp32" ,
510
- "compression_ratio" : None ,
511
- }
512
- save_model_patch .aasert_called_with (saving_params )
488
+ with unittest .mock .patch ("nncf.compress_weights" ) as compress_weights_patch :
489
+ _ = OVModelForCausalLM .from_pretrained (
490
+ MODEL_NAMES ["llama" ],
491
+ export = True ,
492
+ compile = False ,
493
+ use_cache = False ,
494
+ quantization_config = OVWeightQuantizationConfig (bits = 4 , sym = True , group_size = - 1 , ratio = 0.8 ),
495
+ )
496
+ # quantization will be performed later, using load_model
497
+ save_model_patch .assert_called_with (
498
+ unittest .mock .ANY , unittest .mock .ANY , ov_config = OVConfig (dtype = "fp32" )
499
+ )
500
+ compression_params = {
501
+ "mode" : nncf .CompressWeightsMode .INT4_SYM ,
502
+ "ratio" : 0.8 ,
503
+ "group_size" : - 1 ,
504
+ "all_layers" : None ,
505
+ "sensitivity_metric" : None ,
506
+ "dataset" : None ,
507
+ "ignored_scope" : None ,
508
+ }
509
+ compress_weights_patch .assert_called_with (unittest .mock .ANY , ** compression_params )
513
510
514
511
515
512
class OVQuantizerQATest (unittest .TestCase ):
0 commit comments