@@ -459,57 +459,56 @@ def test_ovmodel_load_with_uncompressed_weights(self, model_cls, model_type):
459
459
self .assertEqual (0 , num_int8 )
460
460
461
461
def test_ovmodel_load_large_model_with_default_compressed_weights (self ):
462
- with unittest .mock .patch ("transformers.modeling_utils.ModuleUtilsMixin " ) as model_mixin_patch :
463
- model_mixin_patch . num_parameters . return_value = 2e9
462
+ with unittest .mock .patch ("torch.nn.Module.parameters " ) as model_parameters :
463
+ model_parameters . return_value = [ torch . ones (( 2000 , 1000 , 1000 ), requires_grad = True )]
464
464
with unittest .mock .patch ("openvino.runtime.ie_api.Core.read_model" ) as core_patch :
465
465
with unittest .mock .patch ("optimum.exporters.openvino.convert._save_model" ) as save_model_patch :
466
466
_ = OVModelForCausalLM .from_pretrained (
467
467
MODEL_NAMES ["llama" ], export = True , compile = False , use_cache = False
468
468
)
469
- saving_params = {
470
- "model" : unittest .mock .ANY ,
471
- "path" : unittest .mock .ANY ,
472
- "compression_option" : "int8" ,
473
- "compression_ratio" : None ,
474
- }
475
- save_model_patch .aasert_called_with (saving_params )
469
+ save_model_patch .assert_called_with (
470
+ unittest .mock .ANY , unittest .mock .ANY , ov_config = OVConfig (quantization_config = {"bits" : 8 })
471
+ )
476
472
477
473
def test_ovmodel_load_large_model_with_uncompressed_weights (self ):
478
- with unittest .mock .patch ("transformers.modeling_utils.ModuleUtilsMixin " ) as model_mixin_patch :
479
- model_mixin_patch . num_parameters . return_value = 2e9
474
+ with unittest .mock .patch ("torch.nn.Module.parameters " ) as model_parameters :
475
+ model_parameters . return_value = [ torch . ones (( 2000 , 1000 , 1000 ), requires_grad = True )]
480
476
with unittest .mock .patch ("openvino.runtime.ie_api.Core.read_model" ) as core_patch :
481
477
with unittest .mock .patch ("optimum.exporters.openvino.convert._save_model" ) as save_model_patch :
482
478
_ = OVModelForCausalLM .from_pretrained (
483
479
MODEL_NAMES ["llama" ], export = True , load_in_8bit = False , compile = False , use_cache = False
484
480
)
485
- saving_params = {
486
- "model" : unittest .mock .ANY ,
487
- "path" : unittest .mock .ANY ,
488
- "compression_option" : "fp32" ,
489
- "compression_ratio" : None ,
490
- }
491
- save_model_patch .aasert_called_with (saving_params )
481
+ save_model_patch .assert_called_with (
482
+ unittest .mock .ANY , unittest .mock .ANY , ov_config = OVConfig (dtype = "fp32" )
483
+ )
492
484
493
485
def test_ovmodel_load_large_model_with_additional_quantization_config (self ):
494
- with unittest .mock .patch ("transformers.modeling_utils.ModuleUtilsMixin " ) as model_mixin_patch :
495
- model_mixin_patch . num_parameters . return_value = 2e9
486
+ with unittest .mock .patch ("torch.nn.Module.parameters " ) as model_parameters :
487
+ model_parameters . return_value = [ torch . ones (( 2000 , 1000 , 1000 ), requires_grad = True )]
496
488
with unittest .mock .patch ("openvino.runtime.ie_api.Core.read_model" ) as core_patch :
497
489
with unittest .mock .patch ("optimum.exporters.openvino.convert._save_model" ) as save_model_patch :
498
- _ = OVModelForCausalLM .from_pretrained (
499
- MODEL_NAMES ["llama" ],
500
- export = True ,
501
- compile = False ,
502
- use_cache = False ,
503
- quantization_config = OVWeightQuantizationConfig (bits = 4 , sym = True , group_size = - 1 , ratio = 0.8 ),
504
- )
505
- # quantization will be performed later, using load_model
506
- saving_params = {
507
- "model" : unittest .mock .ANY ,
508
- "path" : unittest .mock .ANY ,
509
- "compression_option" : "fp32" ,
510
- "compression_ratio" : None ,
511
- }
512
- save_model_patch .aasert_called_with (saving_params )
490
+ with unittest .mock .patch ("nncf.compress_weights" ) as compress_weights_patch :
491
+ _ = OVModelForCausalLM .from_pretrained (
492
+ MODEL_NAMES ["llama" ],
493
+ export = True ,
494
+ compile = False ,
495
+ use_cache = False ,
496
+ quantization_config = OVWeightQuantizationConfig (bits = 4 , sym = True , group_size = - 1 , ratio = 0.8 ),
497
+ )
498
+ # quantization will be performed later, using load_model
499
+ save_model_patch .assert_called_with (
500
+ unittest .mock .ANY , unittest .mock .ANY , ov_config = OVConfig (dtype = "fp32" )
501
+ )
502
+ compression_params = {
503
+ "mode" : nncf .CompressWeightsMode .INT4_SYM ,
504
+ "ratio" : 0.8 ,
505
+ "group_size" : - 1 ,
506
+ "all_layers" : None ,
507
+ "sensitivity_metric" : None ,
508
+ "dataset" : None ,
509
+ "ignored_scope" : None ,
510
+ }
511
+ compress_weights_patch .assert_called_with (unittest .mock .ANY , ** compression_params )
513
512
514
513
515
514
class OVQuantizerQATest (unittest .TestCase ):
0 commit comments