Skip to content

Commit 978035a

Browse files
committed
rewrite mcok tests
1 parent 01c8117 commit 978035a

File tree

1 file changed

+26
-29
lines changed

1 file changed

+26
-29
lines changed

tests/openvino/test_quantization.py

+26-29
Original file line numberDiff line numberDiff line change
@@ -466,13 +466,7 @@ def test_ovmodel_load_large_model_with_default_compressed_weights(self):
466466
_ = OVModelForCausalLM.from_pretrained(
467467
MODEL_NAMES["llama"], export=True, compile=False, use_cache=False
468468
)
469-
saving_params = {
470-
"model": unittest.mock.ANY,
471-
"path": unittest.mock.ANY,
472-
"compression_option": "int8",
473-
"compression_ratio": None,
474-
}
475-
save_model_patch.aasert_called_with(saving_params)
469+
save_model_patch.assert_called_with(unittest.mock.ANY, unittest.mock.ANY, ov_config=None)
476470

477471
def test_ovmodel_load_large_model_with_uncompressed_weights(self):
478472
with unittest.mock.patch("transformers.modeling_utils.ModuleUtilsMixin") as model_mixin_patch:
@@ -482,34 +476,37 @@ def test_ovmodel_load_large_model_with_uncompressed_weights(self):
482476
_ = OVModelForCausalLM.from_pretrained(
483477
MODEL_NAMES["llama"], export=True, load_in_8bit=False, compile=False, use_cache=False
484478
)
485-
saving_params = {
486-
"model": unittest.mock.ANY,
487-
"path": unittest.mock.ANY,
488-
"compression_option": "fp32",
489-
"compression_ratio": None,
490-
}
491-
save_model_patch.aasert_called_with(saving_params)
479+
save_model_patch.assert_called_with(
480+
unittest.mock.ANY, unittest.mock.ANY, ov_config=OVConfig(dtype="fp32")
481+
)
492482

493483
def test_ovmodel_load_large_model_with_additional_quantization_config(self):
494484
with unittest.mock.patch("transformers.modeling_utils.ModuleUtilsMixin") as model_mixin_patch:
495485
model_mixin_patch.num_parameters.return_value = 2e9
496486
with unittest.mock.patch("openvino.runtime.ie_api.Core.read_model") as core_patch:
497487
with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as save_model_patch:
498-
_ = OVModelForCausalLM.from_pretrained(
499-
MODEL_NAMES["llama"],
500-
export=True,
501-
compile=False,
502-
use_cache=False,
503-
quantization_config=OVWeightQuantizationConfig(bits=4, sym=True, group_size=-1, ratio=0.8),
504-
)
505-
# quantization will be performed later, using load_model
506-
saving_params = {
507-
"model": unittest.mock.ANY,
508-
"path": unittest.mock.ANY,
509-
"compression_option": "fp32",
510-
"compression_ratio": None,
511-
}
512-
save_model_patch.aasert_called_with(saving_params)
488+
with unittest.mock.patch("nncf.compress_weights") as compress_weights_patch:
489+
_ = OVModelForCausalLM.from_pretrained(
490+
MODEL_NAMES["llama"],
491+
export=True,
492+
compile=False,
493+
use_cache=False,
494+
quantization_config=OVWeightQuantizationConfig(bits=4, sym=True, group_size=-1, ratio=0.8),
495+
)
496+
# quantization will be performed later, using load_model
497+
save_model_patch.assert_called_with(
498+
unittest.mock.ANY, unittest.mock.ANY, ov_config=OVConfig(dtype="fp32")
499+
)
500+
compression_params = {
501+
"mode": nncf.CompressWeightsMode.INT4_SYM,
502+
"ratio": 0.8,
503+
"group_size": -1,
504+
"all_layers": None,
505+
"sensitivity_metric": None,
506+
"dataset": None,
507+
"ignored_scope": None,
508+
}
509+
compress_weights_patch.assert_called_with(unittest.mock.ANY, **compression_params)
513510

514511

515512
class OVQuantizerQATest(unittest.TestCase):

0 commit comments

Comments
 (0)