rewrite mcok tests

eaidova · eaidova · commit 6e2361447f46 · 2024-03-11T17:44:00.000+04:00
diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -459,57 +459,64 @@ def test_ovmodel_load_with_uncompressed_weights(self, model_cls, model_type):
             self.assertEqual(0, num_int8)
 
     def test_ovmodel_load_large_model_with_default_compressed_weights(self):
-        with unittest.mock.patch("transformers.modeling_utils.ModuleUtilsMixin") as model_mixin_patch:
-            model_mixin_patch.num_parameters.return_value = 2e9
+        with unittest.mock.patch("torch.nn.Module.parameters") as model_parameters:
+            mock_tensor = unittest.mock.Mock()
+            mock_tensor.numel = lambda: 2000000000
+            mock_tensor.requires_grad = True
+            model_parameters.return_value = [mock_tensor]
             with unittest.mock.patch("openvino.runtime.ie_api.Core.read_model") as core_patch:
                 with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as save_model_patch:
                     _ = OVModelForCausalLM.from_pretrained(
                         MODEL_NAMES["llama"], export=True, compile=False, use_cache=False
                     )
-                    saving_params = {
-                        "model": unittest.mock.ANY,
-                        "path": unittest.mock.ANY,
-                        "compression_option": "int8",
-                        "compression_ratio": None,
-                    }
-                    save_model_patch.aasert_called_with(saving_params)
+                    save_model_patch.assert_called_with(
+                        unittest.mock.ANY, unittest.mock.ANY, ov_config=OVConfig(quantization_config={"bits": 8})
+                    )
 
     def test_ovmodel_load_large_model_with_uncompressed_weights(self):
-        with unittest.mock.patch("transformers.modeling_utils.ModuleUtilsMixin") as model_mixin_patch:
-            model_mixin_patch.num_parameters.return_value = 2e9
+        with unittest.mock.patch("torch.nn.Module.parameters") as model_parameters:
+            mock_tensor = unittest.mock.Mock()
+            mock_tensor.numel = lambda: 2000000000
+            mock_tensor.requires_grad = True
+            model_parameters.return_value = [mock_tensor]
             with unittest.mock.patch("openvino.runtime.ie_api.Core.read_model") as core_patch:
                 with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as save_model_patch:
                     _ = OVModelForCausalLM.from_pretrained(
                         MODEL_NAMES["llama"], export=True, load_in_8bit=False, compile=False, use_cache=False
                     )
-                    saving_params = {
-                        "model": unittest.mock.ANY,
-                        "path": unittest.mock.ANY,
-                        "compression_option": "fp32",
-                        "compression_ratio": None,
-                    }
-                    save_model_patch.aasert_called_with(saving_params)
+                    save_model_patch.assert_called_with(
+                        unittest.mock.ANY, unittest.mock.ANY, ov_config=OVConfig(dtype="fp32")
+                    )
 
     def test_ovmodel_load_large_model_with_additional_quantization_config(self):
-        with unittest.mock.patch("transformers.modeling_utils.ModuleUtilsMixin") as model_mixin_patch:
-            model_mixin_patch.num_parameters.return_value = 2e9
+        with unittest.mock.patch("torch.nn.Module.parameters") as model_parameters:
+            mock_tensor = unittest.mock.Mock()
+            mock_tensor.numel = lambda: 2000000000
+            mock_tensor.requires_grad = True
             with unittest.mock.patch("openvino.runtime.ie_api.Core.read_model") as core_patch:
                 with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as save_model_patch:
-                    _ = OVModelForCausalLM.from_pretrained(
-                        MODEL_NAMES["llama"],
-                        export=True,
-                        compile=False,
-                        use_cache=False,
-                        quantization_config=OVWeightQuantizationConfig(bits=4, sym=True, group_size=-1, ratio=0.8),
-                    )
-                    # quantization will be performed later, using load_model
-                    saving_params = {
-                        "model": unittest.mock.ANY,
-                        "path": unittest.mock.ANY,
-                        "compression_option": "fp32",
-                        "compression_ratio": None,
-                    }
-                    save_model_patch.aasert_called_with(saving_params)
+                    with unittest.mock.patch("nncf.compress_weights") as compress_weights_patch:
+                        _ = OVModelForCausalLM.from_pretrained(
+                            MODEL_NAMES["llama"],
+                            export=True,
+                            compile=False,
+                            use_cache=False,
+                            quantization_config=OVWeightQuantizationConfig(bits=4, sym=True, group_size=-1, ratio=0.8),
+                        )
+                        # quantization will be performed later, using load_model
+                        save_model_patch.assert_called_with(
+                            unittest.mock.ANY, unittest.mock.ANY, ov_config=OVConfig(dtype="fp32")
+                        )
+                        compression_params = {
+                            "mode": nncf.CompressWeightsMode.INT4_SYM,
+                            "ratio": 0.8,
+                            "group_size": -1,
+                            "all_layers": None,
+                            "sensitivity_metric": None,
+                            "dataset": None,
+                            "ignored_scope": None,
+                        }
+                        compress_weights_patch.assert_called_with(unittest.mock.ANY, **compression_params)
 
 
 class OVQuantizerQATest(unittest.TestCase):