Skip to content

Commit 194a997

Browse files
Adopt tests
1 parent 7789c55 commit 194a997

File tree

1 file changed

+72
-66
lines changed

1 file changed

+72
-66
lines changed

tests/openvino/test_quantization.py

+72-66
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
import inspect
1415

1516
# ruff: noqa
1617

@@ -22,6 +23,7 @@
2223
from enum import Enum
2324
from functools import partial
2425
from typing import Union
26+
2527
import pytest
2628
import evaluate
2729
import numpy as np
@@ -538,76 +540,80 @@ def test_ovmodel_load_with_uncompressed_weights(self, model_cls, model_type):
538540
self.assertEqual(0, num_int8)
539541

540542
def test_ovmodel_load_large_model_with_default_compressed_weights(self):
541-
with unittest.mock.patch("torch.nn.Module.parameters") as model_parameters:
542-
mock_tensor = unittest.mock.Mock()
543-
mock_tensor.numel = lambda: 2000000000
544-
mock_tensor.requires_grad = True
545-
model_parameters.return_value = [mock_tensor]
546-
with unittest.mock.patch("openvino.runtime.ie_api.Core.read_model") as core_patch:
547-
with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as save_model_patch:
548-
_ = OVModelForCausalLM.from_pretrained(
549-
MODEL_NAMES["llama"], export=True, compile=False, use_cache=False
550-
)
551-
save_model_patch.assert_called_with(
552-
unittest.mock.ANY,
553-
unittest.mock.ANY,
554-
ov_config=OVConfig(quantization_config={"bits": 8}),
555-
library_name="transformers",
556-
)
543+
def main_export_in_stacktrace(*args, **kwargs):
544+
# Compression was called from `main_export`
545+
self.assertTrue(inspect.stack()[5].function == "main_export")
546+
547+
with unittest.mock.patch(
548+
"openvino.runtime.op.Constant.shape", new_callable=unittest.mock.PropertyMock
549+
) as ov_constant_shape:
550+
ov_constant_shape.return_value = (2000000000,)
551+
with unittest.mock.patch(
552+
"nncf.compress_weights", side_effect=main_export_in_stacktrace
553+
) as compress_weights_patch:
554+
_ = OVModelForCausalLM.from_pretrained(
555+
MODEL_NAMES["llama"], export=True, compile=False, use_cache=False
556+
)
557+
compression_params = {
558+
"mode": nncf.CompressWeightsMode.INT8_ASYM,
559+
"ratio": 1.0,
560+
"group_size": -1,
561+
"all_layers": None,
562+
"sensitivity_metric": None,
563+
"dataset": None,
564+
"ignored_scope": nncf.IgnoredScope(),
565+
"awq": None,
566+
"subset_size": 128,
567+
"scale_estimation": None,
568+
}
569+
compress_weights_patch.assert_called_with(
570+
unittest.mock.ANY,
571+
**compression_params,
572+
)
557573

558574
def test_ovmodel_load_large_model_with_uncompressed_weights(self):
559-
with unittest.mock.patch("torch.nn.Module.parameters") as model_parameters:
560-
mock_tensor = unittest.mock.Mock()
561-
mock_tensor.numel = lambda: 2000000000
562-
mock_tensor.requires_grad = True
563-
model_parameters.return_value = [mock_tensor]
564-
with unittest.mock.patch("openvino.runtime.ie_api.Core.read_model") as core_patch:
565-
with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as save_model_patch:
566-
_ = OVModelForCausalLM.from_pretrained(
567-
MODEL_NAMES["llama"], export=True, load_in_8bit=False, compile=False, use_cache=False
568-
)
569-
save_model_patch.assert_called_with(
570-
unittest.mock.ANY,
571-
unittest.mock.ANY,
572-
ov_config=OVConfig(dtype="auto"),
573-
library_name="transformers",
574-
)
575+
with unittest.mock.patch(
576+
"openvino.runtime.op.Constant.shape", new_callable=unittest.mock.PropertyMock
577+
) as ov_constant_shape:
578+
ov_constant_shape.return_value = (2000000000,)
579+
with unittest.mock.patch("nncf.compress_weights") as compress_weights_patch:
580+
_ = OVModelForCausalLM.from_pretrained(
581+
MODEL_NAMES["llama"], export=True, load_in_8bit=False, compile=False, use_cache=False
582+
)
583+
compress_weights_patch.assert_not_called()
575584

576585
def test_ovmodel_load_large_model_with_additional_quantization_config(self):
577-
with unittest.mock.patch("torch.nn.Module.parameters") as model_parameters:
578-
mock_tensor = unittest.mock.Mock()
579-
mock_tensor.numel = lambda: 2000000000
580-
mock_tensor.requires_grad = True
581-
with unittest.mock.patch("openvino.runtime.ie_api.Core.read_model") as core_patch:
582-
with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as save_model_patch:
583-
with unittest.mock.patch("nncf.compress_weights") as compress_weights_patch:
584-
_ = OVModelForCausalLM.from_pretrained(
585-
MODEL_NAMES["llama"],
586-
export=True,
587-
compile=False,
588-
use_cache=False,
589-
quantization_config=OVWeightQuantizationConfig(bits=4, sym=True, group_size=-1, ratio=0.8),
590-
)
591-
# quantization will be performed later, using load_model
592-
save_model_patch.assert_called_with(
593-
unittest.mock.ANY,
594-
unittest.mock.ANY,
595-
ov_config=OVConfig(dtype="auto"),
596-
library_name="transformers",
597-
)
598-
compression_params = {
599-
"mode": nncf.CompressWeightsMode.INT4_SYM,
600-
"ratio": 0.8,
601-
"group_size": -1,
602-
"all_layers": None,
603-
"sensitivity_metric": None,
604-
"dataset": None,
605-
"ignored_scope": nncf.IgnoredScope(),
606-
"awq": None,
607-
"subset_size": 128,
608-
"scale_estimation": None,
609-
}
610-
compress_weights_patch.assert_called_with(unittest.mock.ANY, **compression_params)
586+
def main_export_not_in_stacktrace(*args, **kwargs):
587+
# Compression was not called from `main_export`
588+
self.assertTrue(all(frame_info.function != "main_export" for frame_info in inspect.stack()))
589+
590+
with unittest.mock.patch(
591+
"openvino.runtime.op.Constant.shape", new_callable=unittest.mock.PropertyMock
592+
) as ov_constant_shape:
593+
ov_constant_shape.return_value = (2000000000,)
594+
with unittest.mock.patch(
595+
"nncf.compress_weights", side_effect=main_export_not_in_stacktrace
596+
) as compress_weights_patch:
597+
_ = OVModelForCausalLM.from_pretrained(
598+
MODEL_NAMES["llama"],
599+
export=True,
600+
compile=False,
601+
use_cache=False,
602+
quantization_config=OVWeightQuantizationConfig(bits=4, sym=True, group_size=-1, ratio=0.8),
603+
)
604+
compression_params = {
605+
"mode": nncf.CompressWeightsMode.INT4_SYM,
606+
"ratio": 0.8,
607+
"group_size": -1,
608+
"all_layers": None,
609+
"sensitivity_metric": None,
610+
"dataset": None,
611+
"ignored_scope": nncf.IgnoredScope(),
612+
"awq": None,
613+
"subset_size": 128,
614+
"scale_estimation": None,
615+
}
616+
compress_weights_patch.assert_called_with(unittest.mock.ANY, **compression_params)
611617

612618
@parameterized.expand(LOAD_IN_4_BITS_SCOPE)
613619
def test_ovmodel_4bit_dynamic_with_config(self, model_cls, model_name, quantization_config, expected_ov_int4):

0 commit comments

Comments
 (0)