|
11 | 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 | 12 | # See the License for the specific language governing permissions and
|
13 | 13 | # limitations under the License.
|
| 14 | +import inspect |
14 | 15 |
|
15 | 16 | # ruff: noqa
|
16 | 17 |
|
|
22 | 23 | from enum import Enum
|
23 | 24 | from functools import partial
|
24 | 25 | from typing import Union
|
| 26 | + |
25 | 27 | import pytest
|
26 | 28 | import evaluate
|
27 | 29 | import numpy as np
|
@@ -538,76 +540,80 @@ def test_ovmodel_load_with_uncompressed_weights(self, model_cls, model_type):
|
538 | 540 | self.assertEqual(0, num_int8)
|
539 | 541 |
|
540 | 542 | def test_ovmodel_load_large_model_with_default_compressed_weights(self):
|
541 |
| - with unittest.mock.patch("torch.nn.Module.parameters") as model_parameters: |
542 |
| - mock_tensor = unittest.mock.Mock() |
543 |
| - mock_tensor.numel = lambda: 2000000000 |
544 |
| - mock_tensor.requires_grad = True |
545 |
| - model_parameters.return_value = [mock_tensor] |
546 |
| - with unittest.mock.patch("openvino.runtime.ie_api.Core.read_model") as core_patch: |
547 |
| - with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as save_model_patch: |
548 |
| - _ = OVModelForCausalLM.from_pretrained( |
549 |
| - MODEL_NAMES["llama"], export=True, compile=False, use_cache=False |
550 |
| - ) |
551 |
| - save_model_patch.assert_called_with( |
552 |
| - unittest.mock.ANY, |
553 |
| - unittest.mock.ANY, |
554 |
| - ov_config=OVConfig(quantization_config={"bits": 8}), |
555 |
| - library_name="transformers", |
556 |
| - ) |
| 543 | + def main_export_in_stacktrace(*args, **kwargs): |
| 544 | + # Compression was called from `main_export` |
| 545 | + self.assertTrue(inspect.stack()[5].function == "main_export") |
| 546 | + |
| 547 | + with unittest.mock.patch( |
| 548 | + "openvino.runtime.op.Constant.shape", new_callable=unittest.mock.PropertyMock |
| 549 | + ) as ov_constant_shape: |
| 550 | + ov_constant_shape.return_value = (2000000000,) |
| 551 | + with unittest.mock.patch( |
| 552 | + "nncf.compress_weights", side_effect=main_export_in_stacktrace |
| 553 | + ) as compress_weights_patch: |
| 554 | + _ = OVModelForCausalLM.from_pretrained( |
| 555 | + MODEL_NAMES["llama"], export=True, compile=False, use_cache=False |
| 556 | + ) |
| 557 | + compression_params = { |
| 558 | + "mode": nncf.CompressWeightsMode.INT8_ASYM, |
| 559 | + "ratio": 1.0, |
| 560 | + "group_size": -1, |
| 561 | + "all_layers": None, |
| 562 | + "sensitivity_metric": None, |
| 563 | + "dataset": None, |
| 564 | + "ignored_scope": nncf.IgnoredScope(), |
| 565 | + "awq": None, |
| 566 | + "subset_size": 128, |
| 567 | + "scale_estimation": None, |
| 568 | + } |
| 569 | + compress_weights_patch.assert_called_with( |
| 570 | + unittest.mock.ANY, |
| 571 | + **compression_params, |
| 572 | + ) |
557 | 573 |
|
558 | 574 | def test_ovmodel_load_large_model_with_uncompressed_weights(self):
|
559 |
| - with unittest.mock.patch("torch.nn.Module.parameters") as model_parameters: |
560 |
| - mock_tensor = unittest.mock.Mock() |
561 |
| - mock_tensor.numel = lambda: 2000000000 |
562 |
| - mock_tensor.requires_grad = True |
563 |
| - model_parameters.return_value = [mock_tensor] |
564 |
| - with unittest.mock.patch("openvino.runtime.ie_api.Core.read_model") as core_patch: |
565 |
| - with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as save_model_patch: |
566 |
| - _ = OVModelForCausalLM.from_pretrained( |
567 |
| - MODEL_NAMES["llama"], export=True, load_in_8bit=False, compile=False, use_cache=False |
568 |
| - ) |
569 |
| - save_model_patch.assert_called_with( |
570 |
| - unittest.mock.ANY, |
571 |
| - unittest.mock.ANY, |
572 |
| - ov_config=OVConfig(dtype="auto"), |
573 |
| - library_name="transformers", |
574 |
| - ) |
| 575 | + with unittest.mock.patch( |
| 576 | + "openvino.runtime.op.Constant.shape", new_callable=unittest.mock.PropertyMock |
| 577 | + ) as ov_constant_shape: |
| 578 | + ov_constant_shape.return_value = (2000000000,) |
| 579 | + with unittest.mock.patch("nncf.compress_weights") as compress_weights_patch: |
| 580 | + _ = OVModelForCausalLM.from_pretrained( |
| 581 | + MODEL_NAMES["llama"], export=True, load_in_8bit=False, compile=False, use_cache=False |
| 582 | + ) |
| 583 | + compress_weights_patch.assert_not_called() |
575 | 584 |
|
576 | 585 | def test_ovmodel_load_large_model_with_additional_quantization_config(self):
|
577 |
| - with unittest.mock.patch("torch.nn.Module.parameters") as model_parameters: |
578 |
| - mock_tensor = unittest.mock.Mock() |
579 |
| - mock_tensor.numel = lambda: 2000000000 |
580 |
| - mock_tensor.requires_grad = True |
581 |
| - with unittest.mock.patch("openvino.runtime.ie_api.Core.read_model") as core_patch: |
582 |
| - with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as save_model_patch: |
583 |
| - with unittest.mock.patch("nncf.compress_weights") as compress_weights_patch: |
584 |
| - _ = OVModelForCausalLM.from_pretrained( |
585 |
| - MODEL_NAMES["llama"], |
586 |
| - export=True, |
587 |
| - compile=False, |
588 |
| - use_cache=False, |
589 |
| - quantization_config=OVWeightQuantizationConfig(bits=4, sym=True, group_size=-1, ratio=0.8), |
590 |
| - ) |
591 |
| - # quantization will be performed later, using load_model |
592 |
| - save_model_patch.assert_called_with( |
593 |
| - unittest.mock.ANY, |
594 |
| - unittest.mock.ANY, |
595 |
| - ov_config=OVConfig(dtype="auto"), |
596 |
| - library_name="transformers", |
597 |
| - ) |
598 |
| - compression_params = { |
599 |
| - "mode": nncf.CompressWeightsMode.INT4_SYM, |
600 |
| - "ratio": 0.8, |
601 |
| - "group_size": -1, |
602 |
| - "all_layers": None, |
603 |
| - "sensitivity_metric": None, |
604 |
| - "dataset": None, |
605 |
| - "ignored_scope": nncf.IgnoredScope(), |
606 |
| - "awq": None, |
607 |
| - "subset_size": 128, |
608 |
| - "scale_estimation": None, |
609 |
| - } |
610 |
| - compress_weights_patch.assert_called_with(unittest.mock.ANY, **compression_params) |
| 586 | + def main_export_not_in_stacktrace(*args, **kwargs): |
| 587 | + # Compression was not called from `main_export` |
| 588 | + self.assertTrue(all(frame_info.function != "main_export" for frame_info in inspect.stack())) |
| 589 | + |
| 590 | + with unittest.mock.patch( |
| 591 | + "openvino.runtime.op.Constant.shape", new_callable=unittest.mock.PropertyMock |
| 592 | + ) as ov_constant_shape: |
| 593 | + ov_constant_shape.return_value = (2000000000,) |
| 594 | + with unittest.mock.patch( |
| 595 | + "nncf.compress_weights", side_effect=main_export_not_in_stacktrace |
| 596 | + ) as compress_weights_patch: |
| 597 | + _ = OVModelForCausalLM.from_pretrained( |
| 598 | + MODEL_NAMES["llama"], |
| 599 | + export=True, |
| 600 | + compile=False, |
| 601 | + use_cache=False, |
| 602 | + quantization_config=OVWeightQuantizationConfig(bits=4, sym=True, group_size=-1, ratio=0.8), |
| 603 | + ) |
| 604 | + compression_params = { |
| 605 | + "mode": nncf.CompressWeightsMode.INT4_SYM, |
| 606 | + "ratio": 0.8, |
| 607 | + "group_size": -1, |
| 608 | + "all_layers": None, |
| 609 | + "sensitivity_metric": None, |
| 610 | + "dataset": None, |
| 611 | + "ignored_scope": nncf.IgnoredScope(), |
| 612 | + "awq": None, |
| 613 | + "subset_size": 128, |
| 614 | + "scale_estimation": None, |
| 615 | + } |
| 616 | + compress_weights_patch.assert_called_with(unittest.mock.ANY, **compression_params) |
611 | 617 |
|
612 | 618 | @parameterized.expand(LOAD_IN_4_BITS_SCOPE)
|
613 | 619 | def test_ovmodel_4bit_dynamic_with_config(self, model_cls, model_name, quantization_config, expected_ov_int4):
|
|
0 commit comments