|
22 | 22 | from enum import Enum
|
23 | 23 | from functools import partial
|
24 | 24 | from typing import Union
|
25 |
| - |
| 25 | +import pytest |
26 | 26 | import evaluate
|
27 | 27 | import numpy as np
|
28 | 28 | import torch
|
|
37 | 37 | TrainingArguments,
|
38 | 38 | default_data_collator,
|
39 | 39 | )
|
| 40 | +from transformers.testing_utils import slow |
40 | 41 | from transformers.utils.quantization_config import QuantizationMethod
|
41 | 42 |
|
42 | 43 | from optimum.intel import (
|
@@ -173,15 +174,13 @@ def preprocess_function(examples, tokenizer):
|
173 | 174 |
|
174 | 175 |
|
175 | 176 | class OVWeightCompressionTest(unittest.TestCase):
|
176 |
| - # TODO : add models |
177 | 177 | SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = (
|
178 | 178 | (OVModelForSequenceClassification, "bert", 70, 70),
|
179 | 179 | (OVModelForCausalLM, "gpt2", 44, 44),
|
180 | 180 | )
|
181 | 181 |
|
182 | 182 | SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 62, 86),)
|
183 | 183 | SUPPORTED_ARCHITECTURES_WITH_EXPECTED_4BIT_AUTOCOMPRESSED_MATMULS = ((OVModelForCausalLM, "opt125m", 0, 148),)
|
184 |
| - |
185 | 184 | SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS = ((OVModelForCausalLM, "gpt2", 44, 44),)
|
186 | 185 |
|
187 | 186 | LOAD_IN_4_BITS_SCOPE = (
|
@@ -347,7 +346,6 @@ def test_ovmodel_4bit_weight_compression(self, model_cls, model_name, expected_i
|
347 | 346 | self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config.to_dict())
|
348 | 347 |
|
349 | 348 | @parameterized.expand(SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS)
|
350 |
| - @unittest.skipIf(not IS_SUPPORT_STATEFUL, "Stateful models supported only in 2023.3 and above") |
351 | 349 | def test_ovmodel_8bit_weight_compression_stateful(self, model_cls, model_name, expected_pt_int8, expected_ov_int8):
|
352 | 350 | task = model_cls.export_feature
|
353 | 351 | model_id = MODEL_NAMES[model_name]
|
@@ -473,7 +471,6 @@ def test_ovmodel_4bit_auto_compression_with_config(
|
473 | 471 | self.assertEqual(openvino_config.dtype, "int4")
|
474 | 472 |
|
475 | 473 | @parameterized.expand(((OVModelForCausalLM, "gpt2"),))
|
476 |
| - @unittest.skipIf(not IS_SUPPORT_STATEFUL, "Stateful models supported only in 2023.3 and above") |
477 | 474 | def test_ovmodel_stateful_load_with_compressed_weights(self, model_cls, model_type):
|
478 | 475 | model = model_cls.from_pretrained(MODEL_NAMES[model_type], export=True, load_in_8bit=True, stateful=True)
|
479 | 476 | self.assertTrue(model.stateful)
|
@@ -588,9 +585,11 @@ def test_ovmodel_4bit_dynamic_with_config(self, model_cls, model_name, quantizat
|
588 | 585 |
|
589 | 586 |
|
590 | 587 | class OVQuantizerQATest(unittest.TestCase):
|
591 |
| - SUPPORTED_ARCHITECTURES = (("hf-internal-testing/tiny-random-BertForQuestionAnswering",),) |
| 588 | + SUPPORTED_ARCHITECTURES = ("hf-internal-testing/tiny-random-BertForQuestionAnswering",) |
592 | 589 |
|
593 | 590 | @parameterized.expand(SUPPORTED_ARCHITECTURES)
|
| 591 | + @pytest.mark.run_slow |
| 592 | + @slow |
594 | 593 | def test_automodel_static_quantization(self, model_name):
|
595 | 594 | def preprocess_function(examples, tokenizer):
|
596 | 595 | return tokenizer(
|
@@ -630,6 +629,8 @@ def preprocess_function(examples, tokenizer):
|
630 | 629 | self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config.to_dict())
|
631 | 630 |
|
632 | 631 | @parameterized.expand(SUPPORTED_ARCHITECTURES)
|
| 632 | + @pytest.mark.run_slow |
| 633 | + @slow |
633 | 634 | def test_ovmodel_static_quantization(self, model_name):
|
634 | 635 | def preprocess_function(examples, tokenizer):
|
635 | 636 | return tokenizer(
|
@@ -670,12 +671,13 @@ def preprocess_function(examples, tokenizer):
|
670 | 671 |
|
671 | 672 |
|
672 | 673 | class OVTrainerTest(unittest.TestCase):
|
673 |
| - SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("distilbert-base-uncased", 67, 38),) |
| 674 | + SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("albert", 65, 39),) |
674 | 675 |
|
675 | 676 | @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
|
676 | 677 | def test_aware_training_quantization(self, model_name, expected_fake_quantize, expected_int8):
|
677 |
| - model = AutoModelForSequenceClassification.from_pretrained(model_name) |
678 |
| - tokenizer = AutoTokenizer.from_pretrained(model_name) |
| 678 | + model_id = MODEL_NAMES[model_name] |
| 679 | + model = AutoModelForSequenceClassification.from_pretrained(model_id) |
| 680 | + tokenizer = AutoTokenizer.from_pretrained(model_id) |
679 | 681 | ov_config = OVConfig()
|
680 | 682 | dataset = load_dataset("glue", "sst2")
|
681 | 683 | dataset = dataset.map(
|
|
0 commit comments