|
16 | 16 |
|
17 | 17 | import tempfile
|
18 | 18 | import unittest
|
| 19 | +from collections import defaultdict |
19 | 20 | from functools import partial
|
20 | 21 |
|
21 | 22 | import evaluate
|
22 | 23 | import numpy as np
|
| 24 | +import torch |
23 | 25 | from datasets import load_dataset
|
24 | 26 | from parameterized import parameterized
|
25 | 27 | import openvino.runtime as ov
|
|
30 | 32 | AutoModelForCausalLM,
|
31 | 33 | AutoModelForTokenClassification,
|
32 | 34 | AutoTokenizer,
|
| 35 | + AutoProcessor, |
33 | 36 | TrainingArguments,
|
34 | 37 | default_data_collator,
|
35 | 38 | )
|
|
45 | 48 | OVModelForSeq2SeqLM,
|
46 | 49 | OVModelForSequenceClassification,
|
47 | 50 | OVModelForTokenClassification,
|
| 51 | + OVModelForSpeechSeq2Seq, |
48 | 52 | OVStableDiffusionPipeline,
|
49 | 53 | OVStableDiffusionXLPipeline,
|
50 | 54 | OVQuantizer,
|
51 | 55 | OVTrainer,
|
52 | 56 | OVWeightQuantizationConfig,
|
53 | 57 | )
|
54 | 58 |
|
55 |
| - |
56 | 59 | from optimum.intel.openvino.configuration import INT8_WEIGHT_COMPRESSION_CONFIG, DEFAULT_QUANTIZATION_CONFIG
|
| 60 | +from optimum.intel.openvino.quantization import InferRequestWrapper |
57 | 61 | from optimum.intel.utils.import_utils import is_openvino_version
|
58 | 62 | from utils_tests import MODEL_NAMES, get_num_quantized_nodes, _ARCHITECTURES_TO_EXPECTED_INT8
|
59 | 63 |
|
@@ -601,3 +605,38 @@ def compute_metrics(p):
|
601 | 605 | tokens = tokenizer("This is a sample input", return_tensors="pt")
|
602 | 606 | outputs = model(**tokens)
|
603 | 607 | self.assertTrue("logits" in outputs)
|
| 608 | + |
| 609 | + |
| 610 | +class InferRequestWrapperTest(unittest.TestCase): |
| 611 | + MODEL_ID = ("openai/whisper-tiny.en",) |
| 612 | + |
| 613 | + @staticmethod |
| 614 | + def _generate_random_audio_data(processor): |
| 615 | + t = np.linspace(0, 1.0, int(1000), endpoint=False) |
| 616 | + audio_data = 0.5 * np.sin((2 + np.random.random()) * np.pi * t) |
| 617 | + input_features = processor( |
| 618 | + audio_data, |
| 619 | + sampling_rate=16000, |
| 620 | + return_tensors="pt", |
| 621 | + ).input_features |
| 622 | + return input_features |
| 623 | + |
| 624 | + @parameterized.expand(MODEL_ID) |
| 625 | + def test_calibration_data_uniqueness(self, model_id): |
| 626 | + ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True, compile=True) |
| 627 | + processor = AutoProcessor.from_pretrained(model_id) |
| 628 | + |
| 629 | + calibration_data = [] |
| 630 | + ov_model.decoder_with_past.request = InferRequestWrapper(ov_model.decoder_with_past.request, calibration_data) |
| 631 | + for _ in range(2): |
| 632 | + input_features = self._generate_random_audio_data(processor) |
| 633 | + ov_model.generate(input_features) |
| 634 | + |
| 635 | + data_hashes_per_key = defaultdict(list) |
| 636 | + for inputs_dict in calibration_data: |
| 637 | + for k, v in inputs_dict.items(): |
| 638 | + x = (v.numpy() if isinstance(v, torch.Tensor) else v).copy() |
| 639 | + data_hashes_per_key[k].append(hash(x.tobytes())) |
| 640 | + for k, data_hashes in data_hashes_per_key.items(): |
| 641 | + # All hashes can not be equal because calibration dataset contains at least 2 different samples |
| 642 | + self.assertTrue(any(data_hashes[0] != it for it in data_hashes)) |
0 commit comments