Skip to content

Commit 1137b7a

Browse files
committed
merge main in branch
2 parents 3a71f42 + 652a15c commit 1137b7a

File tree

4 files changed

+50
-5
lines changed

4 files changed

+50
-5
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
accelerate
22
diffusers
33
torch~=1.13
4-
nncf @ git+https://github.com/openvinotoolkit/nncf.git
4+
torchvision~=0.14
5+
nncf
56
tomesd @ git+https://github.com/AlexKoff88/tomesd.git@openvino

optimum/intel/openvino/configuration.py

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
"openlm-research/open_llama_3b": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True},
9797
"tiiuae/falcon-7b": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True},
9898
"psmathur/orca_mini_3b": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True},
99+
"mistralai/Mixtral-8x7B-v0.1": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8},
99100
}
100101

101102

optimum/intel/openvino/quantization.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
1516
import inspect
1617
import logging
1718
import os
@@ -87,11 +88,14 @@ def __init__(self, request, data_cache=None):
8788
self.data_cache = data_cache
8889

8990
def __call__(self, *args, **kwargs):
90-
self.data_cache.append(*args)
91+
# If __call__ is invoked then self.request must be an instance of CompiledModel
92+
signature = inspect.signature(self.request)
93+
bound_args = signature.bind(*args, **kwargs).arguments
94+
self.data_cache.append(copy.deepcopy(bound_args["inputs"]))
9195
return self.request(*args, **kwargs)
9296

9397
def infer(self, inputs: Any = None, share_inputs: bool = False):
94-
self.data_cache.append(inputs)
98+
self.data_cache.append(copy.deepcopy(inputs))
9599
return self.request.infer(inputs, share_inputs)
96100

97101
def start_async(
@@ -102,7 +106,7 @@ def start_async(
102106
*,
103107
shared_memory: Any = None,
104108
):
105-
self.data_cache.append(inputs)
109+
self.data_cache.append(copy.deepcopy(inputs))
106110
self.request.infer(inputs, share_inputs, share_outputs=True)
107111

108112
def wait(self):

tests/openvino/test_quantization.py

+40-1
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616

1717
import tempfile
1818
import unittest
19+
from collections import defaultdict
1920
from functools import partial
2021

2122
import evaluate
2223
import numpy as np
24+
import torch
2325
from datasets import load_dataset
2426
from parameterized import parameterized
2527
import openvino.runtime as ov
@@ -30,6 +32,7 @@
3032
AutoModelForCausalLM,
3133
AutoModelForTokenClassification,
3234
AutoTokenizer,
35+
AutoProcessor,
3336
TrainingArguments,
3437
default_data_collator,
3538
)
@@ -45,15 +48,16 @@
4548
OVModelForSeq2SeqLM,
4649
OVModelForSequenceClassification,
4750
OVModelForTokenClassification,
51+
OVModelForSpeechSeq2Seq,
4852
OVStableDiffusionPipeline,
4953
OVStableDiffusionXLPipeline,
5054
OVQuantizer,
5155
OVTrainer,
5256
OVWeightQuantizationConfig,
5357
)
5458

55-
5659
from optimum.intel.openvino.configuration import INT8_WEIGHT_COMPRESSION_CONFIG, DEFAULT_QUANTIZATION_CONFIG
60+
from optimum.intel.openvino.quantization import InferRequestWrapper
5761
from optimum.intel.utils.import_utils import is_openvino_version
5862
from utils_tests import MODEL_NAMES, get_num_quantized_nodes, _ARCHITECTURES_TO_EXPECTED_INT8
5963

@@ -601,3 +605,38 @@ def compute_metrics(p):
601605
tokens = tokenizer("This is a sample input", return_tensors="pt")
602606
outputs = model(**tokens)
603607
self.assertTrue("logits" in outputs)
608+
609+
610+
class InferRequestWrapperTest(unittest.TestCase):
611+
MODEL_ID = ("openai/whisper-tiny.en",)
612+
613+
@staticmethod
614+
def _generate_random_audio_data(processor):
615+
t = np.linspace(0, 1.0, int(1000), endpoint=False)
616+
audio_data = 0.5 * np.sin((2 + np.random.random()) * np.pi * t)
617+
input_features = processor(
618+
audio_data,
619+
sampling_rate=16000,
620+
return_tensors="pt",
621+
).input_features
622+
return input_features
623+
624+
@parameterized.expand(MODEL_ID)
625+
def test_calibration_data_uniqueness(self, model_id):
626+
ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True, compile=True)
627+
processor = AutoProcessor.from_pretrained(model_id)
628+
629+
calibration_data = []
630+
ov_model.decoder_with_past.request = InferRequestWrapper(ov_model.decoder_with_past.request, calibration_data)
631+
for _ in range(2):
632+
input_features = self._generate_random_audio_data(processor)
633+
ov_model.generate(input_features)
634+
635+
data_hashes_per_key = defaultdict(list)
636+
for inputs_dict in calibration_data:
637+
for k, v in inputs_dict.items():
638+
x = (v.numpy() if isinstance(v, torch.Tensor) else v).copy()
639+
data_hashes_per_key[k].append(hash(x.tobytes()))
640+
for k, data_hashes in data_hashes_per_key.items():
641+
# All hashes can not be equal because calibration dataset contains at least 2 different samples
642+
self.assertTrue(any(data_hashes[0] != it for it in data_hashes))

0 commit comments

Comments
 (0)