Skip to content

Commit 8821a04

Browse files
committed
add deepcopy_data
1 parent f601b8b commit 8821a04

File tree

3 files changed

+55
-5
lines changed

3 files changed

+55
-5
lines changed

optimum/intel/openvino/quantization.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
PREDEFINED_SD_DATASETS,
7575
PREDEFINED_SPEECH_TO_TEXT_DATASETS,
7676
PREDEFINED_VISUAL_LM_DATASETS,
77+
deepcopy_data,
7778
)
7879

7980

@@ -131,7 +132,7 @@ def __init__(
131132

132133
def collect_inputs(self, inputs):
133134
if not self.apply_caching or not isinstance(inputs, dict):
134-
self.collected_inputs.append(copy.deepcopy(inputs))
135+
self.collected_inputs.append(deepcopy_data(inputs))
135136
return
136137

137138
copied_inputs = {}
@@ -146,7 +147,7 @@ def collect_inputs(self, inputs):
146147
# Avoid data copying if tensor contains data encountered earlier
147148
self.tensor_cache.setdefault(k, {})
148149
if data_hash not in self.tensor_cache[k]:
149-
self.tensor_cache[k][data_hash] = copy.deepcopy(v)
150+
self.tensor_cache[k][data_hash] = deepcopy_data(v)
150151
copied_inputs[k] = self.tensor_cache[k][data_hash]
151152
self.collected_inputs.append(copied_inputs)
152153

optimum/intel/openvino/utils.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,22 @@
1919
import stat
2020
import warnings
2121
import weakref
22+
from copy import deepcopy
2223
from glob import glob
2324
from pathlib import Path
2425
from tempfile import TemporaryDirectory as OrigTemporaryDirectory
2526
from tempfile import mkdtemp
26-
from typing import Tuple, Type, Union
27+
from typing import Tuple, Type, Union, Any
2728

2829
import numpy as np
2930
import torch
3031
from huggingface_hub import model_info
31-
from openvino.runtime import Core, Model, properties
32+
from openvino.runtime import Core, Model, properties, Tensor
3233
from openvino.runtime import Type as OVType
3334
from packaging.version import Version
3435
from transformers import AutoTokenizer, CLIPTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
3536
from transformers.onnx.utils import ParameterFormat, compute_serialized_parameters_size
36-
37+
import openvino
3738
from optimum.intel.utils.import_utils import is_torch_version
3839

3940

@@ -586,3 +587,20 @@ def check_scale_available(model: Union[Model, str, Path]):
586587
if runtime_options is None:
587588
return False
588589
return runtime_options.find("ACTIVATIONS_SCALE_FACTOR") is not None
590+
591+
592+
def deepcopy_data(inputs: Any) -> Any:
593+
if isinstance(inputs, dict):
594+
new_inputs = {}
595+
for k, v in inputs.items():
596+
new_inputs[deepcopy_data(k)] = deepcopy_data(v)
597+
elif isinstance(inputs, list):
598+
new_inputs = [deepcopy_data(elem) for elem in inputs]
599+
elif isinstance(inputs, tuple):
600+
new_inputs = tuple(deepcopy_data(elem) for elem in inputs)
601+
elif isinstance(inputs, openvino.Tensor):
602+
new_inputs = openvino.Tensor(np.zeros(inputs.shape, dtype=inputs.element_type.to_dtype()))
603+
new_inputs.copy_from(inputs)
604+
else:
605+
new_inputs = deepcopy(inputs)
606+
return new_inputs

tests/openvino/test_quantization.py

+31
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@
4242
from transformers.testing_utils import slow
4343
from transformers.utils.quantization_config import QuantizationMethod
4444

45+
from optimum.intel.openvino.utils import deepcopy_data
46+
47+
4548
from optimum.intel import (
4649
OVConfig,
4750
OVFluxPipeline,
@@ -1354,6 +1357,34 @@ def test_calibration_data_uniqueness(self, model_name, apply_caching):
13541357
# Without caching, encoder hidden states tensors will be unique for each collected input
13551358
self.assertGreater(len(data_id_per_key["encoder_hidden_states"]), 2)
13561359

1360+
def test_deepcopy_data(self):
1361+
data = {
1362+
"a": torch.tensor([1, 2, 3]),
1363+
"b": np.array([1, 2, 3]),
1364+
"c": 1,
1365+
"d": "string",
1366+
"e": {"a": torch.tensor([1, 2, 3]), "b": np.array([1, 2, 3])},
1367+
"f": [ov.Tensor(np.ones((1, 2, 3))), ov.Tensor(np.ones((1, 2, 3)))],
1368+
}
1369+
copied_data = deepcopy_data(data)
1370+
assert copied_data["a"] is not data["a"]
1371+
assert copied_data["b"] is not data["b"]
1372+
assert copied_data["e"]["a"] is not data["e"]["a"]
1373+
assert copied_data["e"]["b"] is not data["e"]["b"]
1374+
assert copied_data["f"][0] is not data["f"][0]
1375+
assert copied_data["f"][1] is not data["f"][1]
1376+
1377+
assert torch.equal(copied_data["a"], data["a"])
1378+
assert np.array_equal(copied_data["b"], data["b"])
1379+
assert copied_data["c"] == data["c"]
1380+
assert copied_data["d"] == data["d"]
1381+
assert torch.equal(copied_data["e"]["a"], data["e"]["a"])
1382+
assert np.array_equal(copied_data["e"]["b"], data["e"]["b"])
1383+
assert np.array_equal(copied_data["f"][0].data, data["f"][0].data)
1384+
assert np.array_equal(copied_data["f"][1].data, data["f"][1].data)
1385+
1386+
assert copied_data is not data
1387+
13571388

13581389
def check_optimization_not_applicable_to_optimized_model(model, quantization_config):
13591390
quantizer = OVQuantizer(model)

0 commit comments

Comments
 (0)