Skip to content

Commit 1067d49

Browse files
Follow up to #2727 (#3211)
### Changes Follow up to #2727 1. Do not use `infer_request.results` 2. Replace `>=` with `opset.greater_equal()` 3. Rename `ov_numeric.py` to `openvino_numeric.py` ### Reason for changes 1. Improve int4 compression time by up to ~10% 2. Avoid warning: `DeprecationWarning: greater_equal is deprecated and will be removed in version 2025.3. Use ops.greater_equal instead` 3. Fix onnx install test ### Related tickets 139047 ### Tests - https://github.com/openvinotoolkit/nncf/actions/runs/12947249537 - NNCF/job/manual/job/post_training_weight_compression/301/ - NNCF/job/nightly/job/test_examples/653/
1 parent 5414dd6 commit 1067d49

File tree

7 files changed

+23
-32
lines changed

7 files changed

+23
-32
lines changed

docs/api/source/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def collect_api_entities() -> APIInfo:
145145
"nncf.tensor.functions.torch_linalg",
146146
"nncf.tensor.functions.torch_io",
147147
"nncf.tensor.functions.numpy_io",
148-
"nncf.tensor.functions.ov_numeric",
148+
"nncf.tensor.functions.openvino_numeric",
149149
]
150150

151151
with mock(mock_modules):

nncf/openvino/optimized_functions/models.py

+10-11
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
3030
from nncf.tensor import Tensor
3131
from nncf.tensor import TensorDataType
32-
from nncf.tensor.functions.ov_numeric import DTYPE_MAP as DTYPE_MAP_OV
32+
from nncf.tensor.functions.openvino_numeric import DTYPE_MAP as DTYPE_MAP_OV
3333

3434
TensorList = List[Tensor]
3535
ModelCallable = Callable[[TensorList], TensorList]
@@ -134,18 +134,17 @@ def _infer_ov_model(
134134
raise ValueError(f"Expected input '{input_name}' to be {expected_dtype}. But found: {actual_dtype}.")
135135

136136
# Infer the model
137-
# TODO (Nikita Savelyev): Investigate the approach when we always infer via infer request creation
137+
if compiled_model._infer_request is None:
138+
compiled_model._infer_request = compiled_model.create_infer_request()
139+
infer_request = compiled_model._infer_request
140+
138141
inputs = [inp.data for inp in inputs]
142+
outputs = infer_request.infer(
143+
inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs
144+
)
139145
if ov_model_params.return_ov_tensors:
140-
infer_request = compiled_model.create_infer_request()
141-
infer_request.infer(
142-
inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs
143-
)
144-
outputs = [infer_request.get_output_tensor(i) for i in range(len(infer_request.results))]
146+
outputs = [infer_request.get_output_tensor(i) for i in range(len(outputs))]
145147
else:
146-
outputs = compiled_model(
147-
inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs
148-
)
149148
outputs = [outputs[i] for i in range(len(outputs))]
150149
outputs = [Tensor(it) for it in outputs]
151150

@@ -367,7 +366,7 @@ def _build_compress_model(
367366
w_max = opset.reduce_max(weight, reduction_axes=reduction_axes, keep_dims=True)
368367
w_abs_min, w_max = opset.convert(w_abs_min, ov.Type.f32), opset.convert(w_max, ov.Type.f32)
369368

370-
scale = opset.select(w_abs_min >= w_max, w_abs_min, opset.negative(w_max))
369+
scale = opset.select(opset.greater_equal(w_abs_min, w_max), w_abs_min, opset.negative(w_max))
371370
scale = divide_op(scale, opset.constant(-level_low, ov.Type.f32))
372371
scale = opset.select(opset.less(opset.abs(scale), eps), eps, scale)
373372

nncf/quantization/algorithms/weight_compression/openvino_backend.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
from nncf.quantization.algorithms.weight_compression.weight_lowering import compress_weight
5656
from nncf.tensor import Tensor
5757
from nncf.tensor.definitions import TensorDataType
58-
from nncf.tensor.functions.ov_numeric import DTYPE_MAP_REV
58+
from nncf.tensor.functions.openvino_numeric import DTYPE_MAP_REV
5959

6060

6161
class OVWeightCompressionAlgoBackend(WeightCompressionAlgoBackend):
File renamed without changes.

tests/openvino/native/test_tensor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from nncf.tensor.definitions import TensorBackend
2121
from nncf.tensor.definitions import TensorDeviceType
2222
from nncf.tensor.functions.numpy_numeric import DTYPE_MAP as DTYPE_MAP_NP
23-
from nncf.tensor.functions.ov_numeric import DTYPE_MAP as DTYPE_MAP_OV
23+
from nncf.tensor.functions.openvino_numeric import DTYPE_MAP as DTYPE_MAP_OV
2424

2525

2626
class TestOVNNCFTensorOperators:

tests/openvino/optimized_functions/test_compression_functions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from nncf.tensor.definitions import TensorBackend
3232
from nncf.tensor.functions.numpy_numeric import DTYPE_MAP as DTYPE_MAP_NP
3333
from nncf.tensor.functions.numpy_numeric import DTYPE_MAP_REV as DTYPE_MAP_REV_NP
34-
from nncf.tensor.functions.ov_numeric import DTYPE_MAP as DTYPE_MAP_OV
34+
from nncf.tensor.functions.openvino_numeric import DTYPE_MAP as DTYPE_MAP_OV
3535

3636

3737
class ComputationBackend(Enum):

tests/openvino/optimized_functions/test_ov_model_parameters.py

+9-17
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,9 @@ def test_release_memory(mocker, release_memory):
240240
compiled_model.inputs = [input_mock]
241241

242242
output_mock = mocker.Mock()
243-
compiled_model.return_value = [output_mock]
243+
infer_request = mocker.Mock()
244+
infer_request.infer.return_value = [output_mock]
245+
compiled_model._infer_request = infer_request
244246

245247
ov_model_params = OVModelParameters(input_dtypes={"input": TensorDataType.float32}, release_memory=release_memory)
246248
input_tensor = mocker.Mock()
@@ -267,16 +269,11 @@ def test_share_inputs_outputs(mocker, share_inputs, share_outputs, return_ov_ten
267269

268270
output_mock = mocker.Mock()
269271

272+
infer_request = mocker.Mock()
273+
infer_request.infer.return_value = [output_mock]
270274
if return_ov_tensors:
271-
infer_request = mocker.Mock()
272-
compiled_model.create_infer_request.return_value = infer_request
273-
274-
infer_request.infer = mocker.Mock()
275-
infer_request.results = [output_mock]
276-
277275
infer_request.get_output_tensor.return_value = output_mock
278-
else:
279-
compiled_model.return_value = [output_mock]
276+
compiled_model._infer_request = infer_request
280277

281278
ov_model_params = OVModelParameters(
282279
input_dtypes={"input": TensorDataType.float32},
@@ -292,14 +289,9 @@ def test_share_inputs_outputs(mocker, share_inputs, share_outputs, return_ov_ten
292289

293290
_infer_ov_model(ov_model_params, compiled_model, inputs=inputs)
294291

295-
if return_ov_tensors:
296-
infer_request.infer.assert_called_once_with(
297-
[input_tensor.data], share_inputs=share_inputs, share_outputs=share_outputs
298-
)
299-
else:
300-
compiled_model.assert_called_once_with(
301-
[input_tensor.data], share_inputs=share_inputs, share_outputs=share_outputs
302-
)
292+
infer_request.infer.assert_called_once_with(
293+
[input_tensor.data], share_inputs=share_inputs, share_outputs=share_outputs
294+
)
303295

304296

305297
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)