|
29 | 29 | from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
|
30 | 30 | from nncf.tensor import Tensor
|
31 | 31 | from nncf.tensor import TensorDataType
|
32 |
| -from nncf.tensor.functions.ov_numeric import DTYPE_MAP as DTYPE_MAP_OV |
| 32 | +from nncf.tensor.functions.openvino_numeric import DTYPE_MAP as DTYPE_MAP_OV |
33 | 33 |
|
34 | 34 | TensorList = List[Tensor]
|
35 | 35 | ModelCallable = Callable[[TensorList], TensorList]
|
@@ -134,18 +134,17 @@ def _infer_ov_model(
|
134 | 134 | raise ValueError(f"Expected input '{input_name}' to be {expected_dtype}. But found: {actual_dtype}.")
|
135 | 135 |
|
136 | 136 | # Infer the model
|
137 |
| - # TODO (Nikita Savelyev): Investigate the approach when we always infer via infer request creation |
| 137 | + if compiled_model._infer_request is None: |
| 138 | + compiled_model._infer_request = compiled_model.create_infer_request() |
| 139 | + infer_request = compiled_model._infer_request |
| 140 | + |
138 | 141 | inputs = [inp.data for inp in inputs]
|
| 142 | + outputs = infer_request.infer( |
| 143 | + inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs |
| 144 | + ) |
139 | 145 | if ov_model_params.return_ov_tensors:
|
140 |
| - infer_request = compiled_model.create_infer_request() |
141 |
| - infer_request.infer( |
142 |
| - inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs |
143 |
| - ) |
144 |
| - outputs = [infer_request.get_output_tensor(i) for i in range(len(infer_request.results))] |
| 146 | + outputs = [infer_request.get_output_tensor(i) for i in range(len(outputs))] |
145 | 147 | else:
|
146 |
| - outputs = compiled_model( |
147 |
| - inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs |
148 |
| - ) |
149 | 148 | outputs = [outputs[i] for i in range(len(outputs))]
|
150 | 149 | outputs = [Tensor(it) for it in outputs]
|
151 | 150 |
|
@@ -367,7 +366,7 @@ def _build_compress_model(
|
367 | 366 | w_max = opset.reduce_max(weight, reduction_axes=reduction_axes, keep_dims=True)
|
368 | 367 | w_abs_min, w_max = opset.convert(w_abs_min, ov.Type.f32), opset.convert(w_max, ov.Type.f32)
|
369 | 368 |
|
370 |
| - scale = opset.select(w_abs_min >= w_max, w_abs_min, opset.negative(w_max)) |
| 369 | + scale = opset.select(opset.greater_equal(w_abs_min, w_max), w_abs_min, opset.negative(w_max)) |
371 | 370 | scale = divide_op(scale, opset.constant(-level_low, ov.Type.f32))
|
372 | 371 | scale = opset.select(opset.less(opset.abs(scale), eps), eps, scale)
|
373 | 372 |
|
|
0 commit comments