|
22 | 22 | from openvino.runtime import Node
|
23 | 23 | from openvino.runtime import opset13 as opset
|
24 | 24 |
|
| 25 | +from nncf.common.utils.backend import is_openvino_at_least |
25 | 26 | from nncf.common.utils.caching import ResultsCache
|
26 | 27 | from nncf.common.utils.caching import cache_results
|
| 28 | +from nncf.common.utils.cpu_info import is_lnl_cpu |
| 29 | +from nncf.common.utils.helpers import set_env_variable |
27 | 30 | from nncf.openvino.graph.node_utils import convert_op
|
28 | 31 | from nncf.openvino.graph.node_utils import non_convertable_divide_op
|
29 | 32 | from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
|
@@ -115,6 +118,16 @@ def clear_ov_model_cache():
|
115 | 118 | OV_MODEL_CACHE.clear()
|
116 | 119 |
|
117 | 120 |
|
| 121 | +def _compile_ov_model(model: ov.Model, device_name: str, config: Dict[str, str]) -> ov.CompiledModel: |
| 122 | + if is_lnl_cpu() and not is_openvino_at_least("2025.1"): |
| 123 | + with set_env_variable("DNNL_MAX_CPU_ISA", "AVX2_VNNI"): |
| 124 | + compiled_model = ov.compile_model(model, device_name=device_name, config=config) |
| 125 | + else: |
| 126 | + compiled_model = ov.compile_model(model, device_name=device_name, config=config) |
| 127 | + |
| 128 | + return compiled_model |
| 129 | + |
| 130 | + |
118 | 131 | def _infer_ov_model(
|
119 | 132 | ov_model_params: OVModelParameters, compiled_model: ov.CompiledModel, inputs: TensorList
|
120 | 133 | ) -> TensorList:
|
@@ -412,7 +425,7 @@ def _build_compress_model(
|
412 | 425 | return ov_parameters, ov_results, ov_model_params
|
413 | 426 |
|
414 | 427 | model = ov.Model(ov_results, ov_parameters)
|
415 |
| - compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32}) |
| 428 | + compiled_model = _compile_ov_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32}) |
416 | 429 |
|
417 | 430 | return partial(_infer_ov_model, ov_model_params, compiled_model)
|
418 | 431 |
|
@@ -467,7 +480,7 @@ def _build_compress_decompress_model(
|
467 | 480 |
|
468 | 481 | ov_results = [decompressed_weight] + ov_results if return_compressed_weight else [decompressed_weight]
|
469 | 482 | model = ov.Model(ov_results, ov_parameters)
|
470 |
| - compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32}) |
| 483 | + compiled_model = _compile_ov_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32}) |
471 | 484 |
|
472 | 485 | return partial(_infer_ov_model, ov_model_params, compiled_model)
|
473 | 486 |
|
@@ -509,6 +522,6 @@ def _build_astype_model(ov_model_params: OVModelParameters, arg_shape: Tuple) ->
|
509 | 522 | arg = opset.parameter(arg_shape, dtype=DTYPE_MAP_OV[input_dtypes["input"]], name="input")
|
510 | 523 | res = opset.convert(arg, DTYPE_MAP_OV[output_dtypes["output"]])
|
511 | 524 | model = ov.Model([res], [arg])
|
512 |
| - compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32}) |
| 525 | + compiled_model = _compile_ov_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32}) |
513 | 526 |
|
514 | 527 | return partial(_infer_ov_model, ov_model_params, compiled_model)
|
0 commit comments