Set environment variable on LNL CPUs before compression model compilation (#3246)

nikita-savelyevv · alexsu52 · web-flow · commit bc907b0e5c5c · 2025-02-04T21:01:28.000+02:00
Copy of #3233 and #3247 to develop branch --------- Co-authored-by: Alexander Suslov <alexander.suslov@intel.com>
diff --git a/nncf/common/utils/backend.py b/nncf/common/utils/backend.py
@@ -12,6 +12,8 @@
 from enum import Enum
 from typing import Any, Callable, TypeVar, cast
 
+from packaging import version
+
 import nncf
 from nncf.experimental.common.check_feature import is_experimental_torch_tracing_enabled
 
@@ -185,3 +187,17 @@ def is_openvino_available() -> bool:
     :return: True if openvino package is installed, False otherwise.
     """
     return _OPENVINO_AVAILABLE
+
+
+def is_openvino_at_least(version_str: str) -> bool:
+    """
+    Check if OpenVINO version is at least the specified one.
+
+    :param version_str: The version string to compare with the installed OpenVINO version. For example "2025.1".
+    :return: True if the installed OpenVINO version is at least the specified one, False otherwise.
+    """
+    if not _OPENVINO_AVAILABLE:
+        return False
+
+    openvino_version = version.parse(openvino.__version__.split("-")[0])
+    return version.parse(version_str) <= openvino_version
diff --git a/nncf/common/utils/cpu_info.py b/nncf/common/utils/cpu_info.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+
+import cpuinfo  # type: ignore
+
+_IS_LNL_CPU = None
+
+
+def is_lnl_cpu() -> bool:
+    global _IS_LNL_CPU
+    if _IS_LNL_CPU is None:
+        _IS_LNL_CPU = re.search(r"Ultra \d 2\d{2}", cpuinfo.get_cpu_info()["brand_raw"]) is not None
+    return _IS_LNL_CPU
diff --git a/nncf/common/utils/helpers.py b/nncf/common/utils/helpers.py
@@ -12,8 +12,9 @@
 import itertools
 import os
 import os.path as osp
+from contextlib import contextmanager
 from pathlib import Path
-from typing import Any, Dict, Hashable, Iterable, List, Optional, TypeVar, Union
+from typing import Any, Dict, Hashable, Iterable, Iterator, List, Optional, TypeVar, Union
 
 from tabulate import tabulate
 
@@ -75,3 +76,22 @@ def product_dict(d: Dict[TKey, List[Any]]) -> Iterable[Dict[TKey, Any]]:
     vals = d.values()
     for instance in itertools.product(*vals):
         yield dict(zip(keys, instance))
+
+
+@contextmanager
+def set_env_variable(key: str, value: str) -> Iterator[None]:
+    """
+    Temporarily sets an environment variable.
+
+    :param key: Environment variable name.
+    :param value: Environment variable value.
+    """
+    old_value = os.environ.get(key)
+    os.environ[key] = value
+    try:
+        yield
+    finally:
+        if old_value is not None:
+            os.environ[key] = old_value
+        else:
+            del os.environ[key]
diff --git a/nncf/openvino/optimized_functions/models.py b/nncf/openvino/optimized_functions/models.py
@@ -22,8 +22,11 @@
 from openvino.runtime import Node
 from openvino.runtime import opset13 as opset
 
+from nncf.common.utils.backend import is_openvino_at_least
 from nncf.common.utils.caching import ResultsCache
 from nncf.common.utils.caching import cache_results
+from nncf.common.utils.cpu_info import is_lnl_cpu
+from nncf.common.utils.helpers import set_env_variable
 from nncf.openvino.graph.node_utils import convert_op
 from nncf.openvino.graph.node_utils import non_convertable_divide_op
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
@@ -115,6 +118,16 @@ def clear_ov_model_cache():
     OV_MODEL_CACHE.clear()
 
 
+def _compile_ov_model(model: ov.Model, device_name: str, config: Dict[str, str]) -> ov.CompiledModel:
+    if is_lnl_cpu() and not is_openvino_at_least("2025.1"):
+        with set_env_variable("DNNL_MAX_CPU_ISA", "AVX2_VNNI"):
+            compiled_model = ov.compile_model(model, device_name=device_name, config=config)
+    else:
+        compiled_model = ov.compile_model(model, device_name=device_name, config=config)
+
+    return compiled_model
+
+
 def _infer_ov_model(
     ov_model_params: OVModelParameters, compiled_model: ov.CompiledModel, inputs: TensorList
 ) -> TensorList:
@@ -412,7 +425,7 @@ def _build_compress_model(
         return ov_parameters, ov_results, ov_model_params
 
     model = ov.Model(ov_results, ov_parameters)
-    compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32})
+    compiled_model = _compile_ov_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32})
 
     return partial(_infer_ov_model, ov_model_params, compiled_model)
 
@@ -467,7 +480,7 @@ def _build_compress_decompress_model(
 
     ov_results = [decompressed_weight] + ov_results if return_compressed_weight else [decompressed_weight]
     model = ov.Model(ov_results, ov_parameters)
-    compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32})
+    compiled_model = _compile_ov_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32})
 
     return partial(_infer_ov_model, ov_model_params, compiled_model)
 
@@ -509,6 +522,6 @@ def _build_astype_model(ov_model_params: OVModelParameters, arg_shape: Tuple) ->
     arg = opset.parameter(arg_shape, dtype=DTYPE_MAP_OV[input_dtypes["input"]], name="input")
     res = opset.convert(arg, DTYPE_MAP_OV[output_dtypes["output"]])
     model = ov.Model([res], [arg])
-    compiled_model = ov.compile_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32})
+    compiled_model = _compile_ov_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32})
 
     return partial(_infer_ov_model, ov_model_params, compiled_model)
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,6 +43,7 @@ dependencies = [
     "packaging>=20.0",
     "pandas>=1.1.5,<2.3",
     "psutil",
+    "py-cpuinfo>=9.0.0",
     "pydot>=1.4.1, <3.0.0",
     "pymoo>=0.6.0.1",
     "rich>=13.5.2",
diff --git a/tests/common/utils/test_helpers.py b/tests/common/utils/test_helpers.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from nncf.common.utils.helpers import set_env_variable
+
+
+def test_set_env_variable():
+    # Test the case when the variable is not set
+    assert os.environ.get("TEST_VAR") is None
+    with set_env_variable("TEST_VAR", "test_value"):
+        assert os.environ.get("TEST_VAR") == "test_value"
+    assert os.environ.get("TEST_VAR") is None
+
+    # Test the case when the variable is already set
+    os.environ["TEST_VAR"] = "original_value"
+    assert os.environ.get("TEST_VAR") == "original_value"
+    with set_env_variable("TEST_VAR", "test_value"):
+        assert os.environ.get("TEST_VAR") == "test_value"
+    assert os.environ.get("TEST_VAR") == "original_value"