Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disable optimized compression on ARM CPUs until the next release #3366

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions nncf/openvino/cpu_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,34 @@

import openvino as ov

_IS_ARM_CPU = None
_IS_LNL_CPU = None


def _get_cpu_name() -> str:
"""
:return: The name of the CPU.
"""
return ov.Core().get_property("CPU", ov.properties.device.full_name)


def is_arm_cpu() -> bool:
"""
Checks whether current CPU is an ARM CPU or not.
:return: True if current CPU is an ARM CPU, False otherwise.
"""
global _IS_ARM_CPU
if _IS_ARM_CPU is None:
_IS_ARM_CPU = "arm" in _get_cpu_name().lower()
return _IS_ARM_CPU


def is_lnl_cpu() -> bool:
"""
Checks whether current CPU is an Intel Lunar Lake generation or not.
:return: True if current CPU is an Intel Lunar Lake generation, False otherwise.
"""
global _IS_LNL_CPU
if _IS_LNL_CPU is None:
cpu_name = ov.Core().get_property("CPU", ov.properties.device.full_name)
_IS_LNL_CPU = re.search(r"Ultra \d 2\d{2}", cpu_name) is not None
_IS_LNL_CPU = re.search(r"Ultra \d 2\d{2}", _get_cpu_name()) is not None
return _IS_LNL_CPU
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from dataclasses import dataclass
from typing import Optional, Tuple, Union

import numpy as np

import nncf
from nncf.common.logging.logger import nncf_logger
from nncf.common.utils.backend import is_openvino_at_least
from nncf.common.utils.backend import is_openvino_available
from nncf.parameters import CompressWeightsMode
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
Expand Down Expand Up @@ -543,9 +544,15 @@ def quantize_dequantize_weight(


def _can_run_optimized(input_backend: TensorBackend) -> bool:
if input_backend in [TensorBackend.ov, TensorBackend.numpy]:
if (
input_backend in [TensorBackend.ov, TensorBackend.numpy]
and os.environ.get("NNCF_DISABLE_OPTIMIZED_COMPRESSION") is None
):
if is_openvino_available():
return True
from nncf.openvino.cpu_info import is_arm_cpu

# Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.
return not is_arm_cpu() or is_openvino_at_least("2025.2")
else:
nncf_logger.info_once(
"OpenVINO optimizations are disabled. Install OpenVINO to enable them and improve the performance."
Expand Down
22 changes: 22 additions & 0 deletions tests/openvino/native/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import inspect
import os
from typing import Callable, Dict, List
from unittest.mock import patch

import numpy as np
import openvino.runtime as ov
Expand All @@ -21,10 +22,12 @@
from openvino.runtime import opset13 as opset

import nncf
import nncf.openvino.optimized_functions as opt_fns
from nncf import CompressWeightsMode
from nncf import SensitivityMetric
from nncf.common.factory import NNCFGraphFactory
from nncf.common.utils.debug import nncf_debug
from nncf.common.utils.helpers import set_env_variable
from nncf.data.dataset import Dataset
from nncf.experimental.common.tensor_statistics.collectors import AggregatorBase
from nncf.openvino.graph.model_transformer import OVModelTransformer
Expand Down Expand Up @@ -1487,6 +1490,25 @@ def test_compression_with_transposed_activations(kwargs):
)


@pytest.mark.parametrize("disabled", [False, True])
def test_disabled_optimized_compression(disabled):
model = LMLinearModel().ov_model

def run_compression():
compress_weights(model, mode=CompressWeightsMode.INT8)

fn_to_patch = opt_fns.do_int_quantization
patch_path = f"nncf.openvino.optimized_functions.{fn_to_patch.__name__}"
with patch(patch_path, side_effect=fn_to_patch) as mock:
if disabled:
with set_env_variable("NNCF_DISABLE_OPTIMIZED_COMPRESSION", "1"):
run_compression()
mock.assert_not_called()
else:
run_compression()
mock.assert_called_once()


class TestOVTemplateWeightCompression(TemplateWeightCompression):
@staticmethod
def get_matmul_model() -> ov.Model:
Expand Down