Skip to content

Commit 996b308

Browse files
Disable optimized compression on ARM CPUs until the next release (#3366)
### Changes - Disabled optimized compression on ARM CPUs until the next OV 2025.2 release. The fix openvinotoolkit/openvino#29577 won't be merged in time. - Added `NNCF_DISABLE_OPTIMIZED_COMPRESSION` environment variable flag to disable optimized compression if needed. ### Reason for changes Enable weights compression on ARM CPUs. ### Related tickets 164135
1 parent dc8eb03 commit 996b308

File tree

3 files changed

+52
-5
lines changed

3 files changed

+52
-5
lines changed

nncf/openvino/cpu_info.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,34 @@
1313

1414
import openvino as ov
1515

16+
_IS_ARM_CPU = None
1617
_IS_LNL_CPU = None
1718

1819

20+
def _get_cpu_name() -> str:
21+
"""
22+
:return: The name of the CPU.
23+
"""
24+
return ov.Core().get_property("CPU", ov.properties.device.full_name)
25+
26+
27+
def is_arm_cpu() -> bool:
28+
"""
29+
Checks whether current CPU is an ARM CPU or not.
30+
:return: True if current CPU is an ARM CPU, False otherwise.
31+
"""
32+
global _IS_ARM_CPU
33+
if _IS_ARM_CPU is None:
34+
_IS_ARM_CPU = "arm" in _get_cpu_name().lower()
35+
return _IS_ARM_CPU
36+
37+
1938
def is_lnl_cpu() -> bool:
2039
"""
2140
Checks whether current CPU is an Intel Lunar Lake generation or not.
2241
:return: True if current CPU is an Intel Lunar Lake generation, False otherwise.
2342
"""
2443
global _IS_LNL_CPU
2544
if _IS_LNL_CPU is None:
26-
cpu_name = ov.Core().get_property("CPU", ov.properties.device.full_name)
27-
_IS_LNL_CPU = re.search(r"Ultra \d 2\d{2}", cpu_name) is not None
45+
_IS_LNL_CPU = re.search(r"Ultra \d 2\d{2}", _get_cpu_name()) is not None
2846
return _IS_LNL_CPU

nncf/quantization/algorithms/weight_compression/weight_lowering.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@
88
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
99
# See the License for the specific language governing permissions and
1010
# limitations under the License.
11-
11+
import os
1212
from dataclasses import dataclass
1313
from typing import Optional, Tuple, Union
1414

1515
import numpy as np
1616

1717
import nncf
1818
from nncf.common.logging.logger import nncf_logger
19+
from nncf.common.utils.backend import is_openvino_at_least
1920
from nncf.common.utils.backend import is_openvino_available
2021
from nncf.parameters import CompressWeightsMode
2122
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
@@ -543,9 +544,15 @@ def quantize_dequantize_weight(
543544

544545

545546
def _can_run_optimized(input_backend: TensorBackend) -> bool:
546-
if input_backend in [TensorBackend.ov, TensorBackend.numpy]:
547+
if (
548+
input_backend in [TensorBackend.ov, TensorBackend.numpy]
549+
and os.environ.get("NNCF_DISABLE_OPTIMIZED_COMPRESSION") is None
550+
):
547551
if is_openvino_available():
548-
return True
552+
from nncf.openvino.cpu_info import is_arm_cpu
553+
554+
# Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.
555+
return not is_arm_cpu() or is_openvino_at_least("2025.2")
549556
else:
550557
nncf_logger.info_once(
551558
"OpenVINO optimizations are disabled. Install OpenVINO to enable them and improve the performance."

tests/openvino/native/quantization/test_weights_compression.py

+22
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import inspect
1313
import os
1414
from typing import Callable, Dict, List
15+
from unittest.mock import patch
1516

1617
import numpy as np
1718
import openvino.runtime as ov
@@ -21,10 +22,12 @@
2122
from openvino.runtime import opset13 as opset
2223

2324
import nncf
25+
import nncf.openvino.optimized_functions as opt_fns
2426
from nncf import CompressWeightsMode
2527
from nncf import SensitivityMetric
2628
from nncf.common.factory import NNCFGraphFactory
2729
from nncf.common.utils.debug import nncf_debug
30+
from nncf.common.utils.helpers import set_env_variable
2831
from nncf.data.dataset import Dataset
2932
from nncf.experimental.common.tensor_statistics.collectors import AggregatorBase
3033
from nncf.openvino.graph.model_transformer import OVModelTransformer
@@ -1487,6 +1490,25 @@ def test_compression_with_transposed_activations(kwargs):
14871490
)
14881491

14891492

1493+
@pytest.mark.parametrize("disabled", [False, True])
1494+
def test_disabled_optimized_compression(disabled):
1495+
model = LMLinearModel().ov_model
1496+
1497+
def run_compression():
1498+
compress_weights(model, mode=CompressWeightsMode.INT8)
1499+
1500+
fn_to_patch = opt_fns.do_int_quantization
1501+
patch_path = f"nncf.openvino.optimized_functions.{fn_to_patch.__name__}"
1502+
with patch(patch_path, side_effect=fn_to_patch) as mock:
1503+
if disabled:
1504+
with set_env_variable("NNCF_DISABLE_OPTIMIZED_COMPRESSION", "1"):
1505+
run_compression()
1506+
mock.assert_not_called()
1507+
else:
1508+
run_compression()
1509+
mock.assert_called_once()
1510+
1511+
14901512
class TestOVTemplateWeightCompression(TemplateWeightCompression):
14911513
@staticmethod
14921514
def get_matmul_model() -> ov.Model:

0 commit comments

Comments
 (0)