openvinotoolkit
diff --git a/‎.github/workflows/api_changes_check.yml
+2-2 b/‎.github/workflows/api_changes_check.yml
+2-2
diff --git a/‎.github/workflows/build_and_publish_doc.yml
+2-2 b/‎.github/workflows/build_and_publish_doc.yml
+2-2
diff --git a/‎.github/workflows/build_html_doc.yml
+1-1 b/‎.github/workflows/build_html_doc.yml
+1-1
diff --git a/‎.github/workflows/build_schema_page.yml
+1-1 b/‎.github/workflows/build_schema_page.yml
+1-1
diff --git a/‎.github/workflows/conformance_weight_compression.yml
+1-1 b/‎.github/workflows/conformance_weight_compression.yml
+1-1
diff --git a/‎.github/workflows/nightly.yml
+1-1 b/‎.github/workflows/nightly.yml
+1-1
diff --git a/‎.github/workflows/pre-commit-linters.yml
+1-1 b/‎.github/workflows/pre-commit-linters.yml
+1-1
diff --git a/‎.github/workflows/sdl.yml
+5-5 b/‎.github/workflows/sdl.yml
+5-5
diff --git a/‎nncf/common/utils/cpu_info.py ‎nncf/openvino/cpu_info.py
+7-2 b/‎nncf/common/utils/cpu_info.py ‎nncf/openvino/cpu_info.py
+7-2
diff --git a/‎nncf/openvino/graph/nncf_graph_builder.py
+3-1 b/‎nncf/openvino/graph/nncf_graph_builder.py
+3-1
diff --git a/‎nncf/openvino/optimized_functions/models.py
+1-1 b/‎nncf/openvino/optimized_functions/models.py
+1-1
diff --git a/‎nncf/quantization/algorithms/weight_compression/algorithm.py
+31-5 b/‎nncf/quantization/algorithms/weight_compression/algorithm.py
+31-5
diff --git a/‎nncf/tensor/definitions.py
+12-1 b/‎nncf/tensor/definitions.py
+12-1
diff --git a/‎nncf/tensor/functions/openvino_numeric.py
+20-5 b/‎nncf/tensor/functions/openvino_numeric.py
+20-5
diff --git a/‎nncf/torch/quantization/layers.py
+23-2 b/‎nncf/torch/quantization/layers.py
+23-2
diff --git a/‎nncf/torch/quantization/reference.py
+2-2 b/‎nncf/torch/quantization/reference.py
+2-2
diff --git a/‎pyproject.toml
-1 b/‎pyproject.toml
-1
@@ -26,7 +26,7 @@ jobs:
       issues: write
     steps:
       - name: Download built HTML doc as artifact from previous step
-        uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
+        uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
         with:
           name: html_doc_artifact
       - run: |
@@ -63,7 +63,7 @@ jobs:
           echo '{"pr_number": "${{ github.event.pull_request.number }}", "action": "none"}' > api_status.json
 
       - name: Upload artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 #v4.6.1
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
         with:
           name: api_status
           path: api_status.json
@@ -28,7 +28,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: Download HTML doc build artifact
-        uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
+        uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
         with:
           name: html_doc_artifact
       - name: Extract artifact
@@ -38,7 +38,7 @@ jobs:
           rm artifact.tar
 
       - name: Download schema doc build artifact
-        uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
+        uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
         with:
           name: schema_doc_artifact
           path: html_build/html
 
@@ -29,7 +29,7 @@ jobs:
       - name: Archive built HTMLs
         shell: bash
         run: tar -czf artifact.tar html_build/html
-      - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 #v4.6.1
+      - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
         with:
           name: html_doc_artifact
           path: artifact.tar
@@ -31,7 +31,7 @@ jobs:
         run: tar -czf artifact.tar schema
 
       - name: Upload result as artifact
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 #v4.6.1
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
         with:
           name: schema_doc_artifact
           path: artifact.tar
@@ -62,7 +62,7 @@ jobs:
         run: column -s, -t < tmp/results.csv || echo "no file"
       - name: Upload artifact
         if: ${{ !cancelled() }}
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 #v4.6.1
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
         with:
           name: wc_results_${{ matrix.group }}
           path: tmp/results.csv
 
@@ -21,7 +21,7 @@ jobs:
     timeout-minutes: 10
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      - uses: AlexanderDokuchaev/md-dead-link-check@c7210ef8a38c194a119834e39d212387d19b512c # v1.1.0
+      - uses: AlexanderDokuchaev/md-dead-link-check@d5a37e0b14e5918605d22b34562532762ccb2e47 # v1.2.0
 
   tensorflow:
     runs-on: ubuntu-latest-8-cores
 
@@ -24,5 +24,5 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      - uses: AlexanderDokuchaev/md-dead-link-check@c7210ef8a38c194a119834e39d212387d19b512c # v1.1.0
+      - uses: AlexanderDokuchaev/md-dead-link-check@d5a37e0b14e5918605d22b34562532762ccb2e47 # v1.2.0
 
@@ -42,11 +42,11 @@ jobs:
         with:
           lfs: true
       - name: Initialize CodeQL
-        uses: github/codeql-action/init@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d # v3.28.10
+        uses: github/codeql-action/init@5f8171a638ada777af81d42b55959a643bb29017 # v3.28.12
         with:
           languages: python
       - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d # v3.28.10
+        uses: github/codeql-action/analyze@5f8171a638ada777af81d42b55959a643bb29017 # v3.28.12
         with:
           category: "/language:python"
 
@@ -66,7 +66,7 @@ jobs:
           mv "report.pdf" "codeql_nncf_report_${DATE}_${REF_NAME//\//-}_${{ github.sha }}.pdf"
       - name: Upload CodeQL Artifacts
         if: ${{ github.event_name != 'pull_request' }}
-        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 #v4.6.1
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
         with:
           name: codeql-scan-results
           path: "./codeql*.pdf"
@@ -81,7 +81,7 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
       - name: Run trivy
-        uses: aquasecurity/trivy-action@18f2510ee396bbf400402947b394f2dd8c87dbb0 # v0.29.0
+        uses: aquasecurity/trivy-action@6c175e9c4083a92bbca2f9724c8a5e33bc2d97a5 # v0.30.0
         with:
           scan-type: "fs"
           scan-ref: .
@@ -102,7 +102,7 @@ jobs:
           mv "trivy_report.html" "trivy_report_${DATE}_${REF_NAME//\//-}_${{ github.sha }}.html"
       - name: Upload Scan Results
         if: ${{ !cancelled() }}
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 #v4.6.0
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
         with:
           name: trivy-report
           path: "./trivy_report*.html"
 
@@ -11,13 +11,18 @@
 
 import re
 
-import cpuinfo  # type: ignore
+import openvino as ov
 
 _IS_LNL_CPU = None
 
 
 def is_lnl_cpu() -> bool:
+    """
+    Checks whether current CPU is an Intel Lunar Lake generation or not.
+    :return: True if current CPU is an Intel Lunar Lake generation, False otherwise.
+    """
     global _IS_LNL_CPU
     if _IS_LNL_CPU is None:
-        _IS_LNL_CPU = re.search(r"Ultra \d 2\d{2}", cpuinfo.get_cpu_info()["brand_raw"]) is not None
+        cpu_name = ov.Core().get_property("CPU", ov.properties.device.full_name)
+        _IS_LNL_CPU = re.search(r"Ultra \d 2\d{2}", cpu_name) is not None
     return _IS_LNL_CPU
@@ -44,11 +44,13 @@ def convert_to_nncf_dtype(ov_type: ov.Type) -> Dtype:
         """
         type_name = ov_type.get_type_name()
         conversion_map = {
+            "nf4": "float",
+            "f8e4m3": "float",
+            "f8e5m2": "float",
             "f16": "float",
             "bf16": "float",
             "f32": "float",
             "f64": "float",
-            "nf4": "float",
             "i4": "int",
             "i8": "int",
             "i16": "int",
 
@@ -25,8 +25,8 @@
 from nncf.common.utils.backend import is_openvino_at_least
 from nncf.common.utils.caching import ResultsCache
 from nncf.common.utils.caching import cache_results
-from nncf.common.utils.cpu_info import is_lnl_cpu
 from nncf.common.utils.helpers import set_env_variable
+from nncf.openvino.cpu_info import is_lnl_cpu
 from nncf.openvino.graph.node_utils import convert_op
 from nncf.openvino.graph.node_utils import non_convertable_divide_op
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
 
@@ -46,6 +46,7 @@
 from nncf.quantization.algorithms.weight_compression.weight_lowering import WeightCompressionConfig
 from nncf.scopes import IgnoredScope
 from nncf.scopes import get_ignored_node_names_from_ignored_scope
+from nncf.tensor.definitions import TensorDataType
 
 TModel = TypeVar("TModel")
 TTensor = TypeVar("TTensor")
@@ -57,6 +58,12 @@
     CompressWeightsMode.NF4,
     CompressWeightsMode.E2M1,
 ]
+SUPPORTED_DATA_TYPES = [
+    TensorDataType.float16,
+    TensorDataType.bfloat16,
+    TensorDataType.float32,
+    TensorDataType.float64,
+]
 
 
 def get_weight_compression_configuration(
@@ -160,9 +167,27 @@ def check_user_compression_configuration(
         msg = f"The ratio should be between 0 and 1, but ratio={ratio} is specified."
         raise nncf.ValidationError(msg)
 
-    if subset_size <= 0:
-        msg = f"The subset_size value should be positive, but subset_size={subset_size} is given."
-        raise nncf.ValidationError(msg)
+    values_to_check = [subset_size]
+    ranks = []
+    if advanced_parameters:
+        values_to_check.extend(
+            [
+                advanced_parameters.awq_params.subset_size,
+                advanced_parameters.scale_estimation_params.subset_size,
+                advanced_parameters.gptq_params.subset_size,
+                advanced_parameters.lora_correction_params.subset_size,
+            ]
+        )
+        ranks = [advanced_parameters.lora_adapter_rank, advanced_parameters.lora_correction_params.adapter_rank]
+    for size in values_to_check:
+        if size <= 0:
+            msg = f"The subset_size value should be positive, but subset_size={size} is given."
+            raise nncf.ValidationError(msg)
+
+    for rank in ranks:
+        if rank <= 0:
+            msg = f"The lora adapter rank should be positive, but rank={rank} is given."
+            raise nncf.ValidationError(msg)
 
     if (
         ratio
@@ -498,7 +523,7 @@ def _get_ignored_scope_weight_statistics(self, model: TModel, graph: NNCFGraph)
                 continue
             for _, weight_port_id in self._backend_entity.get_weight_names_and_port_ids(node, graph):
                 weight_dtype = self._backend_entity.get_weight_dtype(node, weight_port_id, model, graph)
-                if weight_dtype.is_float():
+                if weight_dtype in SUPPORTED_DATA_TYPES:
                     continue
                 weight_shape = self._backend_entity.get_weight_shape(node, weight_port_id, graph)
                 weight_size = reduce(operator.mul, weight_shape, 1)
@@ -544,7 +569,7 @@ def apply(
                     continue
 
                 weight_dtype = self._backend_entity.get_weight_dtype(node, weight_port_id, model, graph)
-                if not weight_dtype.is_float():
+                if weight_dtype not in SUPPORTED_DATA_TYPES:
                     continue
                 weight_shape = self._backend_entity.get_weight_shape(node, weight_port_id, graph)
                 weight_size = reduce(operator.mul, weight_shape, 1)
@@ -656,6 +681,7 @@ def apply(
             zero_points,
             lora_correction_algo,
             self._compression_format,
+            self._advanced_parameters,
         )
 
         self._backend_entity.dump_parameters(
 
@@ -39,6 +39,9 @@ class TensorDataType(Enum):
     bfloat16 = auto()
     float32 = auto()
     float64 = auto()
+    f8e4m3 = auto()
+    f8e5m2 = auto()
+    nf4 = auto()
     int8 = auto()
     int32 = auto()
     int64 = auto()
@@ -50,7 +53,15 @@ def is_float(self) -> bool:
         """
         :return: True if the tensor data type is a floating-point type, else False.
         """
-        return self in [TensorDataType.float16, TensorDataType.bfloat16, TensorDataType.float32, TensorDataType.float64]
+        return self in [
+            TensorDataType.float16,
+            TensorDataType.bfloat16,
+            TensorDataType.float32,
+            TensorDataType.float64,
+            TensorDataType.f8e4m3,
+            TensorDataType.f8e5m2,
+            TensorDataType.nf4,
+        ]
 
 
 class TensorDeviceType(Enum):
 
@@ -21,6 +21,9 @@
 from nncf.tensor.functions import numeric
 
 DTYPE_MAP: Dict[TensorDataType, ov.Type] = {
+    TensorDataType.nf4: ov.Type.nf4,
+    TensorDataType.f8e4m3: ov.Type.f8e4m3,
+    TensorDataType.f8e5m2: ov.Type.f8e5m2,
     TensorDataType.float16: ov.Type.f16,
     TensorDataType.bfloat16: ov.Type.bf16,
     TensorDataType.float32: ov.Type.f32,
@@ -48,12 +51,17 @@ def _(a: ov.Tensor) -> TensorBackend:
 
 @numeric.astype.register
 def _(a: ov.Tensor, dtype: TensorDataType) -> ov.Tensor:
-    if a.get_element_type() in [ov.Type.bf16, ov.Type.i4, ov.Type.u4] or dtype in [
+    ov_cast_types = [
         TensorDataType.bfloat16,
         TensorDataType.int4,
         TensorDataType.uint4,
-    ]:
-        # Cannot cast to/from bfloat16, uint4, int4 directly
+        TensorDataType.nf4,
+        TensorDataType.f8e4m3,
+        TensorDataType.f8e5m2,
+    ]
+    a_dtype = DTYPE_MAP_REV[a.get_element_type()]
+    if a_dtype in ov_cast_types or dtype in ov_cast_types:
+        # Cast using OpenVINO because the target or source dtype requires special handling
         return _astype_ov(a, dtype)
     return ov.Tensor(numeric.astype(a.data, dtype).data)
 
@@ -75,9 +83,16 @@ def _(a: ov.Tensor, shape: Union[int, Tuple[int, ...]]) -> ov.Tensor:
 
 @numeric.as_numpy_tensor.register
 def _(a: ov.Tensor) -> NDArray[Any]:
-    # Cannot convert bfloat16, uint4, int4 to numpy directly
+    # Cannot convert bfloat16, uint4, int4, nf4, f8e4m3, f8e5m2 to numpy directly
     a_dtype = DTYPE_MAP_REV[a.get_element_type()]
-    if a_dtype in [TensorDataType.bfloat16, TensorDataType.uint4, TensorDataType.int4]:
+    if a_dtype in [
+        TensorDataType.bfloat16,
+        TensorDataType.uint4,
+        TensorDataType.int4,
+        TensorDataType.nf4,
+        TensorDataType.f8e4m3,
+        TensorDataType.f8e5m2,
+    ]:
         dtype = TensorDataType.float32
         if a_dtype == TensorDataType.uint4:
             dtype = TensorDataType.uint8
 
@@ -768,6 +768,10 @@ def signed(self, signed: bool):
         self.set_levels()
 
     def quantize(self, x, execute_traced_op_as_identity: bool = False):
+        # TODO: (dokuchaev) remove within new tracing (ticket-163869)
+        with DisableTorchFunction():
+            # in multi-device case after loading nncf checkpoint, quantizers have a different device.
+            self.to(x.device)
         return symmetric_quantize(
             x, self.levels, self.level_low, self.level_high, self.scale, self.eps, skip=execute_traced_op_as_identity
         )
@@ -955,6 +959,10 @@ def set_levels(self):
         self.level_low, self.level_high = calculate_asymmetric_level_ranges(self.num_bits - scaled_num_bits)
 
     def quantize(self, x, execute_traced_op_as_identity: bool = False):
+        # TODO: (dokuchaev) remove within new tracing (ticket-163869)
+        with DisableTorchFunction():
+            # in multi-device case after loading nncf checkpoint, quantizers have a different device.
+            self.to(x.device)
         return asymmetric_quantize(
             x,
             self.levels,
@@ -1067,9 +1075,14 @@ class LoraMixin:
 
     def init_lora(self, lspec: PTLoraSpec):
         self._lspec = lspec
+        default_lora_dtype = torch.bfloat16
         out_features, in_features = lspec.orig_weight_shape
-        self.lora_A = torch.nn.Parameter(torch.ones((lspec.lora_rank, in_features), dtype=torch.bfloat16))
-        self.lora_B = torch.nn.Parameter(torch.zeros((out_features, lspec.lora_rank), dtype=torch.bfloat16))
+        rank = lspec.lora_rank
+        if rank > out_features or rank > in_features:
+            msg = f"Specified LoRA rank={rank} cannot exceed any dimension of the weight tensor"
+            raise nncf.ValidationError(msg)
+        self.lora_A = torch.nn.Parameter(torch.ones((rank, in_features), dtype=default_lora_dtype))
+        self.lora_B = torch.nn.Parameter(torch.zeros((out_features, rank), dtype=default_lora_dtype))
 
     def enable_gradients(self):
         self.lora_A.requires_grad = True
@@ -1097,6 +1110,10 @@ def __init__(self, qspec: PTQuantizerSpec, lspec: PTLoraSpec):
         self.init_lora(lspec)
 
     def quantize(self, x: torch.Tensor, execute_traced_op_as_identity: bool = False):
+        # TODO: (dokuchaev) remove within new tracing (ticket-163869)
+        with DisableTorchFunction():
+            # in multi-device case after loading nncf checkpoint, quantizers have a different device.
+            self.to(x.device)
         return asymmetric_quantize_lora(
             x,
             self._lspec.weight_shape,
@@ -1142,6 +1159,10 @@ def __init__(self, qspec: PTQuantizerSpec, lspec: PTLoraSpec):
         self.init_lora(lspec)
 
     def quantize(self, x, execute_traced_op_as_identity: bool = False):
+        # TODO: (dokuchaev) remove within new tracing (ticket-163869)
+        with DisableTorchFunction():
+            # in multi-device case after loading nncf checkpoint, quantizers have a different device.
+            self.to(x.device)
         return symmetric_quantize_lora(
             x,
             self._lspec.weight_shape,
 
@@ -121,5 +121,5 @@ def tune_range(
 
 class ReferenceQuantizedFunctions:
     _executor = ReferenceQuantize(backend_type=ReferenceBackendType.TORCH)
-    Quantize_forward = torch.compile(_executor.forward)
-    Quantize_backward = torch.compile(_executor.backward)
+    Quantize_forward = _executor.forward
+    Quantize_backward = _executor.backward
@@ -43,7 +43,6 @@ dependencies = [
     "packaging>=20.0",
     "pandas>=1.1.5,<2.3",
     "psutil",
-    "py-cpuinfo>=9.0.0",
     "pydot>=1.4.1, <=3.0.4",
     "pymoo>=0.6.0.1",
     "rich>=13.5.2",