Skip to content

Commit 85cea14

Browse files
committed
Merge remote-tracking branch 'origin/develop' into pr-3331
2 parents c9eef2e + d614c1e commit 85cea14

26 files changed

+254
-50
lines changed

.github/workflows/api_changes_check.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
issues: write
2727
steps:
2828
- name: Download built HTML doc as artifact from previous step
29-
uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
29+
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
3030
with:
3131
name: html_doc_artifact
3232
- run: |
@@ -63,7 +63,7 @@ jobs:
6363
echo '{"pr_number": "${{ github.event.pull_request.number }}", "action": "none"}' > api_status.json
6464
6565
- name: Upload artifact
66-
uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 #v4.6.1
66+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
6767
with:
6868
name: api_status
6969
path: api_status.json

.github/workflows/build_and_publish_doc.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
2929

3030
- name: Download HTML doc build artifact
31-
uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
31+
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
3232
with:
3333
name: html_doc_artifact
3434
- name: Extract artifact
@@ -38,7 +38,7 @@ jobs:
3838
rm artifact.tar
3939
4040
- name: Download schema doc build artifact
41-
uses: actions/download-artifact@cc203385981b70ca67e1cc392babf9cc229d5806 # v4.1.9
41+
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
4242
with:
4343
name: schema_doc_artifact
4444
path: html_build/html

.github/workflows/build_html_doc.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
- name: Archive built HTMLs
3030
shell: bash
3131
run: tar -czf artifact.tar html_build/html
32-
- uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 #v4.6.1
32+
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
3333
with:
3434
name: html_doc_artifact
3535
path: artifact.tar

.github/workflows/build_schema_page.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
run: tar -czf artifact.tar schema
3232

3333
- name: Upload result as artifact
34-
uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 #v4.6.1
34+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
3535
with:
3636
name: schema_doc_artifact
3737
path: artifact.tar

.github/workflows/conformance_weight_compression.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
run: column -s, -t < tmp/results.csv || echo "no file"
6363
- name: Upload artifact
6464
if: ${{ !cancelled() }}
65-
uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 #v4.6.1
65+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
6666
with:
6767
name: wc_results_${{ matrix.group }}
6868
path: tmp/results.csv

.github/workflows/nightly.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
timeout-minutes: 10
2222
steps:
2323
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
24-
- uses: AlexanderDokuchaev/md-dead-link-check@c7210ef8a38c194a119834e39d212387d19b512c # v1.1.0
24+
- uses: AlexanderDokuchaev/md-dead-link-check@d5a37e0b14e5918605d22b34562532762ccb2e47 # v1.2.0
2525

2626
tensorflow:
2727
runs-on: ubuntu-latest-8-cores

.github/workflows/pre-commit-linters.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,5 @@ jobs:
2424
runs-on: ubuntu-latest
2525
steps:
2626
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
27-
- uses: AlexanderDokuchaev/md-dead-link-check@c7210ef8a38c194a119834e39d212387d19b512c # v1.1.0
27+
- uses: AlexanderDokuchaev/md-dead-link-check@d5a37e0b14e5918605d22b34562532762ccb2e47 # v1.2.0
2828

.github/workflows/sdl.yml

+5-5
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ jobs:
4242
with:
4343
lfs: true
4444
- name: Initialize CodeQL
45-
uses: github/codeql-action/init@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d # v3.28.10
45+
uses: github/codeql-action/init@5f8171a638ada777af81d42b55959a643bb29017 # v3.28.12
4646
with:
4747
languages: python
4848
- name: Perform CodeQL Analysis
49-
uses: github/codeql-action/analyze@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d # v3.28.10
49+
uses: github/codeql-action/analyze@5f8171a638ada777af81d42b55959a643bb29017 # v3.28.12
5050
with:
5151
category: "/language:python"
5252

@@ -66,7 +66,7 @@ jobs:
6666
mv "report.pdf" "codeql_nncf_report_${DATE}_${REF_NAME//\//-}_${{ github.sha }}.pdf"
6767
- name: Upload CodeQL Artifacts
6868
if: ${{ github.event_name != 'pull_request' }}
69-
uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 #v4.6.1
69+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
7070
with:
7171
name: codeql-scan-results
7272
path: "./codeql*.pdf"
@@ -81,7 +81,7 @@ jobs:
8181
- name: Checkout repository
8282
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
8383
- name: Run trivy
84-
uses: aquasecurity/trivy-action@18f2510ee396bbf400402947b394f2dd8c87dbb0 # v0.29.0
84+
uses: aquasecurity/trivy-action@6c175e9c4083a92bbca2f9724c8a5e33bc2d97a5 # v0.30.0
8585
with:
8686
scan-type: "fs"
8787
scan-ref: .
@@ -102,7 +102,7 @@ jobs:
102102
mv "trivy_report.html" "trivy_report_${DATE}_${REF_NAME//\//-}_${{ github.sha }}.html"
103103
- name: Upload Scan Results
104104
if: ${{ !cancelled() }}
105-
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 #v4.6.0
105+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 #v4.6.2
106106
with:
107107
name: trivy-report
108108
path: "./trivy_report*.html"

nncf/common/utils/cpu_info.py nncf/openvino/cpu_info.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,18 @@
1111

1212
import re
1313

14-
import cpuinfo # type: ignore
14+
import openvino as ov
1515

1616
_IS_LNL_CPU = None
1717

1818

1919
def is_lnl_cpu() -> bool:
20+
"""
21+
Checks whether current CPU is an Intel Lunar Lake generation or not.
22+
:return: True if current CPU is an Intel Lunar Lake generation, False otherwise.
23+
"""
2024
global _IS_LNL_CPU
2125
if _IS_LNL_CPU is None:
22-
_IS_LNL_CPU = re.search(r"Ultra \d 2\d{2}", cpuinfo.get_cpu_info()["brand_raw"]) is not None
26+
cpu_name = ov.Core().get_property("CPU", ov.properties.device.full_name)
27+
_IS_LNL_CPU = re.search(r"Ultra \d 2\d{2}", cpu_name) is not None
2328
return _IS_LNL_CPU

nncf/openvino/graph/nncf_graph_builder.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,13 @@ def convert_to_nncf_dtype(ov_type: ov.Type) -> Dtype:
4444
"""
4545
type_name = ov_type.get_type_name()
4646
conversion_map = {
47+
"nf4": "float",
48+
"f8e4m3": "float",
49+
"f8e5m2": "float",
4750
"f16": "float",
4851
"bf16": "float",
4952
"f32": "float",
5053
"f64": "float",
51-
"nf4": "float",
5254
"i4": "int",
5355
"i8": "int",
5456
"i16": "int",

nncf/openvino/optimized_functions/models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
from nncf.common.utils.backend import is_openvino_at_least
2626
from nncf.common.utils.caching import ResultsCache
2727
from nncf.common.utils.caching import cache_results
28-
from nncf.common.utils.cpu_info import is_lnl_cpu
2928
from nncf.common.utils.helpers import set_env_variable
29+
from nncf.openvino.cpu_info import is_lnl_cpu
3030
from nncf.openvino.graph.node_utils import convert_op
3131
from nncf.openvino.graph.node_utils import non_convertable_divide_op
3232
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig

nncf/quantization/algorithms/weight_compression/algorithm.py

+31-5
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
from nncf.quantization.algorithms.weight_compression.weight_lowering import WeightCompressionConfig
4747
from nncf.scopes import IgnoredScope
4848
from nncf.scopes import get_ignored_node_names_from_ignored_scope
49+
from nncf.tensor.definitions import TensorDataType
4950

5051
TModel = TypeVar("TModel")
5152
TTensor = TypeVar("TTensor")
@@ -57,6 +58,12 @@
5758
CompressWeightsMode.NF4,
5859
CompressWeightsMode.E2M1,
5960
]
61+
SUPPORTED_DATA_TYPES = [
62+
TensorDataType.float16,
63+
TensorDataType.bfloat16,
64+
TensorDataType.float32,
65+
TensorDataType.float64,
66+
]
6067

6168

6269
def get_weight_compression_configuration(
@@ -160,9 +167,27 @@ def check_user_compression_configuration(
160167
msg = f"The ratio should be between 0 and 1, but ratio={ratio} is specified."
161168
raise nncf.ValidationError(msg)
162169

163-
if subset_size <= 0:
164-
msg = f"The subset_size value should be positive, but subset_size={subset_size} is given."
165-
raise nncf.ValidationError(msg)
170+
values_to_check = [subset_size]
171+
ranks = []
172+
if advanced_parameters:
173+
values_to_check.extend(
174+
[
175+
advanced_parameters.awq_params.subset_size,
176+
advanced_parameters.scale_estimation_params.subset_size,
177+
advanced_parameters.gptq_params.subset_size,
178+
advanced_parameters.lora_correction_params.subset_size,
179+
]
180+
)
181+
ranks = [advanced_parameters.lora_adapter_rank, advanced_parameters.lora_correction_params.adapter_rank]
182+
for size in values_to_check:
183+
if size <= 0:
184+
msg = f"The subset_size value should be positive, but subset_size={size} is given."
185+
raise nncf.ValidationError(msg)
186+
187+
for rank in ranks:
188+
if rank <= 0:
189+
msg = f"The lora adapter rank should be positive, but rank={rank} is given."
190+
raise nncf.ValidationError(msg)
166191

167192
if (
168193
ratio
@@ -498,7 +523,7 @@ def _get_ignored_scope_weight_statistics(self, model: TModel, graph: NNCFGraph)
498523
continue
499524
for _, weight_port_id in self._backend_entity.get_weight_names_and_port_ids(node, graph):
500525
weight_dtype = self._backend_entity.get_weight_dtype(node, weight_port_id, model, graph)
501-
if weight_dtype.is_float():
526+
if weight_dtype in SUPPORTED_DATA_TYPES:
502527
continue
503528
weight_shape = self._backend_entity.get_weight_shape(node, weight_port_id, graph)
504529
weight_size = reduce(operator.mul, weight_shape, 1)
@@ -544,7 +569,7 @@ def apply(
544569
continue
545570

546571
weight_dtype = self._backend_entity.get_weight_dtype(node, weight_port_id, model, graph)
547-
if not weight_dtype.is_float():
572+
if weight_dtype not in SUPPORTED_DATA_TYPES:
548573
continue
549574
weight_shape = self._backend_entity.get_weight_shape(node, weight_port_id, graph)
550575
weight_size = reduce(operator.mul, weight_shape, 1)
@@ -656,6 +681,7 @@ def apply(
656681
zero_points,
657682
lora_correction_algo,
658683
self._compression_format,
684+
self._advanced_parameters,
659685
)
660686

661687
self._backend_entity.dump_parameters(

nncf/tensor/definitions.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ class TensorDataType(Enum):
3939
bfloat16 = auto()
4040
float32 = auto()
4141
float64 = auto()
42+
f8e4m3 = auto()
43+
f8e5m2 = auto()
44+
nf4 = auto()
4245
int8 = auto()
4346
int32 = auto()
4447
int64 = auto()
@@ -50,7 +53,15 @@ def is_float(self) -> bool:
5053
"""
5154
:return: True if the tensor data type is a floating-point type, else False.
5255
"""
53-
return self in [TensorDataType.float16, TensorDataType.bfloat16, TensorDataType.float32, TensorDataType.float64]
56+
return self in [
57+
TensorDataType.float16,
58+
TensorDataType.bfloat16,
59+
TensorDataType.float32,
60+
TensorDataType.float64,
61+
TensorDataType.f8e4m3,
62+
TensorDataType.f8e5m2,
63+
TensorDataType.nf4,
64+
]
5465

5566

5667
class TensorDeviceType(Enum):

nncf/tensor/functions/openvino_numeric.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
from nncf.tensor.functions import numeric
2222

2323
DTYPE_MAP: Dict[TensorDataType, ov.Type] = {
24+
TensorDataType.nf4: ov.Type.nf4,
25+
TensorDataType.f8e4m3: ov.Type.f8e4m3,
26+
TensorDataType.f8e5m2: ov.Type.f8e5m2,
2427
TensorDataType.float16: ov.Type.f16,
2528
TensorDataType.bfloat16: ov.Type.bf16,
2629
TensorDataType.float32: ov.Type.f32,
@@ -48,12 +51,17 @@ def _(a: ov.Tensor) -> TensorBackend:
4851

4952
@numeric.astype.register
5053
def _(a: ov.Tensor, dtype: TensorDataType) -> ov.Tensor:
51-
if a.get_element_type() in [ov.Type.bf16, ov.Type.i4, ov.Type.u4] or dtype in [
54+
ov_cast_types = [
5255
TensorDataType.bfloat16,
5356
TensorDataType.int4,
5457
TensorDataType.uint4,
55-
]:
56-
# Cannot cast to/from bfloat16, uint4, int4 directly
58+
TensorDataType.nf4,
59+
TensorDataType.f8e4m3,
60+
TensorDataType.f8e5m2,
61+
]
62+
a_dtype = DTYPE_MAP_REV[a.get_element_type()]
63+
if a_dtype in ov_cast_types or dtype in ov_cast_types:
64+
# Cast using OpenVINO because the target or source dtype requires special handling
5765
return _astype_ov(a, dtype)
5866
return ov.Tensor(numeric.astype(a.data, dtype).data)
5967

@@ -75,9 +83,16 @@ def _(a: ov.Tensor, shape: Union[int, Tuple[int, ...]]) -> ov.Tensor:
7583

7684
@numeric.as_numpy_tensor.register
7785
def _(a: ov.Tensor) -> NDArray[Any]:
78-
# Cannot convert bfloat16, uint4, int4 to numpy directly
86+
# Cannot convert bfloat16, uint4, int4, nf4, f8e4m3, f8e5m2 to numpy directly
7987
a_dtype = DTYPE_MAP_REV[a.get_element_type()]
80-
if a_dtype in [TensorDataType.bfloat16, TensorDataType.uint4, TensorDataType.int4]:
88+
if a_dtype in [
89+
TensorDataType.bfloat16,
90+
TensorDataType.uint4,
91+
TensorDataType.int4,
92+
TensorDataType.nf4,
93+
TensorDataType.f8e4m3,
94+
TensorDataType.f8e5m2,
95+
]:
8196
dtype = TensorDataType.float32
8297
if a_dtype == TensorDataType.uint4:
8398
dtype = TensorDataType.uint8

nncf/torch/quantization/layers.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,10 @@ def signed(self, signed: bool):
768768
self.set_levels()
769769

770770
def quantize(self, x, execute_traced_op_as_identity: bool = False):
771+
# TODO: (dokuchaev) remove within new tracing (ticket-163869)
772+
with DisableTorchFunction():
773+
# in multi-device case after loading nncf checkpoint, quantizers have a different device.
774+
self.to(x.device)
771775
return symmetric_quantize(
772776
x, self.levels, self.level_low, self.level_high, self.scale, self.eps, skip=execute_traced_op_as_identity
773777
)
@@ -955,6 +959,10 @@ def set_levels(self):
955959
self.level_low, self.level_high = calculate_asymmetric_level_ranges(self.num_bits - scaled_num_bits)
956960

957961
def quantize(self, x, execute_traced_op_as_identity: bool = False):
962+
# TODO: (dokuchaev) remove within new tracing (ticket-163869)
963+
with DisableTorchFunction():
964+
# in multi-device case after loading nncf checkpoint, quantizers have a different device.
965+
self.to(x.device)
958966
return asymmetric_quantize(
959967
x,
960968
self.levels,
@@ -1067,9 +1075,14 @@ class LoraMixin:
10671075

10681076
def init_lora(self, lspec: PTLoraSpec):
10691077
self._lspec = lspec
1078+
default_lora_dtype = torch.bfloat16
10701079
out_features, in_features = lspec.orig_weight_shape
1071-
self.lora_A = torch.nn.Parameter(torch.ones((lspec.lora_rank, in_features), dtype=torch.bfloat16))
1072-
self.lora_B = torch.nn.Parameter(torch.zeros((out_features, lspec.lora_rank), dtype=torch.bfloat16))
1080+
rank = lspec.lora_rank
1081+
if rank > out_features or rank > in_features:
1082+
msg = f"Specified LoRA rank={rank} cannot exceed any dimension of the weight tensor"
1083+
raise nncf.ValidationError(msg)
1084+
self.lora_A = torch.nn.Parameter(torch.ones((rank, in_features), dtype=default_lora_dtype))
1085+
self.lora_B = torch.nn.Parameter(torch.zeros((out_features, rank), dtype=default_lora_dtype))
10731086

10741087
def enable_gradients(self):
10751088
self.lora_A.requires_grad = True
@@ -1097,6 +1110,10 @@ def __init__(self, qspec: PTQuantizerSpec, lspec: PTLoraSpec):
10971110
self.init_lora(lspec)
10981111

10991112
def quantize(self, x: torch.Tensor, execute_traced_op_as_identity: bool = False):
1113+
# TODO: (dokuchaev) remove within new tracing (ticket-163869)
1114+
with DisableTorchFunction():
1115+
# in multi-device case after loading nncf checkpoint, quantizers have a different device.
1116+
self.to(x.device)
11001117
return asymmetric_quantize_lora(
11011118
x,
11021119
self._lspec.weight_shape,
@@ -1142,6 +1159,10 @@ def __init__(self, qspec: PTQuantizerSpec, lspec: PTLoraSpec):
11421159
self.init_lora(lspec)
11431160

11441161
def quantize(self, x, execute_traced_op_as_identity: bool = False):
1162+
# TODO: (dokuchaev) remove within new tracing (ticket-163869)
1163+
with DisableTorchFunction():
1164+
# in multi-device case after loading nncf checkpoint, quantizers have a different device.
1165+
self.to(x.device)
11451166
return symmetric_quantize_lora(
11461167
x,
11471168
self._lspec.weight_shape,

nncf/torch/quantization/reference.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -121,5 +121,5 @@ def tune_range(
121121

122122
class ReferenceQuantizedFunctions:
123123
_executor = ReferenceQuantize(backend_type=ReferenceBackendType.TORCH)
124-
Quantize_forward = torch.compile(_executor.forward)
125-
Quantize_backward = torch.compile(_executor.backward)
124+
Quantize_forward = _executor.forward
125+
Quantize_backward = _executor.backward

pyproject.toml

-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ dependencies = [
4343
"packaging>=20.0",
4444
"pandas>=1.1.5,<2.3",
4545
"psutil",
46-
"py-cpuinfo>=9.0.0",
4746
"pydot>=1.4.1, <=3.0.4",
4847
"pymoo>=0.6.0.1",
4948
"rich>=13.5.2",

0 commit comments

Comments
 (0)