Skip to content

Commit 553b21b

Browse files
authored
xfail for bug with f16 zp (#3220)
### Changes Mark as xfail PTWC torch tests counting number of int4/int8 ops. ### Reason for changes starting from OV 2025 ZP is represented as f16 in some cases (bug: 160006) ### Related tickets 159993 160006 ### Tests - [x] openvino-nightly/job/post_training_weight_compression/73 ![image](https://github.com/user-attachments/assets/bac6b5f0-0d8a-4890-9558-3ce6028ca56c) - [x] manual/job/post_training_weight_compression/303 ![image](https://github.com/user-attachments/assets/0d76fa7a-baf6-4515-bdd5-7bdda11b046e)
1 parent 0333814 commit 553b21b

9 files changed

+81
-34
lines changed

tests/post_training/README.md

+8
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,11 @@ To mark a test as expected to fail (xfail) when a validation metric does not mee
152152
...
153153
metrics_xfail_reason: "Issue-<jira ticket number>"
154154
```
155+
156+
To mark a test as expected to fail (xfail) when a number of compression operations do not meet expectations, add the following line to the reference data:
157+
158+
```yml
159+
<Name from model scopes>_backend_<BACKEND>:
160+
...
161+
num_compressed_xfail_reason: "Issue-<jira ticket number>"
162+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
tinyllama_int8_data_free_backend_TORCH:
2+
metric_value: 0.95624
3+
num_int4: 0
4+
num_int8: 312
5+
num_compressed_xfail_reason: "Issue-160006"
6+
tinyllama_int4_data_free_backend_TORCH:
7+
metric_value: 0.73873
8+
num_int4: 114
9+
num_int8: 84
10+
num_compressed_xfail_reason: "Issue-160006"

tests/post_training/experimental/sparsify_activations/pipelines.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
from tests.post_training.pipelines.base import LIMIT_LENGTH_OF_STATUS
3636
from tests.post_training.pipelines.base import PT_BACKENDS
3737
from tests.post_training.pipelines.base import BackendType
38+
from tests.post_training.pipelines.base import ErrorReason
39+
from tests.post_training.pipelines.base import ErrorReport
3840
from tests.post_training.pipelines.base import NumCompressNodes
3941
from tests.post_training.pipelines.base import RunInfo
4042
from tests.post_training.pipelines.image_classification_timm import ImageClassificationTimm
@@ -170,13 +172,14 @@ def _compress(self):
170172
)
171173

172174
def _validate(self):
173-
super()._validate()
175+
errors = super()._validate()
174176
ref_num_sparse_activations = self.reference_data.get("num_sparse_activations", 0)
175177
num_sparse_activations = self.run_info.num_compress_nodes.num_sparse_activations
176178
if num_sparse_activations != ref_num_sparse_activations:
177179
status_msg = f"Regression: The number of sparse activations is {num_sparse_activations}, \
178180
which differs from reference {ref_num_sparse_activations}."
179-
raise ValueError(status_msg)
181+
errors.append(ErrorReport(ErrorReason.NUM_COMPRESSED, status_msg))
182+
return errors
180183

181184

182185
class LMSparsifyActivations(SAPipelineMixin, LMWeightCompression):

tests/post_training/pipelines/base.py

+43-13
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from datetime import timedelta
2020
from enum import Enum
2121
from pathlib import Path
22-
from typing import Dict, Optional
22+
from typing import Dict, List, Optional
2323

2424
import numpy as np
2525
import onnx
@@ -36,7 +36,18 @@
3636
from tools.memory_monitor import memory_monitor_context
3737

3838
DEFAULT_VAL_THREADS = 4
39-
METRICS_XFAIL_REASON = "metrics_xfail_reason"
39+
XFAIL_SUFFIX = "_xfail_reason"
40+
41+
42+
class ErrorReason(Enum):
43+
METRICS = "metrics"
44+
NUM_COMPRESSED = "num_compressed"
45+
46+
47+
@dataclass
48+
class ErrorReport:
49+
reason: ErrorReason
50+
msg: str
4051

4152

4253
class BackendType(Enum):
@@ -278,9 +289,31 @@ def get_num_compressed(self) -> None:
278289
def run_bench(self) -> None:
279290
"""Run a benchmark to collect performance statistics."""
280291

281-
@abstractmethod
282-
def _validate(self) -> None:
283-
"""Validate IR."""
292+
def _validate(self) -> List[ErrorReport]:
293+
"""
294+
Validates some test criteria.
295+
returns:
296+
A list of error reports generated during validation.
297+
"""
298+
return []
299+
300+
def _process_errors(self, errors) -> str:
301+
"""
302+
Processes a list of error reports and updates the run status.
303+
304+
:param errors: A list of error reports.
305+
:return: A string representing the concatenated statuses of the processed errors.
306+
"""
307+
xfails, msg_list = [], []
308+
for report in errors:
309+
xfail_reason = report.reason.value + XFAIL_SUFFIX
310+
if xfail_reason in self.reference_data:
311+
xfails.append(f"XFAIL: {self.reference_data[xfail_reason]} - {report.msg}")
312+
else:
313+
msg_list.append(report.msg)
314+
if msg_list:
315+
raise ValueError("\n".join(msg_list))
316+
self.run_info.status = "\n".join(xfails)
284317

285318
def prepare(self):
286319
"""
@@ -302,7 +335,7 @@ def validate(self) -> None:
302335
return
303336
print("Validation...")
304337

305-
self._validate()
338+
errors = self._validate()
306339

307340
metric_value = self.run_info.metric_value
308341
metric_reference = self.reference_data.get("metric_value")
@@ -311,22 +344,19 @@ def validate(self) -> None:
311344
if metric_value is not None and metric_value_fp32 is not None:
312345
self.run_info.metric_diff = round(self.run_info.metric_value - self.reference_data["metric_value_fp32"], 5)
313346

314-
status_msg = None
315347
if (
316348
metric_value is not None
317349
and metric_reference is not None
318350
and not np.isclose(metric_value, metric_reference, atol=self.reference_data.get("atol", 0.001))
319351
):
352+
status_msg = None
320353
if metric_value < metric_reference:
321354
status_msg = f"Regression: Metric value is less than reference {metric_value} < {metric_reference}"
322355
if metric_value > metric_reference:
323356
status_msg = f"Improvement: Metric value is better than reference {metric_value} > {metric_reference}"
324-
325-
if status_msg is not None:
326-
if METRICS_XFAIL_REASON in self.reference_data:
327-
self.run_info.status = f"XFAIL: {self.reference_data[METRICS_XFAIL_REASON]} - {status_msg}"
328-
else:
329-
raise ValueError(status_msg)
357+
if status_msg:
358+
errors.append(ErrorReport(ErrorReason.METRICS, status_msg))
359+
self._process_errors(errors)
330360

331361
def run(self) -> None:
332362
"""

tests/post_training/pipelines/causal_language_model.py

-3
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,3 @@ def prepare_calibration_dataset(self):
5757

5858
if self.backend == BackendType.OPTIMUM:
5959
self.calibration_dataset = calibration_dataset
60-
61-
def _validate(self):
62-
pass

tests/post_training/pipelines/gpt.py

-3
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,3 @@ def preprocess_function(examples):
9595
self.calibration_dataset = calibration_dataset
9696
else:
9797
self.calibration_dataset = nncf.Dataset(calibration_dataset, self.get_transform_calibration_fn())
98-
99-
def _validate(self):
100-
pass

tests/post_training/pipelines/image_classification_base.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import copy
1313
import os
14+
from typing import List
1415

1516
import numpy as np
1617
import openvino as ov
@@ -21,6 +22,7 @@
2122
import nncf
2223
from nncf.common.logging.track_progress import track
2324
from tests.post_training.pipelines.base import DEFAULT_VAL_THREADS
25+
from tests.post_training.pipelines.base import ErrorReport
2426
from tests.post_training.pipelines.base import PTQTestPipeline
2527

2628

@@ -33,7 +35,7 @@ def prepare_calibration_dataset(self):
3335

3436
self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn())
3537

36-
def _validate(self):
38+
def _validate(self) -> List[ErrorReport]:
3739
val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform)
3840
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False)
3941

@@ -78,3 +80,4 @@ def process_result(request, userdata):
7880

7981
self.run_info.metric_name = "Acc@1"
8082
self.run_info.metric_value = acc_top1
83+
return []

tests/post_training/pipelines/lm_weight_compression.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import shutil
1515
import time
1616
from dataclasses import dataclass
17-
from typing import Dict, Optional
17+
from typing import Dict, List, Optional
1818

1919
import numpy as np
2020
import openvino as ov
@@ -31,6 +31,8 @@
3131
from tests.cross_fw.shared.paths import TEST_ROOT
3232
from tests.post_training.pipelines.base import BackendType
3333
from tests.post_training.pipelines.base import BaseTestPipeline
34+
from tests.post_training.pipelines.base import ErrorReason
35+
from tests.post_training.pipelines.base import ErrorReport
3436
from tests.post_training.pipelines.base import StatsFromOutput
3537
from tools.memory_monitor import MemoryType
3638
from tools.memory_monitor import MemoryUnit
@@ -257,7 +259,8 @@ def _compress(self):
257259
**self.compression_params,
258260
)
259261

260-
def _validate(self):
262+
def _validate(self) -> List[ErrorReport]:
263+
errors = []
261264
is_stateful = self.params.get("is_stateful", False)
262265
core = ov.Core()
263266

@@ -309,12 +312,11 @@ def _validate(self):
309312
num_int4_value = self.run_info.num_compress_nodes.num_int4
310313
num_int8_value = self.run_info.num_compress_nodes.num_int8
311314

315+
template = "Regression: The number of int{} ops is different than reference {} != {}"
312316
if num_int4_reference != num_int4_value:
313-
status_msg = f"Regression: The number of int4 ops is different \
314-
than reference {num_int4_reference} != {num_int4_value}"
315-
raise ValueError(status_msg)
316-
317+
status_msg = template.format(4, num_int4_reference, num_int4_value)
318+
errors.append(ErrorReport(ErrorReason.NUM_COMPRESSED, status_msg))
317319
if num_int8_reference != num_int8_value:
318-
status_msg = f"Regression: The number of int8 ops is different \
319-
than reference {num_int8_reference} != {num_int8_value}"
320-
raise ValueError(status_msg)
320+
status_msg = template.format(8, num_int8_reference, num_int8_value)
321+
errors.append(ErrorReport(ErrorReason.NUM_COMPRESSED, status_msg))
322+
return errors

tests/post_training/pipelines/masked_language_modeling.py

-3
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,3 @@ def preprocess_function(examples):
106106
self.calibration_dataset = calibration_dataset
107107
else:
108108
self.calibration_dataset = nncf.Dataset(calibration_dataset, self.get_transform_calibration_fn())
109-
110-
def _validate(self):
111-
pass

0 commit comments

Comments
 (0)