Skip to content

Commit 704c9e4

Browse files
committed
make common run_pipeline()
1 parent 89aba29 commit 704c9e4

File tree

7 files changed

+203
-236
lines changed

7 files changed

+203
-236
lines changed

tests/post_training/data/ptq_reference_data.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM_statefull_backend_OPTIMUM:
1818
metric_value: null
1919
hf/hf-internal-testing/tiny-random-GPTNeoXForCausalLM_stateless_backend_OPTIMUM:
2020
metric_value: null
21-
xfail_reason: "Issue-161969"
21+
exception_xfail_reason: "Issue-161969"
2222
hf/hf-internal-testing/tiny-random-gpt2_backend_FP32:
2323
metric_value: null
2424
hf/hf-internal-testing/tiny-random-gpt2_backend_OPTIMUM:

tests/post_training/experimental/sparsify_activations/pipelines.py

-12
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@
3535
from tests.post_training.pipelines.base import LIMIT_LENGTH_OF_STATUS
3636
from tests.post_training.pipelines.base import PT_BACKENDS
3737
from tests.post_training.pipelines.base import BackendType
38-
from tests.post_training.pipelines.base import ErrorReason
39-
from tests.post_training.pipelines.base import ErrorReport
4038
from tests.post_training.pipelines.base import NumCompressNodes
4139
from tests.post_training.pipelines.base import RunInfo
4240
from tests.post_training.pipelines.image_classification_timm import ImageClassificationTimm
@@ -171,16 +169,6 @@ def _compress(self):
171169
**self.compression_params["sparsify_activations"],
172170
)
173171

174-
def _validate(self):
175-
errors = super()._validate()
176-
ref_num_sparse_activations = self.reference_data.get("num_sparse_activations", 0)
177-
num_sparse_activations = self.run_info.num_compress_nodes.num_sparse_activations
178-
if num_sparse_activations != ref_num_sparse_activations:
179-
status_msg = f"Regression: The number of sparse activations is {num_sparse_activations}, \
180-
which differs from reference {ref_num_sparse_activations}."
181-
errors.append(ErrorReport(ErrorReason.NUM_COMPRESSED, status_msg))
182-
return errors
183-
184172

185173
class LMSparsifyActivations(SAPipelineMixin, LMWeightCompression):
186174
DEFAULT_SUBSET_SIZE = 32

tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py

+24-92
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@
99
# See the License for the specific language governing permissions and
1010
# limitations under the License.
1111

12-
13-
import time
14-
import traceback
1512
from collections import OrderedDict
1613
from pathlib import Path
1714
from typing import Dict, Optional
@@ -23,8 +20,6 @@
2320
from tests.post_training.experimental.sparsify_activations.model_scope import SPARSIFY_ACTIVATIONS_TEST_CASES
2421
from tests.post_training.experimental.sparsify_activations.pipelines import SARunInfo
2522
from tests.post_training.pipelines.base import BackendType
26-
from tests.post_training.pipelines.base import BaseTestPipeline
27-
from tests.post_training.test_quantize_conformance import create_short_run_info
2823
from tests.post_training.test_quantize_conformance import fixture_batch_size # noqa: F401
2924
from tests.post_training.test_quantize_conformance import fixture_data # noqa: F401
3025
from tests.post_training.test_quantize_conformance import fixture_extra_columns # noqa: F401
@@ -34,8 +29,7 @@
3429
from tests.post_training.test_quantize_conformance import fixture_run_fp32_backend # noqa: F401
3530
from tests.post_training.test_quantize_conformance import fixture_run_torch_cuda_backend # noqa: F401
3631
from tests.post_training.test_quantize_conformance import fixture_subset_size # noqa: F401
37-
from tests.post_training.test_quantize_conformance import maybe_skip_test_case
38-
from tests.post_training.test_quantize_conformance import write_logs
32+
from tests.post_training.test_quantize_conformance import run_pipeline
3933

4034

4135
@pytest.fixture(scope="session", name="sparsify_activations_reference_data")
@@ -59,39 +53,6 @@ def fixture_sparsify_activations_report_data(output_dir):
5953
df.to_csv(output_dir / "results.csv", index=False)
6054

6155

62-
def create_pipeline_kwargs(
63-
test_model_param: Dict,
64-
subset_size,
65-
test_case_name: str,
66-
reference_data: Dict[str, Dict],
67-
fp32_model_params: Dict[str, Dict],
68-
):
69-
if subset_size:
70-
if "compression_params" not in test_model_param:
71-
test_model_param["compression_params"] = {}
72-
test_model_param["compression_params"]["subset_size"] = subset_size
73-
74-
print("\n")
75-
print(f"Model: {test_model_param['reported_name']}")
76-
print(f"Backend: {test_model_param['backend']}")
77-
print(f"Comprssion params: {test_model_param['compression_params']}")
78-
79-
# Get target fp32 metric value
80-
model_id = test_model_param["model_id"]
81-
fp32_test_case_name = fp32_model_params[model_id]["reported_name"] + f"_backend_{BackendType.FP32.value}"
82-
test_reference = reference_data[test_case_name]
83-
test_reference["metric_value_fp32"] = reference_data[fp32_test_case_name]["metric_value"]
84-
85-
return {
86-
"reported_name": test_model_param["reported_name"],
87-
"model_id": test_model_param["model_id"],
88-
"backend": test_model_param["backend"],
89-
"compression_params": test_model_param["compression_params"],
90-
"params": test_model_param.get("params"),
91-
"reference_data": test_reference,
92-
}
93-
94-
9556
@pytest.mark.parametrize("test_case_name", SPARSIFY_ACTIVATIONS_TEST_CASES.keys())
9657
def test_sparsify_activations(
9758
sparsify_activations_reference_data: dict,
@@ -108,55 +69,26 @@ def test_sparsify_activations(
10869
capsys: pytest.CaptureFixture,
10970
extra_columns: bool,
11071
):
111-
pipeline = None
112-
err_msg = None
113-
test_model_param = None
114-
start_time = time.perf_counter()
115-
try:
116-
if test_case_name not in sparsify_activations_reference_data:
117-
msg = f"{test_case_name} is not defined in `sparsify_activations_reference_data` fixture"
118-
raise RuntimeError(msg)
119-
test_model_param = SPARSIFY_ACTIVATIONS_TEST_CASES[test_case_name]
120-
maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size)
121-
fp32_model_params = {
122-
tc["model_id"]: tc for tc in SPARSIFY_ACTIVATIONS_TEST_CASES.values() if tc["backend"] == BackendType.FP32
123-
}
124-
pipeline_cls = test_model_param["pipeline_cls"]
125-
pipeline_kwargs = create_pipeline_kwargs(
126-
test_model_param, subset_size, test_case_name, sparsify_activations_reference_data, fp32_model_params
127-
)
128-
calibration_batch_size = batch_size or test_model_param.get("batch_size", 1)
129-
pipeline_kwargs.update(
130-
{
131-
"output_dir": output_dir,
132-
"data_dir": data_dir,
133-
"no_eval": no_eval,
134-
"run_benchmark_app": run_benchmark_app,
135-
"batch_size": calibration_batch_size,
136-
}
137-
)
138-
pipeline: BaseTestPipeline = pipeline_cls(**pipeline_kwargs)
139-
pipeline.run()
140-
except Exception as e:
141-
err_msg = str(e)
142-
traceback.print_exc()
143-
144-
if pipeline is not None:
145-
pipeline.cleanup_cache()
146-
run_info = pipeline.run_info
147-
if err_msg:
148-
run_info.status = f"{run_info.status} | {err_msg}" if run_info.status else err_msg
149-
150-
captured = capsys.readouterr()
151-
write_logs(captured, pipeline)
152-
153-
if extra_columns:
154-
pipeline.collect_data_from_stdout(captured.out)
155-
else:
156-
run_info = create_short_run_info(test_model_param, err_msg, test_case_name)
157-
158-
run_info.time_total = time.perf_counter() - start_time
159-
sparsify_activations_result_data[test_case_name] = run_info
160-
161-
if err_msg:
162-
pytest.fail(err_msg)
72+
fp32_model_params = {
73+
tc["model_id"]: tc for tc in SPARSIFY_ACTIVATIONS_TEST_CASES.values() if tc["backend"] == BackendType.FP32
74+
}
75+
run_pipeline(
76+
test_case_name,
77+
sparsify_activations_reference_data,
78+
SPARSIFY_ACTIVATIONS_TEST_CASES,
79+
sparsify_activations_result_data,
80+
output_dir,
81+
data_dir,
82+
no_eval,
83+
batch_size,
84+
run_fp32_backend,
85+
run_torch_cuda_backend,
86+
subset_size,
87+
run_benchmark_app,
88+
False, # torch_compile_validation is not used in SA
89+
capsys,
90+
extra_columns,
91+
False, # memory_monitor is not used in SA
92+
None, # use_avx2 is not used in SA
93+
fp32_model_params,
94+
)

tests/post_training/pipelines/base.py

+4-45
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,8 @@
1919
from datetime import timedelta
2020
from enum import Enum
2121
from pathlib import Path
22-
from typing import Dict, List, Optional
22+
from typing import Dict, Optional
2323

24-
import numpy as np
2524
import onnx
2625
import openvino as ov
2726
import torch
@@ -42,6 +41,7 @@
4241
class ErrorReason(Enum):
4342
METRICS = "metrics"
4443
NUM_COMPRESSED = "num_compressed"
44+
EXCEPTION = "exception"
4545

4646

4747
@dataclass
@@ -293,31 +293,12 @@ def get_num_compressed(self) -> None:
293293
def run_bench(self) -> None:
294294
"""Run a benchmark to collect performance statistics."""
295295

296-
def _validate(self) -> List[ErrorReport]:
296+
def _validate(self) -> None:
297297
"""
298298
Validates some test criteria.
299299
returns:
300300
A list of error reports generated during validation.
301301
"""
302-
return []
303-
304-
def _process_errors(self, errors) -> str:
305-
"""
306-
Processes a list of error reports and updates the run status.
307-
308-
:param errors: A list of error reports.
309-
:return: A string representing the concatenated statuses of the processed errors.
310-
"""
311-
xfails, msg_list = [], []
312-
for report in errors:
313-
xfail_reason = report.reason.value + XFAIL_SUFFIX
314-
if xfail_reason in self.reference_data:
315-
xfails.append(f"XFAIL: {self.reference_data[xfail_reason]} - {report.msg}")
316-
else:
317-
msg_list.append(report.msg)
318-
if msg_list:
319-
raise ValueError("\n".join(msg_list))
320-
self.run_info.status = "\n".join(xfails)
321302

322303
def prepare(self):
323304
"""
@@ -339,29 +320,7 @@ def validate(self) -> None:
339320
print("Validation skipped")
340321
return
341322
print("Validation...")
342-
343-
errors = self._validate()
344-
345-
metric_value = self.run_info.metric_value
346-
metric_reference = self.reference_data.get("metric_value")
347-
metric_value_fp32 = self.reference_data.get("metric_value_fp32")
348-
349-
if metric_value is not None and metric_value_fp32 is not None:
350-
self.run_info.metric_diff = round(self.run_info.metric_value - self.reference_data["metric_value_fp32"], 5)
351-
352-
if (
353-
metric_value is not None
354-
and metric_reference is not None
355-
and not np.isclose(metric_value, metric_reference, atol=self.reference_data.get("atol", 0.001))
356-
):
357-
status_msg = None
358-
if metric_value < metric_reference:
359-
status_msg = f"Regression: Metric value is less than reference {metric_value} < {metric_reference}"
360-
if metric_value > metric_reference:
361-
status_msg = f"Improvement: Metric value is better than reference {metric_value} > {metric_reference}"
362-
if status_msg:
363-
errors.append(ErrorReport(ErrorReason.METRICS, status_msg))
364-
self._process_errors(errors)
323+
self._validate()
365324

366325
def run(self) -> None:
367326
"""

tests/post_training/pipelines/image_classification_base.py

-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ def _validate_ov(
4343
references: np.ndarray,
4444
dataset_size: int,
4545
):
46-
4746
core = ov.Core()
4847
if os.environ.get("INFERENCE_NUM_THREADS"):
4948
# Set CPU_THREADS_NUM for OpenVINO inference
@@ -106,4 +105,3 @@ def _validate(self) -> List[ErrorReport]:
106105

107106
self.run_info.metric_name = "Acc@1"
108107
self.run_info.metric_value = acc_top1
109-
return []

tests/post_training/pipelines/lm_weight_compression.py

+2-20
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import shutil
1515
import time
1616
from dataclasses import dataclass
17-
from typing import Dict, List, Optional
17+
from typing import Dict, Optional
1818

1919
import numpy as np
2020
import openvino as ov
@@ -31,8 +31,6 @@
3131
from tests.cross_fw.shared.paths import TEST_ROOT
3232
from tests.post_training.pipelines.base import BackendType
3333
from tests.post_training.pipelines.base import BaseTestPipeline
34-
from tests.post_training.pipelines.base import ErrorReason
35-
from tests.post_training.pipelines.base import ErrorReport
3634
from tests.post_training.pipelines.base import StatsFromOutput
3735
from tools.memory_monitor import MemoryType
3836
from tools.memory_monitor import MemoryUnit
@@ -269,8 +267,7 @@ def _compress(self):
269267
**self.compression_params,
270268
)
271269

272-
def _validate(self) -> List[ErrorReport]:
273-
errors = []
270+
def _validate(self) -> None:
274271
is_stateful = self.params.get("is_stateful", False)
275272
core = ov.Core()
276273

@@ -315,18 +312,3 @@ def _validate(self) -> List[ErrorReport]:
315312
similarity = all_metrics["similarity"][0]
316313
self.run_info.metric_name = "Similarity"
317314
self.run_info.metric_value = round(similarity, 5)
318-
319-
num_int4_reference = self.reference_data.get("num_int4")
320-
num_int8_reference = self.reference_data.get("num_int8")
321-
322-
num_int4_value = self.run_info.num_compress_nodes.num_int4
323-
num_int8_value = self.run_info.num_compress_nodes.num_int8
324-
325-
template = "Regression: The number of int{} ops is different than reference {} != {}"
326-
if num_int4_reference != num_int4_value:
327-
status_msg = template.format(4, num_int4_reference, num_int4_value)
328-
errors.append(ErrorReport(ErrorReason.NUM_COMPRESSED, status_msg))
329-
if num_int8_reference != num_int8_value:
330-
status_msg = template.format(8, num_int8_reference, num_int8_value)
331-
errors.append(ErrorReport(ErrorReason.NUM_COMPRESSED, status_msg))
332-
return errors

0 commit comments

Comments
 (0)