[GA] test examples (#3001)

AlexanderDokuchaev · web-flow · commit 4bbcefb75f27 · 2024-10-15T10:46:40.000+04:00
### Changes Add GA workflow to run tests examples. Set environment variables to get host-independent in the results: ``` env["ONEDNN_MAX_CPU_ISA"] = "AVX2" env["CUDA_VISIBLE_DEVICES"] = "" ``` ### Related tickets 153650 ### Tests https://github.com/openvinotoolkit/nncf/actions/runs/11332052082
diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
@@ -0,0 +1,68 @@
+name: Test examples
+permissions: read-all
+
+on:
+  schedule:
+    - cron: '0 0 * * *'
+  workflow_dispatch:
+    inputs:
+      pull_request_number:
+        description: 'The pull request number'
+        required: true
+        type: number
+      pytest_args:
+        description: 'Pytest arguments'
+        default: ''
+        type: string
+
+jobs:
+  examples-cpu:
+    name: Test exmaples CPU [${{ matrix.group }}/4]
+    runs-on: ubuntu-22.04-16-cores
+    strategy:
+      fail-fast: false
+      matrix:
+        group: [1, 2, 3, 4]
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
+        with:
+            lfs: true
+            fetch-depth: 0  # Fetch full history to allow checking out any branch or PR
+      - name: Fetch and Checkout the Pull Request Branch
+        if: ${{ github.event_name == 'workflow_dispatch' }}
+        run: |
+          git fetch origin pull/${{ github.event.inputs.pull_request_number }}/head:pr-${{ github.event.inputs.pull_request_number }}
+          git checkout pr-${{ github.event.inputs.pull_request_number }}
+      - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
+        with:
+          python-version: 3.10.14
+          cache: pip
+      - name: cpuinfo
+        run: cat /proc/cpuinfo
+      - name: Install NNCF and test requirements
+        run: |
+          pip install -e .
+          pip install -r tests/cross_fw/examples/requirements.txt
+      - name: Print installed modules
+        run: pip list
+      - name: Run examples test scope
+        continue-on-error: true
+        run: |
+          python -m pytest -ras tests/cross_fw/examples \
+            --junit-xml=pytest-results-${{ matrix.group }}.xml \
+            --durations-path=tests/cross_fw/examples/.test_durations \
+            --splitting-algorithm=least_duration \
+            --splits 4 \
+            --group ${{ matrix.group }} \
+            ${{ github.event.inputs.pytest_args || '' }}
+        env:
+          TQDM_DISABLE: 1
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: pytest-results-${{ matrix.group }}
+          path: pytest-results-${{ matrix.group }}.xml
+          overwrite: True
diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml
@@ -10,10 +10,10 @@ on:
     paths-ignore:
       - '**/*.md'
       - 'docs/**/*'
-      - 'tests/post_training/*'  # post_training tests are only run in Jenkins
+      - 'tests/post_training/*'  # post_training tests runs on Jenkins
       - 'tests/torch/sota_checkpoints_eval.json'  # reference for PT e2e
       - 'tests/tensorflow/sota_checkpoints_eval.json'  # reference for TF e2e
-
+      - 'tests/cross_fw/examples/*'  # examples tests runs in separate workflow
 jobs:
   common:
     runs-on: ubuntu-20.04
diff --git a/constraints.txt b/constraints.txt
@@ -21,3 +21,4 @@ pytest-dependency==0.6.0
 pytest-ordering==0.6
 pytest-xdist==3.5.0
 pytest-forked==1.6.0
+pytest-split==0.9.0
diff --git a/tests/cross_fw/examples/.test_durations b/tests/cross_fw/examples/.test_durations
@@ -0,0 +1,16 @@
+{
+    "tests/cross_fw/examples/test_examples.py::test_examples[llm_compression]": 222.974,
+    "tests/cross_fw/examples/test_examples.py::test_examples[llm_compression_synthetic]": 873.780,
+    "tests/cross_fw/examples/test_examples.py::test_examples[llm_tune_params]": 1018.932,
+    "tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_onnx_mobilenet_v2]": 178.509,
+    "tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_onnx_yolo8_quantize_with_accuracy_control]": 292.766,
+    "tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_anomaly_stfpm_quantize_with_accuracy_control]": 443.025,
+    "tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_mobilenet_v2_quantize]": 169.789,
+    "tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_yolo8_quantize]": 170.593,
+    "tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_openvino_yolo8_quantize_with_accuracy_control]": 205.533,
+    "tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_tensorflow_mobilenet_v2]": 149.202,
+    "tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_torch_mobilenet_v2]": 192.227,
+    "tests/cross_fw/examples/test_examples.py::test_examples[post_training_quantization_torch_ssd300_vgg16]": 231.613,
+    "tests/cross_fw/examples/test_examples.py::test_examples[quantization_aware_training_torch_anomalib]": 478.797,
+    "tests/cross_fw/examples/test_examples.py::test_examples[quantization_aware_training_torch_resnet18]": 1251.144
+}
diff --git a/tests/cross_fw/examples/README.md b/tests/cross_fw/examples/README.md
@@ -0,0 +1,19 @@
+# Test examples
+
+## Manual trigger
+
+To manual run job use GitHub  workflow:
+
+https://github.com/openvinotoolkit/nncf/actions/workflows/examples.yml
+
+Parameters:
+
+    - `pull_request_number`: The pull request number.
+    - `pytest_args`: Additional pytest arguments (example `-k llm`)
+
+## Parallel test
+
+To set up parallel testing between jobs using `pytest-split` with the option `--splitting-algorithm=least_duration`,
+you need to ensure that each test's duration is tracked correctly and stored so that future test runs can use that data
+to split the tests efficiently. After run workflow get time frm artifact of job and
+add new test to [.test_durations](.test_durations).
diff --git a/tests/cross_fw/examples/example_scope.json b/tests/cross_fw/examples/example_scope.json
@@ -38,10 +38,11 @@
         "backend": "openvino",
         "requirements": "examples/post_training_quantization/openvino/yolov8/requirements.txt",
         "cpu": "Intel(R) Core(TM) i9-10980XE CPU @ 3.00GHz",
+        "accuracy_tolerance": 0.3,
         "accuracy_metrics": {
-            "fp32_mAP": 0.45252755065175254,
-            "int8_mAP": 0.4561537594798616,
-            "accuracy_drop": -0.00362620882
+            "fp32_mAP": 0.4521154302569843,
+            "int8_mAP": 0.4599022156047055,
+            "accuracy_drop": -0.0077867853477212035
         },
         "performance_metrics": {
             "fp32_fps": 170.69,
@@ -88,19 +89,20 @@
         "backend": "onnx",
         "requirements": "examples/post_training_quantization/onnx/yolov8_quantize_with_accuracy_control/requirements.txt",
         "cpu": "Intel(R) Core(TM) i9-10980XE CPU @ 3.00GHz",
+        "accuracy_tolerance": 0.3,
         "accuracy_metrics": {
-            "onnx_fp32_box_mAP": 0.44964819166629366,
-            "onnx_fp32_mask_mAP": 0.36509415038708726,
-            "onnx_int8_box_mAP": 0.45071569411799683,
-            "onnx_int8_mask_mAP": 0.36805097120147484,
-            "onnx_drop_box_mAP": -0.0010675024517031728,
-            "onnx_drop_mask_mAP": -0.002956820814387584,
-            "ov_fp32_box_mAP": 0.44964819083938,
+            "onnx_fp32_box_mAP": 0.4496481927421265,
+            "onnx_fp32_mask_mAP": 0.36509979887281135,
+            "onnx_int8_box_mAP": 0.4509292170587964,
+            "onnx_int8_mask_mAP": 0.36710935370534864,
+            "onnx_drop_box_mAP": -0.001281024316669932,
+            "onnx_drop_mask_mAP": -0.002009554832537286,
+            "ov_fp32_box_mAP": 0.4496481927421265,
             "ov_fp32_mask_mAP": 0.36509979887281135,
-            "ov_int8_box_mAP": 0.4462460126164857,
-            "ov_int8_mask_mAP": 0.3631361727753598,
-            "ov_drop_box_mAP": 0.003402178222894292,
-            "ov_drop_mask_mAP": 0.001963626097451543
+            "ov_int8_box_mAP": 0.4494792029705020,
+            "ov_int8_mask_mAP": 0.3666179206494213,
+            "ov_drop_box_mAP": 0.000168989771624439,
+            "ov_drop_mask_mAP": -0.0015181217766099264
         },
         "performance_metrics": {
             "ov_fp32_fps": 139.32,
@@ -114,8 +116,8 @@
         "cpu": "Intel(R) Core(TM) i9-10980XE CPU @ 3.00GHz",
         "accuracy_metrics": {
             "fp32_top1": 0.987770676612854,
-            "int8_top1": 0.9775795936584473,
-            "accuracy_drop": 0.010191082954406738
+            "int8_top1": 0.9691720008850098,
+            "accuracy_drop": 0.01859867572784424
         },
         "performance_metrics": {
             "fp32_fps": 1703.04,
@@ -226,11 +228,11 @@
         "python_version": [3,10,0],
         "accuracy_tolerance_after_training": 0.02,
         "accuracy_metrics": {
-            "fp32_f1score": 0.9919999837875366,
+            "fp32_f1score": 1.0,
             "int8_init_f1score": 0.9767441749572754
         },
         "accuracy_metrics_after_training": {
-            "int8_f1score": 0.9919999837875366,
+            "int8_f1score": 1.0,
             "accuracy_drop": 0.0
         },
         "performance_metrics": {
@@ -244,4 +246,4 @@
             "model_compression_rate": 3.7654144877995197
         }
     }
-}
+}
diff --git a/tests/cross_fw/examples/requirements.txt b/tests/cross_fw/examples/requirements.txt
@@ -1,3 +1,4 @@
 -c ../../../constraints.txt
 pytest
 pytest-cov
+pytest-split
diff --git a/tests/cross_fw/examples/test_examples.py b/tests/cross_fw/examples/test_examples.py
@@ -55,6 +55,8 @@ def test_examples(
     ov_version_override: str,
     data: str,
 ):
+    print("\n" + "-" * 64)
+    print(f"Example name: {example_name}")
     python_version = sys.version_info
     example_python_version = tuple(example_params.get("python_version", python_version))
     if python_version < example_python_version:
@@ -63,19 +65,22 @@ def test_examples(
     backend = example_params["backend"]
     skip_if_backend_not_selected(backend, backends_list)
     venv_path = create_venv_with_nncf(tmp_path, "pip_e_local", "venv", {backend})
+    pip_with_venv = get_pip_executable_with_venv(venv_path)
     if "requirements" in example_params:
-        pip_with_venv = get_pip_executable_with_venv(venv_path)
         requirements = PROJECT_ROOT / example_params["requirements"]
         run_cmd_line = f"{pip_with_venv} install -r {requirements}"
         subprocess.run(run_cmd_line, check=True, shell=True)
 
     if ov_version_override is not None:
-        pip_with_venv = get_pip_executable_with_venv(venv_path)
         ov_version_cmd_line = f"{pip_with_venv} install {ov_version_override}"
         subprocess.run(ov_version_cmd_line, check=True, shell=True)
 
+    subprocess.run(f"{pip_with_venv} list", check=True, shell=True)
+
     env = os.environ.copy()
     env["PYTHONPATH"] = str(PROJECT_ROOT)  # need this to be able to import from tests.* in run_example.py
+    env["ONEDNN_MAX_CPU_ISA"] = "AVX2"  # Set ISA to AVX2 to get CPU independent results
+    env["CUDA_VISIBLE_DEVICES"] = ""  # Disable GPU
 
     metrics_file_path = tmp_path / "metrics.json"
     python_executable_with_venv = get_python_executable_with_venv(venv_path)
@@ -87,17 +92,17 @@ def test_examples(
     cmd.run()
 
     measured_metrics = load_json(metrics_file_path)
-
+    print(measured_metrics)
     for name, value in example_params[ACCURACY_METRICS].items():
         assert measured_metrics[name] == pytest.approx(
             value, abs=example_params.get("accuracy_tolerance", ACCURACY_TOLERANCE)
-        )
+        ), f"metric {name}: {measured_metrics[name]} != {value}"
 
     if ACCURACY_METRICS_AFTER_TRAINING in example_params:
         for name, value in example_params[ACCURACY_METRICS_AFTER_TRAINING].items():
             assert measured_metrics[name] == pytest.approx(
                 value, abs=example_params.get("accuracy_tolerance_after_training", ACCURACY_TOLERANCE)
-            )
+            ), f"metric {name}: {measured_metrics[name]} != {value}"
 
     if MODEL_SIZE_METRICS in example_params:
         for name, value in example_params[MODEL_SIZE_METRICS].items():