huggingface
diff --git a/‎.github/workflows/test_export_onnx_cli.yml
+14-8 b/‎.github/workflows/test_export_onnx_cli.yml
+14-8
diff --git a/‎.github/workflows/test_onnxruntime.yml
+6-6 b/‎.github/workflows/test_onnxruntime.yml
+6-6
diff --git a/‎.github/workflows/test_onnxruntime_gpu.yml
+41-17 b/‎.github/workflows/test_onnxruntime_gpu.yml
+41-17
diff --git a/‎.github/workflows/test_onnxruntime_slow.yml
+37-20 b/‎.github/workflows/test_onnxruntime_slow.yml
+37-20
diff --git a/‎.github/workflows/test_onnxruntime_train.yml
-26 b/‎.github/workflows/test_onnxruntime_train.yml
-26
diff --git a/‎.github/workflows/test_onnxruntime_training.yml
+66 b/‎.github/workflows/test_onnxruntime_training.yml
+66
diff --git a/‎examples/onnxruntime/training/image-classification/run_image_classification.py
+1 b/‎examples/onnxruntime/training/image-classification/run_image_classification.py
+1
diff --git a/‎examples/onnxruntime/training/language-modeling/run_clm.py
+4-1 b/‎examples/onnxruntime/training/language-modeling/run_clm.py
+4-1
diff --git a/‎examples/onnxruntime/training/language-modeling/run_mlm.py
+4-1 b/‎examples/onnxruntime/training/language-modeling/run_mlm.py
+4-1
diff --git a/‎examples/onnxruntime/training/question-answering/run_qa.py
+1 b/‎examples/onnxruntime/training/question-answering/run_qa.py
+1
diff --git a/‎examples/onnxruntime/training/summarization/run_summarization.py
+1 b/‎examples/onnxruntime/training/summarization/run_summarization.py
+1
diff --git a/‎examples/onnxruntime/training/text-classification/run_classification.py
+1 b/‎examples/onnxruntime/training/text-classification/run_classification.py
+1
diff --git a/‎examples/onnxruntime/training/text-classification/run_glue.py
+1 b/‎examples/onnxruntime/training/text-classification/run_glue.py
+1
diff --git a/‎examples/onnxruntime/training/token-classification/run_ner.py
+1 b/‎examples/onnxruntime/training/token-classification/run_ner.py
+1
diff --git a/‎examples/onnxruntime/training/translation/run_translation.py
+1 b/‎examples/onnxruntime/training/translation/run_translation.py
+1
@@ -2,9 +2,11 @@ name: Exporters ONNX CLI / Python - Test
 
 on:
   push:
-    branches: [main]
+    branches:
+      - main
   pull_request:
-    branches: [main]
+    branches:
+      - main
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -19,16 +21,20 @@ jobs:
         os: [ubuntu-20.04]
 
     runs-on: ${{ matrix.os }}
+
     steps:
-      - uses: actions/checkout@v2
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
       - name: Setup Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-      - name: Install dependencies for pytorch export
+
+      - name: Install dependencies
         run: |
           pip install .[tests,exporters,diffusers]
-      - name: Test with unittest
-        working-directory: tests
+
+      - name: Test with pytest
         run: |
-          pytest exporters/onnx/test_exporters_onnx_cli.py -n auto -m "not tensorflow_test and not timm_test" -s --durations=0
+          pytest tests/exporters/onnx/test_exporters_onnx_cli.py -n auto -m "not tensorflow_test and not timm_test" -s --durations=0
@@ -1,12 +1,12 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
-# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 name: ONNX Runtime / Python - Test
 
 on:
   push:
-    branches: [main]
+    branches:
+      - main
   pull_request:
-    branches: [main]
+    branches:
+      - main
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -58,10 +58,10 @@ jobs:
 
       - name: Test with pytest (in series)
         run: |
-          pytest tests/onnxruntime -m "run_in_series" --durations=0 -vvvv -s
+          pytest tests/onnxruntime -m "run_in_series" --durations=0 -vvvv
 
       - name: Test with pytest (in parallel)
         run: |
-          pytest tests/onnxruntime -m "not run_in_series" --durations=0 -vvvv -s -n auto
+          pytest tests/onnxruntime -m "not run_in_series" --durations=0 -vvvv -n auto
         env:
           HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
@@ -1,30 +1,54 @@
-name: ONNX Runtime / Test GPU
+name: ONNX Runtime GPU / Python - Test
 
 on:
   workflow_dispatch:
   schedule:
-    - cron: 0 1 */3 * * # at 1am every 3 days
+    - cron: 0 7 * * * # every day at 7am UTC
   pull_request:
-    types: [opened, synchronize, reopened, labeled]
-  # uncomment to enable on PR merge on main branch:
-  #push:
-  #  branches:
-  #    - main
+    branches:
+      - main
+    types:
+      - opened
+      - labeled
+      - reopened
+      - unlabeled
+      - synchronize
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
 
 jobs:
-  do-the-job:
-    if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }}
-    name: Start self-hosted EC2 runner
+  build:
+    if: ${{
+      (github.event_name == 'push') ||
+      (github.event_name == 'workflow_dispatch') ||
+      contains(github.event.pull_request.labels.*.name, 'gpu') ||
+      contains(github.event.pull_request.labels.*.name, 'onnxruntime-gpu')
+      }}
+
     runs-on:
       group: aws-g6-4xlarge-plus
-    env:
-      AWS_REGION: us-east-1
+
+    container:
+      image: nvcr.io/nvidia/tensorrt:24.12-py3
+      options: --gpus all
+
     steps:
       - name: Checkout
-        uses: actions/checkout@v2
-      - name: Build image
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.9"
+
+      - name: Install dependencies
         run: |
-          docker build -f tests/onnxruntime/docker/Dockerfile_onnxruntime_gpu -t onnxruntime-gpu .
-      - name: Test with unittest within docker container
+          pip install --upgrade pip
+          pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
+          pip install .[tests,onnxruntime-gpu,diffusers]
+
+      - name: Test with pytest
         run: |
-          docker run --rm --gpus all -v /mnt/cache/.cache/huggingface:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime-gpu:latest
+          pytest tests/onnxruntime -m "cuda_ep_test or trt_ep_test" --durations=0 -vvvv -n auto
@@ -1,33 +1,50 @@
-name: ONNX Runtime slow / Python - Test
+name: ONNX Runtime Slow / Python - Test
 
 on:
   workflow_dispatch:
   schedule:
-    - cron: 0 7 * * * # every day at 7am
+    - cron: 0 7 * * * # every day at 7am UTC
+  pull_request:
+    branches:
+      - main
+    types:
+      - opened
+      - labeled
+      - reopened
+      - unlabeled
+      - synchronize
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
   cancel-in-progress: true
 
 jobs:
   build:
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.9"]
-        os: [ubuntu-20.04]
+    if: ${{
+      (github.event_name == 'push') ||
+      (github.event_name == 'workflow_dispatch') ||
+      contains(github.event.pull_request.labels.*.name, 'slow') ||
+      contains(github.event.pull_request.labels.*.name, 'onnxruntime-slow')
+      }}
+
+    runs-on:
+      group: aws-general-8-plus
 
-    runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v2
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies for export
-      run: |
-        pip install .[tests,onnxruntime,diffusers]
-    - name: Test with unittest
-      working-directory: tests
-      run: |
-        RUN_SLOW=1 pytest onnxruntime -s -m "run_slow" --durations=0
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python 3.9
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.9"
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+          pip install .[tests,onnxruntime,diffusers]
+
+      - name: Test with pytest
+        run: |
+          RUN_SLOW=1 pytest tests/onnxruntime -m "run_slow" --durations=0 -vvvv
@@ -0,0 +1,66 @@
+name: ONNX Runtime Training / Python - Test
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: 0 7 * * * # every day at 7am UTC
+  pull_request:
+    branches:
+      - main
+    types:
+      - opened
+      - labeled
+      - reopened
+      - unlabeled
+      - synchronize
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    if: ${{
+      (github.event_name == 'push') ||
+      (github.event_name == 'workflow_dispatch') ||
+      contains( github.event.pull_request.labels.*.name, 'training') ||
+      contains( github.event.pull_request.labels.*.name, 'onnxruntime-training')
+      }}
+
+    runs-on:
+      group: aws-g6-4xlarge-plus
+
+    container:
+      image: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
+      options: --gpus all
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.9"
+
+      - name: Install dependencies
+        env:
+          TORCH_CUDA_ARCH_LIST: "5.0 6.0 7.0 7.5 8.0 8.6 9.0+PTX"
+        run: |
+          pip install --upgrade pip
+          pip install --no-cache-dir "torch<2.6" torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+          pip install --no-cache-dir torch-ort onnxruntime-training && python -m torch_ort.configure
+          pip install --no-cache-dir evaluate absl-py rouge_score seqeval sacrebleu nltk scikit-learn
+          pip install .[tests,onnxruntime-training]
+
+      - name: Test with pytest (trainer)
+        run: |
+          RUN_SLOW=1 pytest tests/onnxruntime-training/test_trainer.py --durations=0 -vvvv
+        env:
+          HF_DATASETS_TRUST_REMOTE_CODE: 1
+
+      - name: Test with pytest (examples)
+        run: |
+          RUN_SLOW=1 pytest tests/onnxruntime-training/test_examples.py --durations=0 -vvvv
+        env:
+          HF_DATASETS_TRUST_REMOTE_CODE: 1
@@ -333,6 +333,7 @@ def compute_metrics(p):
         token=model_args.token,
         trust_remote_code=model_args.trust_remote_code,
         ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
+        attn_implementation="eager",
     )
     image_processor = AutoImageProcessor.from_pretrained(
         model_args.image_processor_name or model_args.model_name_or_path,
 
@@ -442,9 +442,12 @@ def main():
             trust_remote_code=model_args.trust_remote_code,
             torch_dtype=torch_dtype,
             low_cpu_mem_usage=model_args.low_cpu_mem_usage,
+            attn_implementation="eager",
         )
     else:
-        model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code)
+        model = AutoModelForCausalLM.from_config(
+            config, trust_remote_code=model_args.trust_remote_code, attn_implementation="eager"
+        )
         n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
         logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
 
 
@@ -430,10 +430,13 @@ def main():
             token=model_args.token,
             trust_remote_code=model_args.trust_remote_code,
             low_cpu_mem_usage=model_args.low_cpu_mem_usage,
+            attn_implementation="eager",
         )
     else:
         logger.info("Training new model from scratch")
-        model = AutoModelForMaskedLM.from_config(config, trust_remote_code=model_args.trust_remote_code)
+        model = AutoModelForMaskedLM.from_config(
+            config, trust_remote_code=model_args.trust_remote_code, attn_implementation="eager"
+        )
 
     # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
     # on a small vocab and want a smaller embedding size, remove this test.
 
@@ -364,6 +364,7 @@ def main():
         revision=model_args.model_revision,
         token=model_args.token,
         trust_remote_code=model_args.trust_remote_code,
+        attn_implementation="eager",
     )
 
     # Tokenizer check: this script requires a fast tokenizer.
 
@@ -458,6 +458,7 @@ def main():
         revision=model_args.model_revision,
         token=model_args.token,
         trust_remote_code=model_args.trust_remote_code,
+        attn_implementation="eager",
     )
 
     if model.config.decoder_start_token_id is None and isinstance(tokenizer, (MBartTokenizer, MBartTokenizerFast)):
 
@@ -527,6 +527,7 @@ def main():
         token=model_args.token,
         trust_remote_code=model_args.trust_remote_code,
         ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
+        attn_implementation="eager",
     )
     model.config.pad_token_id = model.config.eos_token_id
 
 
@@ -404,6 +404,7 @@ def main():
         token=model_args.token,
         trust_remote_code=model_args.trust_remote_code,
         ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
+        attn_implementation="eager",
     )
 
     # Preprocessing the raw_datasets
 
@@ -405,6 +405,7 @@ def get_label_list(labels):
         token=model_args.token,
         trust_remote_code=model_args.trust_remote_code,
         ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
+        attn_implementation="eager",
     )
 
     if tokenizer.pad_token is None:
 
@@ -408,6 +408,7 @@ def main():
         revision=model_args.model_revision,
         token=model_args.token,
         trust_remote_code=model_args.trust_remote_code,
+        attn_implementation="eager",
     )
 
     # Set decoder_start_token_id
Original file line number	Diff line number	Diff line change
`@@ -333,6 +333,7 @@ def compute_metrics(p):`
`333`	`333`	`token=model_args.token,`
`334`	`334`	`trust_remote_code=model_args.trust_remote_code,`
`335`	`335`	`ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,`
	`336`	`+ attn_implementation="eager",`
`336`	`337`	`)`
`337`	`338`	`image_processor = AutoImageProcessor.from_pretrained(`
`338`	`339`	`model_args.image_processor_name or model_args.model_name_or_path,`
Original file line number	Diff line number	Diff line change
`@@ -364,6 +364,7 @@ def main():`
`364`	`364`	`revision=model_args.model_revision,`
`365`	`365`	`token=model_args.token,`
`366`	`366`	`trust_remote_code=model_args.trust_remote_code,`
	`367`	`+ attn_implementation="eager",`
`367`	`368`	`)`
`368`	`369`
`369`	`370`	`# Tokenizer check: this script requires a fast tokenizer.`
Original file line number	Diff line number	Diff line change
`@@ -458,6 +458,7 @@ def main():`
`458`	`458`	`revision=model_args.model_revision,`
`459`	`459`	`token=model_args.token,`
`460`	`460`	`trust_remote_code=model_args.trust_remote_code,`
	`461`	`+ attn_implementation="eager",`
`461`	`462`	`)`
`462`	`463`
`463`	`464`	`if model.config.decoder_start_token_id is None and isinstance(tokenizer, (MBartTokenizer, MBartTokenizerFast)):`
Original file line number	Diff line number	Diff line change
`@@ -527,6 +527,7 @@ def main():`
`527`	`527`	`token=model_args.token,`
`528`	`528`	`trust_remote_code=model_args.trust_remote_code,`
`529`	`529`	`ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,`
	`530`	`+ attn_implementation="eager",`
`530`	`531`	`)`
`531`	`532`	`model.config.pad_token_id = model.config.eos_token_id`
`532`	`533`
Original file line number	Diff line number	Diff line change
`@@ -404,6 +404,7 @@ def main():`
`404`	`404`	`token=model_args.token,`
`405`	`405`	`trust_remote_code=model_args.trust_remote_code,`
`406`	`406`	`ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,`
	`407`	`+ attn_implementation="eager",`
`407`	`408`	`)`
`408`	`409`
`409`	`410`	`# Preprocessing the raw_datasets`
Original file line number	Diff line number	Diff line change
`@@ -405,6 +405,7 @@ def get_label_list(labels):`
`405`	`405`	`token=model_args.token,`
`406`	`406`	`trust_remote_code=model_args.trust_remote_code,`
`407`	`407`	`ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,`
	`408`	`+ attn_implementation="eager",`
`408`	`409`	`)`
`409`	`410`
`410`	`411`	`if tokenizer.pad_token is None:`
Original file line number	Diff line number	Diff line change
`@@ -408,6 +408,7 @@ def main():`
`408`	`408`	`revision=model_args.model_revision,`
`409`	`409`	`token=model_args.token,`
`410`	`410`	`trust_remote_code=model_args.trust_remote_code,`
	`411`	`+ attn_implementation="eager",`
`411`	`412`	`)`
`412`	`413`
`413`	`414`	`# Set decoder_start_token_id`