Skip to content

Commit d603401

Browse files
huydhnpytorchmergebot
authored andcommitted
Run slow jobs in trunk commits (pytorch#139842)
Per our discussion in https://fburl.com/gdoc/voce5o06, we will run slow jobs more frequently on all trunk commits. Note that slowgradcheck jobs are moved to periodic as they are not about running slow tests. There are currently 3 GPU + 2 ROCm + some CPU `linux.4xlarge` runners running slow jobs. So, I don't expect to see a big increase in CI cost after this. Also, these slow jobs will only run in trunk commits, not in PRs, so their duration won't affect PR TTS. Pull Request resolved: pytorch#139842 Approved by: https://github.com/clee2000
1 parent 8d983aa commit d603401

File tree

2 files changed

+38
-38
lines changed

2 files changed

+38
-38
lines changed

.github/workflows/periodic.yml

+33
Original file line numberDiff line numberDiff line change
@@ -290,3 +290,36 @@ jobs:
290290
build-environment: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build
291291
docker-image: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
292292
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
293+
294+
linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build:
295+
name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
296+
uses: ./.github/workflows/_linux-build.yml
297+
needs: get-label-type
298+
with:
299+
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
300+
build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
301+
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
302+
cuda-arch-list: 8.6
303+
test-matrix: |
304+
{ include: [
305+
{ config: "default", shard: 1, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
306+
{ config: "default", shard: 2, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
307+
{ config: "default", shard: 3, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
308+
{ config: "default", shard: 4, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
309+
{ config: "default", shard: 5, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
310+
{ config: "default", shard: 6, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
311+
{ config: "default", shard: 7, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
312+
{ config: "default", shard: 8, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
313+
]}
314+
315+
linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-test:
316+
name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
317+
uses: ./.github/workflows/_linux-test.yml
318+
needs:
319+
- linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build
320+
- target-determination
321+
with:
322+
build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
323+
docker-image: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.docker-image }}
324+
test-matrix: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }}
325+
timeout-minutes: 300

.github/workflows/slow.yml

+5-38
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
name: slow
55

66
on:
7-
schedule:
8-
- cron: 45 0,4,8,12,16,20 * * *
9-
- cron: 29 8 * * * # about 1:29am PDT, for mem leak check and rerun disabled tests
107
push:
11-
tags:
12-
- ciflow/slow/*
138
branches:
9+
- main
1410
- release/*
11+
tags:
12+
- ciflow/slow/*
13+
schedule:
14+
- cron: 29 8 * * * # about 1:29am PDT, for mem leak check and rerun disabled tests
1515
workflow_dispatch:
1616

1717
concurrency:
@@ -47,39 +47,6 @@ jobs:
4747
curr_branch: ${{ github.head_ref || github.ref_name }}
4848
curr_ref_type: ${{ github.ref_type }}
4949

50-
linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build:
51-
name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
52-
uses: ./.github/workflows/_linux-build.yml
53-
needs: get-label-type
54-
with:
55-
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
56-
build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
57-
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
58-
cuda-arch-list: 8.6
59-
test-matrix: |
60-
{ include: [
61-
{ config: "default", shard: 1, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
62-
{ config: "default", shard: 2, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
63-
{ config: "default", shard: 3, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
64-
{ config: "default", shard: 4, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
65-
{ config: "default", shard: 5, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
66-
{ config: "default", shard: 6, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
67-
{ config: "default", shard: 7, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
68-
{ config: "default", shard: 8, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] },
69-
]}
70-
71-
linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-test:
72-
name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
73-
uses: ./.github/workflows/_linux-test.yml
74-
needs:
75-
- linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build
76-
- target-determination
77-
with:
78-
build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck
79-
docker-image: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.docker-image }}
80-
test-matrix: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }}
81-
timeout-minutes: 300
82-
8350
linux-focal-cuda12_1-py3_10-gcc9-sm86-build:
8451
name: linux-focal-cuda12.1-py3.10-gcc9-sm86
8552
uses: ./.github/workflows/_linux-build.yml

0 commit comments

Comments
 (0)