-
Notifications
You must be signed in to change notification settings - Fork 4
156 lines (135 loc) · 4.37 KB
/
gpu-ci.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
name: "GPU-based Tests"
on:
push:
branches:
- "inference"
workflow_dispatch:
concurrency:
group: gpu-ci-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
python-interface-check:
name: Check Python Interface
runs-on: "runs-on=${{ github.run_id }}/runner=gpu-nvidia"
defaults:
run:
shell: bash -l {0} # required to use an activated conda environment
env:
CUDA_PATH: /usr/local/cuda
CONDA: "3"
HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
steps:
- name: Set Path
run: echo "/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/opt/conda/bin:/usr/local/cuda/bin:/usr/local/cuda/include" >> $GITHUB_PATH
# - name: Check GPU/CUDA availability
# run: |
# printenv CUDA_PATH
# printenv PATH
# printenv LD_LIBRARY_PATH
# nvidia-smi
# nvcc --version
# - name: Display disk space
# run: df -h
- name: Checkout Git Repository
uses: actions/checkout@v4
with:
submodules: recursive
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install conda and FlexFlow dependencies
uses: conda-incubator/setup-miniconda@v3
with:
miniconda-version: "latest"
activate-environment: flexflow
environment-file: conda/flexflow.yml
auto-activate-base: false
auto-update-conda: false
- name: Build FlexFlow
run: |
mkdir build
cd build
../config/config.linux
make -j
- name: Check FlexFlow Python interface (before installation)
run: ./tests/python_interface_test.sh before-installation
- name: Install FlexFlow
run: |
cd build
../config/config.linux
make install
- name: Check FlexFlow Python interface (after installation)
run: ./tests/python_interface_test.sh after-installation
inference-tests:
name: Inference Tests
runs-on: "runs-on=${{ github.run_id }}/runner=gpu-nvidia"
defaults:
run:
shell: bash -l {0} # required to use an activated conda environment
env:
CUDA_PATH: /usr/local/cuda
CONDA: "3"
HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
steps:
- name: Set Path
run: echo "/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/opt/conda/bin:/usr/local/cuda/bin:/usr/local/cuda/include" >> $GITHUB_PATH
# - name: Check GPU/CUDA availability
# run: |
# printenv CUDA_PATH
# printenv PATH
# printenv LD_LIBRARY_PATH
# nvidia-smi
# nvcc --version
# - name: Display disk space
# run: df -h
- name: Checkout Git Repository
uses: actions/checkout@v4
with:
submodules: recursive
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install conda and FlexFlow dependencies
uses: conda-incubator/setup-miniconda@v3
with:
miniconda-version: "latest"
activate-environment: flexflow
environment-file: conda/flexflow.yml
auto-activate-base: false
auto-update-conda: false
- name: Build FlexFlow
run: |
mkdir build
cd build
../config/config.linux
make -j
- name: Run inference tests
env:
CPP_INFERENCE_TESTS: ${{ vars.CPP_INFERENCE_TESTS }}
run: |
source ./build/set_python_envs.sh
./tests/fine_grained_alignment_test.sh
./tests/inference_tests.sh
- name: Run PEFT tests
run: |
source ./build/set_python_envs.sh
./tests/peft_test.sh
- name: Save inference output as an artifact
if: always()
run: |
cd inference
tar -zcvf output.tar.gz ./output
- name: Upload artifact
uses: actions/upload-artifact@v4
if: always()
with:
name: output
path: inference/output.tar.gz
# Github persists the .cache folder across different runs/containers
- name: Clear cache
if: always()
run: sudo rm -rf ~/.cache