Skip to content

Commit 7a6c368

Browse files
committed
[CI]split xpu distributed ut into another job
1 parent 2807035 commit 7a6c368

File tree

1 file changed

+129
-2
lines changed

1 file changed

+129
-2
lines changed

.github/workflows/_linux_ut.yml

+129-2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ permissions: read-all
4848

4949
jobs:
5050
ut_test:
51-
runs-on: ${{ inputs.runner }}
51+
runs-on: ${{ inputs.runner }}
52+
if: !contains(inputs.ut, 'xpu_distributed')
5253
timeout-minutes: 900
5354
env:
5455
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
@@ -234,8 +235,134 @@ jobs:
234235
test_cmd="${test_cmd} test_xpu.py"
235236
fi
236237
eval $test_cmd 2>${{ github.workspace }}/ut_log/torch_xpu/torch_xpu_test_error.log | tee ${{ github.workspace }}/ut_log/torch_xpu/torch_xpu_test.log
238+
- name: UT Test Results Check
239+
shell: bash
240+
run: |
241+
function contains() {
242+
contains_status="echo 'Start $2 ...'"
243+
{
244+
[[ $1 =~ (^|,)$2($|,) ]]
245+
} || {
246+
echo "[Warning] $2 is not suppotted type! Skipped!"
247+
contains_status="continue"
248+
}
249+
}
250+
set -xe
251+
echo "UT_NAME=$(echo ${{ inputs.ut }} |sed 's/,/-/g')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
252+
for ut_suite in $(echo ${{ inputs.ut }} |sed 's/,/ /g')
253+
do
254+
contains "op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu,xpu_distributed" $ut_suite
255+
$contains_status
256+
cd ${{ github.workspace }}/ut_log/${ut_suite}
257+
cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./
258+
bash ut_result_check.sh ${ut_suite}
259+
done
260+
- name: Upload Inductor XPU UT Log
261+
if: always()
262+
uses: actions/upload-artifact@v4
263+
with:
264+
name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }}-${{ env.UT_NAME }}
265+
path: ${{ github.workspace }}/ut_log
266+
267+
distributed_ut_test:
268+
runs-on: ${{ inputs.runner }}
269+
if: contains(inputs.ut, 'xpu_distributed')
270+
timeout-minutes: 900
271+
env:
272+
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
273+
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
274+
steps:
275+
- name: Checkout torch-xpu-ops
276+
uses: actions/checkout@v4
277+
- name: Prepare Stock Pytorch
278+
run: |
279+
pwd
280+
which conda && conda clean -ay
281+
conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
282+
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
283+
conda create -n xpu_op_${ZE_AFFINITY_MASK} python=${{ inputs.python }} cmake ninja -y
284+
source activate xpu_op_${ZE_AFFINITY_MASK}
285+
cd ../ && rm -rf pytorch
286+
pip install requests
287+
git clone https://github.com/pytorch/pytorch pytorch
288+
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
289+
cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
290+
# apply PRs for stock pytorch
291+
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
292+
git status && git show -s
293+
git submodule sync && git submodule update --init --recursive
294+
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
295+
echo "Don't replace torch-xpu-ops!"
296+
else
297+
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
298+
# Workaround for torch-xpu-ops ci test
299+
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
300+
fi
301+
fi
302+
- name: Triton Installation
303+
run: |
304+
source activate xpu_op_${ZE_AFFINITY_MASK}
305+
cd ../pytorch
306+
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
307+
if [ -z ${{ inputs.triton }} ]; then
308+
TRITON_COMMIT_ID="$(<.ci/docker/ci_commit_pins/triton-xpu.txt)"
309+
else
310+
TRITON_COMMIT_ID="${{ inputs.triton }}"
311+
fi
312+
echo ${TRITON_REPO}@${TRITON_COMMIT_ID}
313+
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
314+
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python"
315+
fi
316+
- name: Download Pytorch wheel
317+
if: ${{ inputs.pytorch != 'nightly_wheel' }}
318+
uses: actions/download-artifact@v4
319+
with:
320+
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }}
321+
path: ${{ github.workspace }}
322+
- name: Install Pytorch XPU
323+
run: |
324+
source activate xpu_op_${ZE_AFFINITY_MASK}
325+
source .github/scripts/env.sh ${{ inputs.pytorch }}
326+
pip install mkl-static==2025.0.1 mkl-include==2025.0.1
327+
if [[ ${{ inputs.abi }} == '0' ]]; then
328+
export _GLIBCXX_USE_CXX11_ABI=0
329+
else
330+
export _GLIBCXX_USE_CXX11_ABI=1
331+
fi
332+
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
333+
cd ../pytorch
334+
export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
335+
pip install -r requirements.txt
336+
pip install --force-reinstall ${{ github.workspace }}/torch*.whl
337+
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
338+
else
339+
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
340+
TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
341+
cd ../pytorch
342+
git reset --hard && git checkout ${TORCH_COMMIT_ID}
343+
TORCH_XPU_OPS_COMMIT=$(<third_party/xpu.txt)
344+
rm -rf third_party/torch-xpu-ops
345+
git clone https://github.com/intel/torch-xpu-ops.git third_party/torch-xpu-ops
346+
cd third_party/torch-xpu-ops
347+
git checkout ${TORCH_XPU_OPS_COMMIT}
348+
cd ../..
349+
python third_party/torch-xpu-ops/.github/scripts/apply_torch_pr.py
350+
fi
351+
pip install -r .ci/docker/requirements-ci.txt
352+
- name: Torch Config
353+
run: |
354+
source activate xpu_op_${ZE_AFFINITY_MASK}
355+
source .github/scripts/env.sh ${{ inputs.pytorch }}
356+
python -c "import torch; print(torch.__config__.show())"
357+
python -c "import torch; print(torch.__config__.parallel_info())"
358+
python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
359+
python -c "import triton; print(triton.__version__)"
360+
361+
cd ..
362+
python pytorch/torch/utils/collect_env.py
363+
rm -rf /tmp/torchinductor_*
364+
rm -rf ~/.triton/cache
237365
- name: Run Torch XPU Distributed UT
238-
if: contains(inputs.ut, 'xpu_distributed')
239366
run: |
240367
source .github/scripts/env.sh ${{ inputs.pytorch }}
241368
source activate xpu_op_${ZE_AFFINITY_MASK}

0 commit comments

Comments
 (0)