Skip to content

Commit f40e1b1

Browse files
committed
[CI]split xpu distributed ut into another job
1 parent 2807035 commit f40e1b1

File tree

1 file changed

+127
-0
lines changed

1 file changed

+127
-0
lines changed

.github/workflows/_linux_ut.yml

+127
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,133 @@ jobs:
234234
test_cmd="${test_cmd} test_xpu.py"
235235
fi
236236
eval $test_cmd 2>${{ github.workspace }}/ut_log/torch_xpu/torch_xpu_test_error.log | tee ${{ github.workspace }}/ut_log/torch_xpu/torch_xpu_test.log
237+
- name: UT Test Results Check
238+
shell: bash
239+
run: |
240+
function contains() {
241+
contains_status="echo 'Start $2 ...'"
242+
{
243+
[[ $1 =~ (^|,)$2($|,) ]]
244+
} || {
245+
echo "[Warning] $2 is not suppotted type! Skipped!"
246+
contains_status="continue"
247+
}
248+
}
249+
set -xe
250+
echo "UT_NAME=$(echo ${{ inputs.ut }} |sed 's/,/-/g')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
251+
for ut_suite in $(echo ${{ inputs.ut }} |sed 's/,/ /g')
252+
do
253+
contains "op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu,xpu_distributed" $ut_suite
254+
$contains_status
255+
cd ${{ github.workspace }}/ut_log/${ut_suite}
256+
cp ${{ github.workspace }}/.github/scripts/ut_result_check.sh ./
257+
bash ut_result_check.sh ${ut_suite}
258+
done
259+
- name: Upload Inductor XPU UT Log
260+
if: always()
261+
uses: actions/upload-artifact@v4
262+
with:
263+
name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }}-${{ env.UT_NAME }}
264+
path: ${{ github.workspace }}/ut_log
265+
266+
jobs:
267+
distributed_ut_test:
268+
runs-on: ${{ inputs.runner }}
269+
timeout-minutes: 900
270+
env:
271+
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
272+
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
273+
steps:
274+
- name: Checkout torch-xpu-ops
275+
uses: actions/checkout@v4
276+
- name: Prepare Stock Pytorch
277+
run: |
278+
pwd
279+
which conda && conda clean -ay
280+
conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
281+
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
282+
conda create -n xpu_op_${ZE_AFFINITY_MASK} python=${{ inputs.python }} cmake ninja -y
283+
source activate xpu_op_${ZE_AFFINITY_MASK}
284+
cd ../ && rm -rf pytorch
285+
pip install requests
286+
git clone https://github.com/pytorch/pytorch pytorch
287+
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
288+
cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
289+
# apply PRs for stock pytorch
290+
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
291+
git status && git show -s
292+
git submodule sync && git submodule update --init --recursive
293+
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
294+
echo "Don't replace torch-xpu-ops!"
295+
else
296+
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
297+
# Workaround for torch-xpu-ops ci test
298+
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
299+
fi
300+
fi
301+
- name: Triton Installation
302+
run: |
303+
source activate xpu_op_${ZE_AFFINITY_MASK}
304+
cd ../pytorch
305+
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
306+
if [ -z ${{ inputs.triton }} ]; then
307+
TRITON_COMMIT_ID="$(<.ci/docker/ci_commit_pins/triton-xpu.txt)"
308+
else
309+
TRITON_COMMIT_ID="${{ inputs.triton }}"
310+
fi
311+
echo ${TRITON_REPO}@${TRITON_COMMIT_ID}
312+
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
313+
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python"
314+
fi
315+
- name: Download Pytorch wheel
316+
if: ${{ inputs.pytorch != 'nightly_wheel' }}
317+
uses: actions/download-artifact@v4
318+
with:
319+
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-${{ inputs.abi }}
320+
path: ${{ github.workspace }}
321+
- name: Install Pytorch XPU
322+
run: |
323+
source activate xpu_op_${ZE_AFFINITY_MASK}
324+
source .github/scripts/env.sh ${{ inputs.pytorch }}
325+
pip install mkl-static==2025.0.1 mkl-include==2025.0.1
326+
if [[ ${{ inputs.abi }} == '0' ]]; then
327+
export _GLIBCXX_USE_CXX11_ABI=0
328+
else
329+
export _GLIBCXX_USE_CXX11_ABI=1
330+
fi
331+
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
332+
cd ../pytorch
333+
export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
334+
pip install -r requirements.txt
335+
pip install --force-reinstall ${{ github.workspace }}/torch*.whl
336+
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
337+
else
338+
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
339+
TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
340+
cd ../pytorch
341+
git reset --hard && git checkout ${TORCH_COMMIT_ID}
342+
TORCH_XPU_OPS_COMMIT=$(<third_party/xpu.txt)
343+
rm -rf third_party/torch-xpu-ops
344+
git clone https://github.com/intel/torch-xpu-ops.git third_party/torch-xpu-ops
345+
cd third_party/torch-xpu-ops
346+
git checkout ${TORCH_XPU_OPS_COMMIT}
347+
cd ../..
348+
python third_party/torch-xpu-ops/.github/scripts/apply_torch_pr.py
349+
fi
350+
pip install -r .ci/docker/requirements-ci.txt
351+
- name: Torch Config
352+
run: |
353+
source activate xpu_op_${ZE_AFFINITY_MASK}
354+
source .github/scripts/env.sh ${{ inputs.pytorch }}
355+
python -c "import torch; print(torch.__config__.show())"
356+
python -c "import torch; print(torch.__config__.parallel_info())"
357+
python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
358+
python -c "import triton; print(triton.__version__)"
359+
360+
cd ..
361+
python pytorch/torch/utils/collect_env.py
362+
rm -rf /tmp/torchinductor_*
363+
rm -rf ~/.triton/cache
237364
- name: Run Torch XPU Distributed UT
238365
if: contains(inputs.ut, 'xpu_distributed')
239366
run: |

0 commit comments

Comments
 (0)