Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance CI/Nightly workflow #1419

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/actions/inductor-xpu-e2e-test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ runs:
if: ${{ inputs.env_prepare }}
shell: bash
run: |
pwd && printenv
source activate e2e_ci
source .github/scripts/env.sh ${{ inputs.pytorch }}
if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/rpath.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Post-op the linux wheel to change the .so rpath to make the wheel work with XPU runtime pypi packages
# Usage: rpath.sh /path/to/torch-xxxx.whl

pkg=$1
pkg=$(realpath $1)
PATCHELF_BIN=patchelf

make_wheel_record() {
Expand Down
132 changes: 53 additions & 79 deletions .github/workflows/_linux_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,6 @@ on:
type: string
default: 'lts'
description: Driver lts/rolling
outputs:
whl_name:
description: The name of the wheel file
value: ${{ jobs.Torch-XPU-Build.outputs.whl_name }}
torch_commit_id:
description: The commit id of the torch build
value: ${{ jobs.Torch-XPU-Build.outputs.TORCH_COMMIT_ID }}

permissions:
issues: write
Expand All @@ -48,8 +41,6 @@ jobs:
build:
if: ${{ inputs.pytorch }} != 'nightly_wheel'
runs-on: ${{ inputs.runner }}
outputs:
TORCH_COMMIT_ID: ${{ steps.build_version.outputs.TORCH_COMMIT_ID }}
timeout-minutes: 900
env:
commit_issue: 1280
Expand All @@ -61,7 +52,7 @@ jobs:
uses: actions/checkout@v4
- name: Prepare Stock Pytorch
run: |
pwd
pwd && printenv
which conda && conda clean -ay
conda remove --all -y -n xpu_build || \
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_build
Expand All @@ -70,19 +61,17 @@ jobs:
cd ../ && rm -rf pytorch
pip install requests
git clone https://github.com/pytorch/pytorch pytorch
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
# apply PRs for stock pytorch
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
git status && git show -s
git submodule sync && git submodule update --init --recursive
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
echo "Don't replace torch-xpu-ops!"
else
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
# Workaround for torch-xpu-ops ci test
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
fi
cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
# apply PRs for stock pytorch
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
git status && git show -s
git submodule sync && git submodule update --init --recursive
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
echo "Don't replace torch-xpu-ops!"
else
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
# Workaround for torch-xpu-ops ci test
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
fi
- name: Build Pytorch XPU
run: |
Expand All @@ -99,64 +88,51 @@ jobs:
else
export _GLIBCXX_USE_CXX11_ABI=1
fi
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
repo="${{ github.repository }}"
last_commit=$(gh --repo $repo issue view $commit_issue --json body -q .body | grep ${{ inputs.pytorch }} | cut -d'[' -f 2 | cut -d']' -f 1)
cd ../pytorch
current_commit=$(git rev-parse HEAD)
is_fork_pr=false
if [ -n "${{ github.event.pull_request }}" ] && [ "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]; then
is_fork_pr=true
fi
echo ">>>>>>>>>>>>Fork PR: ${is_fork_pr}, pytorch branch: ${{ inputs.pytorch }}, last commit: ${last_commit}, current commit: ${current_commit}"
build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
repo="${{ github.repository }}"
last_commit=$(gh --repo $repo issue view $commit_issue --json body -q .body | grep ${{ inputs.pytorch }} | cut -d'[' -f 2 | cut -d']' -f 1)
cd ../pytorch
current_commit=$(git rev-parse HEAD)
is_fork_pr=false
if [ -n "${{ github.event.pull_request }}" ] && [ "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]; then
is_fork_pr=true
fi
echo ">>>>>>>>>>>>Fork PR: ${is_fork_pr}, pytorch branch: ${{ inputs.pytorch }}, last commit: ${last_commit}, current commit: ${current_commit}"

export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
pip install -r requirements.txt
WERROR=1 python setup.py bdist_wheel 2>&1 | tee pytorch_${current_commit}_build.log
export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
pip install -r requirements.txt
WERROR=1 python setup.py bdist_wheel 2>&1 | tee pytorch_${current_commit}_build.log

if [[ ${is_fork_pr} == "false" ]]; then
if [ -f dist/torch*.whl && "${last_commit}" != "${current_commit}"] && [[ "${{ inputs.pytorch }}" == "main" || "${{ inputs.pytorch }}" == "release/"* ]]; then
echo "Wheel build successful, update last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280"
gh --repo $repo issue view $commit_issue --json body -q .body | sed "s;${last_commit};${current_commit};g" | sed '/^$/d' > new_body.txt
gh --repo $repo issue edit $commit_issue --body-file new_body.txt
fi
if [ ! -f dist/torch*.whl ]; then
echo "Wheel build failed, use last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280"
gh --repo $repo issue comment $commit_issue -b "Wheel build failed with commit [${current_commit}](https://github.com/pytorch/pytorch/tree/${current_commit}), refer ${build_url}. CC @intel/torch-xpu-ops-maintain @EikanWang @riverliuintel @fengyuan14 @xytintel @etaf @chuanqi129 @mengfei25"
git clean -df .
git checkout $last_commit
# apply PRs for stock pytorch
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
git status && git show -s
git submodule sync && git submodule update --init --recursive
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
echo "Don't replace torch-xpu-ops!"
else
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
# Workaround for torch-xpu-ops ci test
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
fi
WERROR=1 python setup.py bdist_wheel
if [[ ${is_fork_pr} == "false" ]]; then
if [ -f dist/torch*.whl && "${last_commit}" != "${current_commit}"] && [[ "${{ inputs.pytorch }}" == "main" || "${{ inputs.pytorch }}" == "release/"* ]]; then
echo "Wheel build successful, update last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280"
gh --repo $repo issue view $commit_issue --json body -q .body | sed "s;${last_commit};${current_commit};g" | sed '/^$/d' > new_body.txt
gh --repo $repo issue edit $commit_issue --body-file new_body.txt
fi
if [ ! -f dist/torch*.whl ]; then
echo "Wheel build failed, use last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280"
gh --repo $repo issue comment $commit_issue -b "Wheel build failed with commit [${current_commit}](https://github.com/pytorch/pytorch/tree/${current_commit}), refer ${build_url}. CC @intel/torch-xpu-ops-maintain @EikanWang @riverliuintel @fengyuan14 @xytintel @etaf @chuanqi129 @mengfei25"
git clean -df .
git checkout $last_commit
# apply PRs for stock pytorch
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
git status && git show -s
git submodule sync && git submodule update --init --recursive
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
echo "Don't replace torch-xpu-ops!"
else
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
# Workaround for torch-xpu-ops ci test
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
fi
else
echo "Forked PR, don't update the issue"
WERROR=1 python setup.py bdist_wheel
fi
pip install --force-reinstall dist/*.whl
cp dist/*.whl ${{ github.workspace }}/
cp pytorch_${current_commit}_build.log ${{ github.workspace }}/
else
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
cd ../pytorch
git reset --hard && git checkout ${TORCH_COMMIT_ID}
TORCH_XPU_OPS_COMMIT=$(<third_party/xpu.txt)
rm -rf third_party/torch-xpu-ops
git clone https://github.com/intel/torch-xpu-ops.git third_party/torch-xpu-ops
cd third_party/torch-xpu-ops
git checkout ${TORCH_XPU_OPS_COMMIT}
cd ../..
echo "Forked PR, don't update the issue"
fi
pip install --force-reinstall dist/*.whl
cp dist/*.whl ${{ github.workspace }}/
cp pytorch_${current_commit}_build.log ${{ github.workspace }}/
- name: Torch Config
run: |
source activate xpu_build
Expand All @@ -168,12 +144,10 @@ jobs:
cd ..
python pytorch/torch/utils/collect_env.py
- name: Identify Build version
id: build_version
run: |
source activate xpu_build
source .github/scripts/env.sh
cd ../pytorch
echo "TORCH_BRANCH_ID=$(git rev-parse --abbrev-ref HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
echo "BUNDLE_VERSION=$(icpx --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_linux_ut.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
uses: actions/checkout@v4
- name: Prepare Stock Pytorch
run: |
pwd
pwd && printenv
which conda && conda clean -ay
conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
Expand Down
46 changes: 27 additions & 19 deletions .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,20 @@ jobs:
secrets: inherit
if: ${{ ! cancelled() }}
name: linux-nightly-ondemand
outputs:
TORCH_COMMIT_ID: ${{ steps.build_1.outputs.TORCH_COMMIT_ID }}
permissions:
issues: write
uses: ./.github/workflows/_linux_build.yml
with:
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
abi: 1
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
runner: pvc_e2e
steps:
- name: Build PyTorch ABI=1
id: build_1
uses: ./.github/workflows/_linux_build.yml
with:
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
abi: 1
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
runner: pvc_e2e

Linux-Nightly-Ondemand-UT-Tests:
if: ${{ github.event_name == 'schedule' || inputs.ut != '' }}
Expand All @@ -88,7 +93,7 @@ jobs:
with:
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }}
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }}
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.TORCH_COMMIT_ID }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
runner: linux.idc.xpu
Expand All @@ -101,7 +106,7 @@ jobs:
needs: Linux-Nightly-Ondemand-Build
timeout-minutes: 3600
env:
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }}
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.TORCH_COMMIT_ID }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
Expand Down Expand Up @@ -142,7 +147,6 @@ jobs:
pip install requests
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
git status && git show -s
git submodule sync && git submodule update --init --recursive
if [[ ${{ env.keep_torch_xpu_ops }} == 'true' ]]; then
echo "Don't replace torch-xpu-ops!"
else
Expand Down Expand Up @@ -183,7 +187,6 @@ jobs:
echo ${TRITON_REPO}@${TRITON_COMMIT_ID}
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python"
- name: Download Pytorch wheel
if: ${{ inputs.pytorch != 'nightly_wheel' }}
uses: actions/download-artifact@v4
with:
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-1
Expand Down Expand Up @@ -308,13 +311,18 @@ jobs:
name: linux-nightly-ondemand-abi0
permissions:
issues: write
uses: ./.github/workflows/_linux_build.yml
with:
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
abi: 0
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
runner: pvc_e2e
outputs:
TORCH_COMMIT_ID: ${{ steps.build_0.outputs.TORCH_COMMIT_ID }}
steps:
- name: Build PyTorch ABI=0
id: build_0
uses: ./.github/workflows/_linux_build.yml
with:
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
abi: 0
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
runner: pvc_e2e

Linux-Weekly-UT-Tests-ABI-0:
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
Expand All @@ -325,7 +333,7 @@ jobs:
abi: 0
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: op_regression,op_regression_dev1,op_extended,op_ut
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-ABI-0.outputs.torch_commit_id }}
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-ABI-0.outputs.TORCH_COMMIT_ID }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
runner: linux.idc.xpu
Expand Down
8 changes: 3 additions & 5 deletions .github/workflows/nightly_ondemand_rolling.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
with:
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }}
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.torch_commit_id }}
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.build.outputs.TORCH_COMMIT_ID }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
driver: rolling
Expand All @@ -104,7 +104,7 @@ jobs:
needs: Linux-Nightly-Ondemand-Build-Rolling
timeout-minutes: 3600
env:
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.torch_commit_id }}
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.build.outputs.TORCH_COMMIT_ID }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
Expand Down Expand Up @@ -147,7 +147,6 @@ jobs:
pip install requests
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
git status && git show -s
git submodule sync && git submodule update --init --recursive
if [[ ${{ env.keep_torch_xpu_ops }} == 'true' ]]; then
echo "Don't replace torch-xpu-ops!"
else
Expand Down Expand Up @@ -188,7 +187,6 @@ jobs:
echo ${TRITON_REPO}@${TRITON_COMMIT_ID}
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python"
- name: Download Pytorch wheel
if: ${{ inputs.pytorch != 'nightly_wheel' }}
uses: actions/download-artifact@v4
with:
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-1
Expand Down Expand Up @@ -338,7 +336,7 @@ jobs:
abi: 0
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }}
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling-ABI-0.outputs.torch_commit_id }}
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling-ABI-0.build.outputs.TORCH_COMMIT_ID }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
driver: rolling
Expand Down
Loading