From 1bfc37c6c263639c8f10b10763fbd226450b6071 Mon Sep 17 00:00:00 2001 From: mengfeil Date: Tue, 25 Feb 2025 15:34:07 +0800 Subject: [PATCH 1/8] deps --- .github/actions/inductor-xpu-e2e-test/action.yml | 5 ++--- .github/workflows/nightly_ondemand.yml | 3 +-- .github/workflows/nightly_ondemand_rolling.yml | 3 +-- .github/workflows/nightly_ondemand_whl.yml | 2 +- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml index 52ec6c3b0..2fb6b9061 100644 --- a/.github/actions/inductor-xpu-e2e-test/action.yml +++ b/.github/actions/inductor-xpu-e2e-test/action.yml @@ -67,9 +67,8 @@ runs: python install.py --continue_on_fail # deps for torchrec_dlrm pip install pyre_extensions - pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu - pip install torchmetrics==1.0.3 - pip install torchrec --no-deps --index-url https://download.pytorch.org/whl/nightly/cpu + pip install fbgemm-gpu + pip install --no-deps torchmetrics==1.0.3 torchrec tensordict fi if [[ ${{ inputs.suite }} == *"huggingface"* ]]; then pip install --force-reinstall git+https://github.com/huggingface/transformers@${TRANSFORMERS_VERSION} diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index cf0d02331..6da84223c 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -130,7 +130,7 @@ jobs: conda create -n e2e_ci python=${{ env.python }} cmake ninja -y source activate e2e_ci pip install mkl-static==2025.0.1 mkl-include==2025.0.1 - pip install pandas scipy tqdm + pip install pandas scipy tqdm pyyaml botocore - name: Prepare Stock Pytorch run: | pwd @@ -142,7 +142,6 @@ jobs: pip install requests python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py git status && git show -s - git submodule sync && git submodule update --init --recursive if [[ ${{ env.keep_torch_xpu_ops }} == 'true' ]]; then echo "Don't replace torch-xpu-ops!" else diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index d21b91361..e01ad0341 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -135,7 +135,7 @@ jobs: conda create -n e2e_ci python=${{ env.python }} cmake ninja -y source activate e2e_ci pip install mkl-static==2025.0.1 mkl-include==2025.0.1 - pip install pandas scipy tqdm + pip install pandas scipy tqdm pyyaml botocore - name: Prepare Stock Pytorch run: | pwd @@ -147,7 +147,6 @@ jobs: pip install requests python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py git status && git show -s - git submodule sync && git submodule update --init --recursive if [[ ${{ env.keep_torch_xpu_ops }} == 'true' ]]; then echo "Don't replace torch-xpu-ops!" else diff --git a/.github/workflows/nightly_ondemand_whl.yml b/.github/workflows/nightly_ondemand_whl.yml index 1fdee7409..11a400c43 100644 --- a/.github/workflows/nightly_ondemand_whl.yml +++ b/.github/workflows/nightly_ondemand_whl.yml @@ -100,7 +100,7 @@ jobs: conda create -n e2e_ci python=${{ env.python }} cmake ninja -y source activate e2e_ci pip install mkl-static==2025.0.1 mkl-include==2025.0.1 - pip install pandas scipy tqdm + pip install pandas scipy tqdm pyyaml botocore - name: Prepare Stock Pytorch id: installed run: | From 5426bc46e379622089de51b727255601591ddd04 Mon Sep 17 00:00:00 2001 From: mengfeil Date: Wed, 26 Feb 2025 11:13:54 +0800 Subject: [PATCH 2/8] modify rpath pkg path --- .github/scripts/rpath.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/rpath.sh b/.github/scripts/rpath.sh index 14c23856e..250cffb0a 100644 --- a/.github/scripts/rpath.sh +++ b/.github/scripts/rpath.sh @@ -3,7 +3,7 @@ # Post-op the linux wheel to change the .so rpath to make the wheel work with XPU runtime pypi packages # Usage: rpath.sh /path/to/torch-xxxx.whl -pkg=$1 +pkg=$(realpath $1) PATCHELF_BIN=patchelf make_wheel_record() { From 9a21073985fb89bb5535a619bd7df097152c81e5 Mon Sep 17 00:00:00 2001 From: mengfeil Date: Fri, 28 Feb 2025 15:31:46 +0800 Subject: [PATCH 3/8] modify build and show env --- .../actions/inductor-xpu-e2e-test/action.yml | 1 + .github/workflows/_linux_build.yml | 118 ++++++++---------- .github/workflows/_linux_ut.yml | 2 +- 3 files changed, 53 insertions(+), 68 deletions(-) diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml index 2fb6b9061..36375a8b1 100644 --- a/.github/actions/inductor-xpu-e2e-test/action.yml +++ b/.github/actions/inductor-xpu-e2e-test/action.yml @@ -50,6 +50,7 @@ runs: if: ${{ inputs.env_prepare }} shell: bash run: | + pwd && printenv source activate e2e_ci source .github/scripts/env.sh ${{ inputs.pytorch }} if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml index fbaf5e5c5..0fc94c580 100644 --- a/.github/workflows/_linux_build.yml +++ b/.github/workflows/_linux_build.yml @@ -61,7 +61,7 @@ jobs: uses: actions/checkout@v4 - name: Prepare Stock Pytorch run: | - pwd + pwd && printenv which conda && conda clean -ay conda remove --all -y -n xpu_build || \ rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_build @@ -70,19 +70,17 @@ jobs: cd ../ && rm -rf pytorch pip install requests git clone https://github.com/pytorch/pytorch pytorch - if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then - cd pytorch && git checkout $(echo ${{ inputs.pytorch }}) - # apply PRs for stock pytorch - python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py - git status && git show -s - git submodule sync && git submodule update --init --recursive - if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then - echo "Don't replace torch-xpu-ops!" - else - rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ - # Workaround for torch-xpu-ops ci test - sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt - fi + cd pytorch && git checkout $(echo ${{ inputs.pytorch }}) + # apply PRs for stock pytorch + python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py + git status && git show -s + git submodule sync && git submodule update --init --recursive + if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then + echo "Don't replace torch-xpu-ops!" + else + rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ + # Workaround for torch-xpu-ops ci test + sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt fi - name: Build Pytorch XPU run: | @@ -99,63 +97,50 @@ jobs: else export _GLIBCXX_USE_CXX11_ABI=1 fi - if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then - build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" - repo="${{ github.repository }}" - last_commit=$(gh --repo $repo issue view $commit_issue --json body -q .body | grep ${{ inputs.pytorch }} | cut -d'[' -f 2 | cut -d']' -f 1) - cd ../pytorch - current_commit=$(git rev-parse HEAD) - is_fork_pr=false - if [ -n "${{ github.event.pull_request }}" ] && [ "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]; then - is_fork_pr=true - fi - echo ">>>>>>>>>>>>Fork PR: ${is_fork_pr}, pytorch branch: ${{ inputs.pytorch }}, last commit: ${last_commit}, current commit: ${current_commit}" + build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + repo="${{ github.repository }}" + last_commit=$(gh --repo $repo issue view $commit_issue --json body -q .body | grep ${{ inputs.pytorch }} | cut -d'[' -f 2 | cut -d']' -f 1) + cd ../pytorch + current_commit=$(git rev-parse HEAD) + is_fork_pr=false + if [ -n "${{ github.event.pull_request }}" ] && [ "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]; then + is_fork_pr=true + fi + echo ">>>>>>>>>>>>Fork PR: ${is_fork_pr}, pytorch branch: ${{ inputs.pytorch }}, last commit: ${last_commit}, current commit: ${current_commit}" - export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"} - pip install -r requirements.txt - WERROR=1 python setup.py bdist_wheel 2>&1 | tee pytorch_${current_commit}_build.log + export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"} + pip install -r requirements.txt + WERROR=1 python setup.py bdist_wheel 2>&1 | tee pytorch_${current_commit}_build.log - if [[ ${is_fork_pr} == "false" ]]; then - if [ -f dist/torch*.whl && "${last_commit}" != "${current_commit}"] && [[ "${{ inputs.pytorch }}" == "main" || "${{ inputs.pytorch }}" == "release/"* ]]; then - echo "Wheel build successful, update last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280" - gh --repo $repo issue view $commit_issue --json body -q .body | sed "s;${last_commit};${current_commit};g" | sed '/^$/d' > new_body.txt - gh --repo $repo issue edit $commit_issue --body-file new_body.txt + if [[ ${is_fork_pr} == "false" ]]; then + if [ -f dist/torch*.whl && "${last_commit}" != "${current_commit}"] && [[ "${{ inputs.pytorch }}" == "main" || "${{ inputs.pytorch }}" == "release/"* ]]; then + echo "Wheel build successful, update last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280" + gh --repo $repo issue view $commit_issue --json body -q .body | sed "s;${last_commit};${current_commit};g" | sed '/^$/d' > new_body.txt + gh --repo $repo issue edit $commit_issue --body-file new_body.txt + else + echo "Wheel build failed, use last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280" + gh --repo $repo issue comment $commit_issue -b "Wheel build failed with commit [${current_commit}](https://github.com/pytorch/pytorch/tree/${current_commit}), refer ${build_url}. CC @intel/torch-xpu-ops-maintain @EikanWang @riverliuintel @fengyuan14 @xytintel @etaf @chuanqi129 @mengfei25" + git clean -df . + git checkout $last_commit + # apply PRs for stock pytorch + python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py + git status && git show -s + git submodule sync && git submodule update --init --recursive + if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then + echo "Don't replace torch-xpu-ops!" else - echo "Wheel build failed, use last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280" - gh --repo $repo issue comment $commit_issue -b "Wheel build failed with commit [${current_commit}](https://github.com/pytorch/pytorch/tree/${current_commit}), refer ${build_url}. CC @intel/torch-xpu-ops-maintain @EikanWang @riverliuintel @fengyuan14 @xytintel @etaf @chuanqi129 @mengfei25" - git clean -df . - git checkout $last_commit - # apply PRs for stock pytorch - python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py - git status && git show -s - git submodule sync && git submodule update --init --recursive - if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then - echo "Don't replace torch-xpu-ops!" - else - rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ - # Workaround for torch-xpu-ops ci test - sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt - fi - WERROR=1 python setup.py bdist_wheel + rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ + # Workaround for torch-xpu-ops ci test + sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt fi - else - echo "Forked PR, don't update the issue" + WERROR=1 python setup.py bdist_wheel fi - pip install --force-reinstall dist/*.whl - cp dist/*.whl ${{ github.workspace }}/ - cp pytorch_${current_commit}_build.log ${{ github.workspace }}/ else - pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu - TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)') - cd ../pytorch - git reset --hard && git checkout ${TORCH_COMMIT_ID} - TORCH_XPU_OPS_COMMIT=$(> "${GITHUB_ENV}" - echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "BUNDLE_VERSION=$(icpx --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml index 155bf54b2..a76d6d75d 100644 --- a/.github/workflows/_linux_ut.yml +++ b/.github/workflows/_linux_ut.yml @@ -59,7 +59,7 @@ jobs: uses: actions/checkout@v4 - name: Prepare Stock Pytorch run: | - pwd + pwd && printenv which conda && conda clean -ay conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \ rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK} From 80ad21b243e65ec7c60ca7532105f31ae789cbb9 Mon Sep 17 00:00:00 2001 From: mengfeil Date: Tue, 4 Mar 2025 16:19:42 +0800 Subject: [PATCH 4/8] Update --- .github/actions/inductor-xpu-e2e-test/action.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml index 36375a8b1..c59595924 100644 --- a/.github/actions/inductor-xpu-e2e-test/action.yml +++ b/.github/actions/inductor-xpu-e2e-test/action.yml @@ -68,8 +68,9 @@ runs: python install.py --continue_on_fail # deps for torchrec_dlrm pip install pyre_extensions - pip install fbgemm-gpu - pip install --no-deps torchmetrics==1.0.3 torchrec tensordict + pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu + pip install torchmetrics==1.0.3 + pip install torchrec --no-deps --index-url https://download.pytorch.org/whl/nightly/cpu fi if [[ ${{ inputs.suite }} == *"huggingface"* ]]; then pip install --force-reinstall git+https://github.com/huggingface/transformers@${TRANSFORMERS_VERSION} From 37471caa41cde1beba95557173e3e730f0b540fa Mon Sep 17 00:00:00 2001 From: mengfeil Date: Tue, 4 Mar 2025 16:20:53 +0800 Subject: [PATCH 5/8] Update --- .github/workflows/nightly_ondemand.yml | 2 +- .github/workflows/nightly_ondemand_rolling.yml | 2 +- .github/workflows/nightly_ondemand_whl.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index 319d82bbd..88a4c3c32 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -130,7 +130,7 @@ jobs: conda create -n e2e_ci python=${{ env.python }} cmake ninja -y source activate e2e_ci pip install mkl-static==2025.0.1 mkl-include==2025.0.1 - pip install pandas scipy tqdm pyyaml botocore + pip install pandas scipy tqdm - name: Prepare Stock Pytorch run: | pwd diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index cabe056cc..6ca2bdd25 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -135,7 +135,7 @@ jobs: conda create -n e2e_ci python=${{ env.python }} cmake ninja -y source activate e2e_ci pip install mkl-static==2025.0.1 mkl-include==2025.0.1 - pip install pandas scipy tqdm pyyaml botocore + pip install pandas scipy tqdm - name: Prepare Stock Pytorch run: | pwd diff --git a/.github/workflows/nightly_ondemand_whl.yml b/.github/workflows/nightly_ondemand_whl.yml index f99131b24..86cc3e764 100644 --- a/.github/workflows/nightly_ondemand_whl.yml +++ b/.github/workflows/nightly_ondemand_whl.yml @@ -100,7 +100,7 @@ jobs: conda create -n e2e_ci python=${{ env.python }} cmake ninja -y source activate e2e_ci pip install mkl-static==2025.0.1 mkl-include==2025.0.1 - pip install pandas scipy tqdm pyyaml botocore + pip install pandas scipy tqdm - name: Prepare Stock Pytorch id: installed run: | From 2662b13f88de9c5695b453e49524d659c229e666 Mon Sep 17 00:00:00 2001 From: mengfeil Date: Wed, 5 Mar 2025 16:22:30 +0800 Subject: [PATCH 6/8] Modify build --- .github/workflows/_linux_build.yml | 7 ------- .github/workflows/nightly_ondemand.yml | 7 +++---- .github/workflows/nightly_ondemand_rolling.yml | 7 +++---- .github/workflows/pull.yml | 7 +++---- 4 files changed, 9 insertions(+), 19 deletions(-) diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml index ebeaf66cd..21a889f08 100644 --- a/.github/workflows/_linux_build.yml +++ b/.github/workflows/_linux_build.yml @@ -33,13 +33,6 @@ on: type: string default: 'lts' description: Driver lts/rolling - outputs: - whl_name: - description: The name of the wheel file - value: ${{ jobs.Torch-XPU-Build.outputs.whl_name }} - torch_commit_id: - description: The commit id of the torch build - value: ${{ jobs.Torch-XPU-Build.outputs.TORCH_COMMIT_ID }} permissions: issues: write diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index 88a4c3c32..9e891ded7 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -88,7 +88,7 @@ jobs: with: keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }} - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} runner: linux.idc.xpu @@ -101,7 +101,7 @@ jobs: needs: Linux-Nightly-Ondemand-Build timeout-minutes: 3600 env: - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.TORCH_COMMIT_ID }} keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} @@ -182,7 +182,6 @@ jobs: echo ${TRITON_REPO}@${TRITON_COMMIT_ID} pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python" - name: Download Pytorch wheel - if: ${{ inputs.pytorch != 'nightly_wheel' }} uses: actions/download-artifact@v4 with: name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-1 @@ -324,7 +323,7 @@ jobs: abi: 0 keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: op_regression,op_regression_dev1,op_extended,op_ut - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-ABI-0.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-ABI-0.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} runner: linux.idc.xpu diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index 6ca2bdd25..f6515e340 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -90,7 +90,7 @@ jobs: with: keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }} - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} driver: rolling @@ -104,7 +104,7 @@ jobs: needs: Linux-Nightly-Ondemand-Build-Rolling timeout-minutes: 3600 env: - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.TORCH_COMMIT_ID }} keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} @@ -187,7 +187,6 @@ jobs: echo ${TRITON_REPO}@${TRITON_COMMIT_ID} pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python" - name: Download Pytorch wheel - if: ${{ inputs.pytorch != 'nightly_wheel' }} uses: actions/download-artifact@v4 with: name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-1 @@ -337,7 +336,7 @@ jobs: abi: 0 keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }} - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling-ABI-0.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling-ABI-0.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} driver: rolling diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index a3ff597d4..ded75277b 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -63,7 +63,7 @@ jobs: needs: preci-linux-build uses: ./.github/workflows/_linux_ut.yml with: - pytorch: ${{ needs.preci-linux-build.outputs.torch_commit_id }} + pytorch: ${{ needs.preci-linux-build.outputs.TORCH_COMMIT_ID }} ut: op_regression,op_regression_dev1,op_extended,op_ut,xpu_distributed runner: linux.idc.xpu @@ -91,7 +91,7 @@ jobs: cd ../ && rm -rf pytorch source activate e2e_ci git clone https://github.com/pytorch/pytorch pytorch - cd pytorch && git checkout ${{ needs.preci-linux-build.outputs.torch_commit_id }} + cd pytorch && git checkout ${{ needs.preci-linux-build.outputs.TORCH_COMMIT_ID }} # apply PRs for stock pytorch pip install requests # https://github.com/mengfei25/pytorch/pull/18 internal use only for subset model list @@ -110,7 +110,6 @@ jobs: echo ${TRITON_REPO}@${TRITON_PINNED_COMMIT} pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_PINNED_COMMIT}#subdirectory=python" - name: Download Pytorch wheel - if: ${{ inputs.pytorch }} != 'nightly_wheel' uses: actions/download-artifact@v4 with: name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-1 @@ -220,7 +219,7 @@ jobs: uses: ./.github/workflows/_linux_ut.yml with: abi: 0 - pytorch: ${{ needs.preci-linux-build-abi-0.outputs.torch_commit_id }} + pytorch: ${{ needs.preci-linux-build-abi-0.outputs.TORCH_COMMIT_ID }} ut: op_extended runner: linux.idc.xpu From 0044b2b186f1e4491b4800f14eb5fcb3e598c912 Mon Sep 17 00:00:00 2001 From: mengfeil Date: Thu, 6 Mar 2025 13:39:16 +0800 Subject: [PATCH 7/8] modify build torch commit --- .github/workflows/nightly_ondemand.yml | 6 +++--- .github/workflows/nightly_ondemand_rolling.yml | 6 +++--- .github/workflows/pull.yml | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index 9e891ded7..fb04b94d7 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -88,7 +88,7 @@ jobs: with: keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }} - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.TORCH_COMMIT_ID }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.build.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} runner: linux.idc.xpu @@ -101,7 +101,7 @@ jobs: needs: Linux-Nightly-Ondemand-Build timeout-minutes: 3600 env: - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.TORCH_COMMIT_ID }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.build.outputs.TORCH_COMMIT_ID }} keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} @@ -323,7 +323,7 @@ jobs: abi: 0 keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: op_regression,op_regression_dev1,op_extended,op_ut - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-ABI-0.outputs.TORCH_COMMIT_ID }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-ABI-0.build.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} runner: linux.idc.xpu diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index f6515e340..5edd1a871 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -90,7 +90,7 @@ jobs: with: keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }} - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.TORCH_COMMIT_ID }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.build.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} driver: rolling @@ -104,7 +104,7 @@ jobs: needs: Linux-Nightly-Ondemand-Build-Rolling timeout-minutes: 3600 env: - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.TORCH_COMMIT_ID }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.build.outputs.TORCH_COMMIT_ID }} keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} @@ -336,7 +336,7 @@ jobs: abi: 0 keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }} - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling-ABI-0.outputs.TORCH_COMMIT_ID }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling-ABI-0.build.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} driver: rolling diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index ded75277b..8864fefc0 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -63,7 +63,7 @@ jobs: needs: preci-linux-build uses: ./.github/workflows/_linux_ut.yml with: - pytorch: ${{ needs.preci-linux-build.outputs.TORCH_COMMIT_ID }} + pytorch: ${{ needs.preci-linux-build.build.outputs.TORCH_COMMIT_ID }} ut: op_regression,op_regression_dev1,op_extended,op_ut,xpu_distributed runner: linux.idc.xpu @@ -91,7 +91,7 @@ jobs: cd ../ && rm -rf pytorch source activate e2e_ci git clone https://github.com/pytorch/pytorch pytorch - cd pytorch && git checkout ${{ needs.preci-linux-build.outputs.TORCH_COMMIT_ID }} + cd pytorch && git checkout ${{ needs.preci-linux-build.build.outputs.TORCH_COMMIT_ID }} # apply PRs for stock pytorch pip install requests # https://github.com/mengfei25/pytorch/pull/18 internal use only for subset model list @@ -219,7 +219,7 @@ jobs: uses: ./.github/workflows/_linux_ut.yml with: abi: 0 - pytorch: ${{ needs.preci-linux-build-abi-0.outputs.TORCH_COMMIT_ID }} + pytorch: ${{ needs.preci-linux-build-abi-0.build.outputs.TORCH_COMMIT_ID }} ut: op_extended runner: linux.idc.xpu From 7885d5078a2dfd960b50637b7615aded24bc94d2 Mon Sep 17 00:00:00 2001 From: mengfeil Date: Thu, 6 Mar 2025 15:24:59 +0800 Subject: [PATCH 8/8] torch commit --- .github/workflows/_linux_build.yml | 3 -- .github/workflows/nightly_ondemand.yml | 44 ++++++++++++++++---------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml index 21a889f08..bf04b979c 100644 --- a/.github/workflows/_linux_build.yml +++ b/.github/workflows/_linux_build.yml @@ -41,8 +41,6 @@ jobs: build: if: ${{ inputs.pytorch }} != 'nightly_wheel' runs-on: ${{ inputs.runner }} - outputs: - TORCH_COMMIT_ID: ${{ steps.build_version.outputs.TORCH_COMMIT_ID }} timeout-minutes: 900 env: commit_issue: 1280 @@ -146,7 +144,6 @@ jobs: cd .. python pytorch/torch/utils/collect_env.py - name: Identify Build version - id: build_version run: | source activate xpu_build source .github/scripts/env.sh diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index fb04b94d7..73d7d9650 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -70,15 +70,20 @@ jobs: secrets: inherit if: ${{ ! cancelled() }} name: linux-nightly-ondemand + outputs: + TORCH_COMMIT_ID: ${{ steps.build_1.outputs.TORCH_COMMIT_ID }} permissions: issues: write - uses: ./.github/workflows/_linux_build.yml - with: - pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} - keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} - abi: 1 - python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} - runner: pvc_e2e + steps: + - name: Build PyTorch ABI=1 + id: build_1 + uses: ./.github/workflows/_linux_build.yml + with: + pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} + keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} + abi: 1 + python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} + runner: pvc_e2e Linux-Nightly-Ondemand-UT-Tests: if: ${{ github.event_name == 'schedule' || inputs.ut != '' }} @@ -88,7 +93,7 @@ jobs: with: keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }} - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.build.outputs.TORCH_COMMIT_ID }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} runner: linux.idc.xpu @@ -101,7 +106,7 @@ jobs: needs: Linux-Nightly-Ondemand-Build timeout-minutes: 3600 env: - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.build.outputs.TORCH_COMMIT_ID }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.TORCH_COMMIT_ID }} keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} @@ -306,13 +311,18 @@ jobs: name: linux-nightly-ondemand-abi0 permissions: issues: write - uses: ./.github/workflows/_linux_build.yml - with: - pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} - keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} - abi: 0 - python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} - runner: pvc_e2e + outputs: + TORCH_COMMIT_ID: ${{ steps.build_0.outputs.TORCH_COMMIT_ID }} + steps: + - name: Build PyTorch ABI=0 + id: build_0 + uses: ./.github/workflows/_linux_build.yml + with: + pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} + keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} + abi: 0 + python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} + runner: pvc_e2e Linux-Weekly-UT-Tests-ABI-0: if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5' @@ -323,7 +333,7 @@ jobs: abi: 0 keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: op_regression,op_regression_dev1,op_extended,op_ut - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-ABI-0.build.outputs.TORCH_COMMIT_ID }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-ABI-0.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} runner: linux.idc.xpu