diff --git a/.github/actions/inductor-xpu-e2e-test/action.yml b/.github/actions/inductor-xpu-e2e-test/action.yml index 52ec6c3b0..c59595924 100644 --- a/.github/actions/inductor-xpu-e2e-test/action.yml +++ b/.github/actions/inductor-xpu-e2e-test/action.yml @@ -50,6 +50,7 @@ runs: if: ${{ inputs.env_prepare }} shell: bash run: | + pwd && printenv source activate e2e_ci source .github/scripts/env.sh ${{ inputs.pytorch }} if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then diff --git a/.github/scripts/rpath.sh b/.github/scripts/rpath.sh index 14c23856e..250cffb0a 100644 --- a/.github/scripts/rpath.sh +++ b/.github/scripts/rpath.sh @@ -3,7 +3,7 @@ # Post-op the linux wheel to change the .so rpath to make the wheel work with XPU runtime pypi packages # Usage: rpath.sh /path/to/torch-xxxx.whl -pkg=$1 +pkg=$(realpath $1) PATCHELF_BIN=patchelf make_wheel_record() { diff --git a/.github/workflows/_linux_build.yml b/.github/workflows/_linux_build.yml index 6abcab211..bf04b979c 100644 --- a/.github/workflows/_linux_build.yml +++ b/.github/workflows/_linux_build.yml @@ -33,13 +33,6 @@ on: type: string default: 'lts' description: Driver lts/rolling - outputs: - whl_name: - description: The name of the wheel file - value: ${{ jobs.Torch-XPU-Build.outputs.whl_name }} - torch_commit_id: - description: The commit id of the torch build - value: ${{ jobs.Torch-XPU-Build.outputs.TORCH_COMMIT_ID }} permissions: issues: write @@ -48,8 +41,6 @@ jobs: build: if: ${{ inputs.pytorch }} != 'nightly_wheel' runs-on: ${{ inputs.runner }} - outputs: - TORCH_COMMIT_ID: ${{ steps.build_version.outputs.TORCH_COMMIT_ID }} timeout-minutes: 900 env: commit_issue: 1280 @@ -61,7 +52,7 @@ jobs: uses: actions/checkout@v4 - name: Prepare Stock Pytorch run: | - pwd + pwd && printenv which conda && conda clean -ay conda remove --all -y -n xpu_build || \ rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_build @@ -70,19 +61,17 @@ jobs: cd ../ && rm -rf pytorch pip install requests git clone https://github.com/pytorch/pytorch pytorch - if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then - cd pytorch && git checkout $(echo ${{ inputs.pytorch }}) - # apply PRs for stock pytorch - python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py - git status && git show -s - git submodule sync && git submodule update --init --recursive - if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then - echo "Don't replace torch-xpu-ops!" - else - rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ - # Workaround for torch-xpu-ops ci test - sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt - fi + cd pytorch && git checkout $(echo ${{ inputs.pytorch }}) + # apply PRs for stock pytorch + python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py + git status && git show -s + git submodule sync && git submodule update --init --recursive + if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then + echo "Don't replace torch-xpu-ops!" + else + rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ + # Workaround for torch-xpu-ops ci test + sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt fi - name: Build Pytorch XPU run: | @@ -99,64 +88,51 @@ jobs: else export _GLIBCXX_USE_CXX11_ABI=1 fi - if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then - build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" - repo="${{ github.repository }}" - last_commit=$(gh --repo $repo issue view $commit_issue --json body -q .body | grep ${{ inputs.pytorch }} | cut -d'[' -f 2 | cut -d']' -f 1) - cd ../pytorch - current_commit=$(git rev-parse HEAD) - is_fork_pr=false - if [ -n "${{ github.event.pull_request }}" ] && [ "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]; then - is_fork_pr=true - fi - echo ">>>>>>>>>>>>Fork PR: ${is_fork_pr}, pytorch branch: ${{ inputs.pytorch }}, last commit: ${last_commit}, current commit: ${current_commit}" + build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + repo="${{ github.repository }}" + last_commit=$(gh --repo $repo issue view $commit_issue --json body -q .body | grep ${{ inputs.pytorch }} | cut -d'[' -f 2 | cut -d']' -f 1) + cd ../pytorch + current_commit=$(git rev-parse HEAD) + is_fork_pr=false + if [ -n "${{ github.event.pull_request }}" ] && [ "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]; then + is_fork_pr=true + fi + echo ">>>>>>>>>>>>Fork PR: ${is_fork_pr}, pytorch branch: ${{ inputs.pytorch }}, last commit: ${last_commit}, current commit: ${current_commit}" - export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"} - pip install -r requirements.txt - WERROR=1 python setup.py bdist_wheel 2>&1 | tee pytorch_${current_commit}_build.log + export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"} + pip install -r requirements.txt + WERROR=1 python setup.py bdist_wheel 2>&1 | tee pytorch_${current_commit}_build.log - if [[ ${is_fork_pr} == "false" ]]; then - if [ -f dist/torch*.whl && "${last_commit}" != "${current_commit}"] && [[ "${{ inputs.pytorch }}" == "main" || "${{ inputs.pytorch }}" == "release/"* ]]; then - echo "Wheel build successful, update last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280" - gh --repo $repo issue view $commit_issue --json body -q .body | sed "s;${last_commit};${current_commit};g" | sed '/^$/d' > new_body.txt - gh --repo $repo issue edit $commit_issue --body-file new_body.txt - fi - if [ ! -f dist/torch*.whl ]; then - echo "Wheel build failed, use last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280" - gh --repo $repo issue comment $commit_issue -b "Wheel build failed with commit [${current_commit}](https://github.com/pytorch/pytorch/tree/${current_commit}), refer ${build_url}. CC @intel/torch-xpu-ops-maintain @EikanWang @riverliuintel @fengyuan14 @xytintel @etaf @chuanqi129 @mengfei25" - git clean -df . - git checkout $last_commit - # apply PRs for stock pytorch - python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py - git status && git show -s - git submodule sync && git submodule update --init --recursive - if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then - echo "Don't replace torch-xpu-ops!" - else - rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ - # Workaround for torch-xpu-ops ci test - sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt - fi - WERROR=1 python setup.py bdist_wheel + if [[ ${is_fork_pr} == "false" ]]; then + if [ -f dist/torch*.whl && "${last_commit}" != "${current_commit}"] && [[ "${{ inputs.pytorch }}" == "main" || "${{ inputs.pytorch }}" == "release/"* ]]; then + echo "Wheel build successful, update last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280" + gh --repo $repo issue view $commit_issue --json body -q .body | sed "s;${last_commit};${current_commit};g" | sed '/^$/d' > new_body.txt + gh --repo $repo issue edit $commit_issue --body-file new_body.txt + fi + if [ ! -f dist/torch*.whl ]; then + echo "Wheel build failed, use last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280" + gh --repo $repo issue comment $commit_issue -b "Wheel build failed with commit [${current_commit}](https://github.com/pytorch/pytorch/tree/${current_commit}), refer ${build_url}. CC @intel/torch-xpu-ops-maintain @EikanWang @riverliuintel @fengyuan14 @xytintel @etaf @chuanqi129 @mengfei25" + git clean -df . + git checkout $last_commit + # apply PRs for stock pytorch + python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py + git status && git show -s + git submodule sync && git submodule update --init --recursive + if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then + echo "Don't replace torch-xpu-ops!" + else + rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/ + # Workaround for torch-xpu-ops ci test + sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt fi - else - echo "Forked PR, don't update the issue" + WERROR=1 python setup.py bdist_wheel fi - pip install --force-reinstall dist/*.whl - cp dist/*.whl ${{ github.workspace }}/ - cp pytorch_${current_commit}_build.log ${{ github.workspace }}/ else - pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu - TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)') - cd ../pytorch - git reset --hard && git checkout ${TORCH_COMMIT_ID} - TORCH_XPU_OPS_COMMIT=$(> "${GITHUB_ENV}" - echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" + echo "TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" echo "BUNDLE_VERSION=$(icpx --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}" diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml index 22d2834ad..3ab58de22 100644 --- a/.github/workflows/_linux_ut.yml +++ b/.github/workflows/_linux_ut.yml @@ -59,7 +59,7 @@ jobs: uses: actions/checkout@v4 - name: Prepare Stock Pytorch run: | - pwd + pwd && printenv which conda && conda clean -ay conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \ rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK} diff --git a/.github/workflows/nightly_ondemand.yml b/.github/workflows/nightly_ondemand.yml index 26da75ff3..73d7d9650 100644 --- a/.github/workflows/nightly_ondemand.yml +++ b/.github/workflows/nightly_ondemand.yml @@ -70,15 +70,20 @@ jobs: secrets: inherit if: ${{ ! cancelled() }} name: linux-nightly-ondemand + outputs: + TORCH_COMMIT_ID: ${{ steps.build_1.outputs.TORCH_COMMIT_ID }} permissions: issues: write - uses: ./.github/workflows/_linux_build.yml - with: - pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} - keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} - abi: 1 - python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} - runner: pvc_e2e + steps: + - name: Build PyTorch ABI=1 + id: build_1 + uses: ./.github/workflows/_linux_build.yml + with: + pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} + keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} + abi: 1 + python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} + runner: pvc_e2e Linux-Nightly-Ondemand-UT-Tests: if: ${{ github.event_name == 'schedule' || inputs.ut != '' }} @@ -88,7 +93,7 @@ jobs: with: keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }} - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} runner: linux.idc.xpu @@ -101,7 +106,7 @@ jobs: needs: Linux-Nightly-Ondemand-Build timeout-minutes: 3600 env: - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.TORCH_COMMIT_ID }} keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} @@ -142,7 +147,6 @@ jobs: pip install requests python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py git status && git show -s - git submodule sync && git submodule update --init --recursive if [[ ${{ env.keep_torch_xpu_ops }} == 'true' ]]; then echo "Don't replace torch-xpu-ops!" else @@ -183,7 +187,6 @@ jobs: echo ${TRITON_REPO}@${TRITON_COMMIT_ID} pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python" - name: Download Pytorch wheel - if: ${{ inputs.pytorch != 'nightly_wheel' }} uses: actions/download-artifact@v4 with: name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-1 @@ -308,13 +311,18 @@ jobs: name: linux-nightly-ondemand-abi0 permissions: issues: write - uses: ./.github/workflows/_linux_build.yml - with: - pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} - keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} - abi: 0 - python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} - runner: pvc_e2e + outputs: + TORCH_COMMIT_ID: ${{ steps.build_0.outputs.TORCH_COMMIT_ID }} + steps: + - name: Build PyTorch ABI=0 + id: build_0 + uses: ./.github/workflows/_linux_build.yml + with: + pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }} + keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} + abi: 0 + python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} + runner: pvc_e2e Linux-Weekly-UT-Tests-ABI-0: if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5' @@ -325,7 +333,7 @@ jobs: abi: 0 keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: op_regression,op_regression_dev1,op_extended,op_ut - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-ABI-0.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-ABI-0.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} runner: linux.idc.xpu diff --git a/.github/workflows/nightly_ondemand_rolling.yml b/.github/workflows/nightly_ondemand_rolling.yml index 5cf66b3c8..5edd1a871 100644 --- a/.github/workflows/nightly_ondemand_rolling.yml +++ b/.github/workflows/nightly_ondemand_rolling.yml @@ -90,7 +90,7 @@ jobs: with: keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }} - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.build.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} driver: rolling @@ -104,7 +104,7 @@ jobs: needs: Linux-Nightly-Ondemand-Build-Rolling timeout-minutes: 3600 env: - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.build.outputs.TORCH_COMMIT_ID }} keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} @@ -147,7 +147,6 @@ jobs: pip install requests python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py git status && git show -s - git submodule sync && git submodule update --init --recursive if [[ ${{ env.keep_torch_xpu_ops }} == 'true' ]]; then echo "Don't replace torch-xpu-ops!" else @@ -188,7 +187,6 @@ jobs: echo ${TRITON_REPO}@${TRITON_COMMIT_ID} pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_COMMIT_ID}#subdirectory=python" - name: Download Pytorch wheel - if: ${{ inputs.pytorch != 'nightly_wheel' }} uses: actions/download-artifact@v4 with: name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-1 @@ -338,7 +336,7 @@ jobs: abi: 0 keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }} ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut' || inputs.ut }} - pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling-ABI-0.outputs.torch_commit_id }} + pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling-ABI-0.build.outputs.TORCH_COMMIT_ID }} python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }} triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }} driver: rolling diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index a3ff597d4..8864fefc0 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -63,7 +63,7 @@ jobs: needs: preci-linux-build uses: ./.github/workflows/_linux_ut.yml with: - pytorch: ${{ needs.preci-linux-build.outputs.torch_commit_id }} + pytorch: ${{ needs.preci-linux-build.build.outputs.TORCH_COMMIT_ID }} ut: op_regression,op_regression_dev1,op_extended,op_ut,xpu_distributed runner: linux.idc.xpu @@ -91,7 +91,7 @@ jobs: cd ../ && rm -rf pytorch source activate e2e_ci git clone https://github.com/pytorch/pytorch pytorch - cd pytorch && git checkout ${{ needs.preci-linux-build.outputs.torch_commit_id }} + cd pytorch && git checkout ${{ needs.preci-linux-build.build.outputs.TORCH_COMMIT_ID }} # apply PRs for stock pytorch pip install requests # https://github.com/mengfei25/pytorch/pull/18 internal use only for subset model list @@ -110,7 +110,6 @@ jobs: echo ${TRITON_REPO}@${TRITON_PINNED_COMMIT} pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_PINNED_COMMIT}#subdirectory=python" - name: Download Pytorch wheel - if: ${{ inputs.pytorch }} != 'nightly_wheel' uses: actions/download-artifact@v4 with: name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}-1 @@ -220,7 +219,7 @@ jobs: uses: ./.github/workflows/_linux_ut.yml with: abi: 0 - pytorch: ${{ needs.preci-linux-build-abi-0.outputs.torch_commit_id }} + pytorch: ${{ needs.preci-linux-build-abi-0.build.outputs.TORCH_COMMIT_ID }} ut: op_extended runner: linux.idc.xpu