From 81c2646f7f90810be0284c435ed54c7527698c6b Mon Sep 17 00:00:00 2001 From: Michael Mi Date: Mon, 3 Jun 2024 14:59:05 -0700 Subject: [PATCH] fix: use a consistent version for whl (#214) fix install error mentioned in https://github.com/vectorch-ai/ScaleLLM/issues/212 ``` has inconsistent version: expected '0.1.1+cu121torch2.3', but metadata has '0.1.1' ERROR: Could not find a version that satisfies the requirement scalellm (from versions: 0.1.1+cu121torch2.3) ``` --- .github/workflows/build_wheel.yml | 24 +++++++++------- .github/workflows/publish_wheel.yml | 16 +++++++---- python/rename_whl.py | 43 ----------------------------- python/setup.py | 6 +++- scripts/build_wheel.sh | 16 +---------- 5 files changed, 30 insertions(+), 75 deletions(-) delete mode 100644 python/rename_whl.py diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 66f5e9d2..150ae347 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -19,6 +19,10 @@ jobs: cuda: ["11.8", "12.1"] torch: ["2.1", "2.2", "2.3"] runs-on: [self-hosted, linux, release] + env: + PYTHON_VERSION: ${{ matrix.python }} + CUDA_VERSION: ${{ matrix.cuda }} + TORCH_VERSION: ${{ matrix.torch }} steps: - name: Checkout repository uses: actions/checkout@v4 @@ -32,18 +36,18 @@ jobs: - name: Build wheel run: | - docker pull vectorchai/scalellm_manylinux:cuda${{ matrix.cuda }} + docker pull vectorchai/scalellm_manylinux:cuda${CUDA_VERSION} docker run --rm -t \ -v "$CI_CACHE_DIR":/ci_cache \ -v "$GITHUB_WORKSPACE":/ScaleLLM \ - -e PYTHON_VERSION=${{ matrix.python }} \ - -e CUDA_VERSION=${{ matrix.cuda }} \ - -e TORCH_VERSION=${{ matrix.torch }} \ - -e RENAME_WHL=true \ + -e PYTHON_VERSION=${PYTHON_VERSION} \ + -e CUDA_VERSION=${CUDA_VERSION} \ + -e TORCH_VERSION=${TORCH_VERSION} \ + -e SCALELLM_VERSION_SUFFIX="+cu${CUDA_VERSION//./}torch${TORCH_VERSION}" \ -e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \ -e CCACHE_DIR=/ci_cache/.ccache \ -u $(id -u):$(id -g) \ - vectorchai/scalellm_manylinux:cuda${{ matrix.cuda }} \ + vectorchai/scalellm_manylinux:cuda${CUDA_VERSION} \ bash /ScaleLLM/scripts/build_wheel.sh timeout-minutes: 60 @@ -52,7 +56,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: wheel-cuda${{ matrix.cuda }}-torch${{ matrix.torch }}-python${{ matrix.python }} + name: wheel-cuda${CUDA_VERSION}-torch${TORCH_VERSION}-python${PYTHON_VERSION} path: python/dist/* create_release: @@ -79,7 +83,7 @@ jobs: draft: true prerelease: true - publish_whl_index: + commit_whl_index: needs: build_wheel runs-on: ubuntu-latest steps: @@ -99,12 +103,12 @@ jobs: env: WHL_TOKEN: ${{ secrets.WHL_TOKEN }} - - name: Update whl index + - name: Append new whls into whl index run: | cd whl python ./publish_whl.py --whl_path=../dist - - name: Publish whl index + - name: Commit whl index change run: | cd whl git config --global user.name "github-actions[bot]" diff --git a/.github/workflows/publish_wheel.yml b/.github/workflows/publish_wheel.yml index 78a3432a..13cd105a 100644 --- a/.github/workflows/publish_wheel.yml +++ b/.github/workflows/publish_wheel.yml @@ -20,6 +20,10 @@ jobs: cuda: ["12.1"] torch: ["2.3"] runs-on: [self-hosted, linux, release] + env: + PYTHON_VERSION: ${{ matrix.python }} + CUDA_VERSION: ${{ matrix.cuda }} + TORCH_VERSION: ${{ matrix.torch }} steps: - name: Checkout repository uses: actions/checkout@v4 @@ -33,17 +37,17 @@ jobs: - name: Build wheel run: | - docker pull vectorchai/scalellm_manylinux:cuda${{ matrix.cuda }} + docker pull vectorchai/scalellm_manylinux:cuda${CUDA_VERSION} docker run --rm -t \ -v "$CI_CACHE_DIR":/ci_cache \ -v "$GITHUB_WORKSPACE":/ScaleLLM \ - -e PYTHON_VERSION=${{ matrix.python }} \ - -e CUDA_VERSION=${{ matrix.cuda }} \ - -e TORCH_VERSION=${{ matrix.torch }} \ + -e PYTHON_VERSION=${PYTHON_VERSION} \ + -e CUDA_VERSION=${CUDA_VERSION} \ + -e TORCH_VERSION=${TORCH_VERSION} \ -e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \ -e CCACHE_DIR=/ci_cache/.ccache \ -u $(id -u):$(id -g) \ - vectorchai/scalellm_manylinux:cuda${{ matrix.cuda }} \ + vectorchai/scalellm_manylinux:cuda${CUDA_VERSION} \ bash /ScaleLLM/scripts/build_wheel.sh timeout-minutes: 60 @@ -64,7 +68,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: wheel-cuda${{ matrix.cuda }}-torch${{ matrix.torch }}-python${{ matrix.python }} + name: wheel-cuda${CUDA_VERSION}-torch${TORCH_VERSION}-python${PYTHON_VERSION} path: python/dist/* publish_wheel: diff --git a/python/rename_whl.py b/python/rename_whl.py deleted file mode 100644 index f8a64fe5..00000000 --- a/python/rename_whl.py +++ /dev/null @@ -1,43 +0,0 @@ -import os - - -def get_version_suffix(): - try: - import torch - - version, other = torch.__version__.split("+") - major, minor, _ = version.split(".") - return f"{other}torch{major}.{minor}" - except ImportError: - return None - - -def rename_whl(whl_path: str): - version_suffix = get_version_suffix() - if version_suffix is None: - return - - parts = whl_path.split("-") - if len(parts) < 2: - return - version = parts[1] - # check if already added version suffix - if version.endswith(version_suffix): - return - - parts[1] = f"{version}+{version_suffix}" - new_whl_path = "-".join(parts) - os.rename(whl_path, new_whl_path) - - -if __name__ == "__main__": - import sys - - if len(sys.argv) < 2: - print("Usage: python rename_whl.py ") - sys.exit(1) - whl_path = sys.argv[1] - if not os.path.exists(whl_path): - print(f"File not found: {whl_path}") - sys.exit(1) - rename_whl(whl_path) diff --git a/python/setup.py b/python/setup.py index f1c91fe4..74204b49 100644 --- a/python/setup.py +++ b/python/setup.py @@ -60,7 +60,11 @@ def extract_version(file_path): def get_scalellm_version(): init_file = join_path("python", "scalellm", "__init__.py") - return extract_version(init_file) + version = extract_version(init_file) + version_suffix = os.getenv("SCALELLM_VERSION_SUFFIX") + if version_suffix: + version += version_suffix + return version def read_readme() -> str: diff --git a/scripts/build_wheel.sh b/scripts/build_wheel.sh index 79638c6c..46676f53 100755 --- a/scripts/build_wheel.sh +++ b/scripts/build_wheel.sh @@ -13,17 +13,11 @@ ensure_env PYTHON_VERSION ensure_env TORCH_VERSION ensure_env CUDA_VERSION -RENAME_WHL=${RENAME_WHL:-false} - PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" export HOME=/tmp/home mkdir -p $HOME export PATH="$HOME/.local/bin:$PATH" -CUDA_MAJOR="${CUDA_VERSION%.*}" -CUDA_MINOR="${CUDA_VERSION#*.}" -TORCH_MAJOR="${TORCH_VERSION%.*}" -TORCH_MINOR="${TORCH_VERSION#*.}" # choose the right python version PYVER="${PYTHON_VERSION//./}" @@ -31,7 +25,7 @@ export PATH="/opt/python/cp${PYVER}-cp${PYVER}/bin:$PATH" # install PyTorch -pip install torch==$TORCH_VERSION --index-url "https://download.pytorch.org/whl/cu${CUDA_MAJOR}${CUDA_MINOR}" +pip install torch==$TORCH_VERSION -i "https://download.pytorch.org/whl/cu${CUDA_VERSION//./}" # install other dependencies pip install numpy @@ -46,14 +40,6 @@ python setup.py bdist_wheel # show ccache statistics command -v ccache >/dev/null && ccache -vs -# rename wheel to include torch and cuda versions -if [ "$RENAME_WHL" = "true" ]; then - cd "$PROJECT_ROOT/python" - for whl in dist/*.whl; do - python rename_whl.py "$whl" - done -fi - # bundle external shared libraries into wheel # pip install auditwheel # cd "$PROJECT_ROOT/python"