Skip to content

Commit

Permalink
fix: use a consistent version for whl (#214)
Browse files Browse the repository at this point in the history
fix install error mentioned in
#212
```
 has inconsistent version: expected '0.1.1+cu121torch2.3', but metadata has '0.1.1'
ERROR: Could not find a version that satisfies the requirement scalellm (from versions: 0.1.1+cu121torch2.3)
```
  • Loading branch information
guocuimi authored Jun 3, 2024
1 parent 5c8b287 commit 81c2646
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 75 deletions.
24 changes: 14 additions & 10 deletions .github/workflows/build_wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ jobs:
cuda: ["11.8", "12.1"]
torch: ["2.1", "2.2", "2.3"]
runs-on: [self-hosted, linux, release]
env:
PYTHON_VERSION: ${{ matrix.python }}
CUDA_VERSION: ${{ matrix.cuda }}
TORCH_VERSION: ${{ matrix.torch }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
Expand All @@ -32,18 +36,18 @@ jobs:
- name: Build wheel
run: |
docker pull vectorchai/scalellm_manylinux:cuda${{ matrix.cuda }}
docker pull vectorchai/scalellm_manylinux:cuda${CUDA_VERSION}
docker run --rm -t \
-v "$CI_CACHE_DIR":/ci_cache \
-v "$GITHUB_WORKSPACE":/ScaleLLM \
-e PYTHON_VERSION=${{ matrix.python }} \
-e CUDA_VERSION=${{ matrix.cuda }} \
-e TORCH_VERSION=${{ matrix.torch }} \
-e RENAME_WHL=true \
-e PYTHON_VERSION=${PYTHON_VERSION} \
-e CUDA_VERSION=${CUDA_VERSION} \
-e TORCH_VERSION=${TORCH_VERSION} \
-e SCALELLM_VERSION_SUFFIX="+cu${CUDA_VERSION//./}torch${TORCH_VERSION}" \
-e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \
-e CCACHE_DIR=/ci_cache/.ccache \
-u $(id -u):$(id -g) \
vectorchai/scalellm_manylinux:cuda${{ matrix.cuda }} \
vectorchai/scalellm_manylinux:cuda${CUDA_VERSION} \
bash /ScaleLLM/scripts/build_wheel.sh
timeout-minutes: 60

Expand All @@ -52,7 +56,7 @@ jobs:

- uses: actions/upload-artifact@v4
with:
name: wheel-cuda${{ matrix.cuda }}-torch${{ matrix.torch }}-python${{ matrix.python }}
name: wheel-cuda${CUDA_VERSION}-torch${TORCH_VERSION}-python${PYTHON_VERSION}
path: python/dist/*

create_release:
Expand All @@ -79,7 +83,7 @@ jobs:
draft: true
prerelease: true

publish_whl_index:
commit_whl_index:
needs: build_wheel
runs-on: ubuntu-latest
steps:
Expand All @@ -99,12 +103,12 @@ jobs:
env:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}

- name: Update whl index
- name: Append new whls into whl index
run: |
cd whl
python ./publish_whl.py --whl_path=../dist
- name: Publish whl index
- name: Commit whl index change
run: |
cd whl
git config --global user.name "github-actions[bot]"
Expand Down
16 changes: 10 additions & 6 deletions .github/workflows/publish_wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ jobs:
cuda: ["12.1"]
torch: ["2.3"]
runs-on: [self-hosted, linux, release]
env:
PYTHON_VERSION: ${{ matrix.python }}
CUDA_VERSION: ${{ matrix.cuda }}
TORCH_VERSION: ${{ matrix.torch }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
Expand All @@ -33,17 +37,17 @@ jobs:
- name: Build wheel
run: |
docker pull vectorchai/scalellm_manylinux:cuda${{ matrix.cuda }}
docker pull vectorchai/scalellm_manylinux:cuda${CUDA_VERSION}
docker run --rm -t \
-v "$CI_CACHE_DIR":/ci_cache \
-v "$GITHUB_WORKSPACE":/ScaleLLM \
-e PYTHON_VERSION=${{ matrix.python }} \
-e CUDA_VERSION=${{ matrix.cuda }} \
-e TORCH_VERSION=${{ matrix.torch }} \
-e PYTHON_VERSION=${PYTHON_VERSION} \
-e CUDA_VERSION=${CUDA_VERSION} \
-e TORCH_VERSION=${TORCH_VERSION} \
-e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \
-e CCACHE_DIR=/ci_cache/.ccache \
-u $(id -u):$(id -g) \
vectorchai/scalellm_manylinux:cuda${{ matrix.cuda }} \
vectorchai/scalellm_manylinux:cuda${CUDA_VERSION} \
bash /ScaleLLM/scripts/build_wheel.sh
timeout-minutes: 60

Expand All @@ -64,7 +68,7 @@ jobs:

- uses: actions/upload-artifact@v4
with:
name: wheel-cuda${{ matrix.cuda }}-torch${{ matrix.torch }}-python${{ matrix.python }}
name: wheel-cuda${CUDA_VERSION}-torch${TORCH_VERSION}-python${PYTHON_VERSION}
path: python/dist/*

publish_wheel:
Expand Down
43 changes: 0 additions & 43 deletions python/rename_whl.py

This file was deleted.

6 changes: 5 additions & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,11 @@ def extract_version(file_path):

def get_scalellm_version():
init_file = join_path("python", "scalellm", "__init__.py")
return extract_version(init_file)
version = extract_version(init_file)
version_suffix = os.getenv("SCALELLM_VERSION_SUFFIX")
if version_suffix:
version += version_suffix
return version


def read_readme() -> str:
Expand Down
16 changes: 1 addition & 15 deletions scripts/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,19 @@ ensure_env PYTHON_VERSION
ensure_env TORCH_VERSION
ensure_env CUDA_VERSION

RENAME_WHL=${RENAME_WHL:-false}

PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"

export HOME=/tmp/home
mkdir -p $HOME
export PATH="$HOME/.local/bin:$PATH"
CUDA_MAJOR="${CUDA_VERSION%.*}"
CUDA_MINOR="${CUDA_VERSION#*.}"
TORCH_MAJOR="${TORCH_VERSION%.*}"
TORCH_MINOR="${TORCH_VERSION#*.}"

# choose the right python version
PYVER="${PYTHON_VERSION//./}"
export PATH="/opt/python/cp${PYVER}-cp${PYVER}/bin:$PATH"


# install PyTorch
pip install torch==$TORCH_VERSION --index-url "https://download.pytorch.org/whl/cu${CUDA_MAJOR}${CUDA_MINOR}"
pip install torch==$TORCH_VERSION -i "https://download.pytorch.org/whl/cu${CUDA_VERSION//./}"

# install other dependencies
pip install numpy
Expand All @@ -46,14 +40,6 @@ python setup.py bdist_wheel
# show ccache statistics
command -v ccache >/dev/null && ccache -vs

# rename wheel to include torch and cuda versions
if [ "$RENAME_WHL" = "true" ]; then
cd "$PROJECT_ROOT/python"
for whl in dist/*.whl; do
python rename_whl.py "$whl"
done
fi

# bundle external shared libraries into wheel
# pip install auditwheel
# cd "$PROJECT_ROOT/python"
Expand Down

0 comments on commit 81c2646

Please sign in to comment.