Skip to content

Commit

Permalink
upgrade pytorch to 2.4.1 (#341)
Browse files Browse the repository at this point in the history
  • Loading branch information
guocuimi authored Oct 13, 2024
1 parent 3a2b822 commit 785b3b7
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
matrix:
python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
cuda: ["11.8", "12.1", "12.4"]
torch: ["2.2.2", "2.3.1", "2.4.0"]
torch: ["2.2.2", "2.3.1", "2.4.1"]
exclude:
- cuda: "12.4"
torch: "2.3.1"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/package_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
matrix:
python: ["3.10"]
cuda: ["12.4"]
torch: ["2.4.0"]
torch: ["2.4.1"]
runs-on: [self-hosted, linux, build]
env:
PYTHON_VERSION: ${{ matrix.python }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish_wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
matrix:
python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
cuda: ["12.1"]
torch: ["2.4.0"]
torch: ["2.4.1"]
runs-on: [self-hosted, linux, release]
env:
PYTHON_VERSION: ${{ matrix.python }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
matrix:
python: ["3.10"]
cuda: ["12.4"]
torch: ["2.4.0"]
torch: ["2.4.1"]
runs-on: [self-hosted, linux, release]
env:
PYTHON_VERSION: ${{ matrix.python }}
Expand Down
20 changes: 10 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -194,25 +194,25 @@ if (DEFINED ENV{LIBTORCH_ROOT})
else()
include(FetchContent)
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4)
# download libtorch 2.4.0 with cuda 12.4 from pytorch.org
# download libtorch 2.4.1 with cuda 12.4 from pytorch.org
if (USE_CXX11_ABI)
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu124.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.4.1%2Bcu124.zip")
else()
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-shared-with-deps-2.4.0%2Bcu124.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-shared-with-deps-2.4.1%2Bcu124.zip")
endif()
elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.1)
# download libtorch 2.4.0 with cuda 12.1 from pytorch.org
# download libtorch 2.4.1 with cuda 12.1 from pytorch.org
if (USE_CXX11_ABI)
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu121.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.4.1%2Bcu121.zip")
else()
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.4.0%2Bcu121.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.4.1%2Bcu121.zip")
endif()
elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.8)
# download libtorch 2.4.0 with cuda 11.8 from pytorch.org
# download libtorch 2.4.1 with cuda 11.8 from pytorch.org
if (USE_CXX11_ABI)
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu118.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.4.1%2Bcu118.zip")
else()
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.4.0%2Bcu118.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.4.1%2Bcu118.zip")
endif()
else()
# error out if cuda version is not supported
Expand All @@ -232,7 +232,7 @@ else()
FetchContent_MakeAvailable(libtorch)

find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
message(STATUS "Downloading and using libtorch 2.4.0 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
message(STATUS "Downloading and using libtorch 2.4.1 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
endif()

# check if USE_CXX11_ABI is set correctly
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ ScaleLLM is currently undergoing active development. We are fully committed to c

ScaleLLM is available as a Python Wheel package on PyPI. You can install it using pip:
```bash
# Install scalellm with CUDA 12.1 and Pytorch 2.4.0
# Install scalellm with CUDA 12.1 and Pytorch 2.4.1
pip install -U scalellm
```

Expand Down
27 changes: 19 additions & 8 deletions docker/common/install_cuda.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

# adapted from https://github.com/pytorch/builder/blob/main/common/install_cuda.sh
# adapted from https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_cuda.sh

set -ex

Expand Down Expand Up @@ -29,6 +29,17 @@ function install_cusparselt_052 {
rm -rf tmp_cusparselt
}

function install_cusparselt_062 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.2.3-archive.tar.xz
tar xf libcusparse_lt-linux-x86_64-0.6.2.3-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.6.2.3-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}

function install_118 {
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
Expand Down Expand Up @@ -96,13 +107,13 @@ function install_121 {
}

function install_124 {
echo "Installing CUDA 12.4 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
# install CUDA 12.4.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run
chmod +x cuda_12.4.0_550.54.14_linux.run
./cuda_12.4.0_550.54.14_linux.run --toolkit --silent
rm -f cuda_12.4.0_550.54.14_linux.run
# install CUDA 12.4.1 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run
chmod +x cuda_12.4.1_550.54.15_linux.run
./cuda_12.4.1_550.54.15_linux.run --toolkit --silent
rm -f cuda_12.4.1_550.54.15_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
Expand All @@ -123,7 +134,7 @@ function install_124 {
cd ..
rm -rf nccl

install_cusparselt_052
install_cusparselt_062

ldconfig
}
Expand Down

0 comments on commit 785b3b7

Please sign in to comment.