From 156971f5ad7a6d39ce831f9c664847ad735f0b26 Mon Sep 17 00:00:00 2001 From: Michael Mi Date: Thu, 6 Feb 2025 17:52:28 -0800 Subject: [PATCH 1/4] upgrade libtorch and cutlass --- .github/workflows/build_wheel.yml | 8 ++++---- .github/workflows/package_test.yml | 2 +- .github/workflows/publish_wheel.yml | 2 +- .github/workflows/release_test.yml | 2 +- CMakeLists.txt | 20 ++++++++++---------- README.md | 2 +- docs/source/index.rst | 2 +- docs/source/quick_start.rst | 18 ++++++++++++++++++ 8 files changed, 37 insertions(+), 19 deletions(-) diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index db0b5eef..05317b36 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -22,12 +22,12 @@ jobs: build_wheel: strategy: matrix: - python: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python: ["3.9", "3.10", "3.11", "3.12"] cuda: ["11.8", "12.1", "12.4"] - torch: ["2.4.1", "2.5.1"] - exclude: # torch 2.5.1 dropped support for python 3.8 + torch: ["2.4.1", "2.5.1", "2.6.0"] + include: - python: "3.8" - torch: "2.5.1" + torch: "2.4.1" runs-on: [self-hosted, linux, release] env: PYTHON_VERSION: ${{ matrix.python }} diff --git a/.github/workflows/package_test.yml b/.github/workflows/package_test.yml index 3df38919..2c6a6419 100644 --- a/.github/workflows/package_test.yml +++ b/.github/workflows/package_test.yml @@ -40,7 +40,7 @@ jobs: matrix: python: ["3.12"] cuda: ["12.4"] - torch: ["2.5.1"] + torch: ["2.6.0"] runs-on: [self-hosted, linux, build] env: PYTHON_VERSION: ${{ matrix.python }} diff --git a/.github/workflows/publish_wheel.yml b/.github/workflows/publish_wheel.yml index 20b2c4c6..4b76116b 100644 --- a/.github/workflows/publish_wheel.yml +++ b/.github/workflows/publish_wheel.yml @@ -23,7 +23,7 @@ jobs: matrix: python: ["3.9", "3.10", "3.11", "3.12"] cuda: ["12.4"] - torch: ["2.5.1"] + torch: ["2.6.0"] runs-on: [self-hosted, linux, release] env: PYTHON_VERSION: ${{ matrix.python }} diff --git a/.github/workflows/release_test.yml b/.github/workflows/release_test.yml index aa3bd8cf..2ec32aad 100644 --- a/.github/workflows/release_test.yml +++ b/.github/workflows/release_test.yml @@ -21,7 +21,7 @@ jobs: matrix: python: ["3.9", "3.10", "3.11", "3.12"] cuda: ["12.4"] - torch: ["2.5.1"] + torch: ["2.6.0"] runs-on: [self-hosted, linux, release] env: PYTHON_VERSION: ${{ matrix.python }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 3adabde1..aa28bf84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -196,25 +196,25 @@ if (DEFINED ENV{LIBTORCH_ROOT}) else() include(FetchContent) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4) - # download libtorch 2.5.1 with cuda 12.4 from pytorch.org + # download libtorch 2.6.0 with cuda 12.4 from pytorch.org if (USE_CXX11_ABI) - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.5.1%2Bcu124.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu124.zip") else() - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-shared-with-deps-2.5.1%2Bcu124.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-shared-with-deps-2.6.0%2Bcu124.zip") endif() elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.1) - # download libtorch 2.5.1 with cuda 12.1 from pytorch.org + # download libtorch 2.6.0 with cuda 12.1 from pytorch.org if (USE_CXX11_ABI) - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.5.1%2Bcu121.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu121.zip") else() - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.5.1%2Bcu121.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.6.0%2Bcu121.zip") endif() elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.8) - # download libtorch 2.5.1 with cuda 11.8 from pytorch.org + # download libtorch 2.6.0 with cuda 11.8 from pytorch.org if (USE_CXX11_ABI) - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.5.1%2Bcu118.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu118.zip") else() - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.5.1%2Bcu118.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.6.0%2Bcu118.zip") endif() else() # error out if cuda version is not supported @@ -234,7 +234,7 @@ else() FetchContent_MakeAvailable(libtorch) find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH) - message(STATUS "Downloading and using libtorch 2.5.1 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}") + message(STATUS "Downloading and using libtorch 2.6.0 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}") endif() # check if USE_CXX11_ABI is set correctly diff --git a/README.md b/README.md index bc1488ca..78decea1 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ ScaleLLM is currently undergoing active development. We are fully committed to c ScaleLLM is available as a Python Wheel package on PyPI. You can install it using pip: ```bash -# Install scalellm with CUDA 12.4 and Pytorch 2.5.1 +# Install scalellm with CUDA 12.4 and Pytorch 2.6.0 pip install -U scalellm ``` diff --git a/docs/source/index.rst b/docs/source/index.rst index 612bfec1..3c5e30a7 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI Date: Thu, 6 Feb 2025 17:57:50 -0800 Subject: [PATCH 2/4] cutlass 3.8 --- third_party/cutlass | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/cutlass b/third_party/cutlass index b78588d1..affd1b69 160000 --- a/third_party/cutlass +++ b/third_party/cutlass @@ -1 +1 @@ -Subproject commit b78588d1630aa6643bf021613717bafb705df4ef +Subproject commit affd1b693dfc121c51118cbc8583dfd308227ca6 From b8ab2825994faf0fe242b0173f21f9407d15d1ce Mon Sep 17 00:00:00 2001 From: Michael Mi Date: Thu, 6 Feb 2025 18:18:32 -0800 Subject: [PATCH 3/4] drop python 3.8 support --- .github/workflows/build_wheel.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 05317b36..1957cc89 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -25,9 +25,6 @@ jobs: python: ["3.9", "3.10", "3.11", "3.12"] cuda: ["11.8", "12.1", "12.4"] torch: ["2.4.1", "2.5.1", "2.6.0"] - include: - - python: "3.8" - torch: "2.4.1" runs-on: [self-hosted, linux, release] env: PYTHON_VERSION: ${{ matrix.python }} From eea05b8c28a275b173386d700829abf8159962f7 Mon Sep 17 00:00:00 2001 From: Michael Mi Date: Thu, 6 Feb 2025 19:13:01 -0800 Subject: [PATCH 4/4] fix model convert --- scalellm/downloader.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scalellm/downloader.py b/scalellm/downloader.py index 38c497eb..744f06e4 100755 --- a/scalellm/downloader.py +++ b/scalellm/downloader.py @@ -16,7 +16,12 @@ def convert_pickle_to_safetensors(path): continue # load the model - model = torch.load(file_path, map_location="cpu") + try: + model = torch.load(file_path, map_location="cpu") + except Exception as e: + print(f"Error loading {filename}: {e}") + continue + if hasattr(model, "state_dict"): state_dict = model.state_dict() else: