diff --git a/.github/workflows/publish_base_image.yml b/.github/workflows/publish_base_image.yml deleted file mode 100644 index 26c82726..00000000 --- a/.github/workflows/publish_base_image.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: Publish base docker image -on: - workflow_dispatch: - -jobs: - publish_base: - runs-on: [self-hosted, linux, release] - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Login to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKER_HUB_USER }} - password: ${{ secrets.DOCKER_HUB_TOKEN }} - - - name: Build base for cuda 12.1 - uses: docker/build-push-action@v5 - with: - context: ./docker - file: ./docker/Dockerfile.base - push: true - build-args: | - UBUNTU_VERSION=22.04 - CUDA_VERSION=12.1 - tags: | - vectorchai/scalellm_builder:cuda12.1-ubuntu22.04 - vectorchai/scalellm_builder:cuda12.1 - - - name: Build base for cuda 11.8 - uses: docker/build-push-action@v5 - with: - context: ./docker - file: ./docker/Dockerfile.base - push: true - build-args: | - UBUNTU_VERSION=22.04 - CUDA_VERSION=11.8 - tags: | - vectorchai/scalellm_builder:cuda11.8-ubuntu22.04 - vectorchai/scalellm_builder:cuda11.8 - diff --git a/.github/workflows/publish_devel_image.yml b/.github/workflows/publish_devel_image.yml index 2dcb688d..ab16d0f5 100644 --- a/.github/workflows/publish_devel_image.yml +++ b/.github/workflows/publish_devel_image.yml @@ -1,6 +1,9 @@ name: Publish devel docker image on: workflow_dispatch: +env: + # Tells where to store caches. + CI_CACHE_DIR: ${{ github.workspace }}/../../ci_cache jobs: publish_base: @@ -27,6 +30,8 @@ jobs: context: ./docker file: ./docker/Dockerfile.devel push: true + cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache + cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache build-args: | UBUNTU_VERSION=22.04 CUDA_VERSION=12.1 diff --git a/.github/workflows/publish_manylinux_image.yml b/.github/workflows/publish_manylinux_image.yml new file mode 100644 index 00000000..fd3a49a8 --- /dev/null +++ b/.github/workflows/publish_manylinux_image.yml @@ -0,0 +1,52 @@ +name: Publish manylinux docker image +on: + workflow_dispatch: +env: + # Tells where to store caches. + CI_CACHE_DIR: ${{ github.workspace }}/../../ci_cache + +jobs: + publish_base: + runs-on: [self-hosted, linux, release] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_HUB_USER }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + + - name: Build base for cuda 12.1 + uses: docker/build-push-action@v5 + with: + context: ./docker + file: ./docker/Dockerfile.manylinux + push: true + cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache + cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache + build-args: | + CUDA_VERSION=12.1 + tags: | + vectorchai/scalellm_manylinux:cuda12.1 + + # - name: Build base for cuda 11.8 + # uses: docker/build-push-action@v5 + # with: + # context: ./docker + # file: ./docker/Dockerfile.manylinux + # push: true + # cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache + # cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache + # build-args: | + # CUDA_VERSION=11.8 + # tags: | + # vectorchai/scalellm_manylinux:cuda11.8 + diff --git a/.github/workflows/release_wheel.yml b/.github/workflows/release_wheel.yml index 4aefb1c6..32ac47a7 100644 --- a/.github/workflows/release_wheel.yml +++ b/.github/workflows/release_wheel.yml @@ -13,10 +13,11 @@ env: jobs: build_wheel: strategy: + fail-fast: false matrix: - python: ["3.9", "3.10", "3.11", "3.12"] - cuda: ["11.8", "12.1"] - torch: ["2.1", "2.2", "2.3"] + python: ["3.9", "3.10", "3.11"] + cuda: ["12.1"] + torch: ["2.2", "2.3"] runs-on: [self-hosted, linux, release] steps: - name: Checkout repository @@ -36,7 +37,7 @@ jobs: -e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \ -e CCACHE_DIR=/ci_cache/.ccache \ --user $(id -u):$(id -g) \ - vectorchai/scalellm_builder:cuda${{ matrix.cuda }}-ubuntu22.04 \ + vectorchai/scalellm_manylinux:cuda${{ matrix.cuda }} \ bash /ScaleLLM/scripts/build_wheel.sh timeout-minutes: 60 diff --git a/CMakeLists.txt b/CMakeLists.txt index 384009cf..21187549 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,7 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON) option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON) option(USE_CXX11_ABI "Use the new C++-11 ABI, which is not backwards compatible." ON) +option(USE_MANYLINUX "Build for manylinux" OFF) set(CMAKE_POSITION_INDEPENDENT_CODE ON) @@ -144,7 +145,6 @@ if(UNIX) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og") endif() -find_package(Boost CONFIG REQUIRED) find_package(Threads REQUIRED) # find all dependencies from vcpkg find_package(fmt CONFIG REQUIRED) @@ -162,7 +162,13 @@ find_package(prometheus-cpp CONFIG REQUIRED) find_package(stduuid CONFIG REQUIRED) find_package(RapidJSON CONFIG REQUIRED) -find_package(Python REQUIRED COMPONENTS Interpreter Development) +if (USE_MANYLINUX) + # manylinux doesn't ship Development.Embed + find_package(Python REQUIRED COMPONENTS Interpreter Development.Module) +else() + find_package(Python REQUIRED COMPONENTS Interpreter Development) +endif() + find_package(NCCL REQUIRED) if (USE_CXX11_ABI) diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base index fde5c1d4..d3bc5df7 100644 --- a/docker/Dockerfile.base +++ b/docker/Dockerfile.base @@ -29,7 +29,7 @@ ENV DESIRED_CUDA ${CUDA_VERSION} ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH # Install gcc -ARG GCC_VERSION=12 +ARG GCC_VERSION=11 RUN apt-get update \ && apt-get install -y --no-install-recommends \ software-properties-common gpg-agent diff --git a/docker/Dockerfile.manylinux b/docker/Dockerfile.manylinux new file mode 100644 index 00000000..8f6c52e4 --- /dev/null +++ b/docker/Dockerfile.manylinux @@ -0,0 +1,48 @@ +FROM quay.io/pypa/manylinux_2_28_x86_64 as base + +LABEL maintainer="mi@vectorch.com" +ENV DEBIAN_FRONTEND noninteractive + +ENV LC_ALL en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US.UTF-8 + +# Install common dependencies +COPY ./common/install_base.sh install_base.sh +RUN bash ./install_base.sh && rm install_base.sh + +# Install user +COPY ./common/install_user.sh install_user.sh +RUN bash ./install_user.sh && rm install_user.sh + +# Install cuda, cudnn and nccl +ARG CUDA_VERSION=12.1 +COPY ./common/install_cuda.sh install_cuda.sh +RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh +ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH + +# ARG CMAKE_VERSION=3.18.5 +# COPY ./common/install_cmake.sh install_cmake.sh +# RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi +# RUN rm install_cmake.sh + +ARG NINJA_VERSION=1.11.1 +COPY ./common/install_ninja.sh install_ninja.sh +RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi +RUN rm install_ninja.sh + +ARG CCACHE_VERSION=4.8.3 +COPY ./common/install_ccache.sh install_ccache.sh +RUN if [ -n "${CCACHE_VERSION}" ]; then bash ./install_ccache.sh; fi +RUN rm install_ccache.sh + +# install rust +ENV RUSTUP_HOME=/usr/local/rustup +ENV CARGO_HOME=/usr/local/cargo +ENV PATH=/usr/local/cargo/bin:$PATH +RUN curl https://sh.rustup.rs -sSf | sh -s -- -y +# give everyone permission to use rust +RUN chmod -R a+w ${RUSTUP_HOME} ${CARGO_HOME} +RUN rustup --version; cargo --version; rustc --version + +CMD ["bash"] \ No newline at end of file diff --git a/docker/common/install_base.sh b/docker/common/install_base.sh index cbc6990f..deb23825 100644 --- a/docker/common/install_base.sh +++ b/docker/common/install_base.sh @@ -31,11 +31,31 @@ install_ubuntu() { rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* } +install_almalinux() { + yum -y update + yum -y install \ + zip \ + wget \ + curl \ + perl \ + sudo \ + vim \ + jq \ + libtool \ + unzip + + # Cleanup + yum clean all +} + ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') case "$ID" in ubuntu) install_ubuntu ;; + almalinux) + install_almalinux + ;; *) echo "Unable to determine OS..." exit 1 diff --git a/docker/common/install_ccache.sh b/docker/common/install_ccache.sh new file mode 100644 index 00000000..4ccf25f0 --- /dev/null +++ b/docker/common/install_ccache.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -ex + +[ -n "$CCACHE_VERSION" ] + +ARCH=$(uname -m) +url=https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}-linux-${ARCH}.tar.xz + +pushd /tmp +curl -L "$url" | xz -d | tar -x +cp ./ccache-${CCACHE_VERSION}-linux-x86_64/ccache /usr/bin/ccache +popd + +# set max cache size to 5GiB +/usr/bin/ccache -M 5Gi \ No newline at end of file diff --git a/docker/common/install_cmake.sh b/docker/common/install_cmake.sh index 3332ac09..26257bf1 100644 --- a/docker/common/install_cmake.sh +++ b/docker/common/install_cmake.sh @@ -10,6 +10,9 @@ case "$ID" in ubuntu) apt-get remove cmake -y ;; + almalinux) + rm /usr/local/bin/cmake + ;; *) echo "Unable to determine OS..." exit 1 diff --git a/docker/common/install_cuda.sh b/docker/common/install_cuda.sh new file mode 100644 index 00000000..fd32031b --- /dev/null +++ b/docker/common/install_cuda.sh @@ -0,0 +1,235 @@ +#!/bin/bash + +# adapted from https://github.com/pytorch/builder/blob/main/common/install_cuda.sh + +set -ex + +function install_cusparselt_040 { + # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html + mkdir tmp_cusparselt && pushd tmp_cusparselt + wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz + tar xf libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz + cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/include/* /usr/local/cuda/include/ + cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/lib/* /usr/local/cuda/lib64/ + popd + rm -rf tmp_cusparselt +} + +function install_cusparselt_052 { + # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html + mkdir tmp_cusparselt && pushd tmp_cusparselt + wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz + tar xf libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz + cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/include/* /usr/local/cuda/include/ + cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/ + popd + rm -rf tmp_cusparselt +} + +function install_118 { + echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.4.0" + rm -rf /usr/local/cuda-11.8 /usr/local/cuda + # install CUDA 11.8.0 in the same container + wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run + chmod +x cuda_11.8.0_520.61.05_linux.run + ./cuda_11.8.0_520.61.05_linux.run --toolkit --silent + rm -f cuda_11.8.0_520.61.05_linux.run + rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.8 /usr/local/cuda + + # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement + mkdir tmp_cudnn && cd tmp_cudnn + wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz + tar xf cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz + cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_cudnn + + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build + git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git + cd nccl && make -j src.build + cp -a build/include/* /usr/local/cuda/include/ + cp -a build/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf nccl + + install_cusparselt_040 + + ldconfig +} + +function install_121 { + echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2" + rm -rf /usr/local/cuda-12.1 /usr/local/cuda + # install CUDA 12.1.0 in the same container + wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run + chmod +x cuda_12.1.1_530.30.02_linux.run + ./cuda_12.1.1_530.30.02_linux.run --toolkit --silent + rm -f cuda_12.1.1_530.30.02_linux.run + rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda + + # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement + mkdir tmp_cudnn && cd tmp_cudnn + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz + tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz + cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_cudnn + + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build + git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git + cd nccl && make -j src.build + cp -a build/include/* /usr/local/cuda/include/ + cp -a build/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf nccl + + install_cusparselt_052 + + ldconfig +} + +function install_124 { + echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2" + rm -rf /usr/local/cuda-12.4 /usr/local/cuda + # install CUDA 12.4.0 in the same container + wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run + chmod +x cuda_12.4.0_550.54.14_linux.run + ./cuda_12.4.0_550.54.14_linux.run --toolkit --silent + rm -f cuda_12.4.0_550.54.14_linux.run + rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda + + # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement + mkdir tmp_cudnn && cd tmp_cudnn + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz + tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz + cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_cudnn + + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build + git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git + cd nccl && make -j src.build + cp -a build/include/* /usr/local/cuda/include/ + cp -a build/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf nccl + + install_cusparselt_052 + + ldconfig +} + +function prune_118 { + echo "Pruning CUDA 11.8 and cuDNN" + ##################################################################################### + # CUDA 11.8 prune static libs + ##################################################################################### + export NVPRUNE="/usr/local/cuda-11.8/bin/nvprune" + export CUDA_LIB_DIR="/usr/local/cuda-11.8/lib64" + + export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + + if [[ -n "$OVERRIDE_GENCODE" ]]; then + export GENCODE=$OVERRIDE_GENCODE + fi + + # all CUDA libs except CuDNN and CuBLAS (cudnn and cublas need arch 3.7 included) + ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ + | xargs -I {} bash -c \ + "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" + + # prune CuDNN and CuBLAS + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a + + ##################################################################################### + # CUDA 11.8 prune visual tools + ##################################################################################### + export CUDA_BASE="/usr/local/cuda-11.8/" + rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/ +} + +function prune_121 { + echo "Pruning CUDA 12.1" + ##################################################################################### + # CUDA 12.1 prune static libs + ##################################################################################### + export NVPRUNE="/usr/local/cuda-12.1/bin/nvprune" + export CUDA_LIB_DIR="/usr/local/cuda-12.1/lib64" + + export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + + if [[ -n "$OVERRIDE_GENCODE" ]]; then + export GENCODE=$OVERRIDE_GENCODE + fi + + # all CUDA libs except CuDNN and CuBLAS + ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ + | xargs -I {} bash -c \ + "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" + + # prune CuDNN and CuBLAS + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a + + ##################################################################################### + # CUDA 12.1 prune visual tools + ##################################################################################### + export CUDA_BASE="/usr/local/cuda-12.1/" + rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2023.1.0 $CUDA_BASE/nsight-systems-2023.1.2/ +} + +function prune_124 { + echo "Pruning CUDA 12.4" + ##################################################################################### + # CUDA 12.4 prune static libs + ##################################################################################### + export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune" + export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64" + + export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + + if [[ -n "$OVERRIDE_GENCODE" ]]; then + export GENCODE=$OVERRIDE_GENCODE + fi + + # all CUDA libs except CuDNN and CuBLAS + ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ + | xargs -I {} bash -c \ + "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" + + # prune CuDNN and CuBLAS + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a + + ##################################################################################### + # CUDA 12.1 prune visual tools + ##################################################################################### + export CUDA_BASE="/usr/local/cuda-12.4/" + rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/ +} + +# idiomatic parameter and option handling in sh +while test $# -gt 0 +do + case "$1" in + 11.8) install_118; prune_118 + ;; + 12.1) install_121; prune_121 + ;; + 12.4) install_124; prune_124 + ;; + *) echo "bad argument $1"; exit 1 + ;; + esac + shift +done diff --git a/docker/common/install_user.sh b/docker/common/install_user.sh index 9d416049..8aa49587 100644 --- a/docker/common/install_user.sh +++ b/docker/common/install_user.sh @@ -9,8 +9,4 @@ echo "jenkins:x:1000:" >> /etc/group echo "jenkins:*:19110:0:99999:7:::" >>/etc/shadow # allow sudo -echo 'jenkins ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/jenkins - - -# test that sudo works -sudo -u jenkins sudo -v \ No newline at end of file +echo 'jenkins ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/jenkins \ No newline at end of file diff --git a/python/scalellm/csrc/CMakeLists.txt b/python/scalellm/csrc/CMakeLists.txt index ec9f1bbd..899f0e2b 100644 --- a/python/scalellm/csrc/CMakeLists.txt +++ b/python/scalellm/csrc/CMakeLists.txt @@ -16,5 +16,5 @@ pybind_extension( absl::strings gflags::gflags glog::glog - Python::Python + Python::Module ) diff --git a/python/setup.py b/python/setup.py index 8a7ea7e9..77e6a11a 100644 --- a/python/setup.py +++ b/python/setup.py @@ -114,6 +114,7 @@ def build_extension(self, ext: CMakeExtension): "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}", "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON", + "-DUSE_MANYLINUX:BOOL=ON", f"-DPython_EXECUTABLE:FILEPATH={sys.executable}", f"-DPYTHON_INCLUDE_DIRS={python_include_dir}", f"-DCMAKE_CUDA_ARCHITECTURES={cuda_architectures}", diff --git a/src/server/CMakeLists.txt b/src/server/CMakeLists.txt index 6a601ac0..382a54b2 100644 --- a/src/server/CMakeLists.txt +++ b/src/server/CMakeLists.txt @@ -27,20 +27,23 @@ cc_library( glog::glog ) -cc_binary( - NAME - simple - SRCS - simple.cpp - DEPS - :engine - :speculative - torch - absl::strings - gflags::gflags - glog::glog - Python::Python -) +if (NOT USE_MANYLINUX) + # manylinux doesn't ship with Development.Embed + cc_binary( + NAME + simple + SRCS + simple.cpp + DEPS + :engine + :speculative + torch + absl::strings + gflags::gflags + glog::glog + Python::Python + ) +endif() cc_binary( NAME