Try not limiting nvcc threads to 2

tridao · Nov 27, 2023 · 2d33fd8 · 2d33fd8
1 parent 742be5e
commit 2d33fd8
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 5 deletions.
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -144,10 +144,6 @@ jobs:
           pip install ninja packaging wheel
           export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
           export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
-          # Currently for this setting the runner goes OOM if we pass --threads 4 to nvcc
-          if [[ ( ${MATRIX_CUDA_VERSION} == "121" || ${MATRIX_CUDA_VERSION} == "122" ) && ${MATRIX_TORCH_VERSION} == "2.1" ]]; then
-            export FLASH_ATTENTION_FORCE_SINGLE_THREAD="TRUE"
-          fi
           # Limit MAX_JOBS otherwise the github runner goes OOM
           MAX_JOBS=2 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist
           tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}

diff --git a/flash_attn/__init__.py b/flash_attn/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "2.3.5.post4"
+__version__ = "2.3.5.post5"
 
 from flash_attn.flash_attn_interface import (
     flash_attn_func,