Skip to content

Commit

Permalink
Merge pull request ROCm#26 from ROCmSoftwarePlatform/IFU-main-2022-05-02
Browse files Browse the repository at this point in the history
Ifu main 2022 05 02
  • Loading branch information
liligwu authored May 4, 2022
2 parents 9a5a33b + 18b48e9 commit c6f77ae
Show file tree
Hide file tree
Showing 40 changed files with 1,235 additions and 284 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/fbgemmci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,10 @@ jobs:
run: |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
# https://forums.developer.nvidia.com/t/notice-cuda-linux-repository-key-rotation/212772
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
sudo dpkg -i cuda-keyring_1.0-1_all.deb
# sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
sudo apt-get update
sudo apt-get -y install cuda-minimal-build-11-3 cuda-nvrtc-dev-11-3 cuda-nvtx-11-3 cuda-libraries-dev-11-3
Expand Down
39 changes: 38 additions & 1 deletion fbgemm_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ if(USE_CUDA)
message("Building for cuda_architectures = \"${cuda_architectures}\"")
message("${message_line}")

if(DEFINED GLIBCXX_USE_CXX11_ABI)
if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1)
set(CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1")
message("${CMAKE_CXX_FLAGS}")
endif()
endif()

#
# Toch Cuda Extensions are normally compiled with the flags below. However we
# disabled -D__CUDA_NO_HALF_CONVERSIONS__ here as it caused "error: no suitable
Expand Down Expand Up @@ -112,13 +120,15 @@ set(OPTIMIZERS
adam
approx_rowwise_adagrad
approx_rowwise_adagrad_with_weight_decay
approx_rowwise_adagrad_with_counter
approx_sgd
lamb
lars_sgd
partial_rowwise_adam
partial_rowwise_lamb
rowwise_adagrad
rowwise_adagrad_with_weight_decay
rowwise_adagrad_with_counter
rowwise_weighted_adagrad
sgd)

Expand Down Expand Up @@ -297,6 +307,22 @@ set_source_files_properties(
# Actual static SOURCES
#

# Ensure NVML_LIB_PATH is empty if it wasn't set and if the
# default lib path doesn't exist.
if(NOT NVML_LIB_PATH)
set(DEFAULT_NVML_LIB_PATH
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so")

if(EXISTS ${DEFAULT_NVML_LIB_PATH})
message(
STATUS
"Setting NVML_LIB_PATH: \
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so"
)
set(NVML_LIB_PATH "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libnvidia-ml.so")
endif()
endif()

set(fbgemm_gpu_sources_cpu
codegen/embedding_forward_split_cpu.cpp
codegen/embedding_forward_quantized_host_cpu.cpp
Expand All @@ -316,11 +342,18 @@ if(NOT FBGEMM_CPU_ONLY)
codegen/embedding_bounds_check_host.cpp
src/cumem_utils_host.cpp
src/layout_transform_ops_gpu.cpp
# src/merge_pooled_embeddings_cpu.cpp src/merge_pooled_embeddings_gpu.cpp
src/permute_pooled_embedding_ops_gpu.cpp
src/permute_pooled_embedding_ops_split_gpu.cpp
src/permute_pooled_embedding_ops_split_cpu.cpp
src/quantize_ops_gpu.cpp
src/sparse_ops_gpu.cpp
src/split_table_batched_embeddings.cpp)

if(NVML_LIB_PATH)
list(APPEND fbgemm_gpu_sources_cpu
src/merge_pooled_embeddings_cpu.cpp
src/merge_pooled_embeddings_gpu.cpp)
endif()
endif()

set(fbgemm_gpu_sources_cpu_option "-mavx;-mf16c;-mfma;-mavx2")
Expand All @@ -335,6 +368,7 @@ if(NOT FBGEMM_CPU_ONLY)
codegen/embedding_bounds_check.cu src/cumem_utils.cu
src/histogram_binning_calibration_ops.cu src/jagged_tensor_ops.cu
src/layout_transform_ops.cu src/permute_pooled_embedding_ops.cu
src/permute_pooled_embedding_ops_split.cu
src/quantize_ops.cu src/sparse_ops.cu src/split_embeddings_cache_cuda.cu
src/split_embeddings_utils.cu)

Expand Down Expand Up @@ -397,6 +431,9 @@ endif()
set_target_properties(fbgemm_gpu_py PROPERTIES PREFIX "")

target_link_libraries(fbgemm_gpu_py ${TORCH_LIBRARIES})
if(NVML_LIB_PATH)
target_link_libraries(fbgemm_gpu_py ${NVML_LIB_PATH})
endif()
target_include_directories(fbgemm_gpu_py PRIVATE ${TORCH_INCLUDE_DIRS})
if(USE_CUDA)
set_property(TARGET fbgemm_gpu_py PROPERTY CXX_STANDARD 17)
Expand Down
9 changes: 9 additions & 0 deletions fbgemm_gpu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ conda install pytorch cudatoolkit=11.3 -c pytorch-nightly
conda install scikit-build jinja2 ninja cmake hypothesis
```

**If you're planning to build from source** and **don't** have `nvml.h` in your system, you can install it via the command
below.
```
conda install -c conda-forge cudatoolkit-dev
```
Certain operations require this library to be present. Be sure to provide the path to `libnvidia-ml.so` to
`--nvml_lib_path` if installing from source (e.g. `python setup.py install --nvml_lib_path path_to_libnvidia-ml.so`).


## PIP install

Currently only built with sm70/80 (V100/A100 GPU) wheel supports:
Expand Down
Loading

0 comments on commit c6f77ae

Please sign in to comment.