diff --git a/CHANGELOG.md b/CHANGELOG.md index c3409a90586..a381f16129f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,37 @@ # Change Log +## [4.0.01](https://github.com/kokkos/kokkos/tree/4.0.01) (2023-04-14) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.0.01) + +### Backend and Architecture Enhancements: + +#### CUDA: + +- Allow NVCC 12 to compile using C++20 flag [\#6020](https://github.com/kokkos/kokkos/pull/6020) +- Add CUDA Ada architecture support [\#6022](https://github.com/kokkos/kokkos/pull/6022) + +#### HIP: + +- Add support for AMDGPU target NAVI31 / RX 7900 XT(X): gfx1100 [\#6021](https://github.com/kokkos/kokkos/pull/6021) +- HIP: Fix warning from `std::memcpy` [\#6019](https://github.com/kokkos/kokkos/pull/6019) + +#### SYCL: +- Fix `SYCLTeamMember` to take arguments for scratch sizes as `std::size_t` [\#5986](https://github.com/kokkos/kokkos/pull/5986) + +### General Enhancements +- Fixup 4.0 change log [\#6023](https://github.com/kokkos/kokkos/pull/6023) + +### Build System Changes +- Cherry-pick TriBITS update from Trilinos [\#6037](https://github.com/kokkos/kokkos/pull/6037) +- CMake: update package compatibility mode when building within Trilinos [\#6013](https://github.com/kokkos/kokkos/pull/6013) + +### Bug Fixes +- Fix an incorrectly returning size for SIMD uint64_t in AVX2 [\#6011](https://github.com/kokkos/kokkos/pull/6011) +- Desul atomics: wrong value for `desul::Impl::numeric_limits_max` [\#6018](https://github.com/kokkos/kokkos/pull/6018) +- Fix warning in some user code when using std::memcpy [\#6000](https://github.com/kokkos/kokkos/pull/6000) + ## [4.0.0](https://github.com/kokkos/kokkos/tree/4.0.0) (2023-02-21) -[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...4.0.0) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...4.0.00) ### Features: - Allow value types without default constructor in `Kokkos::View` with `Kokkos::WithoutInitializing` [\#5307](https://github.com/kokkos/kokkos/pull/5307) @@ -72,23 +102,12 @@ - Remove Kokkos_ENABLE_CUDA_LDG_INTRINSIC option [\#5623](https://github.com/kokkos/kokkos/pull/5623) - Don't rely on synchronization behavior of default stream in CUDA and HIP - this potentially will break unintended implicit synchronization with other libraries such as MPI [\#5391](https://github.com/kokkos/kokkos/pull/5391) - Make ExecutionSpace::concurrency() a non-static member function [\#5655](https://github.com/kokkos/kokkos/pull/5655) and related PRs +- Remove code guarded by `KOKKOS_ENABLE_DEPRECATED_CODE_3` ### Deprecations -- Guard against non-public header inclusion [\#5178](https://github.com/kokkos/kokkos/pull/5178) -- Raise deprecation warnings if non empty WorkTag class is used [\#5230](https://github.com/kokkos/kokkos/pull/5230) -- Deprecate `parallel_*` overloads taking the label as trailing argument [\#5141](https://github.com/kokkos/kokkos/pull/5141) -- Deprecate nested types in functional [\#5185](https://github.com/kokkos/kokkos/pull/5185) -- Deprecate `InitArguments` struct and replace it with `InitializationSettings` [\#5135](https://github.com/kokkos/kokkos/pull/5135) -- Deprecate `finalize_all()` [\#5134](https://github.com/kokkos/kokkos/pull/5134) -- Deprecate command line arguments (other than `--help`) that are not prefixed with `kokkos-*` [\#5120](https://github.com/kokkos/kokkos/pull/5120) -- Deprecate `--[kokkos-]numa` cmdline arg and `KOKKOS_NUMA` env var [\#5117](https://github.com/kokkos/kokkos/pull/5117) -- Deprecate `--[kokkos-]threads` command line argument in favor of `--[kokkos-]num-threads` [\#5111](https://github.com/kokkos/kokkos/pull/5111) -- Deprecate `Kokkos::is_reducer_type` [\#4957](https://github.com/kokkos/kokkos/pull/4957) -- Deprecate `OffsetView` constructors taking `index_list_type` [\#4810](https://github.com/kokkos/kokkos/pull/4810) -- Deprecate overloads of `Kokkos::sort` taking a parameter `bool always_use_kokkos_sort` [\#5382](https://github.com/kokkos/kokkos/issues/5382) - Deprecate `CudaUVMSpace::available()` which always returned `true` [\#5614](https://github.com/kokkos/kokkos/pull/5614) - Deprecate `volatile`-qualified members from `Kokkos::pair` and `Kokkos::complex` [\#5412](https://github.com/kokkos/kokkos/pull/5412) -- Deprecate `KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_*` macros [\#5824](https://github.com/kokkos/kokkos/pull/5824) (oversight in 3.2) +- Deprecate `KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_*` macros [\#5824](https://github.com/kokkos/kokkos/pull/5824) (oversight in 3.6) ### Bug Fixes - Avoid allocating memory for `UniqueToken` [\#5300](https://github.com/kokkos/kokkos/pull/5300) @@ -102,6 +121,7 @@ - Add missing `ReductionIdentity` specialization [\#5798](https://github.com/kokkos/kokkos/pull/5798) - Don't install standard algorithms headers multiple times [\#5670](https://github.com/kokkos/kokkos/pull/5670) - Fix max scratch size calculation for level 0 scratch in CUDA and HIP [\#5718](https://github.com/kokkos/kokkos/pull/5718) +- Fix excessive build times using Makefile.kokkos [\#6068](https://github.com/kokkos/kokkos/pull/6068) ## [3.7.01](https://github.com/kokkos/kokkos/tree/3.7.01) (2022-12-01) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.00...3.7.01) diff --git a/CMakeLists.txt b/CMakeLists.txt index 02ebcf9e241..aa712f56127 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,13 +5,16 @@ if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" ) message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files and delete CMakeCache.txt." ) endif() +if (COMMAND TRIBITS_PACKAGE_DECL) + TRIBITS_PACKAGE_DECL(Kokkos) +endif() + # We want to determine if options are given with the wrong case # In order to detect which arguments are given to compare against # the list of valid arguments, at the beginning here we need to # form a list of all the given variables. If it begins with any # case of KoKkOS, we add it to the list. - GET_CMAKE_PROPERTY(_variableNames VARIABLES) SET(KOKKOS_GIVEN_VARIABLES) FOREACH (var ${_variableNames}) @@ -123,6 +126,8 @@ IF(NOT KOKKOS_HAS_TRILINOS) FORCE) ENDIF() ENDIF() +ELSE() + SET(KOKKOS_COMPILE_LANGUAGE CXX) ENDIF() IF (NOT CMAKE_SIZEOF_VOID_P) @@ -139,7 +144,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MINOR 0) -set(Kokkos_VERSION_PATCH 0) +set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") # mathematical expressions below are not stricly necessary but they eliminate @@ -288,7 +293,9 @@ IF (KOKKOS_HAS_TRILINOS) $<$:${KOKKOS_ALL_COMPILE_OPTIONS}>) ENDIF() -KOKKOS_PACKAGE_DECL() +if (NOT COMMAND TRIBITS_PACKAGE_DECL) + KOKKOS_PACKAGE_DECL() +endif() #------------------------------------------------------------------------------ diff --git a/Makefile.kokkos b/Makefile.kokkos index a55e3428cfc..60cef6c7f30 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -2,7 +2,7 @@ KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MINOR = 0 -KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -10,7 +10,7 @@ KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MIN KOKKOS_DEVICES ?= "Threads" # Options: # Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR -# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Hopper90 +# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: BGQ,Power7,Power8,Power9 # AMD-GPUS: Vega906,Vega908,Vega90A,Navi1030 @@ -341,6 +341,7 @@ KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volt KOKKOS_INTERNAL_USE_ARCH_TURING75 := $(call kokkos_has_string,$(KOKKOS_ARCH),Turing75) KOKKOS_INTERNAL_USE_ARCH_AMPERE80 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere80) KOKKOS_INTERNAL_USE_ARCH_AMPERE86 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ampere86) +KOKKOS_INTERNAL_USE_ARCH_ADA89 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ada89) KOKKOS_INTERNAL_USE_ARCH_HOPPER90 := $(call kokkos_has_string,$(KOKKOS_ARCH),Hopper90) KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ @@ -356,6 +357,7 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLE + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE80) \ + $(KOKKOS_INTERNAL_USE_ARCH_AMPERE86) \ + + $(KOKKOS_INTERNAL_USE_ARCH_ADA89) \ + $(KOKKOS_INTERNAL_USE_ARCH_HOPPER90)) #SEK: This seems like a bug to me @@ -1048,6 +1050,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86 endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ADA89), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ADA89") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_89 + endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HOPPER90), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER90") @@ -1092,6 +1098,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1100), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1100") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 + endif KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp) @@ -1418,7 +1429,7 @@ tmp := $(call desul_append_header, "$H""endif") DESUL_INTERNAL_LS_CONFIG := $(shell ls $(DESUL_CONFIG_HEADER) 2>&1) ifeq ($(DESUL_INTERNAL_LS_CONFIG), $(DESUL_CONFIG_HEADER)) - KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) | grep -c define)) + DESUL_INTERNAL_NEW_CONFIG := $(strip $(shell diff $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) | grep -c define)) else DESUL_INTERNAL_NEW_CONFIG := 1 endif diff --git a/bin/nvcc_wrapper b/bin/nvcc_wrapper index 2204514d1b3..0c55651460a 100755 --- a/bin/nvcc_wrapper +++ b/bin/nvcc_wrapper @@ -338,6 +338,24 @@ do std_flag=$corrected_std_flag shared_args="$shared_args $std_flag" ;; + --std=c++20|-std=c++20) + if [ -n "$std_flag" ]; then + warn_std_flag + shared_args=${shared_args/ $std_flag/} + fi + # NVCC only has C++20 from version 12 on + cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]}) + if [ ${cuda_main_version} -lt 12 ]; then + fallback_std_flag="-std=c++14" + # this is hopefully just occurring in a downstream project during CMake feature tests + # we really have no choice here but to accept the flag and change to an accepted C++ standard + echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." + std_flag=$fallback_std_flag + else + std_flag=$1 + fi + shared_args="$shared_args $std_flag" + ;; --std=c++17|-std=c++17) if [ -n "$std_flag" ]; then warn_std_flag diff --git a/cmake/KokkosCore_config.h.in b/cmake/KokkosCore_config.h.in index 5b97e461b4a..cb1affa24c3 100644 --- a/cmake/KokkosCore_config.h.in +++ b/cmake/KokkosCore_config.h.in @@ -105,6 +105,7 @@ #cmakedefine KOKKOS_ARCH_AMPERE #cmakedefine KOKKOS_ARCH_AMPERE80 #cmakedefine KOKKOS_ARCH_AMPERE86 +#cmakedefine KOKKOS_ARCH_ADA89 #cmakedefine KOKKOS_ARCH_HOPPER #cmakedefine KOKKOS_ARCH_HOPPER90 #cmakedefine KOKKOS_ARCH_AMD_ZEN @@ -116,3 +117,4 @@ #cmakedefine KOKKOS_ARCH_VEGA90A #cmakedefine KOKKOS_ARCH_NAVI #cmakedefine KOKKOS_ARCH_NAVI1030 +#cmakedefine KOKKOS_ARCH_NAVI1100 diff --git a/cmake/compile_tests/cuda_compute_capability.cc b/cmake/compile_tests/cuda_compute_capability.cc index b0bc5d15208..b81c4218a91 100644 --- a/cmake/compile_tests/cuda_compute_capability.cc +++ b/cmake/compile_tests/cuda_compute_capability.cc @@ -46,6 +46,7 @@ int main() { case 75: std::cout << "Set -DKokkos_ARCH_TURING75=ON ." << std::endl; break; case 80: std::cout << "Set -DKokkos_ARCH_AMPERE80=ON ." << std::endl; break; case 86: std::cout << "Set -DKokkos_ARCH_AMPERE86=ON ." << std::endl; break; + case 89: std::cout << "Set -DKokkos_ARCH_ADA89=ON ." << std::endl; break; case 90: std::cout << "Set -DKokkos_ARCH_HOPPER90=ON ." << std::endl; break; default: std::cout << "Compute capability " << compute_capability diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake index b051f8e3bd2..eb7c271b156 100644 --- a/cmake/kokkos_arch.cmake +++ b/cmake/kokkos_arch.cmake @@ -86,6 +86,7 @@ KOKKOS_ARCH_OPTION(VOLTA72 GPU "NVIDIA Volta generation CC 7.2" "KOKK KOKKOS_ARCH_OPTION(TURING75 GPU "NVIDIA Turing generation CC 7.5" "KOKKOS_SHOW_CUDA_ARCHS") KOKKOS_ARCH_OPTION(AMPERE80 GPU "NVIDIA Ampere generation CC 8.0" "KOKKOS_SHOW_CUDA_ARCHS") KOKKOS_ARCH_OPTION(AMPERE86 GPU "NVIDIA Ampere generation CC 8.6" "KOKKOS_SHOW_CUDA_ARCHS") +KOKKOS_ARCH_OPTION(ADA89 GPU "NVIDIA Ada generation CC 8.9" "KOKKOS_SHOW_CUDA_ARCHS") KOKKOS_ARCH_OPTION(HOPPER90 GPU "NVIDIA Hopper generation CC 9.0" "KOKKOS_SHOW_CUDA_ARCHS") IF(Kokkos_ENABLE_HIP OR Kokkos_ENABLE_OPENMPTARGET) @@ -93,9 +94,9 @@ IF(Kokkos_ENABLE_HIP OR Kokkos_ENABLE_OPENMPTARGET) ENDIF() # AMD archs ordered in decreasing priority of autodetection -LIST(APPEND SUPPORTED_AMD_GPUS MI200 MI100 MI50/60 V620/W6800) -LIST(APPEND SUPPORTED_AMD_ARCHS VEGA90A VEGA908 VEGA906 NAVI1030) -LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx908 gfx906 gfx1030) +LIST(APPEND SUPPORTED_AMD_GPUS MI200 MI100 MI50/60 RX7900XTX V620/W6800) +LIST(APPEND SUPPORTED_AMD_ARCHS VEGA90A VEGA908 VEGA906 NAVI1100 NAVI1030) +LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx908 gfx906 gfx1100 gfx1030) #FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17 FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS) @@ -576,6 +577,7 @@ CHECK_CUDA_ARCH(VOLTA72 sm_72) CHECK_CUDA_ARCH(TURING75 sm_75) CHECK_CUDA_ARCH(AMPERE80 sm_80) CHECK_CUDA_ARCH(AMPERE86 sm_86) +CHECK_CUDA_ARCH(ADA89 sm_89) CHECK_CUDA_ARCH(HOPPER90 sm_90) SET(AMDGPU_ARCH_ALREADY_SPECIFIED "") diff --git a/cmake/kokkos_functions.cmake b/cmake/kokkos_functions.cmake index 4c51bdeabf6..55b1ebbf818 100644 --- a/cmake/kokkos_functions.cmake +++ b/cmake/kokkos_functions.cmake @@ -5,6 +5,9 @@ # Validate options are given with correct case and define an internal # upper-case version for use within +set(Kokkos_OPTIONS_NOT_TO_EXPORT + Kokkos_ENABLE_TESTS Kokkos_ENABLE_EXAMPLES) + # # # @FUNCTION: kokkos_deprecated_list @@ -57,6 +60,12 @@ FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING) # Make sure this appears in the cache with the appropriate DOCSTRING SET(${CAMEL_NAME} ${DEFAULT} CACHE ${TYPE} ${DOCSTRING}) + IF (KOKKOS_HAS_TRILINOS) + IF (NOT CAMEL_NAME IN_LIST Kokkos_OPTIONS_NOT_TO_EXPORT) + TRIBITS_PKG_EXPORT_CACHE_VAR(${CAMEL_NAME}) + ENDIF() + ENDIF() + #I don't love doing it this way because it's N^2 in number options, but c'est la vie FOREACH(opt ${KOKKOS_GIVEN_VARIABLES}) STRING(TOUPPER ${opt} OPT_UC) diff --git a/cmake/kokkos_install.cmake b/cmake/kokkos_install.cmake index c65c2af52b9..fb658239d8d 100644 --- a/cmake/kokkos_install.cmake +++ b/cmake/kokkos_install.cmake @@ -38,7 +38,7 @@ ELSE() WRITE_BASIC_PACKAGE_VERSION_FILE("${CMAKE_CURRENT_BINARY_DIR}/KokkosConfigVersion.cmake" VERSION "${Kokkos_VERSION}" - COMPATIBILITY SameMajorVersion) + COMPATIBILITY AnyNewerVersion) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosConfigVersion.cmake DESTINATION "${${PROJECT_NAME}_INSTALL_LIB_DIR}/cmake/${PACKAGE_NAME}") diff --git a/cmake/kokkos_test_cxx_std.cmake b/cmake/kokkos_test_cxx_std.cmake index eda3124586d..5f8e15cd673 100644 --- a/cmake/kokkos_test_cxx_std.cmake +++ b/cmake/kokkos_test_cxx_std.cmake @@ -29,7 +29,11 @@ FUNCTION(kokkos_set_cxx_standard_feature standard) ELSEIF(NOT KOKKOS_USE_CXX_EXTENSIONS AND ${STANDARD_NAME}) MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard as feature") IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND (KOKKOS_CXX_HOST_COMPILER_ID STREQUAL GNU OR KOKKOS_CXX_HOST_COMPILER_ID STREQUAL Clang)) - SET(SUPPORTED_NVCC_FLAGS "-std=c++17") + IF(${KOKKOS_CXX_COMPILER_VERSION} VERSION_LESS 12.0.0) + SET(SUPPORTED_NVCC_FLAGS "-std=c++17") + ELSE() + SET(SUPPORTED_NVCC_FLAGS "-std=c++17" "-std=c++20") + ENDIF() IF (NOT ${${STANDARD_NAME}} IN_LIST SUPPORTED_NVCC_FLAGS) MESSAGE(FATAL_ERROR "CMake wants to use ${${STANDARD_NAME}} which is not supported by NVCC. Using a more recent host compiler or a more recent CMake version might help.") ENDIF() diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 0557db2098d..0f39551423f 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -353,6 +353,7 @@ MACRO(KOKKOS_INSTALL_ADDITIONAL_FILES) DESTINATION ${KOKKOS_HEADER_DIR}) ENDMACRO() + FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) CMAKE_PARSE_ARGUMENTS(PARSE "PLAIN_STYLE" @@ -441,6 +442,7 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) ENDIF() ENDFUNCTION() + FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) CMAKE_PARSE_ARGUMENTS(PARSE "STATIC;SHARED" @@ -503,19 +505,11 @@ FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME) # preserving the directory structure, e.g. impl # If headers got installed in both locations, it breaks some # downstream packages - TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${PARSE_UNPARSED_ARGUMENTS}) - #Stolen from Tribits - it can add prefixes - SET(TRIBITS_LIBRARY_NAME_PREFIX "${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}") - SET(TRIBITS_LIBRARY_NAME ${TRIBITS_LIBRARY_NAME_PREFIX}${LIBRARY_NAME}) - #Tribits has way too much techinical debt and baggage to even - #allow PUBLIC target_compile_options to be used. It forces C++ flags on projects - #as a giant blob of space-separated strings. We end up with duplicated - #flags between the flags implicitly forced on Kokkos-dependent and those Kokkos - #has in its public INTERFACE_COMPILE_OPTIONS. - #These do NOT get de-deduplicated because Tribits - #creates flags as a giant monolithic space-separated string - #Do not set any transitive properties and keep everything working as before - #KOKKOS_SET_LIBRARY_PROPERTIES(${TRIBITS_LIBRARY_NAME} PLAIN_STYLE) + TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${PARSE_UNPARSED_ARGUMENTS} + ADDED_LIB_TARGET_NAME_OUT ${LIBRARY_NAME}_TARGET_NAME ) + IF (PARSE_ADD_BUILD_OPTIONS) + KOKKOS_SET_LIBRARY_PROPERTIES(${${LIBRARY_NAME}_TARGET_NAME}) + ENDIF() ELSE() # Forward the headers, we want to know about all headers # to make sure they appear correctly in IDEs @@ -527,15 +521,17 @@ FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME) ENDIF() ENDFUNCTION() + FUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY NAME) -IF (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_LIBRARY(${NAME} ${ARGN}) -ELSE() - ADD_LIBRARY(${NAME} INTERFACE) - KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME}) -ENDIF() + IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_LIBRARY(${NAME} ${ARGN}) + ELSE() + ADD_LIBRARY(${NAME} INTERFACE) + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME}) + ENDIF() ENDFUNCTION() + FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) IF(KOKKOS_HAS_TRILINOS) #ignore the target, tribits doesn't do anything directly with targets @@ -549,13 +545,8 @@ FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) ENDFUNCTION() FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET) - IF(KOKKOS_HAS_TRILINOS) - #don't trust tribits to do this correctly - KOKKOS_TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) - ELSE() - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - KOKKOS_TARGET_COMPILE_OPTIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) - ENDIF() + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + KOKKOS_TARGET_COMPILE_OPTIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) ENDFUNCTION() MACRO(KOKKOS_ADD_TEST_DIRECTORIES) diff --git a/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp b/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp index e2fe5a6d838..8e8895f65a9 100644 --- a/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp +++ b/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp @@ -415,7 +415,8 @@ struct HIPParallelLaunchKernelInvokerconstantMemHostStaging; - std::memcpy(staging, &driver, sizeof(DriverType)); + std::memcpy(static_cast(staging), + static_cast(&driver), sizeof(DriverType)); // Copy functor asynchronously from there to constant memory on the device KOKKOS_IMPL_HIP_SAFE_CALL(hipMemcpyToSymbolAsync( diff --git a/core/src/SYCL/Kokkos_SYCL_Team.hpp b/core/src/SYCL/Kokkos_SYCL_Team.hpp index b01d06b928e..674037ed959 100644 --- a/core/src/SYCL/Kokkos_SYCL_Team.hpp +++ b/core/src/SYCL/Kokkos_SYCL_Team.hpp @@ -337,10 +337,11 @@ class SYCLTeamMember { // Private for the driver KOKKOS_INLINE_FUNCTION - SYCLTeamMember(sycl::local_ptr shared, const int shared_begin, - const int shared_size, + SYCLTeamMember(sycl::local_ptr shared, const std::size_t shared_begin, + const std::size_t shared_size, sycl::device_ptr scratch_level_1_ptr, - const int scratch_level_1_size, const sycl::nd_item<2> item) + const std::size_t scratch_level_1_size, + const sycl::nd_item<2> item) : m_team_reduce(shared), m_team_shared(static_cast>(shared) + shared_begin, shared_size, scratch_level_1_ptr, scratch_level_1_size), diff --git a/core/src/impl/Kokkos_Core.cpp b/core/src/impl/Kokkos_Core.cpp index 5e53e426596..a35510dd64e 100644 --- a/core/src/impl/Kokkos_Core.cpp +++ b/core/src/impl/Kokkos_Core.cpp @@ -743,6 +743,8 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) { #elif defined(KOKKOS_ARCH_AMPERE86) declare_configuration_metadata("architecture", "GPU architecture", "AMPERE86"); +#elif defined(KOKKOS_ARCH_ADA89) + declare_configuration_metadata("architecture", "GPU architecture", "ADA89"); #elif defined(KOKKOS_ARCH_HOPPER90) declare_configuration_metadata("architecture", "GPU architecture", "HOPPER90"); @@ -757,6 +759,9 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) { #elif defined(KOKKOS_ARCH_NAVI1030) declare_configuration_metadata("architecture", "GPU architecture", "NAVI1030"); +#elif defined(KOKKOS_ARCH_NAVI1100) + declare_configuration_metadata("architecture", "GPU architecture", + "NAVI1100"); #else declare_configuration_metadata("architecture", "GPU architecture", "none"); diff --git a/generate_makefile.bash b/generate_makefile.bash index 60791d1c018..018426c9b8e 100755 --- a/generate_makefile.bash +++ b/generate_makefile.bash @@ -161,6 +161,7 @@ display_help_text() { echo " VEGA908 = AMD GPU MI100 GFX908" echo " VEGA90A = AMD GPU MI200 GFX90A" echo " NAVI1030 = AMD GPU V620/W6800 GFX1030" + echo " NAVI1100 = AMD GPU RX 7900 XT(X) GFX1100" echo " [ARM]" echo " ARMV80 = ARMv8.0 Compatible CPU" echo " ARMV81 = ARMv8.1 Compatible CPU" diff --git a/master_history.txt b/master_history.txt index abe65ff4503..73e48268b5a 100644 --- a/master_history.txt +++ b/master_history.txt @@ -30,4 +30,5 @@ tag: 3.6.00 date: 04:14:2022 master: 2834f94a release: 6ea708ff tag: 3.6.01 date: 06:16:2022 master: b52f8c83 release: afe9b404 tag: 3.7.00 date: 08:25:2022 master: d19aab99 release: 0018e5fb tag: 3.7.01 date: 12:01:2022 master: 61d7db55 release: d3bb8cfe -tag: 4.0.0 date: 02:23:2023 master: 5ad60966 release: 52ea2953 +tag: 4.0.00 date: 02:23:2023 master: 5ad60966 release: 52ea2953 +tag: 4.0.01 date: 04:26:2023 master: aa1f48f3 release: 5893754f diff --git a/simd/src/Kokkos_SIMD_AVX2.hpp b/simd/src/Kokkos_SIMD_AVX2.hpp index 1732c33ca58..86b944efa54 100644 --- a/simd/src/Kokkos_SIMD_AVX2.hpp +++ b/simd/src/Kokkos_SIMD_AVX2.hpp @@ -804,7 +804,7 @@ class simd> { KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { - return 8; + return 4; } template , bool> = false> diff --git a/simd/unit_tests/TestSIMD.cpp b/simd/unit_tests/TestSIMD.cpp index 7a4ecf19ed1..92c77033b9a 100644 --- a/simd/unit_tests/TestSIMD.cpp +++ b/simd/unit_tests/TestSIMD.cpp @@ -486,3 +486,32 @@ TEST(simd, device) { Kokkos::parallel_for(Kokkos::RangePolicy>(0, 1), simd_device_functor()); } + +TEST(simd, test_size) { +#if defined(KOKKOS_ARCH_AVX512XEON) + constexpr auto width = 8; + using Abi = Kokkos::Experimental::simd_abi::avx512_fixed_size; + static_assert(width == + Kokkos::Experimental::simd::size()); + +#elif defined(KOKKOS_ARCH_AVX2) + constexpr auto width = 4; + using Abi = Kokkos::Experimental::simd_abi::avx2_fixed_size; + +#elif defined(__ARM_NEON) + constexpr auto width = 2; + using Abi = Kokkos::Experimental::simd_abi::neon_fixed_size; + +#else + constexpr auto width = 1; + using Abi = Kokkos::Experimental::simd_abi::scalar; + static_assert(width == + Kokkos::Experimental::simd::size()); +#endif + + static_assert(width == Kokkos::Experimental::simd::size()); + static_assert(width == Kokkos::Experimental::simd::size()); + static_assert(width == + Kokkos::Experimental::simd::size()); + static_assert(width == Kokkos::Experimental::simd::size()); +} diff --git a/tpls/desul/include/desul/atomics/Common.hpp b/tpls/desul/include/desul/atomics/Common.hpp index b8dfcb8acdf..b7353e7dba8 100644 --- a/tpls/desul/include/desul/atomics/Common.hpp +++ b/tpls/desul/include/desul/atomics/Common.hpp @@ -83,11 +83,11 @@ struct numeric_limits_max; template <> struct numeric_limits_max { - static constexpr uint32_t value = 0xffffffffu; + static constexpr auto value = static_cast(-1); }; template <> struct numeric_limits_max { - static constexpr uint64_t value = 0xfffffffflu; + static constexpr auto value = static_cast(-1); }; constexpr bool atomic_always_lock_free(std::size_t size) {