diff --git a/cmake/KokkosKernels_config.h.in b/cmake/KokkosKernels_config.h.in index 6f5b07f287..ef8fea78b8 100644 --- a/cmake/KokkosKernels_config.h.in +++ b/cmake/KokkosKernels_config.h.in @@ -53,6 +53,7 @@ /* Whether to build kernels for execution space Kokkos::HIP */ #cmakedefine KOKKOSKERNELS_INST_EXECSPACE_HIP #cmakedefine KOKKOSKERNELS_INST_MEMSPACE_HIPSPACE +#cmakedefine KOKKOSKERNELS_INST_MEMSPACE_HIPMANAGEDSPACE /* Whether to build kernels for execution space Kokkos::Experimental::SYCL */ #cmakedefine KOKKOSKERNELS_INST_EXECSPACE_SYCL #cmakedefine KOKKOSKERNELS_INST_MEMSPACE_SYCLSPACE diff --git a/cmake/kokkoskernels_eti_devices.cmake b/cmake/kokkoskernels_eti_devices.cmake index 8c6cb540ae..8bd131f2a4 100644 --- a/cmake/kokkoskernels_eti_devices.cmake +++ b/cmake/kokkoskernels_eti_devices.cmake @@ -23,6 +23,7 @@ SET(MEM_SPACES MEMSPACE_CUDASPACE MEMSPACE_CUDAUVMSPACE MEMSPACE_HIPSPACE + MEMSPACE_HIPMANAGEDSPACE MEMSPACE_SYCLSPACE MEMSPACE_SYCLSHAREDSPACE MEMSPACE_OPENMPTARGET @@ -32,6 +33,7 @@ SET(MEM_SPACES SET(MEMSPACE_CUDASPACE_CPP_TYPE Kokkos::CudaSpace) SET(MEMSPACE_CUDAUVMSPACE_CPP_TYPE Kokkos::CudaUVMSpace) SET(MEMSPACE_HIPSPACE_CPP_TYPE Kokkos::HIPSpace) +SET(MEMSPACE_HIPMANAGEDSPACE_CPP_TYPE Kokkos::HIPManagedSpace) SET(MEMSPACE_SYCLSPACE_CPP_TYPE Kokkos::Experimental::SYCLDeviceUSMSpace) SET(MEMSPACE_SYCLSHAREDSPACE_CPP_TYPE Kokkos::Experimental::SYCLSharedUSMSpace) SET(MEMSPACE_OPENMPTARGETSPACE_CPP_TYPE Kokkos::Experimental::OpenMPTargetSpace) @@ -85,10 +87,19 @@ IF(KOKKOS_ENABLE_HIP) BOOL "Whether to pre instantiate kernels for the memory space Kokkos::HIPSpace. Disabling this when Kokkos_ENABLE_HIP is enabled may increase build times. Default: ON if Kokkos is HIP-enabled, OFF otherwise." ) + KOKKOSKERNELS_ADD_OPTION( + INST_MEMSPACE_HIPMANAGEDSPACE + OFF + BOOL + "Whether to pre instantiate kernels for the memory space Kokkos::HIPManagedSpace. Disabling this when Kokkos_ENABLE_HIP is enabled may increase build times. Default: OFF." + ) IF(KOKKOSKERNELS_INST_EXECSPACE_HIP AND KOKKOSKERNELS_INST_MEMSPACE_HIPSPACE) LIST(APPEND DEVICE_LIST "") ENDIF() + IF(KOKKOSKERNELS_INST_EXECSPACE_HIP AND KOKKOSKERNELS_INST_MEMSPACE_HIPMANAGEDSPACE) + LIST(APPEND DEVICE_LIST "") + ENDIF() IF( Trilinos_ENABLE_COMPLEX_DOUBLE AND ((NOT DEFINED CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS) OR (NOT CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS)) ) MESSAGE( WARNING "The CMake option CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS is either undefined or OFF. Please set CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS:BOOL=ON when building with HIP and complex double enabled.") @@ -197,7 +208,7 @@ KOKKOSKERNELS_ADD_OPTION( ) SET(EXECSPACE_CUDA_VALID_MEM_SPACES CUDASPACE CUDAUVMSPACE) -SET(EXECSPACE_HIP_VALID_MEM_SPACES HIPSPACE) +SET(EXECSPACE_HIP_VALID_MEM_SPACES HIPSPACE HIPMANAGEDSPACE) SET(EXECSPACE_SYCL_VALID_MEM_SPACES SYCLSPACE SYCLSHAREDSPACE) SET(EXECSPACE_OPENMPTARGET_VALID_MEM_SPACES OPENMPTARGETSPACE) SET(EXECSPACE_SERIAL_VALID_MEM_SPACES HBWSPACE HOSTSPACE) diff --git a/common/src/KokkosKernels_ExecSpaceUtils.hpp b/common/src/KokkosKernels_ExecSpaceUtils.hpp index 2ec09f4069..4d3a3002b4 100644 --- a/common/src/KokkosKernels_ExecSpaceUtils.hpp +++ b/common/src/KokkosKernels_ExecSpaceUtils.hpp @@ -215,10 +215,21 @@ inline void kk_get_free_total_memory(size_t& free_mem, total_mem /= n_streams; } template <> +inline void kk_get_free_total_memory(size_t& free_mem, + size_t& total_mem, + int n_streams) { + kk_get_free_total_memory(free_mem, total_mem, n_streams); +} +template <> inline void kk_get_free_total_memory(size_t& free_mem, size_t& total_mem) { kk_get_free_total_memory(free_mem, total_mem, 1); } +template <> +inline void kk_get_free_total_memory( + size_t& free_mem, size_t& total_mem) { + kk_get_free_total_memory(free_mem, total_mem, 1); +} #endif // FIXME_SYCL Use compiler extension instead of low level interface when diff --git a/graph/unit_test/Test_Graph_graph_color.hpp b/graph/unit_test/Test_Graph_graph_color.hpp index 5d4eec03ca..101c489bc0 100644 --- a/graph/unit_test/Test_Graph_graph_color.hpp +++ b/graph/unit_test/Test_Graph_graph_color.hpp @@ -110,10 +110,15 @@ void test_coloring(lno_t numRows, size_type nnz, lno_t bandwidth, COLORING_DEFAULT, COLORING_SERIAL, COLORING_VB, COLORING_VBBIT, COLORING_VBCS}; -#ifdef KOKKOS_ENABLE_CUDA + // FIXME: VBD sometimes fails on CUDA and HIP +#if defined(KOKKOS_ENABLE_CUDA) if (!std::is_same::value) { coloring_algorithms.push_back(COLORING_VBD); } +#elif defined(KOKKOS_ENABLE_HIP) + if (!std::is_same::value) { + coloring_algorithms.push_back(COLORING_VBD); + } #else coloring_algorithms.push_back(COLORING_VBD); #endif @@ -174,9 +179,15 @@ void test_coloring(lno_t numRows, size_type nnz, lno_t bandwidth, } } } - EXPECT_TRUE((num_conflict == conf)); - - EXPECT_TRUE((num_conflict == 0)); + EXPECT_TRUE((num_conflict == conf)) + << "Coloring algo " << (int)coloring_algorithm + << ": kk_is_d1_coloring_valid returned incorrect number of conflicts (" + << num_conflict << ", should be " << conf << ")"; + + EXPECT_TRUE((num_conflict == 0)) + << "Coloring algo " << (int)coloring_algorithm + << ": D1 coloring produced invalid coloring (" << num_conflict + << " conflicts)"; } // device::execution_space::finalize(); } diff --git a/sparse/impl/KokkosSparse_sptrsv_solve_impl.hpp b/sparse/impl/KokkosSparse_sptrsv_solve_impl.hpp index 6188d001b1..a64a4d23bc 100644 --- a/sparse/impl/KokkosSparse_sptrsv_solve_impl.hpp +++ b/sparse/impl/KokkosSparse_sptrsv_solve_impl.hpp @@ -664,8 +664,6 @@ struct LowerTriLvlSchedTP2SolverFunctor { // Helper functors for Lower-triangular solve with SpMV template struct SparseTriSupernodalSpMVFunctor { - // using execution_space = typename LHSType::execution_space; - // using memory_space = typename execution_space::memory_space; using execution_space = typename TriSolveHandle::HandleExecSpace; using memory_space = typename TriSolveHandle::HandleTempMemorySpace; @@ -2913,7 +2911,7 @@ void lower_tri_solve(ExecutionSpace &space, TriSolveHandle &thandle, #if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV) using namespace KokkosSparse::Experimental; - using memory_space = typename ExecutionSpace::memory_space; + using memory_space = typename TriSolveHandle::HandleTempMemorySpace; using device_t = Kokkos::Device; using integer_view_t = typename TriSolveHandle::integer_view_t; using integer_view_host_t = typename TriSolveHandle::integer_view_host_t; @@ -3311,7 +3309,7 @@ void upper_tri_solve(ExecutionSpace &space, TriSolveHandle &thandle, #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSPSTRSV_SOLVE_IMPL_PROFILE) cudaProfilerStop(); #endif - using memory_space = typename ExecutionSpace::memory_space; + using memory_space = typename TriSolveHandle::HandleTempMemorySpace; using device_t = Kokkos::Device; typedef typename TriSolveHandle::size_type size_type; typedef typename TriSolveHandle::nnz_lno_view_t NGBLType; diff --git a/test_common/Test_HIP.hpp b/test_common/Test_HIP.hpp index c9e02698c5..dfb8e1d687 100644 --- a/test_common/Test_HIP.hpp +++ b/test_common/Test_HIP.hpp @@ -31,7 +31,18 @@ class hip : public ::testing::Test { static void TearDownTestCase() {} }; +using HIPSpaceDevice = Kokkos::Device; +using HIPManagedSpaceDevice = + Kokkos::Device; + #define TestCategory hip -#define TestDevice Kokkos::HIP + +// Prefer for any testing where only one exec space is used +#if defined(KOKKOSKERNELS_INST_MEMSPACE_HIPMANAGEDSPACE) && \ + !defined(KOKKOSKERNELS_INST_MEMSPACE_HIPSPACE) +#define TestDevice HIPManagedSpaceDevice +#else +#define TestDevice HIPSpaceDevice +#endif #endif // TEST_HIP_HPP