diff --git a/batched/dense/impl/KokkosBatched_Laswp_Serial_Internal.hpp b/batched/dense/impl/KokkosBatched_Laswp_Serial_Internal.hpp index fd0056ddf8..d02bfd96f3 100644 --- a/batched/dense/impl/KokkosBatched_Laswp_Serial_Internal.hpp +++ b/batched/dense/impl/KokkosBatched_Laswp_Serial_Internal.hpp @@ -97,10 +97,12 @@ struct SerialLaswpVectorBackwardInternal { // On H100 with Cuda 12.0.0, the compiler seems to apply // an aggressive optimization which crashes this function // Disabling loop unrolling fixes the issue +#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL) #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ARCH_HOPPER90) #if CUDA_VERSION >= 12000 && CUDA_VERSION < 12100 #pragma unroll 1 #endif +#endif #endif for (int i = (plen - 1); i >= 0; --i) { const int piv = p[i * ps0];