From 3e27b0d43f989781432e92ad3807fa9fa9b0a14f Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Wed, 29 Jan 2025 06:53:06 +0900 Subject: [PATCH] disallow optimization to fix serial getrs Signed-off-by: Yuuichi Asahi --- batched/dense/impl/KokkosBatched_Laswp_Serial_Impl.hpp | 2 +- .../dense/impl/KokkosBatched_Laswp_Serial_Internal.hpp | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/batched/dense/impl/KokkosBatched_Laswp_Serial_Impl.hpp b/batched/dense/impl/KokkosBatched_Laswp_Serial_Impl.hpp index 445251a647..9a59d74832 100644 --- a/batched/dense/impl/KokkosBatched_Laswp_Serial_Impl.hpp +++ b/batched/dense/impl/KokkosBatched_Laswp_Serial_Impl.hpp @@ -83,7 +83,7 @@ struct SerialLaswp { template <> struct SerialLaswp { template - KOKKOS_INLINE_FUNCTION static int invoke(const PivViewType piv, const AViewType &A) { + KOKKOS_INLINE_FUNCTION static int invoke(const PivViewType &piv, const AViewType &A) { auto info = KokkosBatched::Impl::checkLaswpInput(piv, A); if (info) return info; diff --git a/batched/dense/impl/KokkosBatched_Laswp_Serial_Internal.hpp b/batched/dense/impl/KokkosBatched_Laswp_Serial_Internal.hpp index dc49f367b1..3516cca430 100644 --- a/batched/dense/impl/KokkosBatched_Laswp_Serial_Internal.hpp +++ b/batched/dense/impl/KokkosBatched_Laswp_Serial_Internal.hpp @@ -96,6 +96,15 @@ struct SerialLaswpVectorBackwardInternal { /* */ ValueType *KOKKOS_RESTRICT A, const int as0) { for (int i = (plen - 1); i >= 0; --i) { const int piv = p[i * ps0]; + +// On H100 with Cuda 12.0.0, the compiler seems to apply +// an aggressive optimization which crashes this function +// Insert unnecessary operation to disallow optimization +#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ARCH_HOPPER90) +#if CUDA_VERSION == 12000 + if (piv < 0) return 0; +#endif +#endif if (piv != i) { const int idx_i = i * as0, idx_p = piv * as0; const ValueType tmp = A[idx_i];