Skip to content

Commit

Permalink
disallow optimization to fix serial getrs
Browse files Browse the repository at this point in the history
Signed-off-by: Yuuichi Asahi <y.asahi@nr.titech.ac.jp>
  • Loading branch information
Yuuichi Asahi committed Jan 28, 2025
1 parent 9c8eedf commit 3e27b0d
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
2 changes: 1 addition & 1 deletion batched/dense/impl/KokkosBatched_Laswp_Serial_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ struct SerialLaswp<Direct::Forward> {
template <>
struct SerialLaswp<Direct::Backward> {
template <typename PivViewType, typename AViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const PivViewType piv, const AViewType &A) {
KOKKOS_INLINE_FUNCTION static int invoke(const PivViewType &piv, const AViewType &A) {
auto info = KokkosBatched::Impl::checkLaswpInput(piv, A);
if (info) return info;

Expand Down
9 changes: 9 additions & 0 deletions batched/dense/impl/KokkosBatched_Laswp_Serial_Internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,15 @@ struct SerialLaswpVectorBackwardInternal {
/* */ ValueType *KOKKOS_RESTRICT A, const int as0) {
for (int i = (plen - 1); i >= 0; --i) {
const int piv = p[i * ps0];

// On H100 with Cuda 12.0.0, the compiler seems to apply
// an aggressive optimization which crashes this function
// Insert unnecessary operation to disallow optimization
#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ARCH_HOPPER90)
#if CUDA_VERSION == 12000
if (piv < 0) return 0;
#endif
#endif
if (piv != i) {
const int idx_i = i * as0, idx_p = piv * as0;
const ValueType tmp = A[idx_i];
Expand Down

0 comments on commit 3e27b0d

Please sign in to comment.