From 8efb8c24848d2860700ed77d4ffbaf2620a1dbd7 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Tue, 9 Jan 2024 09:43:59 -0700 Subject: [PATCH] Lapack - SVD: adding initial files that do not implement anything Adding SVD feature to Lapack component, the interface is similar to classic Lapack and the implementation relies on the TPL layer to provide initial capabilities. The TPL supported are LAPACK, MKL, cuSOLVER and rocSOLVER. Testing three analytical cases 2x2, 2x3 and 3x2 and then some randomly generated matrices. --- .gitignore | 5 +- lapack/CMakeLists.txt | 7 + .../svd/KokkosLapack_svd_eti_spec_inst.cpp.in | 26 + .../KokkosLapack_svd_eti_spec_avail.hpp.in | 24 + lapack/impl/KokkosLapack_svd_impl.hpp | 34 + lapack/impl/KokkosLapack_svd_spec.hpp | 156 ++++ lapack/src/KokkosLapack_svd.hpp | 238 ++++++ lapack/tpls/KokkosLapack_Host_tpl.cpp | 66 ++ lapack/tpls/KokkosLapack_Host_tpl.hpp | 6 + .../tpls/KokkosLapack_svd_tpl_spec_avail.hpp | 171 +++++ .../tpls/KokkosLapack_svd_tpl_spec_decl.hpp | 679 ++++++++++++++++++ lapack/unit_test/Test_Lapack.hpp | 1 + lapack/unit_test/Test_Lapack_svd.hpp | 639 ++++++++++++++++ 13 files changed, 2051 insertions(+), 1 deletion(-) create mode 100644 lapack/eti/generated_specializations_cpp/svd/KokkosLapack_svd_eti_spec_inst.cpp.in create mode 100644 lapack/eti/generated_specializations_hpp/KokkosLapack_svd_eti_spec_avail.hpp.in create mode 100644 lapack/impl/KokkosLapack_svd_impl.hpp create mode 100644 lapack/impl/KokkosLapack_svd_spec.hpp create mode 100644 lapack/src/KokkosLapack_svd.hpp create mode 100644 lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp create mode 100644 lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp create mode 100644 lapack/unit_test/Test_Lapack_svd.hpp diff --git a/.gitignore b/.gitignore index d64726e92e..6dcc5d6a5d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,7 @@ TAGS #Clangd indexing compile_commands.json .cache/ -.vscode/ \ No newline at end of file +.vscode/ + +#MacOS hidden files +.DS_Store diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index ee91079378..f825a2184a 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -58,3 +58,10 @@ KOKKOSKERNELS_GENERATE_ETI(Lapack_trtri trtri SOURCE_LIST SOURCES TYPE_LISTS FLOATS LAYOUTS DEVICES ) + +KOKKOSKERNELS_GENERATE_ETI(Lapack_svd svd + COMPONENTS lapack + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES +) diff --git a/lapack/eti/generated_specializations_cpp/svd/KokkosLapack_svd_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/svd/KokkosLapack_svd_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..62dd75475f --- /dev/null +++ b/lapack/eti/generated_specializations_cpp/svd/KokkosLapack_svd_eti_spec_inst.cpp.in @@ -0,0 +1,26 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosLapack_svd_spec.hpp" + +namespace KokkosLapack { +namespace Impl { +@LAPACK_SVD_ETI_INST_BLOCK@ + } //IMPL +} //Kokkos diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_svd_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_svd_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..49e526b7e8 --- /dev/null +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_svd_eti_spec_avail.hpp.in @@ -0,0 +1,24 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_SVD_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_SVD_ETI_SPEC_AVAIL_HPP_ +namespace KokkosLapack { +namespace Impl { +@LAPACK_SVD_ETI_AVAIL_BLOCK@ + } //IMPL +} //Kokkos +#endif diff --git a/lapack/impl/KokkosLapack_svd_impl.hpp b/lapack/impl/KokkosLapack_svd_impl.hpp new file mode 100644 index 0000000000..49df758936 --- /dev/null +++ b/lapack/impl/KokkosLapack_svd_impl.hpp @@ -0,0 +1,34 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_IMPL_SVD_HPP_ +#define KOKKOSLAPACK_IMPL_SVD_HPP_ + +/// \file KokkosLapack_svd_impl.hpp +/// \brief Implementation(s) of singular value decomposition of a dense matrix. + +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// NOTE: Might add the implementation of KokkosLapack::svd later + +} // namespace Impl +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_IMPL_SVD_HPP diff --git a/lapack/impl/KokkosLapack_svd_spec.hpp b/lapack/impl/KokkosLapack_svd_spec.hpp new file mode 100644 index 0000000000..fc0a34f790 --- /dev/null +++ b/lapack/impl/KokkosLapack_svd_spec.hpp @@ -0,0 +1,156 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSLAPACK_IMPL_SVD_SPEC_HPP_ +#define KOKKOSLAPACK_IMPL_SVD_SPEC_HPP_ + +#include +#include +#include + +// Include the actual functors +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include +#endif + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct svd_eti_spec_avail { + enum : bool { value = false }; +}; +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization availability +// KokkosLapack::Impl::SVD. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _INST macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_SVD_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct svd_eti_spec_avail< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +// Include the actual specialization declarations +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// Unification layer +/// \brief Implementation of KokkosLapack::svd. + +template ::value, + bool eti_spec_avail = svd_eti_spec_avail< + ExecutionSpace, AMatrix, SVector, UMatrix, VMatrix>::value> +struct SVD { + static void svd(const ExecutionSpace &space, const char jobu[], + const char jobvt[], const AMatrix &A, const SVector &S, + const UMatrix &U, const VMatrix &Vt); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +//! Full specialization of svd +// Unification layer +template +struct SVD { + static void svd(const ExecutionSpace & /* space */, const char * /* jobu */, + const char * /* jobvt */, const AMatrix & /* A */, + const SVector & /* S */, const UMatrix & /* U */, + const VMatrix & /* Vt */) { + // NOTE: Might add the implementation of KokkosLapack::svd later + throw std::runtime_error( + "No fallback implementation of SVD (singular value decomposition) " + "exists. Enable LAPACK, CUSOLVER or ROCSOLVER TPL to use this " + "function."); + } +}; + +#endif +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization of +// KokkosLapack::Impl::SVD. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _DEF macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_SVD_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct SVD< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#define KOKKOSLAPACK_SVD_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct SVD< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#include + +#endif // KOKKOSLAPACK_IMPL_SVD_SPEC_HPP_ diff --git a/lapack/src/KokkosLapack_svd.hpp b/lapack/src/KokkosLapack_svd.hpp new file mode 100644 index 0000000000..1cc02631cc --- /dev/null +++ b/lapack/src/KokkosLapack_svd.hpp @@ -0,0 +1,238 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +/// \file KokkosLapack_svd.hpp +/// \brief Singular Value Decomposition (SVD) +/// +/// This file provides KokkosLapack::svd. This function performs a +/// local (no MPI) singular value decomposition of the input matrix A +/// and returns the singular values and vectors dedending on input flags. + +#ifndef KOKKOSLAPACK_SVD_HPP_ +#define KOKKOSLAPACK_SVD_HPP_ + +#include + +#include "KokkosLapack_svd_spec.hpp" +#include "KokkosKernels_Error.hpp" + +namespace KokkosLapack { + +/// \brief Compute the Singular Value Decomposition of A = U*S*Vt +/// +/// \tparam ExecutionSpace the space where the kernel will run. +/// \tparam AMatrix (mxn) matrix as a rank-2 Kokkos::View. +/// \tparam SVector min(m,n) vector as a rank-1 Kokkos::View +/// \tparam UMatrix (mxm) matrix as a rank-2 Kokkos::View +/// \tparam VMatrix () matrix as a rank-2 Kokkos::View +/// +/// \param space [in] execution space instance used to specified how to execute +/// the svd kernels. +/// \param jobu [in] flag to control the computation of the left singular +/// vectors when set to: 'A' all vectors are computed, 'S' the first min(m,n) +/// singular vectors are computed, 'O' the first min(m,n) singular vectors are +/// overwritten into A, 'N' no singular vectors are computed. \param jobvt [in] +/// flag to control the computation of the right singular vectors when set to: +/// 'A' all vectors are computed, 'S' the first min(m,n) singular vectors are +/// computed, 'O' the first min(m,n) singular vectors are overwritten into A, +/// 'N' no singular vectors are computed. \param jobvt [in] \param A [in] An +/// m-by-n matrix to be decomposed using its singular values. \param S [out] +/// Vector of the min(m, n) singular values of A. \param U [out] the first +/// min(m, n) columns of U are the left singular vectors of A. \param Vt [out] +/// the first min(m, n) columns of Vt are the right singular vectors of A. +/// +template +void svd(const ExecutionSpace& space, const char jobu[], const char jobvt[], + const AMatrix& A, const SVector& S, const UMatrix& U, + const VMatrix& Vt) { + static_assert( + Kokkos::SpaceAccessibility::accessible); + static_assert( + Kokkos::SpaceAccessibility::accessible); + static_assert( + Kokkos::SpaceAccessibility::accessible); + static_assert( + Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::is_view::value, + "KokkosLapack::svd: A must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::svd: S must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::svd: U must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosLapack::svd: Vt must be a Kokkos::View."); + static_assert(AMatrix::rank() == 2, "KokkosLapack::svd: A must have rank 2."); + static_assert(SVector::rank() == 1, "KokkosLapack::svd: S must have rank 1."); + static_assert(UMatrix::rank() == 2, "KokkosLapack::svd: U must have rank 2."); + static_assert(VMatrix::rank() == 2, + "KokkosLapack::svd: Vt must have rank 2."); + + int64_t m = A.extent(0); + int64_t n = A.extent(1); + int64_t rankA = Kokkos::min(m, n); + + // No work to do since the matrix is empty... + // Also do not send a matrix with size zero + // to Lapack TPLs or they will complain! + if ((m == 0) || (n == 0)) { + return; + } + + // Check the jobu and jobvt control flags + // The only valid options there are 'A', 'S', 'O' and 'N' + const bool is_jobu_invalid = + !((jobu[0] == 'A') || (jobu[0] == 'a') || (jobu[0] == 'S') || + (jobu[0] == 's') || (jobu[0] == 'O') || (jobu[0] == 'o') || + (jobu[0] == 'N') || (jobu[0] == 'n')); + + const bool is_jobvt_invalid = + !((jobvt[0] == 'A') || (jobvt[0] == 'a') || (jobvt[0] == 'S') || + (jobvt[0] == 's') || (jobvt[0] == 'O') || (jobvt[0] == 'o') || + (jobvt[0] == 'N') || (jobvt[0] == 'n')); + + if (is_jobu_invalid && is_jobvt_invalid) { + std::ostringstream oss; + oss << "KokkosLapack::svd: both jobu and jobvt are invalid!\n" + << "Possible values are A, S, O or N, submitted values are " << jobu[0] + << " and " << jobvt[0] << "\n"; + KokkosKernels::Impl::throw_runtime_exception(oss.str()); + } + if (is_jobu_invalid) { + std::ostringstream oss; + oss << "KokkosLapack::svd: jobu is invalid!\n" + << "Possible values are A, S, O or N, submitted value is " << jobu[0] + << "\n"; + KokkosKernels::Impl::throw_runtime_exception(oss.str()); + } + if (is_jobvt_invalid) { + std::ostringstream oss; + oss << "KokkosLapack::svd: jobvt is invalid!\n" + << "Possible values are A, S, O or N, submitted value is " << jobvt[0] + << "\n"; + KokkosKernels::Impl::throw_runtime_exception(oss.str()); + } + + if (((jobu[0] == 'O') || (jobu[0] == 'o')) && + ((jobvt[0] == 'O') || (jobvt[0] == 'o'))) { + std::ostringstream oss; + oss << "KokkosLapack::svd: jobu and jobvt cannot be O at the same time!\n"; + KokkosKernels::Impl::throw_runtime_exception(oss.str()); + } + + // Check validity of output views sizes + // Note that of jobu/jobvt are set to O or N + // then the associated matrix does not need storage + bool is_extent_invalid = false; + std::ostringstream os; + if (S.extent_int(0) != rankA) { + is_extent_invalid = true; + os << "KokkosLapack::svd: S has extent " << S.extent(0) << ", instead of " + << rankA << ".\n"; + } + if ((jobu[0] == 'A') || (jobu[0] == 'a') || (jobu[0] == 'S') || + (jobu[0] == 's')) { + if (U.extent_int(0) != m || U.extent_int(1) != m) { + is_extent_invalid = true; + os << "KokkosLapack::svd: U has extents (" << U.extent(0) << ", " + << U.extent(1) << ") instead of (" << m << ", " << m << ").\n"; + } + } + if ((jobvt[0] == 'A') || (jobvt[0] == 'a') || (jobvt[0] == 'S') || + (jobvt[0] == 's')) { + if (Vt.extent_int(0) != n || Vt.extent_int(1) != n) { + is_extent_invalid = true; + os << "KokkosLapack::svd: V has extents (" << Vt.extent(0) << ", " + << Vt.extent(1) << ") instead of (" << n << ", " << n << ").\n"; + } + } + if (is_extent_invalid) { + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) + if (std::is_same_v && + (A.extent(0) < A.extent(1))) { + throw std::runtime_error( + "CUSOLVER does not support SVD for matrices with more columns " + "than rows, you can transpose you matrix first then compute " + "SVD of that transpose: At=VSUt, and swap the output U and Vt" + " and transpose them to recover the desired SVD."); + } +#endif + + using AMatrix_Internal = Kokkos::View< + typename AMatrix::non_const_value_type**, typename AMatrix::array_layout, + typename AMatrix::device_type, Kokkos::MemoryTraits>; + + using SVector_Internal = Kokkos::View< + typename SVector::non_const_value_type*, typename SVector::array_layout, + typename SVector::device_type, Kokkos::MemoryTraits>; + + using UMatrix_Internal = Kokkos::View< + typename UMatrix::non_const_value_type**, typename UMatrix::array_layout, + typename UMatrix::device_type, Kokkos::MemoryTraits>; + + using VMatrix_Internal = Kokkos::View< + typename VMatrix::non_const_value_type**, typename VMatrix::array_layout, + typename VMatrix::device_type, Kokkos::MemoryTraits>; + + AMatrix_Internal A_i = A; + SVector_Internal S_i = S; + UMatrix_Internal U_i = U; + VMatrix_Internal Vt_i = Vt; + + KokkosLapack::Impl::SVD::svd(space, jobu, + jobvt, A_i, + S_i, U_i, + Vt_i); +} + +/// \brief Compute the Singular Value Decomposition of A = U*S*Vt +/// +/// \tparam AMatrix (mxn) matrix as a rank-2 Kokkos::View. +/// \tparam SVector min(m,n) vector as a rank-1 Kokkos::View +/// \tparam UMatrix (mxm) matrix as a rank-2 Kokkos::View +/// \tparam VMatrix () matrix as a rank-2 Kokkos::View +/// +/// \param jobu [in] flag to control the computation of the left singular +/// vectors when set to: 'A' all vectors are computed, 'S' the first min(m,n) +/// singular vectors are computed, 'O' the first min(m,n) singular vectors are +/// overwritten into A, 'N' no singular vectors are computed. \param jobvt [in] +/// flag to control the computation of the right singular vectors when set to: +/// 'A' all vectors are computed, 'S' the first min(m,n) singular vectors are +/// computed, 'O' the first min(m,n) singular vectors are overwritten into A, +/// 'N' no singular vectors are computed. \param A [in] An m-by-n matrix to be +/// decomposed using its singular values. \param S [out] Vector of the min(m, n) +/// singular values of A. \param U [out] the first min(m, n) columns of U are +/// the left singular vectors of A. \param Vt [out] the first min(m, n) columns +/// of Vt are the right singular vectors of A. +/// +template +void svd(const char jobu[], const char jobvt[], const AMatrix& A, + const SVector& S, const UMatrix& U, const VMatrix& Vt) { + typename AMatrix::execution_space space{}; + svd(space, jobu, jobvt, A, S, U, Vt); +} + +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_SVD_HPP_ diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index d629a17f1d..add0a802bd 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -38,6 +38,31 @@ void F77_BLAS_MANGLE(cgesv, CGESV)(int*, int*, std::complex*, int*, int*, void F77_BLAS_MANGLE(zgesv, ZGESV)(int*, int*, std::complex*, int*, int*, std::complex*, int*, int*); +/// +/// Gesvd +/// + +void F77_BLAS_MANGLE(sgesvd, SGESVD)(const char*, const char*, const int*, + const int*, float*, const int*, float*, + float*, const int*, float*, const int*, + float*, int*, int*); +void F77_BLAS_MANGLE(dgesvd, DGESVD)(const char*, const char*, const int*, + const int*, double*, const int*, double*, + double*, const int*, double*, const int*, + double*, int*, int*); +void F77_BLAS_MANGLE(cgesvd, CGESVD)(const char*, const char*, const int*, + const int*, std::complex*, + const int*, float*, std::complex*, + const int*, std::complex*, + const int*, std::complex*, int*, + float*, int*); +void F77_BLAS_MANGLE(zgesvd, ZGESVD)(const char*, const char*, const int*, + const int*, std::complex*, + const int*, double*, std::complex*, + const int*, std::complex*, + const int*, std::complex*, int*, + double*, int*); + /// /// Trtri /// @@ -64,6 +89,11 @@ void F77_BLAS_MANGLE(ztrtri, ZTRTRI)(const char*, const char*, int*, #define F77_FUNC_CGESV F77_BLAS_MANGLE(cgesv, CGESV) #define F77_FUNC_ZGESV F77_BLAS_MANGLE(zgesv, ZGESV) +#define F77_FUNC_SGESVD F77_BLAS_MANGLE(sgesvd, SGESVD) +#define F77_FUNC_DGESVD F77_BLAS_MANGLE(dgesvd, DGESVD) +#define F77_FUNC_CGESVD F77_BLAS_MANGLE(cgesvd, CGESVD) +#define F77_FUNC_ZGESVD F77_BLAS_MANGLE(zgesvd, ZGESVD) + #define F77_FUNC_STRTRI F77_BLAS_MANGLE(strtri, STRTRI) #define F77_FUNC_DTRTRI F77_BLAS_MANGLE(dtrtri, DTRTRI) #define F77_FUNC_CTRTRI F77_BLAS_MANGLE(ctrtri, CTRTRI) @@ -82,6 +112,15 @@ void HostLapack::gesv(int n, int rhs, float* a, int lda, int* ipiv, F77_FUNC_SGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> +void HostLapack::gesvd(const char jobu, const char jobvt, const int m, + const int n, float* a, const int lda, float* s, + float* u, const int ldu, float* vt, + const int ldvt, float* work, int lwork, + float* /*rwork*/, int info) { + F77_FUNC_SGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, + &lwork, &info); +} +template <> int HostLapack::trtri(const char uplo, const char diag, int n, const float* a, int lda) { int info = 0; @@ -99,6 +138,15 @@ void HostLapack::gesv(int n, int rhs, double* a, int lda, int* ipiv, F77_FUNC_DGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> +void HostLapack::gesvd(const char jobu, const char jobvt, const int m, + const int n, double* a, const int lda, double* s, + double* u, const int ldu, double* vt, + const int ldvt, double* work, int lwork, + double* /*rwork*/, int info) { + F77_FUNC_DGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, + &lwork, &info); +} +template <> int HostLapack::trtri(const char uplo, const char diag, int n, const double* a, int lda) { int info = 0; @@ -118,6 +166,15 @@ void HostLapack >::gesv(int n, int rhs, F77_FUNC_CGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> +void HostLapack >::gesvd( + const char jobu, const char jobvt, const int m, const int n, + std::complex* a, const int lda, float* s, std::complex* u, + const int ldu, std::complex* vt, const int ldvt, + std::complex* work, int lwork, float* rwork, int info) { + F77_FUNC_CGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, + &lwork, rwork, &info); +} +template <> int HostLapack >::trtri(const char uplo, const char diag, int n, const std::complex* a, int lda) { @@ -138,6 +195,15 @@ void HostLapack >::gesv(int n, int rhs, F77_FUNC_ZGESV(&n, &rhs, a, &lda, ipiv, b, &ldb, &info); } template <> +void HostLapack >::gesvd( + const char jobu, const char jobvt, const int m, const int n, + std::complex* a, const int lda, double* s, std::complex* u, + const int ldu, std::complex* vt, const int ldvt, + std::complex* work, int lwork, double* rwork, int info) { + F77_FUNC_ZGESVD(&jobu, &jobvt, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, + &lwork, rwork, &info); +} +template <> int HostLapack >::trtri(const char uplo, const char diag, int n, const std::complex* a, diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp index d74099aaec..9eca83afea 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.hpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp @@ -33,6 +33,12 @@ struct HostLapack { static void gesv(int n, int rhs, T *a, int lda, int *ipiv, T *b, int ldb, int info); + static void gesvd(const char jobu, const char jobvt, const int m, const int n, + T *A, const int lda, + typename Kokkos::ArithTraits::mag_type *S, T *U, + const int ldu, T *Vt, const int ldvt, T *work, int lwork, + typename Kokkos::ArithTraits::mag_type *rwork, int info); + static int trtri(const char uplo, const char diag, int n, const T *a, int lda); }; diff --git a/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp new file mode 100644 index 0000000000..7a7403209f --- /dev/null +++ b/lapack/tpls/KokkosLapack_svd_tpl_spec_avail.hpp @@ -0,0 +1,171 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct svd_tpl_spec_avail { + enum : bool { value = false }; +}; + +// LAPACK +#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) || \ + defined(KOKKOSKERNELS_ENABLE_TPL_MKL) +#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, EXECSPACE) \ + template <> \ + struct svd_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +#if defined(KOKKOS_ENABLE_SERIAL) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::Serial) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::Serial) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Serial) +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::OpenMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::OpenMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::OpenMP) +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::Threads) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::Threads) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::Threads) +#endif + +#endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK || KOKKOSKERNELS_ENABLE_TPL_MKL + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct svd_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +#endif // CUDAUVMSPACE +#endif // CUSOLVER + +// ROCSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#define KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct svd_tpl_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HIPSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_HIPMANAGEDSPACE) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +#endif // HIPMANAGEDSPACE +#endif // ROCSOLVER + +} // namespace Impl +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_SVD_TPL_SPEC_AVAIL_HPP_ diff --git a/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp new file mode 100644 index 0000000000..715de4b5a6 --- /dev/null +++ b/lapack/tpls/KokkosLapack_svd_tpl_spec_decl.hpp @@ -0,0 +1,679 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSLAPACK_SVD_TPL_SPEC_DECL_HPP_ +#define KOKKOSLAPACK_SVD_TPL_SPEC_DECL_HPP_ + +#include "KokkosKernels_Error.hpp" +#include "Kokkos_ArithTraits.hpp" + +namespace KokkosLapack { +namespace Impl { +template +inline void svd_print_specialization() { +#ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + if constexpr (std::is_same_v) { + printf( + "KokkosLapack::svd<> TPL Cusolver specialization for < %s , %s, %s, %s " + ">\n", + typeid(AMatrix).name(), typeid(SVector).name(), typeid(UMatrix).name(), + typeid(VMatrix).name()); + } +#endif +#endif +} +} // namespace Impl +} // namespace KokkosLapack + +// LAPACK +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK +#include "KokkosLapack_Host_tpl.hpp" + +namespace KokkosLapack { +namespace Impl { + +template +void lapackSvdWrapper(const ExecutionSpace& /* space */, const char jobu[], + const char jobvt[], const AMatrix& A, const SVector& S, + const UMatrix& U, const VMatrix& Vt) { + using memory_space = typename AMatrix::memory_space; + using Scalar = typename AMatrix::non_const_value_type; + using Magnitude = typename SVector::non_const_value_type; + using ALayout_t = typename AMatrix::array_layout; + using ULayout_t = typename UMatrix::array_layout; + using VLayout_t = typename VMatrix::array_layout; + + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = std::is_same_v ? A.stride(0) + : A.stride(1); + const int ldu = std::is_same_v ? U.stride(0) + : U.stride(1); + const int ldvt = std::is_same_v + ? Vt.stride(0) + : Vt.stride(1); + + int lwork = -1, info = 0; + Kokkos::View rwork("svd rwork buffer", + 5 * Kokkos::min(m, n)); + Kokkos::View work("svd work buffer", 1); + if constexpr (Kokkos::ArithTraits::is_complex) { + HostLapack>::gesvd( + jobu[0], jobvt[0], m, n, + reinterpret_cast*>(A.data()), lda, S.data(), + reinterpret_cast*>(U.data()), ldu, + reinterpret_cast*>(Vt.data()), ldvt, + reinterpret_cast*>(work.data()), lwork, + rwork.data(), info); + + lwork = static_cast(work(0).real()); + + work = Kokkos::View("svd work buffer", lwork); + HostLapack>::gesvd( + jobu[0], jobvt[0], m, n, + reinterpret_cast*>(A.data()), lda, S.data(), + reinterpret_cast*>(U.data()), ldu, + reinterpret_cast*>(Vt.data()), ldvt, + reinterpret_cast*>(work.data()), lwork, + rwork.data(), info); + } else { + HostLapack::gesvd(jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), + U.data(), ldu, Vt.data(), ldvt, work.data(), + lwork, rwork.data(), info); + + lwork = static_cast(work(0)); + + work = Kokkos::View("svd work buffer", lwork); + HostLapack::gesvd(jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), + U.data(), ldu, Vt.data(), ldvt, work.data(), + lwork, rwork.data(), info); + } +} + +#define KOKKOSLAPACK_SVD_LAPACK(SCALAR, LAYOUT, EXEC_SPACE) \ + template <> \ + struct SVD< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + svd_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using SVector = \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using UMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using VMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void svd(const EXEC_SPACE& space, const char jobu[], \ + const char jobvt[], const AMatrix& A, const SVector& S, \ + const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_LAPACK," #SCALAR \ + "]"); \ + svd_print_specialization(); \ + \ + lapackSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#if defined(KOKKOS_ENABLE_SERIAL) +KOKKOSLAPACK_SVD_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Serial) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Serial) +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +KOKKOSLAPACK_SVD_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP) +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +KOKKOSLAPACK_SVD_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads) +KOKKOSLAPACK_SVD_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK + +#ifdef KOKKOSKERNELS_ENABLE_TPL_MKL +#include "mkl.h" + +namespace KokkosLapack { +namespace Impl { + +template +void mklSvdWrapper(const ExecutionSpace& /* space */, const char jobu[], + const char jobvt[], const AMatrix& A, const SVector& S, + const UMatrix& U, const VMatrix& Vt) { + using memory_space = typename AMatrix::memory_space; + using Scalar = typename AMatrix::non_const_value_type; + using Magnitude = typename SVector::non_const_value_type; + using ALayout_t = typename AMatrix::array_layout; + using ULayout_t = typename UMatrix::array_layout; + using VLayout_t = typename VMatrix::array_layout; + + const lapack_int m = A.extent_int(0); + const lapack_int n = A.extent_int(1); + const lapack_int lda = std::is_same_v + ? A.stride(0) + : A.stride(1); + const lapack_int ldu = std::is_same_v + ? U.stride(0) + : U.stride(1); + const lapack_int ldvt = std::is_same_v + ? Vt.stride(0) + : Vt.stride(1); + + Kokkos::View rwork("svd rwork buffer", + Kokkos::min(m, n) - 1); + lapack_int ret = 0; + if constexpr (std::is_same_v) { + ret = + LAPACKE_sgesvd(LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, A.data(), lda, + S.data(), U.data(), ldu, Vt.data(), ldvt, rwork.data()); + } + if constexpr (std::is_same_v) { + ret = + LAPACKE_dgesvd(LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, A.data(), lda, + S.data(), U.data(), ldu, Vt.data(), ldvt, rwork.data()); + } + if constexpr (std::is_same_v>) { + ret = LAPACKE_cgesvd( + LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, + reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, + reinterpret_cast(Vt.data()), ldvt, rwork.data()); + } + if constexpr (std::is_same_v>) { + ret = LAPACKE_zgesvd( + LAPACK_COL_MAJOR, jobu[0], jobvt[0], m, n, + reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, + reinterpret_cast(Vt.data()), ldvt, + rwork.data()); + } + + if (ret != 0) { + std::ostringstream os; + os << "KokkosLapack::svd: MKL failed with return value: " << ret << "\n"; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } +} + +#define KOKKOSLAPACK_SVD_MKL(SCALAR, LAYOUT, EXEC_SPACE) \ + template <> \ + struct SVD< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + svd_eti_spec_avail< \ + EXEC_SPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using SVector = \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using UMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using VMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void svd(const EXEC_SPACE& space, const char jobu[], \ + const char jobvt[], const AMatrix& A, const SVector& S, \ + const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_LAPACK," #SCALAR \ + "]"); \ + svd_print_specialization(); \ + \ + mklSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#if defined(KOKKOS_ENABLE_SERIAL) +KOKKOSLAPACK_SVD_MKL(float, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_MKL(double, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial) +KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Serial) +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +KOKKOSLAPACK_SVD_MKL(float, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_MKL(double, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP) +KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::OpenMP) +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +KOKKOSLAPACK_SVD_MKL(float, Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_MKL(double, Kokkos::LayoutLeft, Kokkos::Threads) +KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads) +KOKKOSLAPACK_SVD_MKL(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::Threads) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_MKL + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include "KokkosLapack_cusolver.hpp" + +namespace KokkosLapack { +namespace Impl { + +template +void cusolverSvdWrapper(const ExecutionSpace& space, const char jobu[], + const char jobvt[], const AMatrix& A, const SVector& S, + const UMatrix& U, const VMatrix& Vt) { + using memory_space = typename AMatrix::memory_space; + using Scalar = typename AMatrix::non_const_value_type; + using Magnitude = typename SVector::non_const_value_type; + using ALayout_t = typename AMatrix::array_layout; + using ULayout_t = typename UMatrix::array_layout; + using VLayout_t = typename VMatrix::array_layout; + + const int m = A.extent_int(0); + const int n = A.extent_int(1); + const int lda = std::is_same_v ? A.stride(0) + : A.stride(1); + const int ldu = std::is_same_v ? U.stride(0) + : U.stride(1); + const int ldvt = std::is_same_v + ? Vt.stride(0) + : Vt.stride(1); + + int lwork = 0; + Kokkos::View info("svd info"); + Kokkos::View rwork("svd rwork buffer", + Kokkos::min(m, n) - 1); + + CudaLapackSingleton& s = CudaLapackSingleton::singleton(); + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSetStream(s.handle, space.cuda_stream())); + if constexpr (std::is_same_v) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnSgesvd_bufferSize(s.handle, m, n, &lwork)); + Kokkos::View work("svd work buffer", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSgesvd( + s.handle, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), + ldu, Vt.data(), ldvt, work.data(), lwork, rwork.data(), info.data())); + } + if constexpr (std::is_same_v) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnDgesvd_bufferSize(s.handle, m, n, &lwork)); + Kokkos::View work("svd work buffer", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDgesvd( + s.handle, jobu[0], jobvt[0], m, n, A.data(), lda, S.data(), U.data(), + ldu, Vt.data(), ldvt, work.data(), lwork, rwork.data(), info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnCgesvd_bufferSize(s.handle, m, n, &lwork)); + Kokkos::View work("svd work buffer", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnCgesvd(s.handle, jobu[0], jobvt[0], m, n, + reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, + reinterpret_cast(Vt.data()), ldvt, + reinterpret_cast(work.data()), lwork, + rwork.data(), info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnZgesvd_bufferSize(s.handle, m, n, &lwork)); + Kokkos::View work("svd work buffer", lwork); + + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( + cusolverDnZgesvd(s.handle, jobu[0], jobvt[0], m, n, + reinterpret_cast(A.data()), lda, + S.data(), reinterpret_cast(U.data()), + ldu, reinterpret_cast(Vt.data()), + ldvt, reinterpret_cast(work.data()), + lwork, rwork.data(), info.data())); + } + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSetStream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_SVD_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct SVD< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + svd_eti_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using SVector = \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using UMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using VMatrix = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void svd(const Kokkos::Cuda& space, const char jobu[], \ + const char jobvt[], const AMatrix& A, const SVector& S, \ + const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_CUSOLVER," #SCALAR \ + "]"); \ + svd_print_specialization(); \ + \ + cusolverSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_SVD_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_SVD_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSLAPACK_SVD_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#include +#include + +namespace KokkosLapack { +namespace Impl { + +template +void rocsolverSvdWrapper(const ExecutionSpace& space, const char jobu[], + const char jobvt[], const AMatrix& A, const SVector& S, + const UMatrix& U, const VMatrix& Vt) { + using memory_space = typename AMatrix::memory_space; + using Scalar = typename AMatrix::non_const_value_type; + using Magnitude = typename SVector::non_const_value_type; + using ALayout_t = typename AMatrix::array_layout; + using ULayout_t = typename UMatrix::array_layout; + using VLayout_t = typename VMatrix::array_layout; + + const rocblas_int m = A.extent_int(0); + const rocblas_int n = A.extent_int(1); + const rocblas_int lda = std::is_same_v + ? A.stride(0) + : A.stride(1); + const rocblas_int ldu = std::is_same_v + ? U.stride(0) + : U.stride(1); + const rocblas_int ldvt = std::is_same_v + ? Vt.stride(0) + : Vt.stride(1); + + rocblas_svect UVecMode = rocblas_svect_all; + if ((jobu[0] == 'S') || (jobu[0] == 's')) { + UVecMode = rocblas_svect_singular; + } else if ((jobu[0] == 'O') || (jobu[0] == 'o')) { + UVecMode = rocblas_svect_overwrite; + } else if ((jobu[0] == 'N') || (jobu[0] == 'n')) { + UVecMode = rocblas_svect_none; + } + rocblas_svect VVecMode = rocblas_svect_all; + if ((jobvt[0] == 'S') || (jobvt[0] == 's')) { + VVecMode = rocblas_svect_singular; + } else if ((jobvt[0] == 'O') || (jobvt[0] == 'o')) { + VVecMode = rocblas_svect_overwrite; + } else if ((jobvt[0] == 'N') || (jobvt[0] == 'n')) { + VVecMode = rocblas_svect_none; + } + + const rocblas_workmode WorkMode = rocblas_outofplace; + + Kokkos::View info("svd info"); + Kokkos::View rwork("svd rwork buffer", + Kokkos::min(m, n) - 1); + + KokkosBlas::Impl::RocBlasSingleton& s = + KokkosBlas::Impl::RocBlasSingleton::singleton(); + KOKKOS_ROCBLAS_SAFE_CALL_IMPL( + rocblas_set_stream(s.handle, space.hip_stream())); + if constexpr (std::is_same_v) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_sgesvd( + s.handle, UVecMode, VVecMode, m, n, A.data(), lda, S.data(), U.data(), + ldu, Vt.data(), ldvt, rwork.data(), WorkMode, info.data())); + } + if constexpr (std::is_same_v) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_dgesvd( + s.handle, UVecMode, VVecMode, m, n, A.data(), lda, S.data(), U.data(), + ldu, Vt.data(), ldvt, rwork.data(), WorkMode, info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_cgesvd( + s.handle, UVecMode, VVecMode, m, n, + reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, + reinterpret_cast(Vt.data()), ldvt, rwork.data(), + WorkMode, info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocsolver_zgesvd( + s.handle, UVecMode, VVecMode, m, n, + reinterpret_cast(A.data()), lda, S.data(), + reinterpret_cast(U.data()), ldu, + reinterpret_cast(Vt.data()), ldvt, + rwork.data(), WorkMode, info.data())); + } + KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_SVD_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct SVD< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, \ + svd_eti_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using SVector = \ + Kokkos::View::mag_type*, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using UMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using VMatrix = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + \ + static void svd(const Kokkos::HIP& space, const char jobu[], \ + const char jobvt[], const AMatrix& A, const SVector& S, \ + const UMatrix& U, const VMatrix& Vt) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::svd[TPL_ROCSOLVER," #SCALAR \ + "]"); \ + svd_print_specialization(); \ + \ + rocsolverSvdWrapper(space, jobu, jobvt, A, S, U, Vt); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_SVD_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_HIPMANAGEDSPACE) +KOKKOSLAPACK_SVD_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +KOKKOSLAPACK_SVD_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, + Kokkos::HIPManagedSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER + +#endif // KOKKOSLAPACK_SVD_TPL_SPEC_DECL_HPP_ diff --git a/lapack/unit_test/Test_Lapack.hpp b/lapack/unit_test/Test_Lapack.hpp index 815c442884..1a717521f8 100644 --- a/lapack/unit_test/Test_Lapack.hpp +++ b/lapack/unit_test/Test_Lapack.hpp @@ -18,5 +18,6 @@ #include "Test_Lapack_gesv.hpp" #include "Test_Lapack_trtri.hpp" +#include "Test_Lapack_svd.hpp" #endif // TEST_LAPACK_HPP diff --git a/lapack/unit_test/Test_Lapack_svd.hpp b/lapack/unit_test/Test_Lapack_svd.hpp new file mode 100644 index 0000000000..2453df54ed --- /dev/null +++ b/lapack/unit_test/Test_Lapack_svd.hpp @@ -0,0 +1,639 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +#include +#include +#include +#include + +#include + +namespace Test { + +template +void check_triple_product( + const AMatrix& A, const SVector& S, const UMatrix& U, const VMatrix& Vt, + typename Kokkos::ArithTraits< + typename AMatrix::non_const_value_type>::mag_type tol) { + // After a successful SVD decomposition we have A=U*S*V + // So using gemm we should be able to compare the above + // triple product to the original matrix A. + using execution_space = typename AMatrix::execution_space; + + AMatrix temp("intermediate U*S product", A.extent(0), A.extent(1)); + AMatrix M("U*S*V product", A.extent(0), A.extent(1)); + + // First compute the left side of the product: temp = U*S + Kokkos::parallel_for( + Kokkos::RangePolicy(0, U.extent_int(0)), + KOKKOS_LAMBDA(const int& rowIdx) { + for (int colIdx = 0; colIdx < U.extent_int(1); ++colIdx) { + if (colIdx < S.extent_int(0)) { + temp(rowIdx, colIdx) = U(rowIdx, colIdx) * S(colIdx); + } + } + }); + + // Second compute the right side of the product: M = temp*V = U*S*V + KokkosBlas::gemm("N", "N", 1, temp, Vt, 0, M); + + typename AMatrix::HostMirror A_h = Kokkos::create_mirror_view(A); + typename AMatrix::HostMirror M_h = Kokkos::create_mirror_view(M); + Kokkos::deep_copy(A_h, A); + Kokkos::deep_copy(M_h, M); + for (int rowIdx = 0; rowIdx < A.extent_int(0); ++rowIdx) { + for (int colIdx = 0; colIdx < A.extent_int(1); ++colIdx) { + if (tol < Kokkos::abs(A_h(rowIdx, colIdx))) { + EXPECT_NEAR_KK_REL(A_h(rowIdx, colIdx), M_h(rowIdx, colIdx), tol); + } else { + EXPECT_NEAR_KK(A_h(rowIdx, colIdx), M_h(rowIdx, colIdx), tol); + } + } + } +} + +template +void check_unitary_orthogonal_matrix( + const Matrix& M, typename Kokkos::ArithTraits< + typename Matrix::non_const_value_type>::mag_type tol) { + // After a successful SVD decomposition the matrices + // U and V are unitary matrices. Thus we can check + // the property UUt=UtU=I and VVt=VtV=I using gemm. + using scalar_type = typename Matrix::non_const_value_type; + + Matrix I0("M*Mt", M.extent(0), M.extent(0)); + KokkosBlas::gemm("N", "C", 1, M, M, 0, I0); + typename Matrix::HostMirror I0_h = Kokkos::create_mirror_view(I0); + Kokkos::deep_copy(I0_h, I0); + for (int rowIdx = 0; rowIdx < M.extent_int(0); ++rowIdx) { + for (int colIdx = 0; colIdx < M.extent_int(0); ++colIdx) { + if (rowIdx == colIdx) { + EXPECT_NEAR_KK_REL(I0_h(rowIdx, colIdx), + Kokkos::ArithTraits::one(), tol); + } else { + EXPECT_NEAR_KK(I0_h(rowIdx, colIdx), + Kokkos::ArithTraits::zero(), tol); + } + } + } + + Matrix I1("Mt*M", M.extent(1), M.extent(1)); + KokkosBlas::gemm("C", "N", 1, M, M, 0, I1); + typename Matrix::HostMirror I1_h = Kokkos::create_mirror_view(I1); + Kokkos::deep_copy(I1_h, I1); + for (int rowIdx = 0; rowIdx < M.extent_int(1); ++rowIdx) { + for (int colIdx = 0; colIdx < M.extent_int(1); ++colIdx) { + if (rowIdx == colIdx) { + EXPECT_NEAR_KK_REL(I1_h(rowIdx, colIdx), + Kokkos::ArithTraits::one(), tol); + } else { + EXPECT_NEAR_KK(I1_h(rowIdx, colIdx), + Kokkos::ArithTraits::zero(), tol); + } + } + } +} + +template +int impl_analytic_2x2_svd() { + using scalar_type = typename AMatrix::value_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + using vector_type = + Kokkos::View; + using KAT_S = Kokkos::ArithTraits; + + const mag_type eps = KAT_S::eps(); + + AMatrix A("A", 2, 2), U("U", 2, 2), Vt("Vt", 2, 2), Aref("A ref", 2, 2); + vector_type S("S", 2); + + typename AMatrix::HostMirror A_h = Kokkos::create_mirror_view(A); + + // A = [3 0] + // [4 5] + // USV = 1/sqrt(10) [1 -3] * sqrt(5) [3 0] * 1/sqrt(2) [ 1 1] + // [3 1] [0 1] [-1 1] + A_h(0, 0) = 3; + A_h(1, 0) = 4; + A_h(1, 1) = 5; + + Kokkos::deep_copy(A, A_h); + Kokkos::deep_copy(Aref, A_h); + + KokkosLapack::svd("A", "A", A, S, U, Vt); + // Don't really need to fence here as we deep_copy right after... + + typename vector_type::HostMirror S_h = Kokkos::create_mirror_view(S); + Kokkos::deep_copy(S_h, S); + typename AMatrix::HostMirror U_h = Kokkos::create_mirror_view(U); + Kokkos::deep_copy(U_h, U); + typename AMatrix::HostMirror Vt_h = Kokkos::create_mirror_view(Vt); + Kokkos::deep_copy(Vt_h, Vt); + + // The singular values for this problem + // are known: sqrt(45) and sqrt(5) + EXPECT_NEAR_KK_REL(S_h(0), static_cast(Kokkos::sqrt(45)), + 100 * eps); + EXPECT_NEAR_KK_REL(S_h(1), static_cast(Kokkos::sqrt(5)), 100 * eps); + + // The singular vectors should be identical + // or of oposite sign we check the first + // component of the vectors to determine + // the proper signed comparison. + std::vector Uref = { + static_cast(1 / Kokkos::sqrt(10)), + static_cast(3 / Kokkos::sqrt(10)), + static_cast(-3 / Kokkos::sqrt(10)), + static_cast(1 / Kokkos::sqrt(10))}; + std::vector Vtref = { + static_cast(1 / Kokkos::sqrt(2)), + static_cast(-1 / Kokkos::sqrt(2)), + static_cast(1 / Kokkos::sqrt(2)), + static_cast(1 / Kokkos::sqrt(2))}; + + // Both rotations and reflections are valid + // vector basis so we need to check both signs + // to confirm proper SVD was achieved. + Kokkos::View U_real("U real", 2, 2), + Vt_real("Vt real", 2, 2); + if constexpr (KAT_S::is_complex) { + U_real(0, 0) = U_h(0, 0).real(); + U_real(0, 1) = U_h(0, 1).real(); + U_real(1, 0) = U_h(1, 0).real(); + U_real(1, 1) = U_h(1, 1).real(); + + Vt_real(0, 0) = Vt_h(0, 0).real(); + Vt_real(0, 1) = Vt_h(0, 1).real(); + Vt_real(1, 0) = Vt_h(1, 0).real(); + Vt_real(1, 1) = Vt_h(1, 1).real(); + } else { + U_real(0, 0) = U_h(0, 0); + U_real(0, 1) = U_h(0, 1); + U_real(1, 0) = U_h(1, 0); + U_real(1, 1) = U_h(1, 1); + + Vt_real(0, 0) = Vt_h(0, 0); + Vt_real(0, 1) = Vt_h(0, 1); + Vt_real(1, 0) = Vt_h(1, 0); + Vt_real(1, 1) = Vt_h(1, 1); + } + + const mag_type tol = 100 * KAT_S::eps(); + const mag_type one_sqrt10 = static_cast(1 / Kokkos::sqrt(10)); + const mag_type one_sqrt2 = static_cast(1 / Kokkos::sqrt(2)); + + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(0, 0)), one_sqrt10, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(0, 1)), 3 * one_sqrt10, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(1, 0)), 3 * one_sqrt10, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(1, 1)), one_sqrt10, tol); + + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(0, 0)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(0, 1)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(1, 0)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(1, 1)), one_sqrt2, tol); + + check_unitary_orthogonal_matrix(U, tol); + check_unitary_orthogonal_matrix(Vt, tol); + + check_triple_product(Aref, S, U, Vt, tol); + + return 0; +} + +template +int impl_analytic_2x3_svd() { + using scalar_type = typename AMatrix::value_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + using vector_type = + Kokkos::View; + using KAT_S = Kokkos::ArithTraits; + + const mag_type tol = 100 * KAT_S::eps(); + + AMatrix A("A", 2, 3), U("U", 2, 2), Vt("Vt", 3, 3), Aref("A ref", 2, 3); + vector_type S("S", 2); + + typename AMatrix::HostMirror A_h = Kokkos::create_mirror_view(A); + + // A = [3 2 2] + // [2 3 -2] + // USVt = 1/sqrt(2) [1 1] * [5 0 0] * 1/(3*sqrt(2)) [ 3 3 0] + // [1 -1] [0 3 0] [ 1 -1 4] + // [2*sqrt(2) -2*sqrt(2) + // -sqrt(2)] + A_h(0, 0) = 3; + A_h(0, 1) = 2; + A_h(0, 2) = 2; + A_h(1, 0) = 2; + A_h(1, 1) = 3; + A_h(1, 2) = -2; + + Kokkos::deep_copy(A, A_h); + Kokkos::deep_copy(Aref, A_h); + + try { + KokkosLapack::svd("A", "A", A, S, U, Vt); + } catch (const std::runtime_error& e) { + std::string test_string = e.what(); + std::string cusolver_m_less_than_n = + "CUSOLVER does not support SVD for matrices with more columns " + "than rows, you can transpose you matrix first then compute " + "SVD of that transpose: At=VSUt, and swap the output U and Vt" + " and transpose them to recover the desired SVD."; + + if (test_string == cusolver_m_less_than_n) { + return 0; + } + } + // Don't really need to fence here as we deep_copy right after... + + typename vector_type::HostMirror S_h = Kokkos::create_mirror_view(S); + Kokkos::deep_copy(S_h, S); + typename AMatrix::HostMirror U_h = Kokkos::create_mirror_view(U); + Kokkos::deep_copy(U_h, U); + typename AMatrix::HostMirror Vt_h = Kokkos::create_mirror_view(Vt); + Kokkos::deep_copy(Vt_h, Vt); + + // The singular values for this problem + // are known: sqrt(45) and sqrt(5) + EXPECT_NEAR_KK_REL(S_h(0), static_cast(5), tol); + EXPECT_NEAR_KK_REL(S_h(1), static_cast(3), tol); + + // Both rotations and reflections are valid + // vector basis so we need to check both signs + // to confirm proper SVD was achieved. + Kokkos::View U_real("U real", 2, 2), + Vt_real("Vt real", 3, 3); + if constexpr (KAT_S::is_complex) { + U_real(0, 0) = U_h(0, 0).real(); + U_real(0, 1) = U_h(0, 1).real(); + U_real(1, 0) = U_h(1, 0).real(); + U_real(1, 1) = U_h(1, 1).real(); + + Vt_real(0, 0) = Vt_h(0, 0).real(); + Vt_real(0, 1) = Vt_h(0, 1).real(); + Vt_real(0, 2) = Vt_h(0, 2).real(); + Vt_real(1, 0) = Vt_h(1, 0).real(); + Vt_real(1, 1) = Vt_h(1, 1).real(); + Vt_real(1, 2) = Vt_h(1, 2).real(); + Vt_real(2, 0) = Vt_h(2, 0).real(); + Vt_real(2, 1) = Vt_h(2, 1).real(); + Vt_real(2, 2) = Vt_h(2, 2).real(); + } else { + U_real(0, 0) = U_h(0, 0); + U_real(0, 1) = U_h(0, 1); + U_real(1, 0) = U_h(1, 0); + U_real(1, 1) = U_h(1, 1); + + Vt_real(0, 0) = Vt_h(0, 0); + Vt_real(0, 1) = Vt_h(0, 1); + Vt_real(0, 2) = Vt_h(0, 2); + Vt_real(1, 0) = Vt_h(1, 0); + Vt_real(1, 1) = Vt_h(1, 1); + Vt_real(1, 2) = Vt_h(1, 2); + Vt_real(2, 0) = Vt_h(2, 0); + Vt_real(2, 1) = Vt_h(2, 1); + Vt_real(2, 2) = Vt_h(2, 2); + } + + const mag_type one_sqrt2 = static_cast(1 / Kokkos::sqrt(2)); + const mag_type one_sqrt18 = static_cast(1 / Kokkos::sqrt(18)); + const mag_type one_third = static_cast(1. / 3.); + + // Check values of U + // Don't worry about the sign + // it will be check with the + // triple product + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(0, 0)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(0, 1)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(1, 0)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(1, 1)), one_sqrt2, tol); + + // Check values of Vt + // Don't worry about the sign + // it will be check with the + // triple product + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(0, 0)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(0, 1)), one_sqrt2, tol); + EXPECT_NEAR_KK(Kokkos::abs(Vt_real(0, 2)), 0, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(1, 0)), one_sqrt18, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(1, 1)), one_sqrt18, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(1, 2)), 4 * one_sqrt18, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(2, 0)), 2 * one_third, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(2, 1)), 2 * one_third, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(2, 2)), one_third, tol); + + check_unitary_orthogonal_matrix(U, tol); + check_unitary_orthogonal_matrix(Vt, tol); + + check_triple_product(Aref, S, U, Vt, tol); + + return 0; +} + +template +int impl_analytic_3x2_svd() { + using scalar_type = typename AMatrix::value_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + using vector_type = + Kokkos::View; + using KAT_S = Kokkos::ArithTraits; + + const mag_type tol = 100 * KAT_S::eps(); + + AMatrix A("A", 3, 2), U("U", 3, 3), Vt("Vt", 2, 2), Aref("A ref", 3, 2); + vector_type S("S", 2); + + typename AMatrix::HostMirror A_h = Kokkos::create_mirror_view(A); + + // Note this is simply the transpose of the 2x3 matrix in the test above + // A = [3 2] + // [2 3] + // [2 -2] + // USVt = 1/(3*sqrt(2)) [3 1 2*sqrt(2)] * [5 0] * 1/sqrt(2) [1 1] + // [3 -1 -2*sqrt(2)] [0 3] [1 -1] + // [0 4 sqrt(2)] [0 0] + A_h(0, 0) = 3; + A_h(0, 1) = 2; + A_h(1, 0) = 2; + A_h(1, 1) = 3; + A_h(2, 0) = 2; + A_h(2, 1) = -2; + + Kokkos::deep_copy(A, A_h); + Kokkos::deep_copy(Aref, A_h); + + KokkosLapack::svd("A", "A", A, S, U, Vt); + // Don't really need to fence here as we deep_copy right after... + + typename vector_type::HostMirror S_h = Kokkos::create_mirror_view(S); + Kokkos::deep_copy(S_h, S); + typename AMatrix::HostMirror U_h = Kokkos::create_mirror_view(U); + Kokkos::deep_copy(U_h, U); + typename AMatrix::HostMirror Vt_h = Kokkos::create_mirror_view(Vt); + Kokkos::deep_copy(Vt_h, Vt); + + // The singular values for this problem + // are known: sqrt(45) and sqrt(5) + EXPECT_NEAR_KK_REL(S_h(0), static_cast(5), tol); + EXPECT_NEAR_KK_REL(S_h(1), static_cast(3), tol); + + // Both rotations and reflections are valid + // vector basis so we need to check both signs + // to confirm proper SVD was achieved. + Kokkos::View U_real("U real", 3, 3), + Vt_real("Vt real", 2, 2); + if constexpr (KAT_S::is_complex) { + U_real(0, 0) = U_h(0, 0).real(); + U_real(0, 1) = U_h(0, 1).real(); + U_real(0, 2) = U_h(0, 2).real(); + U_real(1, 0) = U_h(1, 0).real(); + U_real(1, 1) = U_h(1, 1).real(); + U_real(1, 2) = U_h(1, 2).real(); + U_real(2, 0) = U_h(2, 0).real(); + U_real(2, 1) = U_h(2, 1).real(); + U_real(2, 2) = U_h(2, 2).real(); + + Vt_real(0, 0) = Vt_h(0, 0).real(); + Vt_real(0, 1) = Vt_h(0, 1).real(); + Vt_real(1, 0) = Vt_h(1, 0).real(); + Vt_real(1, 1) = Vt_h(1, 1).real(); + } else { + U_real(0, 0) = U_h(0, 0); + U_real(0, 1) = U_h(0, 1); + U_real(0, 2) = U_h(0, 2); + U_real(1, 0) = U_h(1, 0); + U_real(1, 1) = U_h(1, 1); + U_real(1, 2) = U_h(1, 2); + U_real(2, 0) = U_h(2, 0); + U_real(2, 1) = U_h(2, 1); + U_real(2, 2) = U_h(2, 2); + + Vt_real(0, 0) = Vt_h(0, 0); + Vt_real(0, 1) = Vt_h(0, 1); + Vt_real(1, 0) = Vt_h(1, 0); + Vt_real(1, 1) = Vt_h(1, 1); + } + + const mag_type one_sqrt2 = static_cast(1 / Kokkos::sqrt(2)); + const mag_type one_sqrt18 = static_cast(1 / Kokkos::sqrt(18)); + const mag_type one_third = static_cast(1. / 3.); + + // Check values of U + // Don't worry about the sign + // it will be check with the + // triple product + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(0, 0)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(0, 1)), one_sqrt18, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(0, 2)), 2 * one_third, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(1, 0)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(1, 1)), one_sqrt18, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(1, 2)), 2 * one_third, tol); + EXPECT_NEAR_KK(Kokkos::abs(U_real(2, 0)), 0, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(2, 1)), 4 * one_sqrt18, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(U_real(2, 2)), one_third, tol); + + // Check values of Vt + // Don't worry about the sign + // it will be check with the + // triple product + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(0, 0)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(0, 1)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(1, 0)), one_sqrt2, tol); + EXPECT_NEAR_KK_REL(Kokkos::abs(Vt_real(1, 1)), one_sqrt2, tol); + + check_unitary_orthogonal_matrix(U, tol); + check_unitary_orthogonal_matrix(Vt, tol); + + check_triple_product(Aref, S, U, Vt, tol); + + return 0; +} + +template +int impl_test_svd(const int m, const int n) { + using execution_space = typename Device::execution_space; + using scalar_type = typename AMatrix::value_type; + using KAT_S = Kokkos::ArithTraits; + using mag_type = typename KAT_S::mag_type; + using vector_type = + Kokkos::View; + + const mag_type tol = 1000 * KAT_S::eps(); + + AMatrix A("A", m, n), U("U", m, m), Vt("Vt", n, n), Aref("A ref", m, n); + vector_type S("S", Kokkos::min(m, n)); + + const uint64_t seed = + std::chrono::high_resolution_clock::now().time_since_epoch().count(); + Kokkos::Random_XorShift64_Pool rand_pool(seed); + + // Initialize A with random numbers + scalar_type randStart = 0, randEnd = 0; + Test::getRandomBounds(10.0, randStart, randEnd); + Kokkos::fill_random(A, rand_pool, randStart, randEnd); + Kokkos::deep_copy(Aref, A); + + KokkosLapack::svd("A", "A", A, S, U, Vt); + + check_unitary_orthogonal_matrix(U, tol); + check_unitary_orthogonal_matrix(Vt, tol); + + // For larger sizes with the triple product + // we accumulate a bit more error apparently? + check_triple_product(Aref, S, U, Vt, 1000 * tol); + + return 0; +} + +} // namespace Test + +template +int test_svd() { + int ret; + +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_layout_left = + Kokkos::View; + + ret = Test::impl_analytic_2x2_svd(); + + ret = Test::impl_analytic_2x3_svd(); + + ret = Test::impl_test_svd(0, 0); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(1, 1); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(15, 15); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(100, 100); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(100, 70); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(70, 100); + EXPECT_EQ(ret, 0); +#endif + +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_layout_right = + Kokkos::View; + + ret = Test::impl_analytic_2x2_svd(); + + ret = Test::impl_analytic_2x3_svd(); + + ret = Test::impl_test_svd(0, 0); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(1, 1); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(15, 15); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(100, 100); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(100, 70); + EXPECT_EQ(ret, 0); + + ret = Test::impl_test_svd(70, 100); + EXPECT_EQ(ret, 0); +#endif + + return 1; +} + +template +int test_svd_wrapper() { +#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) || \ + defined(KOKKOSKERNELS_ENABLE_TPL_MKL) + if constexpr (std::is_same_v) { + // Using a device side space with LAPACK/MKL + return test_svd(); + } +#endif + +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) + if constexpr (std::is_same_v) { + // Using a Cuda device with CUSOLVER + return test_svd(); + } +#endif + +#if defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER) + if constexpr (std::is_same_v) { + // Using a HIP device with ROCSOLVER + return test_svd(); + } +#endif + + std::cout << "No TPL support enabled, svd is not tested" << std::endl; + return 0; +} + +#if defined(KOKKOSKERNELS_INST_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, svd_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_float"); + test_svd_wrapper(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, svd_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_double"); + test_svd_wrapper(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, svd_complex_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_complex_float"); + test_svd_wrapper, TestDevice>(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && \ + !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, svd_complex_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::svd_complex_double"); + test_svd_wrapper, TestDevice>(); + Kokkos::Profiling::popRegion(); +} +#endif