diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index 334dbef682..15a5522f37 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -32,9 +32,9 @@ namespace KokkosLapack { /// \brief Computes a QR factorization of a matrix A /// -/// \tparam ExecutionSpace the space where the kernel will run. -/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. -/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View. +/// \tparam ExecutionSpace The space where the kernel will run. +/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. +/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View. /// /// \param space [in] Execution space instance used to specified how to execute /// the geqrf kernels. @@ -58,6 +58,7 @@ namespace KokkosLapack { /// If min(M,N) != 0, then LWORK must be >= N. /// If the QR factorization is successful, then the first /// position of Work contains the optimal LWORK. +/// /// \return = 0: successfull exit /// < 0: if equal to '-i', the i-th argument had an illegal /// value @@ -148,10 +149,17 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, /// /// \param A [in,out] On entry, the M-by-N matrix to be factorized. /// On exit, the elements on and above the diagonal contain -/// the min(M,N)-by-N upper trapezoidal matrix R (R is -/// upper triangular if M >= N); the elements below the -/// diagonal, with the array Tau, represent the unitary -/// matrix Q as a product of min(M,N) elementary reflectors. +/// the min(M,N)-by-N upper trapezoidal matrix R (R is upper +/// triangular if M >= N); the elements below the diagonal, +/// with the array Tau, represent the unitary matrix Q as a +/// product of min(M,N) elementary reflectors. The matrix Q +/// is represented as a product of elementary reflectors +/// Q = H(1) H(2) . . . H(k), where k = min(M,N). +/// Each H(i) has the form +/// H(i) = I - Tau * v * v**H +/// where tau is a complex scalar, and v is a complex vector +/// with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on +/// exit in A(i+1:M,i), and tau in Tau(i). /// \param Tau [out] One-dimensional array of size min(M,N) that contains /// the scalar factors of the elementary reflectors. /// \param Work [out] One-dimensional array of size max(1,LWORK). @@ -159,6 +167,7 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau, /// If min(M,N) != 0, then LWORK must be >= N. /// If the QR factorization is successful, then the first /// position of Work contains the optimal LWORK. +/// /// \return = 0: successfull exit /// < 0: if equal to '-i', the i-th argument had an illegal /// value diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index 0ec9388dd4..dfcc7566e9 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -31,12 +31,62 @@ #include #include -#include -#include +//#include +//#include #include namespace Test { +template +void getQR( int const m + , int const n + , typename ViewTypeA::HostMirror const & //h_A + , typename ViewTypeTW::HostMirror const & //h_tau + , typename ViewTypeTW::HostMirror const & //h_work + , typename ViewTypeA::HostMirror & //h_Q + , typename ViewTypeA::HostMirror & h_R + , typename ViewTypeA::HostMirror & //h_QR + ) +{ + using ScalarA = typename ViewTypeA::value_type; + + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + if constexpr (Kokkos::ArithTraits::is_complex) { + h_R(i,j).real() = 0.; + h_R(i,j).imag() = 0.; + } + else { + h_R(i,j) = 0.; + } + } + } + + ViewTypeA I("I", m, m); + typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I); + for (int i(0); i < m; ++i) { + for (int j(0); j < m; ++j) { + if constexpr (Kokkos::ArithTraits::is_complex) { + if (i == j) { + h_I(i,j).real() = 1.; + } + else { + h_I(i,j).real() = 0.; + } + h_I(i,j).imag() = 0.; + } + else { + if (i == j) { + h_I(i,j) = 1.; + } + else { + h_I(i,j) = 0.; + } + } + } + } +} + template void impl_test_geqrf(int m, int n) { using execution_space = typename Device::execution_space; @@ -59,43 +109,44 @@ void impl_test_geqrf(int m, int n) { ViewTypeTW Work("Work", lwork); // Create host mirrors of device views. - typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); - typename ViewTypeTW::HostMirror h_tau = Kokkos::create_mirror_view(Tau); - typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work); + typename ViewTypeA::HostMirror h_A = Kokkos::create_mirror_view(A); + typename ViewTypeA::HostMirror h_Aorig = Kokkos::create_mirror_view(A); + typename ViewTypeTW::HostMirror h_tau = Kokkos::create_mirror_view(Tau); + typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work); // Initialize data. if ((m == 3) && (n == 3)) { if constexpr (Kokkos::ArithTraits::is_complex) { - h_A(0, 0).real() = 12.; - h_A(0, 1).real() = -51.; - h_A(0, 2).real() = 4.; + h_A(0,0).real() = 12.; + h_A(0,1).real() = -51.; + h_A(0,2).real() = 4.; - h_A(1, 0).real() = 6.; - h_A(1, 1).real() = 167.; - h_A(1, 2).real() = -68.; + h_A(1,0).real() = 6.; + h_A(1,1).real() = 167.; + h_A(1,2).real() = -68.; - h_A(2, 0).real() = -4.; - h_A(2, 1).real() = 24.; - h_A(2, 2).real() = -41.; + h_A(2,0).real() = -4.; + h_A(2,1).real() = 24.; + h_A(2,2).real() = -41.; for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { - h_A(i, j).imag() = 0.; + h_A(i,j).imag() = 0.; } } } else { - h_A(0, 0) = 12.; - h_A(0, 1) = -51.; - h_A(0, 2) = 4.; + h_A(0,0) = 12.; + h_A(0,1) = -51.; + h_A(0,2) = 4.; - h_A(1, 0) = 6.; - h_A(1, 1) = 167.; - h_A(1, 2) = -68.; + h_A(1,0) = 6.; + h_A(1,1) = 167.; + h_A(1,2) = -68.; - h_A(2, 0) = -4.; - h_A(2, 1) = 24.; - h_A(2, 2) = -41.; + h_A(2,0) = -4.; + h_A(2,1) = 24.; + h_A(2,2) = -41.; } Kokkos::deep_copy(A, h_A); @@ -108,11 +159,15 @@ void impl_test_geqrf(int m, int n) { Kokkos::deep_copy(h_A, A); } + Kokkos::deep_copy(h_Aorig, h_A); + +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG for (int i(0); i < m; ++i) { for (int j(0); j < n; ++j) { std::cout << "A(" << i << "," << j << ") = " << h_A(i,j) << std::endl; } } +#endif Kokkos::fence(); @@ -126,13 +181,17 @@ void impl_test_geqrf(int m, int n) { FAIL(); return; } + Kokkos::fence(); + EXPECT_EQ(rc, 0) << "Failed geqrf() test: rc = " << rc; + // Get the results Kokkos::deep_copy(h_A, A); Kokkos::deep_copy(h_tau, Tau); Kokkos::deep_copy(h_work, Work); +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG std::cout << "rc = " << rc << std::endl; for (int i(0); i < minMN; ++i) { for (int j(0); j < n; ++j) { @@ -145,6 +204,38 @@ void impl_test_geqrf(int m, int n) { for (int i(0); i < lwork; ++i) { std::cout << "work(" << i << ") = " << h_work[i] << std::endl; } +#endif + + ViewTypeA Q ("Q", m, m); + ViewTypeA R ("R", m, n); + ViewTypeA QR("QR", m, n); + + typename ViewTypeA::HostMirror h_Q = Kokkos::create_mirror_view(Q); + typename ViewTypeA::HostMirror h_R = Kokkos::create_mirror_view(R); + typename ViewTypeA::HostMirror h_QR = Kokkos::create_mirror_view(QR); + + getQR(m, n, h_A, h_tau, h_work, h_Q, h_R, h_QR); + +#if 1 // def HAVE_KOKKOSKERNELS_DEBUG + for (int i(0); i < m; ++i) { + for (int j(0); j < m; ++j) { + std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl; + } + } + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + std::cout << "R(" << i << "," << j << ") = " << h_R(i,j) << std::endl; + } + } + for (int i(0); i < m; ++i) { + for (int j(0); j < n; ++j) { + std::cout << "QR(" << i << "," << j << ") = " << h_QR(i,j) << std::endl; + } + } +#endif + + if ((m == 3) && (n == 3)) { + } // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B). // void gemm( const execution_space & space