Backup

kokkos · May 20, 2024 · 51279f8 · 51279f8
1 parent 32298f0
commit 51279f8
Show file tree

Hide file tree

Showing 2 changed files with 131 additions and 31 deletions.
diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp
@@ -32,9 +32,9 @@ namespace KokkosLapack {
 
 /// \brief Computes a QR factorization of a matrix A
 ///
-/// \tparam ExecutionSpace the space where the kernel will run.
-/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View.
-/// \tparam TWArray Type of arrays Tau and Work, as a 1-D Kokkos::View.
+/// \tparam ExecutionSpace The space where the kernel will run.
+/// \tparam AMatrix        Type of matrix A, as a 2-D Kokkos::View.
+/// \tparam TWArray        Type of arrays Tau and Work, as a 1-D Kokkos::View.
 ///
 /// \param space [in] Execution space instance used to specified how to execute
 ///                   the geqrf kernels.
@@ -58,6 +58,7 @@ namespace KokkosLapack {
 ///                   If min(M,N) != 0, then LWORK must be >= N.
 ///                   If the QR factorization is successful, then the first
 ///                   position of Work contains the optimal LWORK.
+///
 /// \return           = 0: successfull exit
 ///                   < 0: if equal to '-i', the i-th argument had an illegal
 ///                        value
@@ -148,17 +149,25 @@ int geqrf(const ExecutionSpace& space, const AMatrix& A, const TWArray& Tau,
 ///
 /// \param A [in,out] On entry, the M-by-N matrix to be factorized.
 ///                   On exit, the elements on and above the diagonal contain
-///                   the min(M,N)-by-N upper trapezoidal matrix R (R is
-///                   upper triangular if M >= N); the elements below the
-///                   diagonal, with the array Tau, represent the unitary
-///                   matrix Q as a product of min(M,N) elementary reflectors.
+///                   the min(M,N)-by-N upper trapezoidal matrix R (R is upper
+///                   triangular if M >= N); the elements below the diagonal,
+///                   with the array Tau, represent the unitary matrix Q as a
+///                   product of min(M,N) elementary reflectors. The matrix Q
+///                   is represented as a product of elementary reflectors
+///                     Q = H(1) H(2) . . . H(k), where k = min(M,N).
+///                   Each H(i) has the form
+///                     H(i) = I - Tau * v * v**H
+///                   where tau is a complex scalar, and v is a complex vector
+///                   with v(1:i-1) = 0 and v(i) = 1; v(i+1:M) is stored on
+///                   exit in A(i+1:M,i), and tau in Tau(i).
 /// \param Tau [out]  One-dimensional array of size min(M,N) that contains
 ///                   the scalar factors of the elementary reflectors.
 /// \param Work [out] One-dimensional array of size max(1,LWORK).
 ///                   If min(M,N) == 0, then LWORK must be >= 1.
 ///                   If min(M,N) != 0, then LWORK must be >= N.
 ///                   If the QR factorization is successful, then the first
 ///                   position of Work contains the optimal LWORK.
+///
 /// \return           = 0: successfull exit
 ///                   < 0: if equal to '-i', the i-th argument had an illegal
 ///                        value

diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp
@@ -31,12 +31,62 @@
 #include <Kokkos_Random.hpp>
 
 #include <KokkosLapack_geqrf.hpp>
-#include <KokkosBlas2_gemv.hpp>
-#include <KokkosBlas3_gemm.hpp>
+//#include <KokkosBlas2_gemv.hpp>
+//#include <KokkosBlas3_gemm.hpp>
 #include <KokkosKernels_TestUtils.hpp>
 
 namespace Test {
 
+template <class ViewTypeA, class ViewTypeTW>
+void getQR( int                             const   m
+          , int                             const   n
+          , typename ViewTypeA::HostMirror  const & //h_A
+          , typename ViewTypeTW::HostMirror const & //h_tau
+          , typename ViewTypeTW::HostMirror const & //h_work
+          , typename ViewTypeA::HostMirror        & //h_Q
+          , typename ViewTypeA::HostMirror        & h_R
+          , typename ViewTypeA::HostMirror        & //h_QR
+          )
+{
+  using ScalarA = typename ViewTypeA::value_type;
+
+  for (int i(0); i < m; ++i) {
+    for (int j(0); j < n; ++j) {
+      if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
+        h_R(i,j).real() = 0.;
+        h_R(i,j).imag() = 0.;
+      }
+      else {
+        h_R(i,j) = 0.;
+      }
+    }
+  }
+
+  ViewTypeA I("I", m, m);
+  typename ViewTypeA::HostMirror h_I = Kokkos::create_mirror_view(I);
+  for (int i(0); i < m; ++i) {
+    for (int j(0); j < m; ++j) {
+      if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
+	if (i == j) {
+          h_I(i,j).real() = 1.;
+	}
+	else {
+          h_I(i,j).real() = 0.;
+	}
+        h_I(i,j).imag() = 0.;
+      }
+      else {
+	if (i == j) {
+          h_I(i,j) = 1.;
+	}
+	else {
+          h_I(i,j) = 0.;
+	}
+      }
+    }
+  }
+}
+
 template <class ViewTypeA, class ViewTypeTW, class Device>
 void impl_test_geqrf(int m, int n) {
   using execution_space = typename Device::execution_space;
@@ -59,43 +109,44 @@ void impl_test_geqrf(int m, int n) {
   ViewTypeTW Work("Work", lwork);
 
   // Create host mirrors of device views.
-  typename ViewTypeA::HostMirror  h_A    = Kokkos::create_mirror_view(A);
-  typename ViewTypeTW::HostMirror h_tau  = Kokkos::create_mirror_view(Tau);
-  typename ViewTypeTW::HostMirror h_work = Kokkos::create_mirror_view(Work);
+  typename ViewTypeA::HostMirror  h_A     = Kokkos::create_mirror_view(A);
+  typename ViewTypeA::HostMirror  h_Aorig = Kokkos::create_mirror_view(A);
+  typename ViewTypeTW::HostMirror h_tau   = Kokkos::create_mirror_view(Tau);
+  typename ViewTypeTW::HostMirror h_work  = Kokkos::create_mirror_view(Work);
 
   // Initialize data.
   if ((m == 3) && (n == 3)) {
     if constexpr (Kokkos::ArithTraits<ScalarA>::is_complex) {
-      h_A(0, 0).real() = 12.;
-      h_A(0, 1).real() = -51.;
-      h_A(0, 2).real() = 4.;
+      h_A(0,0).real() = 12.;
+      h_A(0,1).real() = -51.;
+      h_A(0,2).real() = 4.;
 
-      h_A(1, 0).real() = 6.;
-      h_A(1, 1).real() = 167.;
-      h_A(1, 2).real() = -68.;
+      h_A(1,0).real() = 6.;
+      h_A(1,1).real() = 167.;
+      h_A(1,2).real() = -68.;
 
-      h_A(2, 0).real() = -4.;
-      h_A(2, 1).real() = 24.;
-      h_A(2, 2).real() = -41.;
+      h_A(2,0).real() = -4.;
+      h_A(2,1).real() = 24.;
+      h_A(2,2).real() = -41.;
 
       for (int i(0); i < m; ++i) {
         for (int j(0); j < n; ++j) {
-          h_A(i, j).imag() = 0.;
+          h_A(i,j).imag() = 0.;
 	}
       }
     }
     else {
-      h_A(0, 0) = 12.;
-      h_A(0, 1) = -51.;
-      h_A(0, 2) = 4.;
+      h_A(0,0) = 12.;
+      h_A(0,1) = -51.;
+      h_A(0,2) = 4.;
 
-      h_A(1, 0) = 6.;
-      h_A(1, 1) = 167.;
-      h_A(1, 2) = -68.;
+      h_A(1,0) = 6.;
+      h_A(1,1) = 167.;
+      h_A(1,2) = -68.;
 
-      h_A(2, 0) = -4.;
-      h_A(2, 1) = 24.;
-      h_A(2, 2) = -41.;
+      h_A(2,0) = -4.;
+      h_A(2,1) = 24.;
+      h_A(2,2) = -41.;
     }
 
     Kokkos::deep_copy(A, h_A);
@@ -108,11 +159,15 @@ void impl_test_geqrf(int m, int n) {
     Kokkos::deep_copy(h_A, A);
   }
 
+  Kokkos::deep_copy(h_Aorig, h_A);
+
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
   for (int i(0); i < m; ++i) {
     for (int j(0); j < n; ++j) {
       std::cout << "A(" << i << "," << j << ") = " << h_A(i,j) << std::endl;
     }
   }
+#endif
 
   Kokkos::fence();
 
@@ -126,13 +181,17 @@ void impl_test_geqrf(int m, int n) {
     FAIL();
     return;
   }
+
   Kokkos::fence();
 
+  EXPECT_EQ(rc, 0) << "Failed geqrf() test: rc = " << rc;
+
   // Get the results
   Kokkos::deep_copy(h_A, A);
   Kokkos::deep_copy(h_tau, Tau);
   Kokkos::deep_copy(h_work, Work);
 
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
   std::cout << "rc = " << rc << std::endl;
   for (int i(0); i < minMN; ++i) {
     for (int j(0); j < n; ++j) {
@@ -145,6 +204,38 @@ void impl_test_geqrf(int m, int n) {
   for (int i(0); i < lwork; ++i) {
     std::cout << "work(" << i << ") = " << h_work[i] << std::endl;
   }
+#endif
+
+  ViewTypeA Q ("Q",  m, m);
+  ViewTypeA R ("R",  m, n);
+  ViewTypeA QR("QR", m, n);
+
+  typename ViewTypeA::HostMirror h_Q  = Kokkos::create_mirror_view(Q);
+  typename ViewTypeA::HostMirror h_R  = Kokkos::create_mirror_view(R);
+  typename ViewTypeA::HostMirror h_QR = Kokkos::create_mirror_view(QR);
+
+  getQR<ViewTypeA, ViewTypeTW>(m, n, h_A, h_tau, h_work, h_Q, h_R, h_QR);
+
+#if 1 // def HAVE_KOKKOSKERNELS_DEBUG
+  for (int i(0); i < m; ++i) {
+    for (int j(0); j < m; ++j) {
+      std::cout << "Q(" << i << "," << j << ") = " << h_Q(i,j) << std::endl;
+    }
+  }
+  for (int i(0); i < m; ++i) {
+    for (int j(0); j < n; ++j) {
+      std::cout << "R(" << i << "," << j << ") = " << h_R(i,j) << std::endl;
+    }
+  }
+  for (int i(0); i < m; ++i) {
+    for (int j(0); j < n; ++j) {
+      std::cout << "QR(" << i << "," << j << ") = " << h_QR(i,j) << std::endl;
+    }
+  }
+#endif
+
+  if ((m == 3) && (n == 3)) {
+  }
 
   // Dense matrix-matrix multiply: C = beta*C + alpha*op(A)*op(B).
   // void gemm( const execution_space                & space