diff --git a/splines__linear__problem__band_8hpp_source.html b/splines__linear__problem__band_8hpp_source.html index 1b2e14eb9..7a1ee48bf 100644 --- a/splines__linear__problem__band_8hpp_source.html +++ b/splines__linear__problem__band_8hpp_source.html @@ -149,194 +149,195 @@
20#include <lapacke.h>
21#endif
22
-
23#include <KokkosBatched_Gbtrs.hpp>
-
24#include <KokkosBatched_Util.hpp>
-
25
-
26#include "splines_linear_problem.hpp"
-
27
-
28namespace ddc::detail {
-
29
-
30/**
-
31 * @brief A band linear problem dedicated to the computation of a spline approximation.
-
32 *
-
33 * The storage format is dense row-major. Lapack is used to perform every matrix and linear solver-related operations.
-
34 *
-
35 * Given the linear system A*x=b, we assume that A is a square (n by n)
-
36 * with ku superdiagonals and kl subdiagonals.
-
37 * All non-zero elements of A are stored in the rectangular matrix q, using
-
38 * the format required by DGBTRF (LAPACK): diagonals of A are rows of q.
-
39 * q has 2*kl rows for the subdiagonals, 1 row for the diagonal, and ku rows
-
40 * for the superdiagonals. (The kl additional rows are needed for pivoting.)
-
41 *
-
42 * @tparam ExecSpace The Kokkos::ExecutionSpace on which operations related to the matrix are supposed to be performed.
-
43 */
-
44template <class ExecSpace>
-
45class SplinesLinearProblemBand : public SplinesLinearProblem<ExecSpace>
-
46{
-
47public:
-
48 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
-
49 using SplinesLinearProblem<ExecSpace>::size;
-
50
-
51protected:
-
52 std::size_t m_kl; // no. of subdiagonals
-
53 std::size_t m_ku; // no. of superdiagonals
-
54 Kokkos::DualView<double**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
-
55 m_q; // band matrix representation
-
56 Kokkos::DualView<int*, typename ExecSpace::memory_space> m_ipiv; // pivot indices
-
57
-
58public:
-
59 /**
-
60 * @brief SplinesLinearProblemBand constructor.
-
61 *
-
62 * @param mat_size The size of one of the dimensions of the square matrix.
-
63 * @param kl The number of subdiagonals of the matrix.
-
64 * @param ku The number of superdiagonals of the matrix.
-
65 */
-
66 explicit SplinesLinearProblemBand(
-
67 std::size_t const mat_size,
-
68 std::size_t const kl,
-
69 std::size_t const ku)
-
70 : SplinesLinearProblem<ExecSpace>(mat_size)
-
71 , m_kl(kl)
-
72 , m_ku(ku)
-
73 /*
-
74 * The matrix itself stored in band format requires a (kl + ku + 1)*mat_size
-
75 * allocation, but the LU-factorization requires an additional kl*mat_size block
-
76 */
-
77 , m_q("q", 2 * kl + ku + 1, mat_size)
-
78 , m_ipiv("ipiv", mat_size)
-
79 {
-
80 assert(m_kl <= mat_size);
-
81 assert(m_ku <= mat_size);
-
82
-
83 Kokkos::deep_copy(m_q.h_view, 0.);
-
84 }
-
85
-
86private:
-
87 std::size_t band_storage_row_index(std::size_t const i, std::size_t const j) const
-
88 {
-
89 return m_kl + m_ku + i - j;
-
90 }
-
91
-
92public:
-
93 double get_element(std::size_t const i, std::size_t const j) const override
-
94 {
-
95 assert(i < size());
-
96 assert(j < size());
-
97 /*
-
98 * The "row index" of the band format storage identify the (sub/super)-diagonal
-
99 * while the column index is actually the column index of the matrix. Two layouts
-
100 * are supported by LAPACKE. The m_kl first lines are irrelevant for the storage of
-
101 * the matrix itself but required for the storage of its LU factorization.
-
102 */
-
103 if (i >= std::
-
104 max(static_cast<std::ptrdiff_t>(0),
-
105 static_cast<std::ptrdiff_t>(j) - static_cast<std::ptrdiff_t>(m_ku))
-
106 && i < std::min(size(), j + m_kl + 1)) {
-
107 return m_q.h_view(band_storage_row_index(i, j), j);
-
108 }
-
109
-
110 return 0.0;
-
111 }
-
112
-
113 void set_element(std::size_t const i, std::size_t const j, double const aij) override
-
114 {
-
115 assert(i < size());
-
116 assert(j < size());
-
117 /*
-
118 * The "row index" of the band format storage identify the (sub/super)-diagonal
-
119 * while the column index is actually the column index of the matrix. Two layouts
-
120 * are supported by LAPACKE. The m_kl first lines are irrelevant for the storage of
-
121 * the matrix itself but required for the storage of its LU factorization.
-
122 */
-
123 if (i >= std::
-
124 max(static_cast<std::ptrdiff_t>(0),
-
125 static_cast<std::ptrdiff_t>(j) - static_cast<std::ptrdiff_t>(m_ku))
-
126 && i < std::min(size(), j + m_kl + 1)) {
-
127 m_q.h_view(band_storage_row_index(i, j), j) = aij;
-
128 } else {
-
129 assert(std::fabs(aij) < 1e-20);
-
130 }
-
131 }
-
132
-
133 /**
-
134 * @brief Perform a pre-process operation on the solver. Must be called after filling the matrix.
-
135 *
-
136 * LU-factorize the matrix A and store the pivots using the LAPACK dgbtrf() implementation.
-
137 */
-
138 void setup_solver() override
-
139 {
-
140 int const info = LAPACKE_dgbtrf(
-
141 LAPACK_ROW_MAJOR,
-
142 size(),
+
23#include <KokkosBatched_Util.hpp>
+
24
+
25#include "kokkos-kernels-ext/KokkosBatched_Gbtrs.hpp"
+
26
+
27#include "splines_linear_problem.hpp"
+
28
+
29namespace ddc::detail {
+
30
+
31/**
+
32 * @brief A band linear problem dedicated to the computation of a spline approximation.
+
33 *
+
34 * The storage format is dense row-major. Lapack is used to perform every matrix and linear solver-related operations.
+
35 *
+
36 * Given the linear system A*x=b, we assume that A is a square (n by n)
+
37 * with ku superdiagonals and kl subdiagonals.
+
38 * All non-zero elements of A are stored in the rectangular matrix q, using
+
39 * the format required by DGBTRF (LAPACK): diagonals of A are rows of q.
+
40 * q has 2*kl rows for the subdiagonals, 1 row for the diagonal, and ku rows
+
41 * for the superdiagonals. (The kl additional rows are needed for pivoting.)
+
42 *
+
43 * @tparam ExecSpace The Kokkos::ExecutionSpace on which operations related to the matrix are supposed to be performed.
+
44 */
+
45template <class ExecSpace>
+
46class SplinesLinearProblemBand : public SplinesLinearProblem<ExecSpace>
+
47{
+
48public:
+
49 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
+
50 using SplinesLinearProblem<ExecSpace>::size;
+
51
+
52protected:
+
53 std::size_t m_kl; // no. of subdiagonals
+
54 std::size_t m_ku; // no. of superdiagonals
+
55 Kokkos::DualView<double**, Kokkos::LayoutRight, typename ExecSpace::memory_space>
+
56 m_q; // band matrix representation
+
57 Kokkos::DualView<int*, typename ExecSpace::memory_space> m_ipiv; // pivot indices
+
58
+
59public:
+
60 /**
+
61 * @brief SplinesLinearProblemBand constructor.
+
62 *
+
63 * @param mat_size The size of one of the dimensions of the square matrix.
+
64 * @param kl The number of subdiagonals of the matrix.
+
65 * @param ku The number of superdiagonals of the matrix.
+
66 */
+
67 explicit SplinesLinearProblemBand(
+
68 std::size_t const mat_size,
+
69 std::size_t const kl,
+
70 std::size_t const ku)
+
71 : SplinesLinearProblem<ExecSpace>(mat_size)
+
72 , m_kl(kl)
+
73 , m_ku(ku)
+
74 /*
+
75 * The matrix itself stored in band format requires a (kl + ku + 1)*mat_size
+
76 * allocation, but the LU-factorization requires an additional kl*mat_size block
+
77 */
+
78 , m_q("q", 2 * kl + ku + 1, mat_size)
+
79 , m_ipiv("ipiv", mat_size)
+
80 {
+
81 assert(m_kl <= mat_size);
+
82 assert(m_ku <= mat_size);
+
83
+
84 Kokkos::deep_copy(m_q.h_view, 0.);
+
85 }
+
86
+
87private:
+
88 std::size_t band_storage_row_index(std::size_t const i, std::size_t const j) const
+
89 {
+
90 return m_kl + m_ku + i - j;
+
91 }
+
92
+
93public:
+
94 double get_element(std::size_t const i, std::size_t const j) const override
+
95 {
+
96 assert(i < size());
+
97 assert(j < size());
+
98 /*
+
99 * The "row index" of the band format storage identify the (sub/super)-diagonal
+
100 * while the column index is actually the column index of the matrix. Two layouts
+
101 * are supported by LAPACKE. The m_kl first lines are irrelevant for the storage of
+
102 * the matrix itself but required for the storage of its LU factorization.
+
103 */
+
104 if (i >= std::
+
105 max(static_cast<std::ptrdiff_t>(0),
+
106 static_cast<std::ptrdiff_t>(j) - static_cast<std::ptrdiff_t>(m_ku))
+
107 && i < std::min(size(), j + m_kl + 1)) {
+
108 return m_q.h_view(band_storage_row_index(i, j), j);
+
109 }
+
110
+
111 return 0.0;
+
112 }
+
113
+
114 void set_element(std::size_t const i, std::size_t const j, double const aij) override
+
115 {
+
116 assert(i < size());
+
117 assert(j < size());
+
118 /*
+
119 * The "row index" of the band format storage identify the (sub/super)-diagonal
+
120 * while the column index is actually the column index of the matrix. Two layouts
+
121 * are supported by LAPACKE. The m_kl first lines are irrelevant for the storage of
+
122 * the matrix itself but required for the storage of its LU factorization.
+
123 */
+
124 if (i >= std::
+
125 max(static_cast<std::ptrdiff_t>(0),
+
126 static_cast<std::ptrdiff_t>(j) - static_cast<std::ptrdiff_t>(m_ku))
+
127 && i < std::min(size(), j + m_kl + 1)) {
+
128 m_q.h_view(band_storage_row_index(i, j), j) = aij;
+
129 } else {
+
130 assert(std::fabs(aij) < 1e-20);
+
131 }
+
132 }
+
133
+
134 /**
+
135 * @brief Perform a pre-process operation on the solver. Must be called after filling the matrix.
+
136 *
+
137 * LU-factorize the matrix A and store the pivots using the LAPACK dgbtrf() implementation.
+
138 */
+
139 void setup_solver() override
+
140 {
+
141 int const info = LAPACKE_dgbtrf(
+
142 LAPACK_ROW_MAJOR,
143 size(),
-
144 m_kl,
-
145 m_ku,
-
146 m_q.h_view.data(),
-
147 m_q.h_view.stride(
-
148 0), // m_q.h_view.stride(0) if LAPACK_ROW_MAJOR, m_q.h_view.stride(1) if LAPACK_COL_MAJOR
-
149 m_ipiv.h_view.data());
-
150 if (info != 0) {
-
151 throw std::runtime_error(
-
152 "LAPACKE_dgbtrf failed with error code " + std::to_string(info));
-
153 }
-
154
-
155 // Convert 1-based index to 0-based index
-
156 for (int i = 0; i < size(); ++i) {
-
157 m_ipiv.h_view(i) -= 1;
-
158 }
-
159
-
160 // Push on device
-
161 m_q.modify_host();
-
162 m_q.sync_device();
-
163 m_ipiv.modify_host();
-
164 m_ipiv.sync_device();
-
165 }
-
166
-
167 /**
-
168 * @brief Solve the multiple right-hand sides linear problem Ax=b or its transposed version A^tx=b inplace.
-
169 *
-
170 * The solver method is band gaussian elimination with partial pivoting using the LU-factorized matrix A. The implementation is LAPACK method dgbtrs.
-
171 *
-
172 * @param[in, out] b A 2D Kokkos::View storing the multiple right-hand sides of the problem and receiving the corresponding solution.
-
173 * @param transpose Choose between the direct or transposed version of the linear problem.
-
174 */
-
175 void solve(MultiRHS const b, bool const transpose) const override
-
176 {
-
177 assert(b.extent(0) == size());
-
178
-
179 std::size_t const kl_proxy = m_kl;
-
180 std::size_t const ku_proxy = m_ku;
-
181 auto q_device = m_q.d_view;
-
182 auto ipiv_device = m_ipiv.d_view;
-
183 Kokkos::RangePolicy<ExecSpace> const policy(0, b.extent(1));
-
184 if (transpose) {
-
185 Kokkos::parallel_for(
-
186 "gbtrs",
-
187 policy,
-
188 KOKKOS_LAMBDA(const int i) {
-
189 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
-
190 KokkosBatched::SerialGbtrs<
-
191 KokkosBatched::Trans::Transpose,
-
192 KokkosBatched::Algo::Gbtrs::Unblocked>::
-
193 invoke(q_device, sub_b, ipiv_device, kl_proxy, ku_proxy);
-
194 });
-
195 } else {
-
196 Kokkos::parallel_for(
-
197 "gbtrs",
-
198 policy,
-
199 KOKKOS_LAMBDA(const int i) {
-
200 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
-
201 KokkosBatched::SerialGbtrs<
-
202 KokkosBatched::Trans::NoTranspose,
-
203 KokkosBatched::Algo::Gbtrs::Unblocked>::
-
204 invoke(q_device, sub_b, ipiv_device, kl_proxy, ku_proxy);
-
205 });
-
206 }
-
207 }
-
208};
-
209
-
210} // namespace ddc::detail
+
144 size(),
+
145 m_kl,
+
146 m_ku,
+
147 m_q.h_view.data(),
+
148 m_q.h_view.stride(
+
149 0), // m_q.h_view.stride(0) if LAPACK_ROW_MAJOR, m_q.h_view.stride(1) if LAPACK_COL_MAJOR
+
150 m_ipiv.h_view.data());
+
151 if (info != 0) {
+
152 throw std::runtime_error(
+
153 "LAPACKE_dgbtrf failed with error code " + std::to_string(info));
+
154 }
+
155
+
156 // Convert 1-based index to 0-based index
+
157 for (int i = 0; i < size(); ++i) {
+
158 m_ipiv.h_view(i) -= 1;
+
159 }
+
160
+
161 // Push on device
+
162 m_q.modify_host();
+
163 m_q.sync_device();
+
164 m_ipiv.modify_host();
+
165 m_ipiv.sync_device();
+
166 }
+
167
+
168 /**
+
169 * @brief Solve the multiple right-hand sides linear problem Ax=b or its transposed version A^tx=b inplace.
+
170 *
+
171 * The solver method is band gaussian elimination with partial pivoting using the LU-factorized matrix A. The implementation is LAPACK method dgbtrs.
+
172 *
+
173 * @param[in, out] b A 2D Kokkos::View storing the multiple right-hand sides of the problem and receiving the corresponding solution.
+
174 * @param transpose Choose between the direct or transposed version of the linear problem.
+
175 */
+
176 void solve(MultiRHS const b, bool const transpose) const override
+
177 {
+
178 assert(b.extent(0) == size());
+
179
+
180 std::size_t const kl_proxy = m_kl;
+
181 std::size_t const ku_proxy = m_ku;
+
182 auto q_device = m_q.d_view;
+
183 auto ipiv_device = m_ipiv.d_view;
+
184 Kokkos::RangePolicy<ExecSpace> const policy(0, b.extent(1));
+
185 if (transpose) {
+
186 Kokkos::parallel_for(
+
187 "gbtrs",
+
188 policy,
+
189 KOKKOS_LAMBDA(const int i) {
+
190 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
+
191 KokkosBatched::SerialGbtrs<
+
192 KokkosBatched::Trans::Transpose,
+
193 KokkosBatched::Algo::Level3::Unblocked>::
+
194 invoke(q_device, sub_b, ipiv_device, kl_proxy, ku_proxy);
+
195 });
+
196 } else {
+
197 Kokkos::parallel_for(
+
198 "gbtrs",
+
199 policy,
+
200 KOKKOS_LAMBDA(const int i) {
+
201 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
+
202 KokkosBatched::SerialGbtrs<
+
203 KokkosBatched::Trans::NoTranspose,
+
204 KokkosBatched::Algo::Level3::Unblocked>::
+
205 invoke(q_device, sub_b, ipiv_device, kl_proxy, ku_proxy);
+
206 });
+
207 }
+
208 }
+
209};
+
210
+
211} // namespace ddc::detail
ddc
The top-level namespace of DDC.
Definition aligned_allocator.hpp:11
diff --git a/splines__linear__problem__dense_8hpp_source.html b/splines__linear__problem__dense_8hpp_source.html index 2d8cc5df4..604c8c4e5 100644 --- a/splines__linear__problem__dense_8hpp_source.html +++ b/splines__linear__problem__dense_8hpp_source.html @@ -147,139 +147,140 @@
18#include <lapacke.h>
19#endif
20
-
21#include <KokkosBatched_Getrs.hpp>
-
22#include <KokkosBatched_Util.hpp>
-
23
-
24#include "splines_linear_problem.hpp"
-
25
-
26namespace ddc::detail {
-
27
-
28/**
-
29 * @brief A dense linear problem dedicated to the computation of a spline approximation.
-
30 *
-
31 * The storage format is dense row-major. Lapack is used to perform every matrix and linear solver-related operations.
-
32 *
-
33 * @tparam ExecSpace The Kokkos::ExecutionSpace on which operations related to the matrix are supposed to be performed.
-
34 */
-
35template <class ExecSpace>
-
36class SplinesLinearProblemDense : public SplinesLinearProblem<ExecSpace>
-
37{
-
38public:
-
39 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
-
40 using SplinesLinearProblem<ExecSpace>::size;
-
41
-
42protected:
-
43 Kokkos::DualView<double**, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_a;
-
44 Kokkos::DualView<int*, typename ExecSpace::memory_space> m_ipiv;
-
45
-
46public:
-
47 /**
-
48 * @brief SplinesLinearProblemDense constructor.
-
49 *
-
50 * @param mat_size The size of one of the dimensions of the square matrix.
-
51 */
-
52 explicit SplinesLinearProblemDense(std::size_t const mat_size)
-
53 : SplinesLinearProblem<ExecSpace>(mat_size)
-
54 , m_a("a", mat_size, mat_size)
-
55 , m_ipiv("ipiv", mat_size)
-
56 {
-
57 Kokkos::deep_copy(m_a.h_view, 0.);
-
58 }
-
59
-
60 double get_element(std::size_t const i, std::size_t const j) const override
-
61 {
-
62 assert(i < size());
-
63 assert(j < size());
-
64 return m_a.h_view(i, j);
-
65 }
-
66
-
67 void set_element(std::size_t const i, std::size_t const j, double const aij) override
-
68 {
-
69 assert(i < size());
-
70 assert(j < size());
-
71 m_a.h_view(i, j) = aij;
-
72 }
-
73
-
74 /**
-
75 * @brief Perform a pre-process operation on the solver. Must be called after filling the matrix.
-
76 *
-
77 * LU-factorize the matrix A and store the pivots using the LAPACK dgetrf() implementation.
-
78 */
-
79 void setup_solver() override
-
80 {
-
81 int const info = LAPACKE_dgetrf(
-
82 LAPACK_ROW_MAJOR,
-
83 size(),
+
21#include <KokkosBatched_Util.hpp>
+
22
+
23#include "kokkos-kernels-ext/KokkosBatched_Getrs.hpp"
+
24
+
25#include "splines_linear_problem.hpp"
+
26
+
27namespace ddc::detail {
+
28
+
29/**
+
30 * @brief A dense linear problem dedicated to the computation of a spline approximation.
+
31 *
+
32 * The storage format is dense row-major. Lapack is used to perform every matrix and linear solver-related operations.
+
33 *
+
34 * @tparam ExecSpace The Kokkos::ExecutionSpace on which operations related to the matrix are supposed to be performed.
+
35 */
+
36template <class ExecSpace>
+
37class SplinesLinearProblemDense : public SplinesLinearProblem<ExecSpace>
+
38{
+
39public:
+
40 using typename SplinesLinearProblem<ExecSpace>::MultiRHS;
+
41 using SplinesLinearProblem<ExecSpace>::size;
+
42
+
43protected:
+
44 Kokkos::DualView<double**, Kokkos::LayoutRight, typename ExecSpace::memory_space> m_a;
+
45 Kokkos::DualView<int*, typename ExecSpace::memory_space> m_ipiv;
+
46
+
47public:
+
48 /**
+
49 * @brief SplinesLinearProblemDense constructor.
+
50 *
+
51 * @param mat_size The size of one of the dimensions of the square matrix.
+
52 */
+
53 explicit SplinesLinearProblemDense(std::size_t const mat_size)
+
54 : SplinesLinearProblem<ExecSpace>(mat_size)
+
55 , m_a("a", mat_size, mat_size)
+
56 , m_ipiv("ipiv", mat_size)
+
57 {
+
58 Kokkos::deep_copy(m_a.h_view, 0.);
+
59 }
+
60
+
61 double get_element(std::size_t const i, std::size_t const j) const override
+
62 {
+
63 assert(i < size());
+
64 assert(j < size());
+
65 return m_a.h_view(i, j);
+
66 }
+
67
+
68 void set_element(std::size_t const i, std::size_t const j, double const aij) override
+
69 {
+
70 assert(i < size());
+
71 assert(j < size());
+
72 m_a.h_view(i, j) = aij;
+
73 }
+
74
+
75 /**
+
76 * @brief Perform a pre-process operation on the solver. Must be called after filling the matrix.
+
77 *
+
78 * LU-factorize the matrix A and store the pivots using the LAPACK dgetrf() implementation.
+
79 */
+
80 void setup_solver() override
+
81 {
+
82 int const info = LAPACKE_dgetrf(
+
83 LAPACK_ROW_MAJOR,
84 size(),
-
85 m_a.h_view.data(),
-
86 size(),
-
87 m_ipiv.h_view.data());
-
88 if (info != 0) {
-
89 throw std::runtime_error(
-
90 "LAPACKE_dgetrf failed with error code " + std::to_string(info));
-
91 }
-
92
-
93 // Convert 1-based index to 0-based index
-
94 for (int i = 0; i < size(); ++i) {
-
95 m_ipiv.h_view(i) -= 1;
-
96 }
-
97
-
98 // Push on device
-
99 m_a.modify_host();
-
100 m_a.sync_device();
-
101 m_ipiv.modify_host();
-
102 m_ipiv.sync_device();
-
103 }
-
104
-
105 /**
-
106 * @brief Solve the multiple right-hand sides linear problem Ax=b or its transposed version A^tx=b inplace.
-
107 *
-
108 * The solver method is gaussian elimination with partial pivoting using the LU-factorized matrix A. The implementation is LAPACK method dgetrs.
-
109 *
-
110 * @param[in, out] b A 2D Kokkos::View storing the multiple right-hand sides of the problem and receiving the corresponding solution.
-
111 * @param transpose Choose between the direct or transposed version of the linear problem.
-
112 */
-
113 void solve(MultiRHS const b, bool const transpose) const override
-
114 {
-
115 assert(b.extent(0) == size());
-
116
-
117 // For order 1 splines, size() can be 0 then we bypass the solver call.
-
118 if (size() == 0) {
-
119 return;
-
120 }
-
121
-
122 auto a_device = m_a.d_view;
-
123 auto ipiv_device = m_ipiv.d_view;
-
124
-
125 Kokkos::RangePolicy<ExecSpace> const policy(0, b.extent(1));
-
126
-
127 if (transpose) {
-
128 Kokkos::parallel_for(
-
129 "gerts",
-
130 policy,
-
131 KOKKOS_LAMBDA(const int i) {
-
132 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
-
133 KokkosBatched::SerialGetrs<
-
134 KokkosBatched::Trans::Transpose,
-
135 KokkosBatched::Algo::Getrs::Unblocked>::
-
136 invoke(a_device, ipiv_device, sub_b);
-
137 });
-
138 } else {
-
139 Kokkos::parallel_for(
-
140 "gerts",
-
141 policy,
-
142 KOKKOS_LAMBDA(const int i) {
-
143 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
-
144 KokkosBatched::SerialGetrs<
-
145 KokkosBatched::Trans::NoTranspose,
-
146 KokkosBatched::Algo::Getrs::Unblocked>::
-
147 invoke(a_device, ipiv_device, sub_b);
-
148 });
-
149 }
-
150 }
-
151};
-
152
-
153} // namespace ddc::detail
+
85 size(),
+
86 m_a.h_view.data(),
+
87 size(),
+
88 m_ipiv.h_view.data());
+
89 if (info != 0) {
+
90 throw std::runtime_error(
+
91 "LAPACKE_dgetrf failed with error code " + std::to_string(info));
+
92 }
+
93
+
94 // Convert 1-based index to 0-based index
+
95 for (int i = 0; i < size(); ++i) {
+
96 m_ipiv.h_view(i) -= 1;
+
97 }
+
98
+
99 // Push on device
+
100 m_a.modify_host();
+
101 m_a.sync_device();
+
102 m_ipiv.modify_host();
+
103 m_ipiv.sync_device();
+
104 }
+
105
+
106 /**
+
107 * @brief Solve the multiple right-hand sides linear problem Ax=b or its transposed version A^tx=b inplace.
+
108 *
+
109 * The solver method is gaussian elimination with partial pivoting using the LU-factorized matrix A. The implementation is LAPACK method dgetrs.
+
110 *
+
111 * @param[in, out] b A 2D Kokkos::View storing the multiple right-hand sides of the problem and receiving the corresponding solution.
+
112 * @param transpose Choose between the direct or transposed version of the linear problem.
+
113 */
+
114 void solve(MultiRHS const b, bool const transpose) const override
+
115 {
+
116 assert(b.extent(0) == size());
+
117
+
118 // For order 1 splines, size() can be 0 then we bypass the solver call.
+
119 if (size() == 0) {
+
120 return;
+
121 }
+
122
+
123 auto a_device = m_a.d_view;
+
124 auto ipiv_device = m_ipiv.d_view;
+
125
+
126 Kokkos::RangePolicy<ExecSpace> const policy(0, b.extent(1));
+
127
+
128 if (transpose) {
+
129 Kokkos::parallel_for(
+
130 "gerts",
+
131 policy,
+
132 KOKKOS_LAMBDA(const int i) {
+
133 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
+
134 KokkosBatched::SerialGetrs<
+
135 KokkosBatched::Trans::Transpose,
+
136 KokkosBatched::Algo::Level3::Unblocked>::
+
137 invoke(a_device, ipiv_device, sub_b);
+
138 });
+
139 } else {
+
140 Kokkos::parallel_for(
+
141 "gerts",
+
142 policy,
+
143 KOKKOS_LAMBDA(const int i) {
+
144 auto sub_b = Kokkos::subview(b, Kokkos::ALL, i);
+
145 KokkosBatched::SerialGetrs<
+
146 KokkosBatched::Trans::NoTranspose,
+
147 KokkosBatched::Algo::Level3::Unblocked>::
+
148 invoke(a_device, ipiv_device, sub_b);
+
149 });
+
150 }
+
151 }
+
152};
+
153
+
154} // namespace ddc::detail
ddc
The top-level namespace of DDC.
Definition aligned_allocator.hpp:11