diff --git a/include/ddc/kernels/splines/spline_builder.hpp b/include/ddc/kernels/splines/spline_builder.hpp index d5bad9617..97bd41b01 100644 --- a/include/ddc/kernels/splines/spline_builder.hpp +++ b/include/ddc/kernels/splines/spline_builder.hpp @@ -300,17 +300,18 @@ class SplineBuilder /** * @brief Get the whole domain on which spline coefficients are defined, with the dimension of interest being the leading dimension. * - * This is used internally due to solver limitation and because it may be beneficial to computation performance. + * This is used internally due to solver limitation and because it may be beneficial to computation performance. For LAPACK backend and non-periodic boundary condition, we are using SplinesLinearSolver3x3Blocks which requires upper_block_size additional rows for internal operations. * * @return The (transposed) domain for the spline coefficients. */ batched_spline_tr_domain_type batched_spline_tr_domain() const noexcept { - return batched_spline_tr_domain_type( - batched_spline_domain().restrict(ddc::DiscreteDomain( + return batched_spline_tr_domain_type(ddc::replace_dim_of( + batched_spline_domain(), + ddc::DiscreteDomain( ddc::DiscreteElement(0), ddc::DiscreteVector( - ddc::discrete_space().nbasis())))); + matrix->required_number_of_rhs_rows())))); } public: @@ -846,7 +847,7 @@ operator()( // Create a 2D Kokkos::View to manage spline_tr as a matrix Kokkos::View bcoef_section( spline_tr.data_handle(), - ddc::discrete_space().nbasis(), + static_cast(spline_tr.template extent()), batch_domain().size()); // Compute spline coef matrix->solve(bcoef_section); diff --git a/include/ddc/kernels/splines/splines_linear_problem.hpp b/include/ddc/kernels/splines/splines_linear_problem.hpp index 11bb921d4..6434db556 100644 --- a/include/ddc/kernels/splines/splines_linear_problem.hpp +++ b/include/ddc/kernels/splines/splines_linear_problem.hpp @@ -78,6 +78,26 @@ class SplinesLinearProblem { return m_size; } + + /** + * @brief Get the required number of rows of the multi-rhs view passed to solve(). + * + * Implementations may require a number of rows larger than what `size` returns for optimization purposes. + * + * @return The required number of rows of the multi-rhs view. It is guaranteed to be greater or equal to `size`. + */ + std::size_t required_number_of_rhs_rows() const + { + std::size_t const nrows = impl_required_number_of_rhs_rows(); + assert(nrows >= size()); + return nrows; + } + +private: + virtual std::size_t impl_required_number_of_rhs_rows() const + { + return m_size; + } }; /** diff --git a/include/ddc/kernels/splines/splines_linear_problem_3x3_blocks.hpp b/include/ddc/kernels/splines/splines_linear_problem_3x3_blocks.hpp index 1dc93e99b..cdcbcfa5a 100644 --- a/include/ddc/kernels/splines/splines_linear_problem_3x3_blocks.hpp +++ b/include/ddc/kernels/splines/splines_linear_problem_3x3_blocks.hpp @@ -96,9 +96,10 @@ class SplinesLinearProblem3x3Blocks : public SplinesLinearProblem2x2Blocks | b_top | -- Considered as a - * | b_bottom | | b_bottom | -- single bottom block + * | b_top | | - | + * | b_center | -> | b_center | + * | b_bottom | | b_top | -- Considered as a + * | - | | b_bottom | -- single bottom block * * @param b The multiple right-hand sides. */ @@ -106,34 +107,43 @@ class SplinesLinearProblem3x3Blocks : public SplinesLinearProblem2x2Blockssize(); // size of the center block - // prevent Kokkos::deep_copy(b_top_dst, b_top) to be a deep_copy between overlapping allocations - assert(nq >= m_top_size); - MultiRHS const b_top = Kokkos:: subview(b, std::pair {0, m_top_size}, Kokkos::ALL); - MultiRHS const b_center = Kokkos:: + MultiRHS const b_bottom = Kokkos:: subview(b, - std::pair {m_top_size, m_top_size + nq}, + std::pair {m_top_size + nq, size()}, Kokkos::ALL); - MultiRHS const b_center_dst - = Kokkos::subview(b, std::pair {0, nq}, Kokkos::ALL); MultiRHS const b_top_dst = Kokkos:: - subview(b, std::pair {nq, nq + m_top_size}, Kokkos::ALL); + subview(b, + std::pair {m_top_size + nq, 2 * m_top_size + nq}, + Kokkos::ALL); + MultiRHS const b_bottom_dst = Kokkos:: + subview(b, + std::pair< + std::size_t, + std::size_t> {2 * m_top_size + nq, m_top_size + size()}, + Kokkos::ALL); - MultiRHS const buffer = Kokkos::create_mirror(ExecSpace(), b_center); + if (b_bottom.extent(0) > b_top.extent(0)) { + // Need a buffer to prevent overlapping + MultiRHS const buffer = Kokkos::create_mirror(ExecSpace(), b_bottom); - Kokkos::deep_copy(buffer, b_center); + Kokkos::deep_copy(buffer, b_bottom); + Kokkos::deep_copy(b_bottom_dst, buffer); + } else { + Kokkos::deep_copy(b_bottom_dst, b_bottom); + } Kokkos::deep_copy(b_top_dst, b_top); - Kokkos::deep_copy(b_center_dst, buffer); } /** * @brief Perform row interchanges on multiple right-hand sides to restore its 3-blocks structure. * - * | b_center | | b_top | - * | b_top | -> | b_center | - * | b_bottom | | b_bottom | + * | - | | b_top | + * | b_center | -> | b_center | + * | b_top | | b_bottom | + * | b_bottom | | - | * * @param b The multiple right-hand sides. */ @@ -141,26 +151,34 @@ class SplinesLinearProblem3x3Blocks : public SplinesLinearProblem2x2Blockssize(); // size of the center block - // prevent Kokkos::deep_copy(b_top, b_top_src) to be a deep_copy between overlapping allocations - assert(nq >= m_top_size); - - MultiRHS const b_center_src - = Kokkos::subview(b, std::pair {0, nq}, Kokkos::ALL); - MultiRHS const b_top_src = Kokkos:: - subview(b, std::pair {nq, nq + m_top_size}, Kokkos::ALL); - MultiRHS const b_top = Kokkos:: subview(b, std::pair {0, m_top_size}, Kokkos::ALL); - MultiRHS const b_center = Kokkos:: + MultiRHS const b_bottom = Kokkos:: subview(b, - std::pair {m_top_size, m_top_size + nq}, + std::pair {m_top_size + nq, size()}, Kokkos::ALL); - MultiRHS const buffer = Kokkos::create_mirror(ExecSpace(), b_center); + MultiRHS const b_top_src = Kokkos:: + subview(b, + std::pair {m_top_size + nq, 2 * m_top_size + nq}, + Kokkos::ALL); + MultiRHS const b_bottom_src = Kokkos:: + subview(b, + std::pair< + std::size_t, + std::size_t> {2 * m_top_size + nq, m_top_size + size()}, + Kokkos::ALL); - Kokkos::deep_copy(buffer, b_center_src); Kokkos::deep_copy(b_top, b_top_src); - Kokkos::deep_copy(b_center, buffer); + if (b_bottom.extent(0) > b_top.extent(0)) { + // Need a buffer to prevent overlapping + MultiRHS const buffer = Kokkos::create_mirror(ExecSpace(), b_bottom); + + Kokkos::deep_copy(buffer, b_bottom_src); + Kokkos::deep_copy(b_bottom, buffer); + } else { + Kokkos::deep_copy(b_bottom, b_bottom_src); + } } public: @@ -169,17 +187,32 @@ class SplinesLinearProblem3x3Blocks : public SplinesLinearProblem2x2Blocks::solve(b, transpose); + SplinesLinearProblem2x2Blocks:: + solve(Kokkos:: + subview(b, + std::pair< + std::size_t, + std::size_t> {m_top_size, m_top_size + size()}, + Kokkos::ALL), + transpose); interchange_rows_from_2_to_3_blocks_rhs(b); } + +private: + std::size_t impl_required_number_of_rhs_rows() const override + { + return size() + m_top_size; + } }; } // namespace ddc::detail diff --git a/tests/splines/splines_linear_problem.cpp b/tests/splines/splines_linear_problem.cpp index 5b1f139a1..ee79962de 100644 --- a/tests/splines/splines_linear_problem.cpp +++ b/tests/splines/splines_linear_problem.cpp @@ -21,7 +21,6 @@ namespace DDC_HIP_5_7_ANONYMOUS_NAMESPACE_WORKAROUND(MATRIX_CPP) void fill_identity( ddc::detail::SplinesLinearProblem::MultiRHS mat) { - assert(mat.extent(0) == mat.extent(1)); for (std::size_t i(0); i < mat.extent(0); ++i) { for (std::size_t j(0); j < mat.extent(1); ++j) { mat(i, j) = int(i == j); @@ -93,33 +92,45 @@ namespace DDC_HIP_5_7_ANONYMOUS_NAMESPACE_WORKAROUND(MATRIX_CPP) splines_linear_problem.setup_solver(); - Kokkos::DualView inv_ptr("inv_ptr", N * N); + Kokkos::DualView + inv_ptr("inv_ptr", splines_linear_problem.required_number_of_rhs_rows() * N); ddc::detail::SplinesLinearProblem::MultiRHS - inv(inv_ptr.h_view.data(), N, N); + inv(inv_ptr.h_view.data(), splines_linear_problem.required_number_of_rhs_rows(), N); fill_identity(inv); inv_ptr.modify_host(); inv_ptr.sync_device(); splines_linear_problem.solve( - ddc::detail::SplinesLinearProblem< - Kokkos::DefaultExecutionSpace>::MultiRHS(inv_ptr.d_view.data(), N, N)); + ddc::detail::SplinesLinearProblem::MultiRHS( + inv_ptr.d_view.data(), + splines_linear_problem.required_number_of_rhs_rows(), + N)); inv_ptr.modify_device(); inv_ptr.sync_host(); - Kokkos::DualView inv_tr_ptr("inv_tr_ptr", N * N); + Kokkos::DualView + inv_tr_ptr("inv_tr_ptr", splines_linear_problem.required_number_of_rhs_rows() * N); ddc::detail::SplinesLinearProblem::MultiRHS - inv_tr(inv_tr_ptr.h_view.data(), N, N); + inv_tr(inv_tr_ptr.h_view.data(), + splines_linear_problem.required_number_of_rhs_rows(), + N); fill_identity(inv_tr); inv_tr_ptr.modify_host(); inv_tr_ptr.sync_device(); splines_linear_problem - .solve(ddc::detail::SplinesLinearProblem:: - MultiRHS(inv_tr_ptr.d_view.data(), N, N), + .solve(ddc::detail::SplinesLinearProblem::MultiRHS( + inv_tr_ptr.d_view.data(), + splines_linear_problem.required_number_of_rhs_rows(), + N), true); inv_tr_ptr.modify_device(); inv_tr_ptr.sync_host(); - check_inverse(val, inv); - check_inverse_transpose(val, inv_tr); + check_inverse( + val, + Kokkos::subview(inv, std::pair {0, N}, Kokkos::ALL)); + check_inverse_transpose( + val, + Kokkos::subview(inv_tr, std::pair {0, N}, Kokkos::ALL)); } } // namespace ) @@ -255,12 +266,13 @@ TEST(SplinesLinearProblem, 3x3Blocks) { std::size_t const N = 10; std::size_t const k = 10; + std::size_t const top_size = 2; std::unique_ptr> center_block = std::make_unique< ddc::detail::SplinesLinearProblemDense>(N - 5); std::unique_ptr> splines_linear_problem = std::make_unique>(N, 2, std::move(center_block)); + Kokkos::DefaultExecutionSpace>>(N, top_size, std::move(center_block)); // Build a non-symmetric full-rank matrix (without zero) for (std::size_t i(0); i < N; ++i) { @@ -350,10 +362,11 @@ TEST_P(SplinesLinearProblemSizesFixture, OffsetBanded) TEST_P(SplinesLinearProblemSizesFixture, 2x2Blocks) { auto const [N, k] = GetParam(); + std::size_t const bottom_size = 3; std::unique_ptr> splines_linear_problem = ddc::detail::SplinesLinearProblemMaker::make_new_block_matrix_with_band_main_block< - Kokkos::DefaultExecutionSpace>(N, k, k, false, 3); + Kokkos::DefaultExecutionSpace>(N, k, k, false, bottom_size); // Build a non-symmetric full-rank band matrix for (std::size_t i(0); i < N; ++i) { @@ -372,10 +385,12 @@ TEST_P(SplinesLinearProblemSizesFixture, 2x2Blocks) TEST_P(SplinesLinearProblemSizesFixture, 3x3Blocks) { auto const [N, k] = GetParam(); + std::size_t const bottom_size = 3; + std::size_t const top_size = 2; std::unique_ptr> splines_linear_problem = ddc::detail::SplinesLinearProblemMaker::make_new_block_matrix_with_band_main_block< - Kokkos::DefaultExecutionSpace>(N, k, k, false, 3, 2); + Kokkos::DefaultExecutionSpace>(N, k, k, false, bottom_size, top_size); // Build a non-symmetric full-rank band matrix for (std::size_t i(0); i < N; ++i) {