From 52c90eb4a95a1cc2ee6a4dc33a0d5593342c3981 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Mon, 24 Feb 2025 23:18:54 +0000 Subject: [PATCH 01/22] Updates --- cpp/dolfinx/fem/assemble_scalar_impl.h | 86 ++++++++++++++------------ 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_scalar_impl.h b/cpp/dolfinx/fem/assemble_scalar_impl.h index 4a305a88b6..1dc17151c3 100644 --- a/cpp/dolfinx/fem/assemble_scalar_impl.h +++ b/cpp/dolfinx/fem/assemble_scalar_impl.h @@ -11,6 +11,7 @@ #include "FunctionSpace.h" #include "utils.h" #include +#include #include #include #include @@ -20,6 +21,8 @@ namespace dolfinx::fem::impl { +namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; + /// Assemble functional over cells template T assemble_cells(mdspan2_t x_dofmap, std::span> x, @@ -27,6 +30,7 @@ T assemble_cells(mdspan2_t x_dofmap, std::span> x, std::span constants, std::span coeffs, int cstride) { + T value(0); if (cells.empty()) return value; @@ -40,8 +44,7 @@ T assemble_cells(mdspan2_t x_dofmap, std::span> x, std::int32_t c = cells[index]; // Get cell coordinates/geometry - auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, c, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs = md::submdspan(x_dofmap, c, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3, @@ -58,13 +61,14 @@ T assemble_cells(mdspan2_t x_dofmap, std::span> x, /// Execute kernel over exterior facets and accumulate result template -T assemble_exterior_facets(mdspan2_t x_dofmap, - std::span> x, - int num_facets_per_cell, - std::span facets, - FEkernel auto fn, std::span constants, - std::span coeffs, int cstride, - std::span perms) +T assemble_exterior_facets( + mdspan2_t x_dofmap, std::span> x, + int num_facets_per_cell, + md::mdspan> + facets, + FEkernel auto fn, std::span constants, + std::span coeffs, int cstride, std::span perms) { T value(0); if (facets.empty()) @@ -74,15 +78,13 @@ T assemble_exterior_facets(mdspan2_t x_dofmap, std::vector> coordinate_dofs(3 * x_dofmap.extent(1)); // Iterate over all facets - assert(facets.size() % 2 == 0); - for (std::size_t index = 0; index < facets.size(); index += 2) + for (std::size_t f = 0; f < facets.extent(0); ++f) { - std::int32_t cell = facets[index]; - std::int32_t local_facet = facets[index + 1]; + std::int32_t cell = facets(f, 0); + std::int32_t local_facet = facets(f, 1); // Get cell coordinates/geometry - auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cell, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3, @@ -92,7 +94,7 @@ T assemble_exterior_facets(mdspan2_t x_dofmap, // Permutations std::uint8_t perm = perms.empty() ? 0 : perms[cell * num_facets_per_cell + local_facet]; - const T* coeff_cell = coeffs.data() + index / 2 * cstride; + const T* coeff_cell = coeffs.data() + f * cstride; fn(&value, coeff_cell, constants.data(), coordinate_dofs.data(), &local_facet, &perm); } @@ -102,14 +104,15 @@ T assemble_exterior_facets(mdspan2_t x_dofmap, /// Assemble functional over interior facets template -T assemble_interior_facets(mdspan2_t x_dofmap, - std::span> x, - int num_facets_per_cell, - std::span facets, - FEkernel auto fn, std::span constants, - std::span coeffs, int cstride, - std::span offsets, - std::span perms) +T assemble_interior_facets( + mdspan2_t x_dofmap, std::span> x, + int num_facets_per_cell, + md::mdspan> + facets, + FEkernel auto fn, std::span constants, + std::span coeffs, int cstride, std::span offsets, + std::span perms) { T value(0); if (facets.empty()) @@ -126,23 +129,19 @@ T assemble_interior_facets(mdspan2_t x_dofmap, assert(offsets.back() == cstride); // Iterate over all facets - assert(facets.size() % 4 == 0); - for (std::size_t index = 0; index < facets.size(); index += 4) + for (std::size_t f = 0; f < facets.extent(0); ++f) { - std::array cells = {facets[index], facets[index + 2]}; - std::array local_facet - = {facets[index + 1], facets[index + 3]}; + std::array cells = {facets(f, 0), facets(f, 2)}; + std::array local_facet = {facets(f, 1), facets(f, 3)}; // Get cell geometry - auto x_dofs0 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cells[0], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs0 = md::submdspan(x_dofmap, cells[0], md::full_extent); for (std::size_t i = 0; i < x_dofs0.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs0[i]), 3, std::next(cdofs0.begin(), 3 * i)); } - auto x_dofs1 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cells[1], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs1 = md::submdspan(x_dofmap, cells[1], md::full_extent); for (std::size_t i = 0; i < x_dofs1.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs1[i]), 3, @@ -155,7 +154,7 @@ T assemble_interior_facets(mdspan2_t x_dofmap, : std::array{ perms[cells[0] * num_facets_per_cell + local_facet[0]], perms[cells[1] * num_facets_per_cell + local_facet[1]]}; - fn(&value, coeffs.data() + index / 2 * cstride, constants.data(), + fn(&value, coeffs.data() + 2 * f * cstride, constants.data(), coordinate_dofs.data(), local_facet.data(), perm.data()); } @@ -200,10 +199,15 @@ T assemble_scalar( assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::exterior_facet, i}); + + std::span facets = M.domain(IntegralType::exterior_facet, i, 0); value += impl::assemble_exterior_facets( x_dofmap, x, num_facets_per_cell, - M.domain(IntegralType::exterior_facet, i, 0), fn, constants, coeffs, - cstride, perms); + /// M.domain(IntegralType::exterior_facet, i, 0), + md::mdspan>( + facets.data(), facets.size() / 2, 2), + fn, constants, coeffs, cstride, perms); } for (int i : M.integral_ids(IntegralType::interior_facet)) @@ -213,10 +217,16 @@ T assemble_scalar( assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::interior_facet, i}); + + std::span facets = M.domain(IntegralType::interior_facet, i, 0); + value += impl::assemble_interior_facets( x_dofmap, x, num_facets_per_cell, - M.domain(IntegralType::interior_facet, i, 0), fn, constants, coeffs, - cstride, c_offsets, perms); + // M.domain(IntegralType::interior_facet, i, 0), + md::mdspan>( + facets.data(), facets.size() / 4, 4), + fn, constants, coeffs, cstride, c_offsets, perms); } return value; From a57a61b6f59c343e528d13f58072e8b32b07858e Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Mon, 24 Feb 2025 23:31:08 +0000 Subject: [PATCH 02/22] Updates --- cpp/dolfinx/fem/assemble_scalar_impl.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_scalar_impl.h b/cpp/dolfinx/fem/assemble_scalar_impl.h index 1dc17151c3..a183c961d4 100644 --- a/cpp/dolfinx/fem/assemble_scalar_impl.h +++ b/cpp/dolfinx/fem/assemble_scalar_impl.h @@ -27,8 +27,8 @@ namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; template T assemble_cells(mdspan2_t x_dofmap, std::span> x, std::span cells, FEkernel auto fn, - std::span constants, std::span coeffs, - int cstride) + std::span constants, + md::mdspan> coeffs) { T value(0); @@ -51,9 +51,8 @@ T assemble_cells(mdspan2_t x_dofmap, std::span> x, std::next(coordinate_dofs.begin(), 3 * i)); } - const T* coeff_cell = coeffs.data() + index * cstride; - fn(&value, coeff_cell, constants.data(), coordinate_dofs.data(), nullptr, - nullptr); + fn(&value, &coeffs(index, 0), constants.data(), coordinate_dofs.data(), + nullptr, nullptr); } return value; @@ -68,7 +67,8 @@ T assemble_exterior_facets( md::extents> facets, FEkernel auto fn, std::span constants, - std::span coeffs, int cstride, std::span perms) + md::mdspan> coeffs, + std::span perms) { T value(0); if (facets.empty()) @@ -94,8 +94,7 @@ T assemble_exterior_facets( // Permutations std::uint8_t perm = perms.empty() ? 0 : perms[cell * num_facets_per_cell + local_facet]; - const T* coeff_cell = coeffs.data() + f * cstride; - fn(&value, coeff_cell, constants.data(), coordinate_dofs.data(), + fn(&value, &coeffs(f, 0), constants.data(), coordinate_dofs.data(), &local_facet, &perm); } @@ -179,8 +178,9 @@ T assemble_scalar( assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::cell, i}); std::span cells = M.domain(IntegralType::cell, i, 0); - value += impl::assemble_cells(x_dofmap, x, cells, fn, constants, coeffs, - cstride); + value += impl::assemble_cells( + x_dofmap, x, cells, fn, constants, + md::mdspan(coeffs.data(), coeffs.size() / cstride, cstride)); } std::span perms; @@ -207,7 +207,8 @@ T assemble_scalar( md::mdspan>( facets.data(), facets.size() / 2, 2), - fn, constants, coeffs, cstride, perms); + fn, constants, + md::mdspan(coeffs.data(), coeffs.size() / cstride, cstride), perms); } for (int i : M.integral_ids(IntegralType::interior_facet)) From de7ba1cd84b66781cb4f2504701a37c54f77aaf4 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Tue, 25 Feb 2025 08:43:21 +0000 Subject: [PATCH 03/22] Small updates --- cpp/dolfinx/fem/assemble_scalar_impl.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_scalar_impl.h b/cpp/dolfinx/fem/assemble_scalar_impl.h index a183c961d4..c5acb880cf 100644 --- a/cpp/dolfinx/fem/assemble_scalar_impl.h +++ b/cpp/dolfinx/fem/assemble_scalar_impl.h @@ -117,6 +117,8 @@ T assemble_interior_facets( if (facets.empty()) return value; + assert(offsets.back() == cstride); + // Create data structures used in assembly using X = scalar_value_type_t; std::vector coordinate_dofs(2 * x_dofmap.extent(1) * 3); @@ -124,9 +126,6 @@ T assemble_interior_facets( std::span cdofs1(coordinate_dofs.data() + x_dofmap.extent(1) * 3, x_dofmap.extent(1) * 3); - std::vector coeff_array(2 * offsets.back()); - assert(offsets.back() == cstride); - // Iterate over all facets for (std::size_t f = 0; f < facets.extent(0); ++f) { @@ -200,13 +199,12 @@ T assemble_scalar( auto& [coeffs, cstride] = coefficients.at({IntegralType::exterior_facet, i}); + using Ext2 = md::extents; std::span facets = M.domain(IntegralType::exterior_facet, i, 0); value += impl::assemble_exterior_facets( x_dofmap, x, num_facets_per_cell, - /// M.domain(IntegralType::exterior_facet, i, 0), - md::mdspan>( - facets.data(), facets.size() / 2, 2), + md::mdspan(facets.data(), facets.size() / 2, + 2), fn, constants, md::mdspan(coeffs.data(), coeffs.size() / cstride, cstride), perms); } From 4c6bd9592872cb0fd73799012af81ae608851edc Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Tue, 25 Feb 2025 08:59:29 +0000 Subject: [PATCH 04/22] Simplify --- cpp/dolfinx/fem/assemble_scalar_impl.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_scalar_impl.h b/cpp/dolfinx/fem/assemble_scalar_impl.h index c5acb880cf..8a35b2f8fa 100644 --- a/cpp/dolfinx/fem/assemble_scalar_impl.h +++ b/cpp/dolfinx/fem/assemble_scalar_impl.h @@ -110,15 +110,16 @@ T assemble_interior_facets( md::extents> facets, FEkernel auto fn, std::span constants, - std::span coeffs, int cstride, std::span offsets, + // std::span coeffs, int cstride, + md::mdspan> + coeffs, std::span perms) { T value(0); if (facets.empty()) return value; - assert(offsets.back() == cstride); - // Create data structures used in assembly using X = scalar_value_type_t; std::vector coordinate_dofs(2 * x_dofmap.extent(1) * 3); @@ -152,8 +153,10 @@ T assemble_interior_facets( : std::array{ perms[cells[0] * num_facets_per_cell + local_facet[0]], perms[cells[1] * num_facets_per_cell + local_facet[1]]}; - fn(&value, coeffs.data() + 2 * f * cstride, constants.data(), - coordinate_dofs.data(), local_facet.data(), perm.data()); + fn(&value, &coeffs(f, 0, 0), constants.data(), coordinate_dofs.data(), + local_facet.data(), perm.data()); + // fn(&value, coeffs.data() + 2 * f * cstride, constants.data(), + // coordinate_dofs.data(), local_facet.data(), perm.data()); } return value; @@ -221,11 +224,15 @@ T assemble_scalar( value += impl::assemble_interior_facets( x_dofmap, x, num_facets_per_cell, - // M.domain(IntegralType::interior_facet, i, 0), md::mdspan>( facets.data(), facets.size() / 4, 4), - fn, constants, coeffs, cstride, c_offsets, perms); + fn, constants, + // coeffs, cstride, + md::mdspan>( + coeffs.data(), coeffs.size() / (2 * cstride), 2, cstride), + perms); } return value; From 586f012718e99118edeec8a1ea7c756b4a4a00a7 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Tue, 25 Feb 2025 14:14:09 +0000 Subject: [PATCH 05/22] Small change --- cpp/dolfinx/fem/assemble_scalar_impl.h | 43 ++++++++++++------------- python/test/unit/fem/test_expression.py | 2 -- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_scalar_impl.h b/cpp/dolfinx/fem/assemble_scalar_impl.h index 8a35b2f8fa..52def37925 100644 --- a/cpp/dolfinx/fem/assemble_scalar_impl.h +++ b/cpp/dolfinx/fem/assemble_scalar_impl.h @@ -67,8 +67,7 @@ T assemble_exterior_facets( md::extents> facets, FEkernel auto fn, std::span constants, - md::mdspan> coeffs, - std::span perms) + std::span coeffs, int cstride, std::span perms) { T value(0); if (facets.empty()) @@ -94,7 +93,8 @@ T assemble_exterior_facets( // Permutations std::uint8_t perm = perms.empty() ? 0 : perms[cell * num_facets_per_cell + local_facet]; - fn(&value, &coeffs(f, 0), constants.data(), coordinate_dofs.data(), + const T* coeff_cell = coeffs.data() + f * cstride; + fn(&value, coeff_cell, constants.data(), coordinate_dofs.data(), &local_facet, &perm); } @@ -110,16 +110,15 @@ T assemble_interior_facets( md::extents> facets, FEkernel auto fn, std::span constants, - // std::span coeffs, int cstride, - md::mdspan> - coeffs, + std::span coeffs, int cstride, std::span offsets, std::span perms) { T value(0); if (facets.empty()) return value; + assert(offsets.back() == cstride); + // Create data structures used in assembly using X = scalar_value_type_t; std::vector coordinate_dofs(2 * x_dofmap.extent(1) * 3); @@ -153,10 +152,8 @@ T assemble_interior_facets( : std::array{ perms[cells[0] * num_facets_per_cell + local_facet[0]], perms[cells[1] * num_facets_per_cell + local_facet[1]]}; - fn(&value, &coeffs(f, 0, 0), constants.data(), coordinate_dofs.data(), - local_facet.data(), perm.data()); - // fn(&value, coeffs.data() + 2 * f * cstride, constants.data(), - // coordinate_dofs.data(), local_facet.data(), perm.data()); + fn(&value, coeffs.data() + 2 * f * cstride, constants.data(), + coordinate_dofs.data(), local_facet.data(), perm.data()); } return value; @@ -180,9 +177,15 @@ T assemble_scalar( assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::cell, i}); std::span cells = M.domain(IntegralType::cell, i, 0); + if (cstride <= 0) + { + std::cout << "Foo: " << cells.size() << ", " << cstride << ", " + << coeffs.size() << std::endl; + } + // assert(cstride > 0); value += impl::assemble_cells( x_dofmap, x, cells, fn, constants, - md::mdspan(coeffs.data(), coeffs.size() / cstride, cstride)); + md::mdspan(coeffs.data(), cells.size(), cstride)); } std::span perms; @@ -202,14 +205,13 @@ T assemble_scalar( auto& [coeffs, cstride] = coefficients.at({IntegralType::exterior_facet, i}); - using Ext2 = md::extents; std::span facets = M.domain(IntegralType::exterior_facet, i, 0); value += impl::assemble_exterior_facets( x_dofmap, x, num_facets_per_cell, - md::mdspan(facets.data(), facets.size() / 2, - 2), - fn, constants, - md::mdspan(coeffs.data(), coeffs.size() / cstride, cstride), perms); + md::mdspan>( + facets.data(), facets.size() / 2, 2), + fn, constants, coeffs, cstride, perms); } for (int i : M.integral_ids(IntegralType::interior_facet)) @@ -227,12 +229,7 @@ T assemble_scalar( md::mdspan>( facets.data(), facets.size() / 4, 4), - fn, constants, - // coeffs, cstride, - md::mdspan>( - coeffs.data(), coeffs.size() / (2 * cstride), 2, cstride), - perms); + fn, constants, coeffs, cstride, c_offsets, perms); } return value; diff --git a/python/test/unit/fem/test_expression.py b/python/test/unit/fem/test_expression.py index 07715eb891..672cd252f2 100644 --- a/python/test/unit/fem/test_expression.py +++ b/python/test/unit/fem/test_expression.py @@ -17,8 +17,6 @@ from dolfinx.fem import Constant, Expression, Function, form, functionspace from dolfinx.mesh import create_unit_square -dolfinx.cpp.log.set_log_level(dolfinx.cpp.log.LogLevel.DEBUG) - @pytest.mark.parametrize( "dtype", From 3bba8ebbfb38f6e1eeae9c3c65ca974bf3fca5c8 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Tue, 25 Feb 2025 15:55:14 +0000 Subject: [PATCH 06/22] Updates --- cpp/dolfinx/fem/assemble_scalar_impl.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_scalar_impl.h b/cpp/dolfinx/fem/assemble_scalar_impl.h index 52def37925..78a315c5b9 100644 --- a/cpp/dolfinx/fem/assemble_scalar_impl.h +++ b/cpp/dolfinx/fem/assemble_scalar_impl.h @@ -67,7 +67,8 @@ T assemble_exterior_facets( md::extents> facets, FEkernel auto fn, std::span constants, - std::span coeffs, int cstride, std::span perms) + md::mdspan> coeffs, + std::span perms) { T value(0); if (facets.empty()) @@ -93,8 +94,7 @@ T assemble_exterior_facets( // Permutations std::uint8_t perm = perms.empty() ? 0 : perms[cell * num_facets_per_cell + local_facet]; - const T* coeff_cell = coeffs.data() + f * cstride; - fn(&value, coeff_cell, constants.data(), coordinate_dofs.data(), + fn(&value, &coeffs(f, 0), constants.data(), coordinate_dofs.data(), &local_facet, &perm); } @@ -177,12 +177,7 @@ T assemble_scalar( assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::cell, i}); std::span cells = M.domain(IntegralType::cell, i, 0); - if (cstride <= 0) - { - std::cout << "Foo: " << cells.size() << ", " << cstride << ", " - << coeffs.size() << std::endl; - } - // assert(cstride > 0); + assert(cells.size() * cstride == coeffs.size()); value += impl::assemble_cells( x_dofmap, x, cells, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride)); @@ -206,12 +201,14 @@ T assemble_scalar( = coefficients.at({IntegralType::exterior_facet, i}); std::span facets = M.domain(IntegralType::exterior_facet, i, 0); + assert((facets.size() / 2) * cstride == coeffs.size()); value += impl::assemble_exterior_facets( x_dofmap, x, num_facets_per_cell, md::mdspan>( facets.data(), facets.size() / 2, 2), - fn, constants, coeffs, cstride, perms); + fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), + perms); } for (int i : M.integral_ids(IntegralType::interior_facet)) From 08c8c18aade872d053abe2ac7788c5b33c48fc18 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Tue, 25 Feb 2025 16:29:03 +0000 Subject: [PATCH 07/22] Fix wrapper bug --- cpp/dolfinx/fem/assemble_scalar_impl.h | 20 ++++++++++--------- cpp/dolfinx/fem/assembler.h | 3 +++ .../wrappers/dolfinx_wrappers/pycoeff.h | 8 ++++---- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_scalar_impl.h b/cpp/dolfinx/fem/assemble_scalar_impl.h index 78a315c5b9..b3698bbd66 100644 --- a/cpp/dolfinx/fem/assemble_scalar_impl.h +++ b/cpp/dolfinx/fem/assemble_scalar_impl.h @@ -110,15 +110,15 @@ T assemble_interior_facets( md::extents> facets, FEkernel auto fn, std::span constants, - std::span coeffs, int cstride, std::span offsets, + md::mdspan> + coeffs, std::span perms) { T value(0); if (facets.empty()) return value; - assert(offsets.back() == cstride); - // Create data structures used in assembly using X = scalar_value_type_t; std::vector coordinate_dofs(2 * x_dofmap.extent(1) * 3); @@ -152,8 +152,8 @@ T assemble_interior_facets( : std::array{ perms[cells[0] * num_facets_per_cell + local_facet[0]], perms[cells[1] * num_facets_per_cell + local_facet[1]]}; - fn(&value, coeffs.data() + 2 * f * cstride, constants.data(), - coordinate_dofs.data(), local_facet.data(), perm.data()); + fn(&value, &coeffs(f, 0, 0), constants.data(), coordinate_dofs.data(), + local_facet.data(), perm.data()); } return value; @@ -213,20 +213,22 @@ T assemble_scalar( for (int i : M.integral_ids(IntegralType::interior_facet)) { - const std::vector c_offsets = M.coefficient_offsets(); auto fn = M.kernel(IntegralType::interior_facet, i, 0); assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::interior_facet, i}); - std::span facets = M.domain(IntegralType::interior_facet, i, 0); - + assert((facets.size() / 4) * 2 * cstride == coeffs.size()); value += impl::assemble_interior_facets( x_dofmap, x, num_facets_per_cell, md::mdspan>( facets.data(), facets.size() / 4, 4), - fn, constants, coeffs, cstride, c_offsets, perms); + fn, constants, + md::mdspan>( + coeffs.data(), facets.size() / 4, 2, cstride), + perms); } return value; diff --git a/cpp/dolfinx/fem/assembler.h b/cpp/dolfinx/fem/assembler.h index 36b108d129..26c1aa8db5 100644 --- a/cpp/dolfinx/fem/assembler.h +++ b/cpp/dolfinx/fem/assembler.h @@ -191,8 +191,11 @@ template T assemble_scalar(const Form& M) { const std::vector constants = pack_constants(M); + std::cout << "Allocate storage" << std::endl; auto coefficients = allocate_coefficient_storage(M); + std::cout << "Pack coefss" << std::endl; pack_coefficients(M, coefficients); + std::cout << "End Pack coefss" << std::endl; return assemble_scalar(M, std::span(constants), make_coefficients_span(coefficients)); } diff --git a/python/dolfinx/wrappers/dolfinx_wrappers/pycoeff.h b/python/dolfinx/wrappers/dolfinx_wrappers/pycoeff.h index 2a2ed0058b..2c675b4731 100644 --- a/python/dolfinx/wrappers/dolfinx_wrappers/pycoeff.h +++ b/python/dolfinx/wrappers/dolfinx_wrappers/pycoeff.h @@ -25,10 +25,10 @@ py_to_cpp_coeffs( coeffs, std::inserter(c, c.end()), [](auto& e) -> typename decltype(c)::value_type { - return {e.first, - {std::span(static_cast(e.second.data()), - e.second.shape(0)), - e.second.shape(1)}}; + return { + e.first, + {std::span(static_cast(e.second.data()), e.second.size()), + e.second.shape(1)}}; }); return c; } From 07049397f000269e4031246bcecf0c2b5fb001a3 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Tue, 25 Feb 2025 18:34:38 +0000 Subject: [PATCH 08/22] Simplifications --- cpp/dolfinx/fem/assemble_scalar_impl.h | 48 ++++++++++++-------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_scalar_impl.h b/cpp/dolfinx/fem/assemble_scalar_impl.h index b3698bbd66..83cb39e07f 100644 --- a/cpp/dolfinx/fem/assemble_scalar_impl.h +++ b/cpp/dolfinx/fem/assemble_scalar_impl.h @@ -1,4 +1,4 @@ -// Copyright (C) 2019-2020 Garth N. Wells +// Copyright (C) 2019-2025 Garth N. Wells // // This file is part of DOLFINx (https://www.fenicsproject.org) // @@ -62,13 +62,12 @@ T assemble_cells(mdspan2_t x_dofmap, std::span> x, template T assemble_exterior_facets( mdspan2_t x_dofmap, std::span> x, - int num_facets_per_cell, md::mdspan> facets, FEkernel auto fn, std::span constants, md::mdspan> coeffs, - std::span perms) + md::mdspan> perms) { T value(0); if (facets.empty()) @@ -92,8 +91,7 @@ T assemble_exterior_facets( } // Permutations - std::uint8_t perm - = perms.empty() ? 0 : perms[cell * num_facets_per_cell + local_facet]; + std::uint8_t perm = perms.empty() ? 0 : perms(cell, local_facet); fn(&value, &coeffs(f, 0), constants.data(), coordinate_dofs.data(), &local_facet, &perm); } @@ -105,15 +103,14 @@ T assemble_exterior_facets( template T assemble_interior_facets( mdspan2_t x_dofmap, std::span> x, - int num_facets_per_cell, md::mdspan> + md::extents> facets, FEkernel auto fn, std::span constants, md::mdspan> coeffs, - std::span perms) + md::mdspan> perms) { T value(0); if (facets.empty()) @@ -129,8 +126,8 @@ T assemble_interior_facets( // Iterate over all facets for (std::size_t f = 0; f < facets.extent(0); ++f) { - std::array cells = {facets(f, 0), facets(f, 2)}; - std::array local_facet = {facets(f, 1), facets(f, 3)}; + std::array cells = {facets(f, 0, 0), facets(f, 1, 0)}; + std::array local_facet = {facets(f, 0, 1), facets(f, 1, 1)}; // Get cell geometry auto x_dofs0 = md::submdspan(x_dofmap, cells[0], md::full_extent); @@ -146,12 +143,10 @@ T assemble_interior_facets( std::next(cdofs1.begin(), 3 * i)); } - std::array perm - = perms.empty() - ? std::array{0, 0} - : std::array{ - perms[cells[0] * num_facets_per_cell + local_facet[0]], - perms[cells[1] * num_facets_per_cell + local_facet[1]]}; + std::array perm = perms.empty() + ? std::array{0, 0} + : std::array{perms(cells[0], local_facet[0]), + perms(cells[1], local_facet[1])}; fn(&value, &coeffs(f, 0, 0), constants.data(), coordinate_dofs.data(), local_facet.data(), perm.data()); } @@ -183,16 +178,19 @@ T assemble_scalar( md::mdspan(coeffs.data(), cells.size(), cstride)); } - std::span perms; + mesh::CellType cell_type = mesh->topology()->cell_type(); + int num_facets_per_cell + = mesh::cell_num_entities(cell_type, mesh->topology()->dim() - 1); + md::mdspan> perms; if (M.needs_facet_permutations()) { mesh->topology_mutable()->create_entity_permutations(); - perms = std::span(mesh->topology()->get_facet_permutations()); + const std::vector& p + = mesh->topology()->get_facet_permutations(); + perms = md::mdspan(p.data(), p.size() / num_facets_per_cell, + num_facets_per_cell); } - mesh::CellType cell_type = mesh->topology()->cell_type(); - int num_facets_per_cell - = mesh::cell_num_entities(cell_type, mesh->topology()->dim() - 1); for (int i : M.integral_ids(IntegralType::exterior_facet)) { auto fn = M.kernel(IntegralType::exterior_facet, i, 0); @@ -203,7 +201,7 @@ T assemble_scalar( std::span facets = M.domain(IntegralType::exterior_facet, i, 0); assert((facets.size() / 2) * cstride == coeffs.size()); value += impl::assemble_exterior_facets( - x_dofmap, x, num_facets_per_cell, + x_dofmap, x, md::mdspan>( facets.data(), facets.size() / 2, 2), @@ -220,10 +218,10 @@ T assemble_scalar( std::span facets = M.domain(IntegralType::interior_facet, i, 0); assert((facets.size() / 4) * 2 * cstride == coeffs.size()); value += impl::assemble_interior_facets( - x_dofmap, x, num_facets_per_cell, + x_dofmap, x, md::mdspan>( - facets.data(), facets.size() / 4, 4), + md::extents>( + facets.data(), facets.size() / 4, 2, 2), fn, constants, md::mdspan>( From 11988ea5641d66d447e305cf376ae836c767178f Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Tue, 25 Feb 2025 21:00:56 +0000 Subject: [PATCH 09/22] Updates --- cpp/dolfinx/fem/assemble_vector_impl.h | 68 ++++++++++---------------- 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 8c513fcf18..ac564a9e24 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -33,10 +33,10 @@ class DirichletBC; } namespace dolfinx::fem::impl { +namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; + /// @cond -using mdspan2_t = MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< - const std::int32_t, - MDSPAN_IMPL_STANDARD_NAMESPACE::dextents>; +using mdspan2_t = md::mdspan>; /// @endcond /// @brief Apply boundary condition lifting for cell integrals. @@ -85,7 +85,7 @@ void _lift_bc_cells( fem::DofTransformKernel auto P0, std::tuple> dofmap1, fem::DofTransformKernel auto P1T, std::span constants, - std::span coeffs, int cstride, + md::mdspan> coeffs, std::span cell_info0, std::span cell_info1, std::span bc_values1, std::span bc_markers1, std::span x0, T alpha) @@ -112,8 +112,7 @@ void _lift_bc_cells( std::int32_t c1 = cells1[index]; // Get dof maps for cell - auto dofs1 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - dmap1, c1, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto dofs1 = md::submdspan(dmap1, c1, md::full_extent); // Check if bc is applied to cell bool has_bc = false; @@ -149,8 +148,7 @@ void _lift_bc_cells( continue; // Get cell coordinates/geometry - auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, c, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs = md::submdspan(x_dofmap, c, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3, @@ -158,17 +156,15 @@ void _lift_bc_cells( } // Size data structure for assembly - auto dofs0 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - dmap0, c0, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto dofs0 = md::submdspan(dmap0, c0, md::full_extent); const int num_rows = bs0 * dofs0.size(); const int num_cols = bs1 * dofs1.size(); - const T* coeff_array = coeffs.data() + index * cstride; Ae.resize(num_rows * num_cols); std::ranges::fill(Ae, 0); - kernel(Ae.data(), coeff_array, constants.data(), coordinate_dofs.data(), - nullptr, nullptr); + kernel(Ae.data(), &coeffs(index, 0), constants.data(), + coordinate_dofs.data(), nullptr, nullptr); P0(Ae, cell_info0, c0, num_cols); P1T(Ae, cell_info1, c1, num_rows); @@ -303,8 +299,7 @@ void _lift_bc_exterior_facets( std::int32_t local_facet = facets[index + 1]; // Get dof maps for cell - auto dofs1 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - dmap1, cell1, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto dofs1 = md::submdspan(dmap1, cell1, md::full_extent); // Check if bc is applied to cell bool has_bc = false; @@ -324,8 +319,7 @@ void _lift_bc_exterior_facets( continue; // Get cell coordinates/geometry - auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cell, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3, @@ -333,8 +327,7 @@ void _lift_bc_exterior_facets( } // Size data structure for assembly - auto dofs0 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - dmap0, cell0, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto dofs0 = md::submdspan(dmap0, cell0, md::full_extent); const int num_rows = bs0 * dofs0.size(); const int num_cols = bs1 * dofs1.size(); @@ -462,15 +455,13 @@ void _lift_bc_interior_facets( = {facets[index + 1], facets[index + 3]}; // Get cell geometry - auto x_dofs0 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cells[0], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs0 = md::submdspan(x_dofmap, cells[0], md::full_extent); for (std::size_t i = 0; i < x_dofs0.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs0[i]), 3, std::next(cdofs0.begin(), 3 * i)); } - auto x_dofs1 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cells[1], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs1 = md::submdspan(x_dofmap, cells[1], md::full_extent); for (std::size_t i = 0; i < x_dofs1.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs1[i]), 3, @@ -663,8 +654,7 @@ void assemble_cells( std::int32_t c0 = cells0[index]; // Get cell coordinates/geometry - auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, c, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs = md::submdspan(x_dofmap, c, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3, @@ -678,8 +668,7 @@ void assemble_cells( P0(_be, cell_info0, c0, 1); // Scatter cell vector to 'global' vector array - auto dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - dmap, c0, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto dofs = md::submdspan(dmap, c0, md::full_extent); if constexpr (_bs > 0) { for (std::size_t i = 0; i < dofs.size(); ++i) @@ -754,8 +743,7 @@ void assemble_exterior_facets( std::int32_t cell0 = facets0[index]; // Get cell coordinates/geometry - auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cell, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3, @@ -774,8 +762,7 @@ void assemble_exterior_facets( P0(_be, cell_info0, cell0, 1); // Add element vector to global vector - auto dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - dmap, cell0, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto dofs = md::submdspan(dmap, cell0, md::full_extent); if constexpr (_bs > 0) { for (std::size_t i = 0; i < dofs.size(); ++i) @@ -854,15 +841,13 @@ void assemble_interior_facets( facets[index + 3]}; // Get cell geometry - auto x_dofs0 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cells[0], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs0 = md::submdspan(x_dofmap, cells[0], md::full_extent); for (std::size_t i = 0; i < x_dofs0.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs0[i]), 3, std::next(cdofs0.begin(), 3 * i)); } - auto x_dofs1 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cells[1], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs1 = md::submdspan(x_dofmap, cells[1], md::full_extent); for (std::size_t i = 0; i < x_dofs1.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs1[i]), 3, @@ -984,30 +969,31 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, { auto kernel = a.kernel(IntegralType::cell, i, 0); assert(kernel); - auto& [coeffs, cstride] = coefficients.at({IntegralType::cell, i}); + auto& [_coeffs, cstride] = coefficients.at({IntegralType::cell, i}); std::span cells = a.domain(IntegralType::cell, i, 0); std::span cells0 = a.domain_arg(IntegralType::cell, 0, i, 0); std::span cells1 = a.domain_arg(IntegralType::cell, 1, i, 0); + assert(_coeffs.size() == cells.size() * cstride); + auto coeffs = md::mdspan(_coeffs.data(), cells.size(), cstride); if (bs0 == 1 and bs1 == 1) { _lift_bc_cells( b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cstride, cell_info0, + {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); } else if (bs0 == 3 and bs1 == 3) { _lift_bc_cells( b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cstride, cell_info0, + {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); } else { _lift_bc_cells(b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cstride, - cell_info0, cell_info1, bc_values1, bc_markers1, x0, - alpha); + {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, + cell_info1, bc_values1, bc_markers1, x0, alpha); } } From a26c033aac2f5d3656054fbed842ca324658822a Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Tue, 25 Feb 2025 22:10:58 +0000 Subject: [PATCH 10/22] Updates --- cpp/dolfinx/fem/assemble_vector_impl.h | 75 +++++++++++++++++--------- 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index ac564a9e24..6ac0e99155 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -65,7 +65,6 @@ using mdspan2_t = md::mdspan>; /// @param[in] constants Constants data. /// @param[in] coeffs The coefficient data array with shape /// `(cells.size(), cstride)` flattened into row-major format. -/// @param[in] cstride The coefficient stride. /// @param[in] cell_info0 The cell permutation information for the test /// function mesh. /// @param[in] cell_info1 The cell permutation information for the trial @@ -263,10 +262,19 @@ template void _lift_bc_exterior_facets( std::span b, mdspan2_t x_dofmap, std::span> x, int num_facets_per_cell, - FEkernel auto kernel, std::span facets, - std::tuple> dofmap0, + FEkernel auto kernel, + md::mdspan> + facets, + std::tuple>> + dofmap0, fem::DofTransformKernel auto P0, - std::tuple> dofmap1, + std::tuple>> + dofmap1, fem::DofTransformKernel auto P1T, std::span constants, std::span coeffs, int cstride, std::span cell_info0, @@ -286,17 +294,17 @@ void _lift_bc_exterior_facets( assert(facets.size() % 2 == 0); assert(facets0.size() == facets.size()); assert(facets1.size() == facets.size()); - for (std::size_t index = 0; index < facets.size(); index += 2) + for (std::size_t index = 0; index < facets.extent(0); ++index) { // Cell in integration domain mesh - std::int32_t cell = facets[index]; + std::int32_t cell = facets(index, 0); // Cell in test function mesh - std::int32_t cell0 = facets0[index]; + std::int32_t cell0 = facets0(index, 0); // Cell in trial function mesh - std::int32_t cell1 = facets1[index]; + std::int32_t cell1 = facets1(index, 0); // Local facet index - std::int32_t local_facet = facets[index + 1]; + std::int32_t local_facet = facets(index, 1); // Get dof maps for cell auto dofs1 = md::submdspan(dmap1, cell1, md::full_extent); @@ -336,7 +344,7 @@ void _lift_bc_exterior_facets( std::uint8_t perm = perms.empty() ? 0 : perms[cell * num_facets_per_cell + local_facet]; - const T* coeff_array = coeffs.data() + index / 2 * cstride; + const T* coeff_array = coeffs.data() + index * cstride; Ae.resize(num_rows * num_cols); std::ranges::fill(Ae, 0); kernel(Ae.data(), coeff_array, constants.data(), coordinate_dofs.data(), @@ -409,7 +417,10 @@ template void _lift_bc_interior_facets( std::span b, mdspan2_t x_dofmap, std::span> x, int num_facets_per_cell, - FEkernel auto kernel, std::span facets, + FEkernel auto kernel, + md::mdspan> + facets, std::tuple> dofmap0, fem::DofTransformKernel auto P0, std::tuple> dofmap1, @@ -442,17 +453,17 @@ void _lift_bc_interior_facets( const int num_dofs1 = dmap1.extent(1); assert(facets0.size() == facets.size()); assert(facets1.size() == facets.size()); - for (std::size_t index = 0; index < facets.size(); index += 4) + for (std::size_t index = 0; index < facets.extent(0); ++index) { // Cells in integration domain, test function domain and trial // function domain meshes - std::array cells{facets[index], facets[index + 2]}; - std::array cells0{facets0[index], facets0[index + 2]}; - std::array cells1{facets1[index], facets1[index + 2]}; + std::array cells{facets(index, 0, 0), facets(index, 1, 0)}; + std::array cells0{facets0[4 * index], facets0[4 * index + 2]}; + std::array cells1{facets1[4 * index], facets1[4 * index + 2]}; // Local facet indices std::array local_facet - = {facets[index + 1], facets[index + 3]}; + = {facets(index, 0, 1), facets(index, 1, 1)}; // Get cell geometry auto x_dofs0 = md::submdspan(x_dofmap, cells[0], md::full_extent); @@ -1013,13 +1024,21 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, assert(kernel); auto& [coeffs, cstride] = coefficients.at({IntegralType::exterior_facet, i}); - _lift_bc_exterior_facets( - b, x_dofmap, x, num_facets_per_cell, kernel, - a.domain(IntegralType::exterior_facet, i, 0), - {dofmap0, bs0, a.domain_arg(IntegralType::exterior_facet, 0, i, 0)}, P0, - {dofmap1, bs1, a.domain_arg(IntegralType::exterior_facet, 1, i, 0)}, - P1T, constants, coeffs, cstride, cell_info0, cell_info1, bc_values1, - bc_markers1, x0, alpha, perms); + + using mdspanx2_t + = md::mdspan>; + std::span f = a.domain(IntegralType::exterior_facet, i, 0); + mdspanx2_t facets(f.data(), f.size() / 2, 2); + std::span f0 = a.domain_arg(IntegralType::exterior_facet, 0, i, 0); + mdspanx2_t facets0(f0.data(), f0.size() / 2, 2); + std::span f1 = a.domain_arg(IntegralType::exterior_facet, 1, i, 0); + mdspanx2_t facets1(f1.data(), f1.size() / 2, 2); + _lift_bc_exterior_facets(b, x_dofmap, x, num_facets_per_cell, kernel, + facets, {dofmap0, bs0, facets0}, P0, + {dofmap1, bs1, facets0}, P1T, constants, coeffs, + cstride, cell_info0, cell_info1, bc_values1, + bc_markers1, x0, alpha, perms); } for (int i : a.integral_ids(IntegralType::interior_facet)) @@ -1028,9 +1047,15 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, assert(kernel); auto& [coeffs, cstride] = coefficients.at({IntegralType::interior_facet, i}); + + using mdspanx22_t + = md::mdspan>; + std::span f = a.domain(IntegralType::interior_facet, i, 0); + mdspanx22_t facets(f.data(), f.size() / 4, 2, 2); + _lift_bc_interior_facets( - b, x_dofmap, x, num_facets_per_cell, kernel, - a.domain(IntegralType::interior_facet, i, 0), + b, x_dofmap, x, num_facets_per_cell, kernel, facets, {dofmap0, bs0, a.domain_arg(IntegralType::interior_facet, 0, i, 0)}, P0, {dofmap1, bs1, a.domain_arg(IntegralType::interior_facet, 1, i, 0)}, P1T, constants, coeffs, cstride, cell_info0, cell_info1, perms, From 017145f817bb0055b33cbe6de3daa6f101e2a739 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Tue, 25 Feb 2025 22:17:45 +0000 Subject: [PATCH 11/22] Updates --- cpp/dolfinx/fem/assemble_vector_impl.h | 30 ++++++++++++++++---------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 6ac0e99155..49f7220fd6 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -421,9 +421,15 @@ void _lift_bc_interior_facets( md::mdspan> facets, - std::tuple> dofmap0, + std::tuple>> + dofmap0, fem::DofTransformKernel auto P0, - std::tuple> dofmap1, + std::tuple>> + dofmap1, fem::DofTransformKernel auto P1T, std::span constants, std::span coeffs, int cstride, std::span cell_info0, @@ -447,7 +453,6 @@ void _lift_bc_interior_facets( // Temporaries for joint dofmaps std::vector dmapjoint0, dmapjoint1; - assert(facets.size() % 4 == 0); const int num_dofs0 = dmap0.extent(1); const int num_dofs1 = dmap1.extent(1); @@ -458,8 +463,8 @@ void _lift_bc_interior_facets( // Cells in integration domain, test function domain and trial // function domain meshes std::array cells{facets(index, 0, 0), facets(index, 1, 0)}; - std::array cells0{facets0[4 * index], facets0[4 * index + 2]}; - std::array cells1{facets1[4 * index], facets1[4 * index + 2]}; + std::array cells0{facets0(index, 0, 0), facets0(index, 1, 0)}; + std::array cells1{facets1(index, 0, 0), facets1(index, 1, 0)}; // Local facet indices std::array local_facet @@ -1053,13 +1058,16 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, md::extents>; std::span f = a.domain(IntegralType::interior_facet, i, 0); mdspanx22_t facets(f.data(), f.size() / 4, 2, 2); + std::span f0 = a.domain_arg(IntegralType::interior_facet, 0, i, 0); + mdspanx22_t facets0(f0.data(), f0.size() / 4, 2, 2); + std::span f1 = a.domain_arg(IntegralType::interior_facet, 1, i, 0); + mdspanx22_t facets1(f1.data(), f1.size() / 4, 2, 2); - _lift_bc_interior_facets( - b, x_dofmap, x, num_facets_per_cell, kernel, facets, - {dofmap0, bs0, a.domain_arg(IntegralType::interior_facet, 0, i, 0)}, P0, - {dofmap1, bs1, a.domain_arg(IntegralType::interior_facet, 1, i, 0)}, - P1T, constants, coeffs, cstride, cell_info0, cell_info1, perms, - bc_values1, bc_markers1, x0, alpha); + _lift_bc_interior_facets(b, x_dofmap, x, num_facets_per_cell, kernel, + facets, {dofmap0, bs0, facets0}, P0, + {dofmap1, bs1, facets1}, P1T, constants, coeffs, + cstride, cell_info0, cell_info1, perms, bc_values1, + bc_markers1, x0, alpha); } } From 79a2b407267ada6578eb347896d788bd02fb437e Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Wed, 26 Feb 2025 07:31:32 +0000 Subject: [PATCH 12/22] Small fix --- cpp/dolfinx/fem/assemble_vector_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 49f7220fd6..c6462e1e66 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1041,7 +1041,7 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, mdspanx2_t facets1(f1.data(), f1.size() / 2, 2); _lift_bc_exterior_facets(b, x_dofmap, x, num_facets_per_cell, kernel, facets, {dofmap0, bs0, facets0}, P0, - {dofmap1, bs1, facets0}, P1T, constants, coeffs, + {dofmap1, bs1, facets1}, P1T, constants, coeffs, cstride, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha, perms); } From d49c0483e293f7e038f75289ea7f4e923ce8d9da Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Wed, 26 Feb 2025 08:26:03 +0000 Subject: [PATCH 13/22] Updates --- cpp/dolfinx/fem/assemble_vector_impl.h | 204 ++++++++++++++----------- 1 file changed, 118 insertions(+), 86 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index c6462e1e66..f038d96f4a 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -296,11 +296,10 @@ void _lift_bc_exterior_facets( assert(facets1.size() == facets.size()); for (std::size_t index = 0; index < facets.extent(0); ++index) { - // Cell in integration domain mesh + // Cell in integration domain, test function and trial function + // meshes std::int32_t cell = facets(index, 0); - // Cell in test function mesh std::int32_t cell0 = facets0(index, 0); - // Cell in trial function mesh std::int32_t cell1 = facets1(index, 0); // Local facet index @@ -636,11 +635,10 @@ void _lift_bc_interior_facets( /// the (0) dofmap, (1) dofmap block size and (2) dofmap cell indices. /// @param kernel Kernel function to execute over each cell. /// @param constants The constant data -/// @param coeffs The coefficient data array of shape (cells.size(), cstride), -/// flattened into row-major format. -/// @param cstride The coefficient stride -/// @param cell_info0 The cell permutation information for the test function -/// mesh +/// @param coeffs The coefficient data array of shape (cells.size(), +/// cstride). +/// @param cell_info0 The cell permutation information for the test +/// function mesh. template void assemble_cells( fem::DofTransformKernel auto P0, std::span b, mdspan2_t x_dofmap, @@ -648,7 +646,7 @@ void assemble_cells( std::span cells, std::tuple> dofmap, FEkernel auto kernel, std::span constants, - std::span coeffs, int cstride, + md::mdspan> coeffs, std::span cell_info0) { if (cells.empty()) @@ -679,7 +677,7 @@ void assemble_cells( // Tabulate vector for cell std::ranges::fill(be, 0); - kernel(be.data(), coeffs.data() + index * cstride, constants.data(), + kernel(be.data(), &coeffs(index, 0), constants.data(), coordinate_dofs.data(), nullptr, nullptr); P0(_be, cell_info0, c0, 1); @@ -711,7 +709,6 @@ void assemble_cells( /// @param[in,out] b The vector to accumulate into. /// @param[in] x_dofmap Dofmap for the mesh geometry. /// @param[in] x Mesh geometry (coordinates). -/// @param[in] num_facets_per_cell Number of cell facets /// @param[in] facets Facets (in the integration domain mesh) to execute /// the kernel over. /// @param[in] dofmap Test function (row) degree-of-freedom data holding @@ -719,8 +716,7 @@ void assemble_cells( /// @param[in] fn Kernel function to execute over each cell. /// @param[in] constants The constant data. /// @param[in] coeffs The coefficient data array of shape -/// `(cells.size(), cstride)`, flattened into row-major format. -/// @param[in] cstride The coefficient stride. +/// `(cells.size(), coeffs_per_cell)`. /// @param[in] cell_info0 The cell permutation information for the test /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet @@ -728,13 +724,18 @@ void assemble_cells( template void assemble_exterior_facets( fem::DofTransformKernel auto P0, std::span b, mdspan2_t x_dofmap, - std::span> x, int num_facets_per_cell, - std::span facets, - std::tuple> dofmap, + std::span> x, + md::mdspan> + facets, + std::tuple>> + dofmap, FEkernel auto fn, std::span constants, - std::span coeffs, int cstride, + md::mdspan> coeffs, std::span cell_info0, - std::span perms) + md::mdspan> perms) { if (facets.empty()) return; @@ -742,21 +743,19 @@ void assemble_exterior_facets( const auto [dmap, bs, facets0] = dofmap; assert(_bs < 0 or _bs == bs); - // FIXME: Add proper interface for num_dofs // Create data structures used in assembly const int num_dofs = dmap.extent(1); std::vector> coordinate_dofs(3 * x_dofmap.extent(1)); std::vector be(bs * num_dofs); std::span _be(be); - assert(facets.size() % 2 == 0); assert(facets0.size() == facets.size()); - for (std::size_t index = 0; index < facets.size(); index += 2) + for (std::size_t f = 0; f < facets.extent(0); ++f) { // Cell in the integration domain, local facet index relative to the // integration domain cell, and cell in the test function mesh - std::int32_t cell = facets[index]; - std::int32_t local_facet = facets[index + 1]; - std::int32_t cell0 = facets0[index]; + std::int32_t cell = facets(f, 0); + std::int32_t local_facet = facets(f, 1); + std::int32_t cell0 = facets0(f, 0); // Get cell coordinates/geometry auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); @@ -767,13 +766,12 @@ void assemble_exterior_facets( } // Permutations - std::uint8_t perm - = perms.empty() ? 0 : perms[cell * num_facets_per_cell + local_facet]; + std::uint8_t perm = perms.empty() ? 0 : perms(cell, local_facet); // Tabulate element vector std::ranges::fill(be, 0); - fn(be.data(), coeffs.data() + index / 2 * cstride, constants.data(), - coordinate_dofs.data(), &local_facet, &perm); + fn(be.data(), &coeffs(f, 0), constants.data(), coordinate_dofs.data(), + &local_facet, &perm); P0(_be, cell_info0, cell0, 1); @@ -805,7 +803,6 @@ void assemble_exterior_facets( /// @param[in,out] b The vector to accumulate into. /// @param[in] x_dofmap Dofmap for the mesh geometry. /// @param[in] x Mesh geometry (coordinates). -/// @param[in] num_facets_per_cell Number of facets of a cell /// @param[in] facets Facets (in the integration domain mesh) to execute /// the kernel over. /// @param[in] dofmap Test function (row) degree-of-freedom data holding @@ -822,13 +819,20 @@ void assemble_exterior_facets( template void assemble_interior_facets( fem::DofTransformKernel auto P0, std::span b, mdspan2_t x_dofmap, - std::span> x, int num_facets_per_cell, - std::span facets, - std::tuple> dofmap, + std::span> x, + md::mdspan> + facets, + std::tuple>> + dofmap, FEkernel auto fn, std::span constants, - std::span coeffs, int cstride, + md::mdspan> + coeffs, std::span cell_info0, - std::span perms) + md::mdspan> perms) { if (facets.empty()) return; @@ -844,17 +848,15 @@ void assemble_interior_facets( x_dofmap.extent(1) * 3); std::vector be; - assert(facets.size() % 4 == 0); assert(facets0.size() == facets.size()); - for (std::size_t index = 0; index < facets.size(); index += 4) + for (std::size_t f = 0; f < facets.extent(0); ++f) { // Cells in integration domain and test function domain meshes - std::array cells{facets[index], facets[index + 2]}; - std::array cells0{facets0[index], facets0[index + 2]}; + std::array cells{facets(f, 0, 0), facets(f, 1, 0)}; + std::array cells0{facets0(f, 0, 0), facets0(f, 1, 0)}; // Local facet indices - std::array local_facet{facets[index + 1], - facets[index + 3]}; + std::array local_facet{facets(f, 0, 1), facets(f, 1, 1)}; // Get cell geometry auto x_dofs0 = md::submdspan(x_dofmap, cells[0], md::full_extent); @@ -877,14 +879,12 @@ void assemble_interior_facets( // Tabulate element vector be.resize(bs * (dmap0.size() + dmap1.size())); std::ranges::fill(be, 0); - std::array perm - = perms.empty() - ? std::array{0, 0} - : std::array{ - perms[cells[0] * num_facets_per_cell + local_facet[0]], - perms[cells[1] * num_facets_per_cell + local_facet[1]]}; - fn(be.data(), coeffs.data() + index / 2 * cstride, constants.data(), - coordinate_dofs.data(), local_facet.data(), perm.data()); + std::array perm = perms.empty() + ? std::array{0, 0} + : std::array{perms(cells[0], local_facet[0]), + perms(cells[1], local_facet[1])}; + fn(be.data(), &coeffs(f, 0, 0), constants.data(), coordinate_dofs.data(), + local_facet.data(), perm.data()); std::span _be(be); std::span sub_be = _be.subspan(bs * dmap0.size(), bs * dmap1.size()); @@ -1208,109 +1208,141 @@ void assemble_vector( { auto fn = L.kernel(IntegralType::cell, i, cell_type_idx); assert(fn); - auto& [coeffs, cstride] = coefficients.at({IntegralType::cell, i}); std::span cells = L.domain(IntegralType::cell, i, cell_type_idx); std::span cells0 = L.domain_arg(IntegralType::cell, 0, i, cell_type_idx); + auto& [coeffs, cstride] = coefficients.at({IntegralType::cell, i}); + assert(cells.size() * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_cells(P0, b, x_dofmap, x, cells, - {dofs, bs, cells0}, fn, constants, coeffs, - cstride, cell_info0); + impl::assemble_cells( + P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, + md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else if (bs == 3) { - impl::assemble_cells(P0, b, x_dofmap, x, cells, - {dofs, bs, cells0}, fn, constants, coeffs, - cstride, cell_info0); + impl::assemble_cells( + P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, + md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else { - impl::assemble_cells(P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, - constants, coeffs, cstride, cell_info0); + impl::assemble_cells( + P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, + md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } } - std::span perms; + md::mdspan> perms; if (L.needs_facet_permutations()) { + mesh::CellType cell_type = mesh->topology()->cell_types()[cell_type_idx]; + int num_facets_per_cell + = mesh::cell_num_entities(cell_type, mesh->topology()->dim() - 1); mesh->topology_mutable()->create_entity_permutations(); - perms = std::span(mesh->topology()->get_facet_permutations()); + const std::vector& p + = mesh->topology()->get_facet_permutations(); + perms = md::mdspan(p.data(), p.size() / num_facets_per_cell, + num_facets_per_cell); } - mesh::CellType cell_type = mesh->topology()->cell_types()[cell_type_idx]; - int num_facets_per_cell - = mesh::cell_num_entities(cell_type, mesh->topology()->dim() - 1); + using mdspanx2_t + = md::mdspan>; + for (int i : L.integral_ids(IntegralType::exterior_facet)) { auto fn = L.kernel(IntegralType::exterior_facet, i, 0); assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::exterior_facet, i}); - std::span facets + std::span f = L.domain(IntegralType::exterior_facet, i, 0); - std::span facets1 = L.domain_arg(IntegralType::exterior_facet, 0, i, 0); + mdspanx2_t facets(f.data(), f.size() / 2, 2); + std::span f1 = L.domain_arg(IntegralType::exterior_facet, 0, i, 0); + mdspanx2_t facets1(f1.data(), f1.size() / 2, 2); + assert((facets.size() / 2) * cstride == coeffs.size()); if (bs == 1) { impl::assemble_exterior_facets( - P0, b, x_dofmap, x, num_facets_per_cell, facets, - {dofs, bs, facets1}, fn, constants, coeffs, cstride, cell_info0, + P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, + md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); } else if (bs == 3) { impl::assemble_exterior_facets( - P0, b, x_dofmap, x, num_facets_per_cell, facets, - {dofs, bs, facets1}, fn, constants, coeffs, cstride, cell_info0, + P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, + md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); } else { - impl::assemble_exterior_facets(P0, b, x_dofmap, x, num_facets_per_cell, - facets, {dofs, bs, facets1}, fn, - constants, coeffs, cstride, cell_info0, - perms); + impl::assemble_exterior_facets( + P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, + md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, + perms); } } for (int i : L.integral_ids(IntegralType::interior_facet)) { + using mdspanx22_t + = md::mdspan>; + using mdspanx2x_t + = md::mdspan>; + auto fn = L.kernel(IntegralType::interior_facet, i, 0); assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::interior_facet, i}); std::span facets = L.domain(IntegralType::interior_facet, i, 0); std::span facets1 = L.domain_arg(IntegralType::interior_facet, 0, i, 0); + assert((facets.size() / 4) * 2 * cstride == coeffs.size()); if (bs == 1) { impl::assemble_interior_facets( - P0, b, x_dofmap, x, num_facets_per_cell, facets, - {*dofmap, bs, facets1}, fn, constants, coeffs, cstride, cell_info0, - perms); + P0, b, x_dofmap, x, + mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), + {*dofmap, bs, + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, + fn, constants, + mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), + cell_info0, perms); } else if (bs == 3) { impl::assemble_interior_facets( - P0, b, x_dofmap, x, num_facets_per_cell, facets, - {*dofmap, bs, facets1}, fn, constants, coeffs, cstride, cell_info0, - perms); + P0, b, x_dofmap, x, + mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), + {*dofmap, bs, + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, + fn, constants, + mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), + cell_info0, perms); } else { - impl::assemble_interior_facets(P0, b, x_dofmap, x, num_facets_per_cell, - facets, {*dofmap, bs, facets1}, fn, - constants, coeffs, cstride, cell_info0, - perms); + impl::assemble_interior_facets( + P0, b, x_dofmap, x, + mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), + {*dofmap, bs, + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, + fn, constants, + mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), + cell_info0, perms); } } } } -/// @brief Assemble linear form into a vector +/// @brief Assemble linear form into a vector. /// @param[in,out] b The vector to be assembled. It will not be zeroed /// before assembly. -/// @param[in] L The linear forms to assemble into b -/// @param[in] constants Packed constants that appear in `L` -/// @param[in] coefficients Packed coefficients that appear in `L` +/// @param[in] L The linear forms to assemble into b. +/// @param[in] constants Packed constants that appear in `L`. +/// @param[in] coefficients Packed coefficients that appear in `L.` template void assemble_vector( std::span b, const Form& L, std::span constants, From 5cd2d724f784fefc36eba50d0a95fa6db10b8b32 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Wed, 26 Feb 2025 13:44:47 +0000 Subject: [PATCH 14/22] Demo update --- cpp/demo/custom_kernel/main.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index 2cb5f8d96e..e5ef9428fd 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -165,9 +165,9 @@ double assemble_vector1(const mesh::Geometry& g, const fem::DofMap& dofmap, { la::Vector b(dofmap.index_map, 1); common::Timer timer("Assembler1 lambda (vector)"); - fem::impl::assemble_cells( - [](auto, auto, auto, auto) {}, b.mutable_array(), g.dofmap(), g.x(), - cells, {dofmap.map(), 1, cells}, kernel, {}, {}, 0, {}); + fem::impl::assemble_cells([](auto, auto, auto, auto) {}, + b.mutable_array(), g.dofmap(), g.x(), cells, + {dofmap.map(), 1, cells}, kernel, {}, {}, {}); b.scatter_rev(std::plus()); return la::squared_norm(b); } From 5d293286fe3ad6f8c2f479ef151b6b9e79d76731 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Wed, 26 Feb 2025 15:02:56 +0000 Subject: [PATCH 15/22] Doc fix --- cpp/dolfinx/fem/assemble_vector_impl.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index f038d96f4a..b9da505599 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -809,9 +809,8 @@ void assemble_exterior_facets( /// the (0) dofmap, (1) dofmap block size and (2) dofmap cell indices. /// @param[in] fn Kernel function to execute over each cell. /// @param[in] constants The constant data -/// @param[in] coeffs The coefficient data array of shape (cells.size(), -/// cstride), flattened into row-major format. -/// @param[in] cstride The coefficient stride +/// @param[in] coeffs Coefficient data array, withshape (cells.size(), +/// cstride). /// @param[in] cell_info0 The cell permutation information for the test /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet From dd0105a68dcf35be1438a76b099de449e5b31a04 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Wed, 26 Feb 2025 16:06:40 +0000 Subject: [PATCH 16/22] Simplifications --- cpp/dolfinx/fem/assemble_vector_impl.h | 98 +++++++++++++------------- 1 file changed, 48 insertions(+), 50 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index b9da505599..640814b6d0 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -229,7 +229,6 @@ void _lift_bc_cells( /// @param[in,out] b The vector to modify /// @param[in] x_dofmap Dofmap for the mesh geometry. /// @param[in] x Mesh geometry (coordinates). -/// @param[in] num_facets_per_cell Number of cell facets /// @param[in] kernel Kernel function to execute over each cell. /// @param[in] facets Facet indices (in the integration domain mesh) to /// execute the kernel over. @@ -245,7 +244,6 @@ void _lift_bc_cells( /// @param[in] constants The constant data. /// @param[in] coeffs The coefficient data array of shape (cells.size(), /// cstride), flattened into row-major format. -/// @param[in] cstride The coefficient stride. /// @param[in] cell_info0 The cell permutation information for the test /// function mesh. /// @param[in] cell_info1 The cell permutation information for the trial @@ -261,8 +259,7 @@ void _lift_bc_cells( template void _lift_bc_exterior_facets( std::span b, mdspan2_t x_dofmap, - std::span> x, int num_facets_per_cell, - FEkernel auto kernel, + std::span> x, FEkernel auto kernel, md::mdspan> facets, @@ -276,11 +273,11 @@ void _lift_bc_exterior_facets( md::extents>> dofmap1, fem::DofTransformKernel auto P1T, std::span constants, - std::span coeffs, int cstride, + md::mdspan> coeffs, std::span cell_info0, std::span cell_info1, std::span bc_values1, std::span bc_markers1, std::span x0, T alpha, - std::span perms) + md::mdspan> perms) { if (facets.empty()) return; @@ -291,7 +288,6 @@ void _lift_bc_exterior_facets( // Data structures used in bc application std::vector> coordinate_dofs(3 * x_dofmap.extent(1)); std::vector Ae, be; - assert(facets.size() % 2 == 0); assert(facets0.size() == facets.size()); assert(facets1.size() == facets.size()); for (std::size_t index = 0; index < facets.extent(0); ++index) @@ -340,14 +336,12 @@ void _lift_bc_exterior_facets( const int num_cols = bs1 * dofs1.size(); // Permutations - std::uint8_t perm - = perms.empty() ? 0 : perms[cell * num_facets_per_cell + local_facet]; + std::uint8_t perm = perms.empty() ? 0 : perms(cell, local_facet); - const T* coeff_array = coeffs.data() + index * cstride; Ae.resize(num_rows * num_cols); std::ranges::fill(Ae, 0); - kernel(Ae.data(), coeff_array, constants.data(), coordinate_dofs.data(), - &local_facet, &perm); + kernel(Ae.data(), &coeffs(index, 0), constants.data(), + coordinate_dofs.data(), &local_facet, &perm); P0(Ae, cell_info0, cell0, num_cols); P1T(Ae, cell_info1, cell1, num_rows); @@ -382,7 +376,6 @@ void _lift_bc_exterior_facets( /// @param[in,out] b The vector to modify /// @param[in] x_dofmap Dofmap for the mesh geometry. /// @param[in] x Mesh geometry (coordinates). -/// @param[in] num_facets_per_cell Number of facets of a cell. /// @param[in] kernel Kernel function to execute over each cell. /// @param[in] facets Facet indices (in the integration domain mesh) to /// execute the kernel over. @@ -415,8 +408,7 @@ void _lift_bc_exterior_facets( template void _lift_bc_interior_facets( std::span b, mdspan2_t x_dofmap, - std::span> x, int num_facets_per_cell, - FEkernel auto kernel, + std::span> x, FEkernel auto kernel, md::mdspan> facets, @@ -430,11 +422,14 @@ void _lift_bc_interior_facets( md::extents>> dofmap1, fem::DofTransformKernel auto P1T, std::span constants, - std::span coeffs, int cstride, + md::mdspan> + coeffs, std::span cell_info0, std::span cell_info1, - std::span perms, std::span bc_values1, - std::span bc_markers1, std::span x0, T alpha) + md::mdspan> perms, + std::span bc_values1, std::span bc_markers1, + std::span x0, T alpha) { if (facets.empty()) return; @@ -457,17 +452,16 @@ void _lift_bc_interior_facets( const int num_dofs1 = dmap1.extent(1); assert(facets0.size() == facets.size()); assert(facets1.size() == facets.size()); - for (std::size_t index = 0; index < facets.extent(0); ++index) + for (std::size_t f = 0; f < facets.extent(0); ++f) { // Cells in integration domain, test function domain and trial // function domain meshes - std::array cells{facets(index, 0, 0), facets(index, 1, 0)}; - std::array cells0{facets0(index, 0, 0), facets0(index, 1, 0)}; - std::array cells1{facets1(index, 0, 0), facets1(index, 1, 0)}; + std::array cells{facets(f, 0, 0), facets(f, 1, 0)}; + std::array cells0{facets0(f, 0, 0), facets0(f, 1, 0)}; + std::array cells1{facets1(f, 0, 0), facets1(f, 1, 0)}; // Local facet indices - std::array local_facet - = {facets(index, 0, 1), facets(index, 1, 1)}; + std::array local_facet = {facets(f, 0, 1), facets(f, 1, 1)}; // Get cell geometry auto x_dofs0 = md::submdspan(x_dofmap, cells[0], md::full_extent); @@ -540,13 +534,11 @@ void _lift_bc_interior_facets( // Tabulate tensor Ae.resize(num_rows * num_cols); std::ranges::fill(Ae, 0); - std::array perm - = perms.empty() - ? std::array{0, 0} - : std::array{ - perms[cells[0] * num_facets_per_cell + local_facet[0]], - perms[cells[1] * num_facets_per_cell + local_facet[1]]}; - kernel(Ae.data(), coeffs.data() + index / 2 * cstride, constants.data(), + std::array perm = perms.empty() + ? std::array{0, 0} + : std::array{perms(cells[0], local_facet[0]), + perms(cells[1], local_facet[1])}; + kernel(Ae.data(), &coeffs(f, 0, 0), constants.data(), coordinate_dofs.data(), local_facet.data(), perm.data()); std::span _Ae(Ae); @@ -872,8 +864,8 @@ void assemble_interior_facets( } // Get dofmaps for cells - std::span dmap0 = dmap.cell_dofs(cells0[0]); - std::span dmap1 = dmap.cell_dofs(cells0[1]); + std::span dmap0 = dmap.cell_dofs(cells0[0]); + std::span dmap1 = dmap.cell_dofs(cells0[1]); // Tabulate element vector be.resize(bs * (dmap0.size() + dmap1.size())); @@ -1012,16 +1004,19 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, } } - std::span perms; + md::mdspan> perms; if (a.needs_facet_permutations()) { + mesh::CellType cell_type = mesh->topology()->cell_type(); + int num_facets_per_cell + = mesh::cell_num_entities(cell_type, mesh->topology()->dim() - 1); mesh->topology_mutable()->create_entity_permutations(); - perms = std::span(mesh->topology()->get_facet_permutations()); + const std::vector& p + = mesh->topology()->get_facet_permutations(); + perms = md::mdspan(p.data(), p.size() / num_facets_per_cell, + num_facets_per_cell); } - mesh::CellType cell_type = mesh->topology()->cell_type(); - int num_facets_per_cell - = mesh::cell_num_entities(cell_type, mesh->topology()->dim() - 1); for (int i : a.integral_ids(IntegralType::exterior_facet)) { auto kernel = a.kernel(IntegralType::exterior_facet, i, 0); @@ -1038,11 +1033,12 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, mdspanx2_t facets0(f0.data(), f0.size() / 2, 2); std::span f1 = a.domain_arg(IntegralType::exterior_facet, 1, i, 0); mdspanx2_t facets1(f1.data(), f1.size() / 2, 2); - _lift_bc_exterior_facets(b, x_dofmap, x, num_facets_per_cell, kernel, - facets, {dofmap0, bs0, facets0}, P0, - {dofmap1, bs1, facets1}, P1T, constants, coeffs, - cstride, cell_info0, cell_info1, bc_values1, - bc_markers1, x0, alpha, perms); + assert(coeffs.size() == facets.extent(0) * cstride); + _lift_bc_exterior_facets( + b, x_dofmap, x, kernel, facets, {dofmap0, bs0, facets0}, P0, + {dofmap1, bs1, facets1}, P1T, constants, + md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, + cell_info1, bc_values1, bc_markers1, x0, alpha, perms); } for (int i : a.integral_ids(IntegralType::interior_facet)) @@ -1055,18 +1051,20 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, using mdspanx22_t = md::mdspan>; + using mdspanx2x_t + = md::mdspan>; std::span f = a.domain(IntegralType::interior_facet, i, 0); mdspanx22_t facets(f.data(), f.size() / 4, 2, 2); std::span f0 = a.domain_arg(IntegralType::interior_facet, 0, i, 0); mdspanx22_t facets0(f0.data(), f0.size() / 4, 2, 2); std::span f1 = a.domain_arg(IntegralType::interior_facet, 1, i, 0); mdspanx22_t facets1(f1.data(), f1.size() / 4, 2, 2); - - _lift_bc_interior_facets(b, x_dofmap, x, num_facets_per_cell, kernel, - facets, {dofmap0, bs0, facets0}, P0, - {dofmap1, bs1, facets1}, P1T, constants, coeffs, - cstride, cell_info0, cell_info1, perms, bc_values1, - bc_markers1, x0, alpha); + _lift_bc_interior_facets( + b, x_dofmap, x, kernel, facets, {dofmap0, bs0, facets0}, P0, + {dofmap1, bs1, facets1}, P1T, constants, + mdspanx2x_t(coeffs.data(), facets.extent(0), 2, cstride), cell_info0, + cell_info1, perms, bc_values1, bc_markers1, x0, alpha); } } @@ -1264,7 +1262,7 @@ void assemble_vector( { impl::assemble_exterior_facets( P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, - md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, + md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, perms); } else if (bs == 3) From ebe5acf5bbc9805a699ead909971f2a5cfcc138d Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Wed, 26 Feb 2025 17:34:55 +0000 Subject: [PATCH 17/22] Updates --- cpp/dolfinx/fem/assemble_matrix_impl.h | 31 +++++++++++++++++--------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index 0dca52c928..16c82f5f9d 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -24,6 +24,8 @@ namespace dolfinx::fem::impl { +namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; + /// @brief Typedef using mdspan2_t = MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< const std::int32_t, @@ -195,7 +197,9 @@ template void assemble_exterior_facets( la::MatSet auto mat_set, mdspan2_t x_dofmap, std::span> x, int num_facets_per_cell, - std::span facets, + md::mdspan> + facets, std::tuple> dofmap0, fem::DofTransformKernel auto P0, std::tuple> dofmap1, @@ -220,22 +224,20 @@ void assemble_exterior_facets( const int ndim1 = bs1 * num_dofs1; std::vector Ae(ndim0 * ndim1); std::span _Ae(Ae); - assert(facets.size() % 2 == 0); assert(facets0.size() == facets.size()); assert(facets1.size() == facets.size()); - for (std::size_t index = 0; index < facets.size(); index += 2) + for (std::size_t f = 0; f < facets.extent(0); ++f) { // Cell in the integration domain, local facet index relative to the // integration domain cell, and cells in the test and trial function // meshes - std::int32_t cell = facets[index]; - std::int32_t local_facet = facets[index + 1]; - std::int32_t cell0 = facets0[index]; - std::int32_t cell1 = facets1[index]; + std::int32_t cell = facets(f, 0); + std::int32_t local_facet = facets(f, 1); + std::int32_t cell0 = facets0[2 * f]; + std::int32_t cell1 = facets1[2 * f]; // Get cell coordinates/geometry - auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cell, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3, @@ -248,7 +250,7 @@ void assemble_exterior_facets( // Tabulate tensor std::ranges::fill(Ae, 0); - kernel(Ae.data(), coeffs.data() + index / 2 * cstride, constants.data(), + kernel(Ae.data(), coeffs.data() + f * cstride, constants.data(), coordinate_dofs.data(), &local_facet, &perm); P0(_Ae, cell_info0, cell0, ndim1); @@ -589,13 +591,20 @@ void assemble_matrix( "topology aren't supported yet"); } + using mdspanx2_t + = md::mdspan>; + auto fn = a.kernel(IntegralType::exterior_facet, i, 0); assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::exterior_facet, i}); + std::span facets + = a.domain(IntegralType::exterior_facet, i, 0); + assert((facets.size() / 2) * cstride == coeffs.size()); impl::assemble_exterior_facets( mat_set, x_dofmap, x, num_facets_per_cell, - a.domain(IntegralType::exterior_facet, i, 0), + mdspanx2_t(facets.data(), facets.size() / 2, 2), {dofs0, bs0, a.domain_arg(IntegralType::exterior_facet, 0, i, 0)}, P0, {dofs1, bs1, a.domain_arg(IntegralType::exterior_facet, 1, i, 0)}, P1T, bc0, bc1, fn, coeffs, cstride, constants, cell_info0, cell_info1, From 4cdbc3eb6bcbefec6d5895fb58dd06f457e86392 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Wed, 26 Feb 2025 21:06:44 +0000 Subject: [PATCH 18/22] Update --- cpp/dolfinx/fem/assemble_matrix_impl.h | 98 +++++++++++++------------- cpp/dolfinx/fem/assemble_vector_impl.h | 3 +- 2 files changed, 51 insertions(+), 50 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index 16c82f5f9d..9602fcfb3a 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -27,9 +27,7 @@ namespace dolfinx::fem::impl namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; /// @brief Typedef -using mdspan2_t = MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< - const std::int32_t, - MDSPAN_IMPL_STANDARD_NAMESPACE::dextents>; +using mdspan2_t = md::mdspan>; /// @brief Execute kernel over cells and accumulate result in matrix. /// @tparam T Matrix/form scalar type. @@ -51,9 +49,7 @@ using mdspan2_t = MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< /// @param bc0 Marker for rows with Dirichlet boundary conditions applied /// @param bc1 Marker for columns with Dirichlet boundary conditions applied /// @param kernel Kernel function to execute over each cell. -/// @param coeffs The coefficient data array of shape (cells.size(), cstride), -/// flattened into row-major format. -/// @param cstride The coefficient stride +/// @param coeffs The coefficient data array of shape (cells.size(), cstride). /// @param constants The constant data /// @param cell_info0 The cell permutation information for the test function /// mesh @@ -69,8 +65,8 @@ void assemble_cells( std::tuple> dofmap1, fem::DofTransformKernel auto P1T, std::span bc0, std::span bc1, FEkernel auto kernel, - std::span coeffs, int cstride, std::span constants, - std::span cell_info0, + md::mdspan> coeffs, + std::span constants, std::span cell_info0, std::span cell_info1) { if (cells.empty()) @@ -91,17 +87,16 @@ void assemble_cells( // Iterate over active cells assert(cells0.size() == cells.size()); assert(cells1.size() == cells.size()); - for (std::size_t index = 0; index < cells.size(); ++index) + for (std::size_t c = 0; c < cells.size(); ++c) { // Cell index in integration domain mesh (c), test function mesh // (c0) and trial function mesh (c1) - std::int32_t c = cells[index]; - std::int32_t c0 = cells0[index]; - std::int32_t c1 = cells1[index]; + std::int32_t cell = cells[c]; + std::int32_t cell0 = cells0[c]; + std::int32_t cell1 = cells1[c]; // Get cell coordinates/geometry - auto x_dofs = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, c, MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); for (std::size_t i = 0; i < x_dofs.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs[i]), 3, @@ -110,16 +105,16 @@ void assemble_cells( // Tabulate tensor std::ranges::fill(Ae, 0); - kernel(Ae.data(), coeffs.data() + index * cstride, constants.data(), - coordinate_dofs.data(), nullptr, nullptr); + kernel(Ae.data(), &coeffs(c, 0), constants.data(), coordinate_dofs.data(), + nullptr, nullptr); // Compute A = P_0 \tilde{A} P_1^T (dof transformation) - P0(_Ae, cell_info0, c0, ndim1); // B = P0 \tilde{A} - P1T(_Ae, cell_info1, c1, ndim0); // A = B P1_T + P0(_Ae, cell_info0, cell0, ndim1); // B = P0 \tilde{A} + P1T(_Ae, cell_info1, cell1, ndim0); // A = B P1_T // Zero rows/columns for essential bcs - std::span dofs0(dmap0.data_handle() + c0 * num_dofs0, num_dofs0); - std::span dofs1(dmap1.data_handle() + c1 * num_dofs1, num_dofs1); + std::span dofs0(dmap0.data_handle() + cell0 * num_dofs0, num_dofs0); + std::span dofs1(dmap1.data_handle() + cell1 * num_dofs1, num_dofs1); if (!bc0.empty()) { @@ -200,13 +195,19 @@ void assemble_exterior_facets( md::mdspan> facets, - std::tuple> dofmap0, + std::tuple>> + dofmap0, fem::DofTransformKernel auto P0, - std::tuple> dofmap1, + std::tuple>> + dofmap1, fem::DofTransformKernel auto P1T, std::span bc0, std::span bc1, FEkernel auto kernel, - std::span coeffs, int cstride, std::span constants, - std::span cell_info0, + md::mdspan> coeffs, + std::span constants, std::span cell_info0, std::span cell_info1, std::span perms) { @@ -233,8 +234,8 @@ void assemble_exterior_facets( // meshes std::int32_t cell = facets(f, 0); std::int32_t local_facet = facets(f, 1); - std::int32_t cell0 = facets0[2 * f]; - std::int32_t cell1 = facets1[2 * f]; + std::int32_t cell0 = facets0(f, 0); + std::int32_t cell1 = facets1(f, 0); // Get cell coordinates/geometry auto x_dofs = md::submdspan(x_dofmap, cell, md::full_extent); @@ -250,8 +251,8 @@ void assemble_exterior_facets( // Tabulate tensor std::ranges::fill(Ae, 0); - kernel(Ae.data(), coeffs.data() + f * cstride, constants.data(), - coordinate_dofs.data(), &local_facet, &perm); + kernel(Ae.data(), &coeffs(f, 0), constants.data(), coordinate_dofs.data(), + &local_facet, &perm); P0(_Ae, cell_info0, cell0, ndim1); P1T(_Ae, cell_info1, cell1, ndim0); @@ -381,15 +382,13 @@ void assemble_interior_facets( std::array local_facet{facets[index + 1], facets[index + 3]}; // Get cell geometry - auto x_dofs0 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cells[0], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs0 = md::submdspan(x_dofmap, cells[0], md::full_extent); for (std::size_t i = 0; i < x_dofs0.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs0[i]), 3, std::next(cdofs0.begin(), 3 * i)); } - auto x_dofs1 = MDSPAN_IMPL_STANDARD_NAMESPACE::submdspan( - x_dofmap, cells[1], MDSPAN_IMPL_STANDARD_NAMESPACE::full_extent); + auto x_dofs1 = md::submdspan(x_dofmap, cells[1], md::full_extent); for (std::size_t i = 0; i < x_dofs1.size(); ++i) { std::copy_n(std::next(x.begin(), 3 * x_dofs1[i]), 3, @@ -562,15 +561,15 @@ void assemble_matrix( { auto fn = a.kernel(IntegralType::cell, i, cell_type_idx); assert(fn); + std::span cells = a.domain(IntegralType::cell, i, cell_type_idx); + std::span cells0 = a.domain_arg(IntegralType::cell, 0, i, cell_type_idx); + std::span cells1 = a.domain_arg(IntegralType::cell, 1, i, cell_type_idx); auto& [coeffs, cstride] = coefficients.at({IntegralType::cell, i}); - - impl::assemble_cells( - mat_set, x_dofmap, x, a.domain(IntegralType::cell, i, cell_type_idx), - {dofs0, bs0, a.domain_arg(IntegralType::cell, 0, i, cell_type_idx)}, - P0, - {dofs1, bs1, a.domain_arg(IntegralType::cell, 1, i, cell_type_idx)}, - P1T, bc0, bc1, fn, coeffs, cstride, constants, cell_info0, - cell_info1); + assert(cells.size() * cstride == coeffs.size()); + impl::assemble_cells(mat_set, x_dofmap, x, cells, {dofs0, bs0, cells0}, + P0, {dofs1, bs1, cells1}, P1T, bc0, bc1, fn, + md::mdspan(coeffs.data(), cells.size(), cstride), + constants, cell_info0, cell_info1); } std::span perms; @@ -599,16 +598,19 @@ void assemble_matrix( assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::exterior_facet, i}); - std::span facets - = a.domain(IntegralType::exterior_facet, i, 0); + + std::span f = a.domain(IntegralType::exterior_facet, i, 0); + mdspanx2_t facets(f.data(), f.size() / 2, 2); + std::span f0 = a.domain_arg(IntegralType::exterior_facet, 0, i, 0); + mdspanx2_t facets0(f0.data(), f0.size() / 2, 2); + std::span f1 = a.domain_arg(IntegralType::exterior_facet, 1, i, 0); + mdspanx2_t facets1(f1.data(), f1.size() / 2, 2); assert((facets.size() / 2) * cstride == coeffs.size()); impl::assemble_exterior_facets( - mat_set, x_dofmap, x, num_facets_per_cell, - mdspanx2_t(facets.data(), facets.size() / 2, 2), - {dofs0, bs0, a.domain_arg(IntegralType::exterior_facet, 0, i, 0)}, P0, - {dofs1, bs1, a.domain_arg(IntegralType::exterior_facet, 1, i, 0)}, - P1T, bc0, bc1, fn, coeffs, cstride, constants, cell_info0, cell_info1, - perms); + mat_set, x_dofmap, x, num_facets_per_cell, facets, + {dofs0, bs0, facets0}, P0, {dofs1, bs1, facets1}, P1T, bc0, bc1, fn, + md::mdspan(coeffs.data(), facets.extent(0), cstride), constants, + cell_info0, cell_info1, perms); } for (int i : a.integral_ids(IntegralType::interior_facet)) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 640814b6d0..3bd5d440c5 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1252,8 +1252,7 @@ void assemble_vector( assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::exterior_facet, i}); - std::span f - = L.domain(IntegralType::exterior_facet, i, 0); + std::span f = L.domain(IntegralType::exterior_facet, i, 0); mdspanx2_t facets(f.data(), f.size() / 2, 2); std::span f1 = L.domain_arg(IntegralType::exterior_facet, 0, i, 0); mdspanx2_t facets1(f1.data(), f1.size() / 2, 2); From ca18b1556be6a4ecc9c2c0cd66bdc88b66c329c3 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Wed, 26 Feb 2025 22:01:34 +0000 Subject: [PATCH 19/22] Tidy up --- cpp/dolfinx/fem/assemble_matrix_impl.h | 135 ++++++++++++++----------- 1 file changed, 77 insertions(+), 58 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index 9602fcfb3a..c5daee183b 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -35,8 +35,9 @@ using mdspan2_t = md::mdspan>; /// matrix. /// @param x_dofmap Dofmap for the mesh geometry. /// @param x Mesh geometry (coordinates). -/// @param cells Cell indices (in the integration domain mesh) to execute -/// the kernel over. These are the indices into the geometry dofmap. +/// @param cells Cell indices (in the integration domain mesh) to +/// execute the kernel over. These are the indices into the geometry +/// dofmap. /// @param dofmap0 Test function (row) degree-of-freedom data holding /// the (0) dofmap, (1) dofmap block size and (2) dofmap cell indices. /// @param P0 Function that applies transformation P_0 A in-place to @@ -46,15 +47,18 @@ using mdspan2_t = md::mdspan>; /// indices. /// @param P1T Function that applies transformation A P_1^T in-place to /// transform trial degrees-of-freedom. -/// @param bc0 Marker for rows with Dirichlet boundary conditions applied -/// @param bc1 Marker for columns with Dirichlet boundary conditions applied +/// @param bc0 Marker for rows with Dirichlet boundary conditions +/// applied. +/// @param bc1 Marker for columns with Dirichlet boundary conditions +/// applied. /// @param kernel Kernel function to execute over each cell. -/// @param coeffs The coefficient data array of shape (cells.size(), cstride). -/// @param constants The constant data -/// @param cell_info0 The cell permutation information for the test function -/// mesh -/// @param cell_info1 The cell permutation information for the trial function -/// mesh +/// @param coeffs Coefficient data array of shape (cells.size(), +/// cstride). +/// @param constants Constant data. +/// @param cell_info0 Cell permutation information for the test +/// function mesh. +/// @param cell_info1 Cell permutation information for the trial +/// function mesh. template void assemble_cells( la::MatSet auto mat_set, mdspan2_t x_dofmap, @@ -160,7 +164,6 @@ void assemble_cells( /// matrix. /// @param[in] x_dofmap Dofmap for the mesh geometry. /// @param[in] x Mesh geometry (coordinates). -/// @param[in] num_facets_per_cell Number of cell facets /// @param[in] facets Facet indices (in the integration domain mesh) to /// execute the kernel over. /// @param[in] dofmap0 Test function (row) degree-of-freedom data @@ -179,8 +182,7 @@ void assemble_cells( /// applied. /// @param[in] kernel Kernel function to execute over each cell. /// @param[in] coeffs Coefficient data array of shape `(cells.size(), -/// cstride)`, flattened into row-major format. -/// @param[in] cstride Coefficient stride. +/// cstride)`. /// @param[in] constants Constant data. /// @param[in] cell_info0 Cell permutation information for the test /// function mesh. @@ -191,7 +193,7 @@ void assemble_cells( template void assemble_exterior_facets( la::MatSet auto mat_set, mdspan2_t x_dofmap, - std::span> x, int num_facets_per_cell, + std::span> x, md::mdspan> facets, @@ -209,7 +211,7 @@ void assemble_exterior_facets( md::mdspan> coeffs, std::span constants, std::span cell_info0, std::span cell_info1, - std::span perms) + md::mdspan> perms) { if (facets.empty()) return; @@ -246,8 +248,7 @@ void assemble_exterior_facets( } // Permutations - std::uint8_t perm - = perms.empty() ? 0 : perms[cell * num_facets_per_cell + local_facet]; + std::uint8_t perm = perms.empty() ? 0 : perms(cell, local_facet); // Tabulate tensor std::ranges::fill(Ae, 0); @@ -303,7 +304,6 @@ void assemble_exterior_facets( /// matrix. /// @param[in] x_dofmap Dofmap for the mesh geometry. /// @param[in] x Mesh geometry (coordinates). -/// @param[in] num_facets_per_cell Number of facets of a cell /// @param[in] facets Facet indices (in the integration domain mesh) to /// execute the kernel over. /// @param[in] dofmap0 Test function (row) degree-of-freedom data @@ -320,12 +320,9 @@ void assemble_exterior_facets( /// applied. /// @param[in] bc1 Marker for columns with Dirichlet boundary conditions /// applied. -/// @param[in] coeffs The coefficient data array of shape (cells.size(), -/// cstride), /// @param[in] kernel Kernel function to execute over each cell. -/// flattened into row-major format. -/// @param[in] cstride Coefficient stride. -/// @param[in] offsets Coefficient offsets. +/// @param[in] coeffs The coefficient data array of shape (cells.size(), +/// cstride). /// @param[in] constants Constant data. /// @param[in] cell_info0 Cell permutation information for the test /// function mesh. @@ -336,17 +333,27 @@ void assemble_exterior_facets( template void assemble_interior_facets( la::MatSet auto mat_set, mdspan2_t x_dofmap, - std::span> x, int num_facets_per_cell, - std::span facets, - std::tuple> dofmap0, + std::span> x, + md::mdspan> + facets, + std::tuple>> + dofmap0, fem::DofTransformKernel auto P0, - std::tuple> dofmap1, + std::tuple>> + dofmap1, fem::DofTransformKernel auto P1T, std::span bc0, std::span bc1, FEkernel auto kernel, - std::span coeffs, int cstride, std::span offsets, + md::mdspan> + coeffs, std::span constants, std::span cell_info0, std::span cell_info1, - std::span perms) + md::mdspan> perms) { if (facets.empty()) return; @@ -362,24 +369,21 @@ void assemble_interior_facets( x_dofmap.extent(1) * 3); std::vector Ae, be; - std::vector coeff_array(2 * offsets.back()); - assert(offsets.back() == cstride); // Temporaries for joint dofmaps std::vector dmapjoint0, dmapjoint1; - assert(facets.size() % 4 == 0); assert(facets0.size() == facets.size()); assert(facets1.size() == facets.size()); - for (std::size_t index = 0; index < facets.size(); index += 4) + for (std::size_t f = 0; f < facets.extent(0); ++f) { // Cells in integration domain, test function domain and trial // function domain - std::array cells{facets[index], facets[index + 2]}; - std::array cells0{facets0[index], facets0[index + 2]}; - std::array cells1{facets1[index], facets1[index + 2]}; + std::array cells{facets(f, 0, 0), facets(f, 1, 0)}; + std::array cells0{facets0(f, 0, 0), facets0(f, 1, 0)}; + std::array cells1{facets1(f, 0, 0), facets1(f, 1, 0)}; // Local facets indices - std::array local_facet{facets[index + 1], facets[index + 3]}; + std::array local_facet{facets(f, 0, 1), facets(f, 1, 1)}; // Get cell geometry auto x_dofs0 = md::submdspan(x_dofmap, cells[0], md::full_extent); @@ -417,13 +421,11 @@ void assemble_interior_facets( Ae.resize(num_rows * num_cols); std::ranges::fill(Ae, 0); - std::array perm - = perms.empty() - ? std::array{0, 0} - : std::array{ - perms[cells[0] * num_facets_per_cell + local_facet[0]], - perms[cells[1] * num_facets_per_cell + local_facet[1]]}; - kernel(Ae.data(), coeffs.data() + index / 2 * cstride, constants.data(), + std::array perm = perms.empty() + ? std::array{0, 0} + : std::array{perms(cells[0], local_facet[0]), + perms(cells[1], local_facet[1])}; + kernel(Ae.data(), &coeffs(f, 0, 0), constants.data(), coordinate_dofs.data(), local_facet.data(), perm.data()); // Local element layout is a 2x2 block matrix with structure @@ -572,16 +574,19 @@ void assemble_matrix( constants, cell_info0, cell_info1); } - std::span perms; + md::mdspan> perms; if (a.needs_facet_permutations()) { + mesh::CellType cell_type = mesh->topology()->cell_types()[cell_type_idx]; + int num_facets_per_cell + = mesh::cell_num_entities(cell_type, mesh->topology()->dim() - 1); mesh->topology_mutable()->create_entity_permutations(); - perms = std::span(mesh->topology()->get_facet_permutations()); + const std::vector& p + = mesh->topology()->get_facet_permutations(); + perms = md::mdspan(p.data(), p.size() / num_facets_per_cell, + num_facets_per_cell); } - mesh::CellType cell_type = mesh->topology()->cell_types()[cell_type_idx]; - int num_facets_per_cell - = mesh::cell_num_entities(cell_type, mesh->topology()->dim() - 1); for (int i : a.integral_ids(IntegralType::exterior_facet)) { if (num_cell_types > 1) @@ -607,8 +612,8 @@ void assemble_matrix( mdspanx2_t facets1(f1.data(), f1.size() / 2, 2); assert((facets.size() / 2) * cstride == coeffs.size()); impl::assemble_exterior_facets( - mat_set, x_dofmap, x, num_facets_per_cell, facets, - {dofs0, bs0, facets0}, P0, {dofs1, bs1, facets1}, P1T, bc0, bc1, fn, + mat_set, x_dofmap, x, facets, {dofs0, bs0, facets0}, P0, + {dofs1, bs1, facets1}, P1T, bc0, bc1, fn, md::mdspan(coeffs.data(), facets.extent(0), cstride), constants, cell_info0, cell_info1, perms); } @@ -621,19 +626,33 @@ void assemble_matrix( "topology aren't supported yet"); } - const std::vector c_offsets = a.coefficient_offsets(); + using mdspanx22_t + = md::mdspan>; + using mdspanx2x_t + = md::mdspan>; + auto fn = a.kernel(IntegralType::interior_facet, i, 0); assert(fn); auto& [coeffs, cstride] = coefficients.at({IntegralType::interior_facet, i}); + + std::span facets = a.domain(IntegralType::interior_facet, i, 0); + std::span facets0 = a.domain_arg(IntegralType::interior_facet, 0, i, 0); + std::span facets1 = a.domain_arg(IntegralType::interior_facet, 1, i, 0); + assert((facets.size() / 4) * 2 * cstride == coeffs.size()); impl::assemble_interior_facets( - mat_set, x_dofmap, x, num_facets_per_cell, - a.domain(IntegralType::interior_facet, i, 0), - {*dofmap0, bs0, a.domain_arg(IntegralType::interior_facet, 0, i, 0)}, + mat_set, x_dofmap, x, + mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), + {*dofmap0, bs0, + mdspanx22_t(facets0.data(), facets0.size() / 4, 2, 2)}, P0, - {*dofmap1, bs1, a.domain_arg(IntegralType::interior_facet, 1, i, 0)}, - P1T, bc0, bc1, fn, coeffs, cstride, c_offsets, constants, cell_info0, - cell_info1, perms); + {*dofmap1, bs1, + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, + P1T, bc0, bc1, fn, + mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), constants, + cell_info0, cell_info1, perms); } } } From be6d13c7e7a26c2dd8e51e99848b1c4bb2a563ad Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Wed, 26 Feb 2025 22:05:45 +0000 Subject: [PATCH 20/22] Update demo --- cpp/demo/custom_kernel/main.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index e5ef9428fd..de622b0a3e 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -140,10 +140,9 @@ double assemble_matrix1(const mesh::Geometry& g, const fem::DofMap& dofmap, la::MatrixCSR A(sp); auto ident = [](auto, auto, auto, auto) {}; // DOF permutation not required common::Timer timer("Assembler1 lambda (matrix)"); - fem::impl::assemble_cells(A.mat_add_values(), g.dofmap(), g.x(), cells, - {dofmap.map(), 1, cells}, ident, - {dofmap.map(), 1, cells}, ident, {}, {}, kernel, - std::span(), 0, {}, {}, {}); + fem::impl::assemble_cells( + A.mat_add_values(), g.dofmap(), g.x(), cells, {dofmap.map(), 1, cells}, + ident, {dofmap.map(), 1, cells}, ident, {}, {}, kernel, {}, {}, {}, {}); A.scatter_rev(); return A.squared_norm(); } From 1ff7836889656d95788e1f4ff1005f83e3705d78 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Wed, 26 Feb 2025 22:44:49 +0000 Subject: [PATCH 21/22] Remove debug output --- cpp/dolfinx/fem/assembler.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/dolfinx/fem/assembler.h b/cpp/dolfinx/fem/assembler.h index 26c1aa8db5..36b108d129 100644 --- a/cpp/dolfinx/fem/assembler.h +++ b/cpp/dolfinx/fem/assembler.h @@ -191,11 +191,8 @@ template T assemble_scalar(const Form& M) { const std::vector constants = pack_constants(M); - std::cout << "Allocate storage" << std::endl; auto coefficients = allocate_coefficient_storage(M); - std::cout << "Pack coefss" << std::endl; pack_coefficients(M, coefficients); - std::cout << "End Pack coefss" << std::endl; return assemble_scalar(M, std::span(constants), make_coefficients_span(coefficients)); } From 4279cf53d257d7b40d984cc3173e5a52210d2908 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Thu, 27 Feb 2025 09:14:35 +0000 Subject: [PATCH 22/22] Improve docs --- cpp/dolfinx/fem/assemble_matrix_impl.h | 54 ++++-- cpp/dolfinx/fem/assemble_vector_impl.h | 220 +++++++++++++------------ 2 files changed, 156 insertions(+), 118 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_matrix_impl.h b/cpp/dolfinx/fem/assemble_matrix_impl.h index c5daee183b..df5b73aeed 100644 --- a/cpp/dolfinx/fem/assemble_matrix_impl.h +++ b/cpp/dolfinx/fem/assemble_matrix_impl.h @@ -29,31 +29,34 @@ namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; /// @brief Typedef using mdspan2_t = md::mdspan>; -/// @brief Execute kernel over cells and accumulate result in matrix. +/// @brief Execute kernel over cells and accumulate result in a matrix. +/// /// @tparam T Matrix/form scalar type. /// @param mat_set Function that accumulates computed entries into a /// matrix. -/// @param x_dofmap Dofmap for the mesh geometry. -/// @param x Mesh geometry (coordinates). -/// @param cells Cell indices (in the integration domain mesh) to -/// execute the kernel over. These are the indices into the geometry -/// dofmap. -/// @param dofmap0 Test function (row) degree-of-freedom data holding -/// the (0) dofmap, (1) dofmap block size and (2) dofmap cell indices. -/// @param P0 Function that applies transformation P_0 A in-place to -/// transform test degrees-of-freedom. -/// @param dofmap1 Trial function (column) degree-of-freedom data +/// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. +/// @param[in] x Mesh geometry (coordinates). +/// @param[in] cells Cell indices to execute the kernel over. These are +/// the indices into the geometry dofmap `x_dofmap`. +/// @param[in] dofmap0 Test function (row) degree-of-freedom data +/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell +/// indices. +/// @param[in] P0 Function that applies transformation `P_0 A` in-place +/// to the computed tensor `A` to transform its test degrees-of-freedom. +/// @param[in] dofmap1 Trial function (column) degree-of-freedom data /// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell /// indices. -/// @param P1T Function that applies transformation A P_1^T in-place to -/// transform trial degrees-of-freedom. +/// @param[in] P1T Function that applies transformation `A P_1^T` +/// in-place to to the computed tensor `A` to transform trial +/// degrees-of-freedom. /// @param bc0 Marker for rows with Dirichlet boundary conditions /// applied. /// @param bc1 Marker for columns with Dirichlet boundary conditions /// applied. /// @param kernel Kernel function to execute over each cell. -/// @param coeffs Coefficient data array of shape (cells.size(), -/// cstride). +/// @param[in] coeffs Coefficient data in the kernel. It has shape +/// `(cells.size(), num_cell_coeffs)`. `coeffs(i, j)` is the `j`th +/// coefficient for cell `i`. /// @param constants Constant data. /// @param cell_info0 Cell permutation information for the test /// function mesh. @@ -159,6 +162,7 @@ void assemble_cells( /// @brief Execute kernel over exterior facets and accumulate result in /// a matrix. +/// /// @tparam T Matrix/form scalar type. /// @param[in] mat_set Function that accumulates computed entries into a /// matrix. @@ -299,6 +303,7 @@ void assemble_exterior_facets( /// @brief Execute kernel over interior facets and accumulate result in /// a matrix. +/// /// @tparam T Matrix/form scalar type. /// @param mat_set Function that accumulates computed entries into a /// matrix. @@ -493,6 +498,25 @@ void assemble_interior_facets( /// local indices. Rows (bc0) and columns (bc1) with Dirichlet /// conditions are zeroed. Markers (bc0 and bc1) can be empty if no bcs /// are applied. Matrix is not finalised. + +/// @brief Assemble (accumulate) into a matrix. +/// +/// Rows (bc0) and columns (bc1) with Dirichlet conditions are zeroed. +/// Markers (bc0 and bc1) can be empty if no Dirichlet conditions are +/// applied. +/// +/// @tparam T Scalar type. +/// @tparam U Geometry type. +/// @param[in] mat_set Function that accumulates computed entries into a +/// matrix. +/// @param[in] a Bilinear form to assemble. +/// @param[in] x Mesh geometry (coordinates). +/// @param[in] constants Constants that appear in `a`. +/// @param[in] coefficients Coefficients that appear in `a`. +/// @param bc0 Marker for rows with Dirichlet boundary conditions +/// applied. +/// @param bc1 Marker for columns with Dirichlet boundary conditions +/// applied. template void assemble_matrix( la::MatSet auto mat_set, const Form& a, diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 3bd5d440c5..4ffacef0c2 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -40,6 +40,7 @@ using mdspan2_t = md::mdspan>; /// @endcond /// @brief Apply boundary condition lifting for cell integrals. +/// /// @tparam T The scalar type. /// @tparam _bs0 The block size of the form test function dof map. If /// less than zero the block size is determined at runtime. If `_bs0` is @@ -50,26 +51,28 @@ using mdspan2_t = md::mdspan>; /// @param x_dofmap Dofmap for the mesh geometry. /// @param[in] x Mesh geometry (coordinates). /// @param[in] kernel Kernel function to execute over each cell. -/// @param[in] cells Cell indices (in the integration domain mesh) to -/// execute the kernel over. These are the indices into the geometry -/// dofmap. -/// @param[in] dofmap0 Test function (row) degree-of-freedom data holding -/// the (0) dofmap, (1) dofmap block size and (2) dofmap cell indices. -/// @param[in] P0 Function that applies transformation P_0 A in-place to -/// transform test degrees-of-freedom. +/// @param[in] cells Cell indices to execute the kernel over. These are +/// the indices into the geometry dofmap `x_dofmap`. +/// @param[in] dofmap0 Test function (row) degree-of-freedom data +/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell +/// indices. +/// @param[in] P0 Function that applies transformation `P_0 A` in-place +/// to the computed tensor `A` to transform its test degrees-of-freedom. /// @param[in] dofmap1 Trial function (column) degree-of-freedom data /// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell /// indices. -/// @param[in] P1T Function that applies transformation A P_1^T in-place -/// to transform trial degrees-of-freedom. -/// @param[in] constants Constants data. -/// @param[in] coeffs The coefficient data array with shape -/// `(cells.size(), cstride)` flattened into row-major format. -/// @param[in] cell_info0 The cell permutation information for the test +/// @param[in] P1T Function that applies transformation `A P_1^T` +/// in-place to to the computed tensor `A` to transform trial +/// degrees-of-freedom. +/// @param[in] constants Constant data in the kernel. +/// @param[in] coeffs Coefficient data in the kernel. It has shape +/// `(cells.size(), num_cell_coeffs)`. `coeffs(i, j)` is the `j`th +/// coefficient for cell `i`. +/// @param[in] cell_info0 Cell permutation information for the test /// function mesh. -/// @param[in] cell_info1 The cell permutation information for the trial +/// @param[in] cell_info1 Cell permutation information for the trial /// function mesh. -/// @param[in] bc_values1 The value for entries with an applied boundary +/// @param[in] bc_values1 Value for entries with an applied boundary /// condition. /// @param[in] bc_markers1 Marker to identify which DOFs have boundary /// conditions applied. @@ -224,39 +227,43 @@ void _lift_bc_cells( } /// @brief Apply lifting for exterior facet integrals. -/// @tparam T The scalar type -/// @tparam _bs FIXME This is unused -/// @param[in,out] b The vector to modify -/// @param[in] x_dofmap Dofmap for the mesh geometry. +/// +/// @tparam T Scalar type. +/// @param[in,out] b Vector to modify. +/// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. /// @param[in] x Mesh geometry (coordinates). -/// @param[in] kernel Kernel function to execute over each cell. -/// @param[in] facets Facet indices (in the integration domain mesh) to -/// execute the kernel over. -/// @param[in] dofmap0 Test function (row) degree-of-freedom data holding -/// the (0) dofmap, (1) dofmap block size and (2) dofmap cell indices. -/// @param[in] P0 Function that applies transformation P_0 A in-place to -/// transform test degrees-of-freedom. -/// @param[in] dofmap1 Trial function (column) degree-of-freedom data -/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell -/// indices. -/// @param[in] P1T Function that applies transformation A P_1^T in-place -/// to transform trial degrees-of-freedom. -/// @param[in] constants The constant data. -/// @param[in] coeffs The coefficient data array of shape (cells.size(), -/// cstride), flattened into row-major format. -/// @param[in] cell_info0 The cell permutation information for the test +/// @param[in] kernel Kernel function to execute over each facet. +/// @param[in] facets Facets to execute the kernel over, where for the +/// `i`th facet `facets(i, 0)` is the attached cell and `facets(i, 1)` +/// is the local index of the facet relative to the cell. +/// @param[in] dofmap0 Test function (row) degree-of-freedom data +/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap +/// indices. See `facets` documentation for the dofmap indices layout. +/// @param[in] P0 Function that applies the transformation `P_0 A` +/// in-place to `A` to transform the test degrees-of-freedom. +/// @param[in] dofmap1 Trial function (column) degree-of-freedom data. +/// See `dofmap0` for a description. +/// @param[in] P1T Function that applies the transformation `A P_1^T` +/// in-place to `A` to transform the trial degrees-of-freedom. +/// @param[in] constants Constant coefficient data in the kernel. +/// @param[in] coeffs Coefficient data in the kernel. It has shape +/// `(cells.size(), num_cell_coeffs)`. `coeffs(i, j)` is the `j`th +/// coefficient for cell `i`. +/// @param[in] cell_info0 Cell permutation information for the test /// function mesh. -/// @param[in] cell_info1 The cell permutation information for the trial +/// @param[in] cell_info1 Cell permutation information for the trial /// function mesh. -/// @param[in] bc_values1 The value for entries with an applied boundary +/// @param[in] bc_values1 Values for entries with an applied boundary /// condition. /// @param[in] bc_markers1 Marker to identify which DOFs have boundary /// conditions applied. /// @param[in] x0 The vector used in the lifting. -/// @param[in] alpha The scaling to apply. -/// @param[in] perms Facet permutation integer. Empty if facet -/// permutations are not required. -template +/// @param[in] alpha Scaling to apply. +/// @param[in] perms Facet permutation data, where `(cell_idx, +/// local_facet_idx)` is the permutation value for the facet attached to +/// the cell `cell_idx` with local index `local_facet_idx` relative to +/// the cell. Empty if facet permutations are not required. +template void _lift_bc_exterior_facets( std::span b, mdspan2_t x_dofmap, std::span> x, FEkernel auto kernel, @@ -371,41 +378,46 @@ void _lift_bc_exterior_facets( } /// @brief Apply lifting for interior facet integrals. +/// /// @tparam T Scalar type. -/// @tparam _bs FIXME This is unused. -/// @param[in,out] b The vector to modify -/// @param[in] x_dofmap Dofmap for the mesh geometry. +/// @param[in,out] b Vector to modify +/// @param[in] x_dofmap Degree-of-freedom map for the mesh geometry. /// @param[in] x Mesh geometry (coordinates). -/// @param[in] kernel Kernel function to execute over each cell. -/// @param[in] facets Facet indices (in the integration domain mesh) to -/// execute the kernel over. +/// @param[in] kernel Kernel function to execute over each facet. +/// @param[in] facets Facets to execute the kernel over, where for the +/// `i`th facet `facets(i, 0, 0)` is the first attached cell and +/// `facets(i, 0, 1)` is the local index of the facet relative to the +/// first cell, and `facets(i, 1, 0)` is the second first attached cell +/// and `facets(i, 1, 1)` is the local index of the facet relative to +/// the second cell. /// @param[in] dofmap0 Test function (row) degree-of-freedom data /// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell -/// indices. -/// @param[in] P0 Function that applies transformation P_0 A in-place to -/// transform test degrees-of-freedom. -/// @param[in] dofmap1 Trial function (column) degree-of-freedom data -/// holding the (0) dofmap, (1) dofmap block size and (2) dofmap cell -/// indices. -/// @param[in] P1T Function that applies transformation A P_1^T in-place -/// to transform trial degrees-of-freedom. -/// @param[in] constants The constant data. -/// @param[in] coeffs The coefficient data array of shape (cells.size(), -/// cstride), flattened into row-major format. -/// @param[in] cstride The coefficient stride. -/// @param[in] cell_info0 The cell permutation information for the test +/// indices. See `facets` documentation for the dofmap indices layout. +/// @param[in] P0 Function that applies the transformation `P_0 A` +/// in-place to `A` to transform the test degrees-of-freedom. +/// @param[in] dofmap1 Trial function (column) degree-of-freedom data. +/// See `dofmap0` for a description. +/// @param[in] P1T Function that applies the transformation `A P_1^T` +/// in-place to `A` to transform the trial degrees-of-freedom. +/// @param[in] constants Constant coefficient data in the kernel. +/// @param[in] coeffs Coefficient data in the kernel. It has shape +/// `(cells.size(), num_cell_coeffs)`. `coeffs(i, j)` is the `j`th +/// coefficient for cell `i`. +/// @param[in] cell_info0 Cell permutation information for the test /// function mesh. -/// @param[in] cell_info1 The cell permutation information for the trial +/// @param[in] cell_info1 Cell permutation information for the trial /// function mesh. -/// @param[in] perms Facet permutation integer. Empty if facet -/// permutations are not required. -/// @param[in] bc_values1 The value for entries with an applied boundary +/// @param[in] bc_values1 Values for entries with an applied boundary /// condition. /// @param[in] bc_markers1 Marker to identify which DOFs have boundary /// conditions applied. -/// @param[in] x0 The vector used in the lifting. -/// @param[in] alpha The scaling to apply -template +/// @param[in] x0 Vector used in the lifting. +/// @param[in] alpha Scaling to apply +/// @param[in] perms Facet permutation data, where `(cell_idx, +/// local_facet_idx)` is the permutation value for the facet attached to +/// the cell `cell_idx` with local index `local_facet_idx` relative to +/// the cell. Empty if facet permutations are not required. +template void _lift_bc_interior_facets( std::span b, mdspan2_t x_dofmap, std::span> x, FEkernel auto kernel, @@ -426,10 +438,9 @@ void _lift_bc_interior_facets( md::dynamic_extent>> coeffs, std::span cell_info0, - std::span cell_info1, - md::mdspan> perms, - std::span bc_values1, std::span bc_markers1, - std::span x0, T alpha) + std::span cell_info1, std::span bc_values1, + std::span bc_markers1, std::span x0, T alpha, + md::mdspan> perms) { if (facets.empty()) return; @@ -610,26 +621,28 @@ void _lift_bc_interior_facets( } } -/// @brief Execute kernel over cells and accumulate result in vector -/// @tparam T The scalar type -/// @tparam _bs The block size of the form test function dof map. If -/// less than zero the block size is determined at runtime. If `_bs` is +/// @brief Execute kernel over cells and accumulate result in vector. +/// +/// @tparam T Scalar type +/// @tparam _bs Block size of the form test function dof map. If less +/// than zero the block size is determined at runtime. If `_bs` is /// positive the block size is used as a compile-time constant, which /// has performance benefits. -/// @param P0 Function that applies transformation P0.b in-place to -/// transform test degrees-of-freedom. -/// @param b The vector to accumulate into -/// @param x_dofmap Dofmap for the mesh geometry. -/// @param x Mesh geometry (coordinates). -/// @param cells Cell indices (in the integration domain mesh) to execute -/// the kernel over. These are the indices into the geometry dofmap. -/// @param dofmap Test function (row) degree-of-freedom data holding +/// @param[in] P0 Function that applies transformation `P0.b` in-place +/// to `b` to transform test degrees-of-freedom. +/// @param[in,out] b Aray to accumulate into. +/// @param[in] x_dofmap Dofmap for the mesh geometry. +/// @param[in] x Mesh geometry (coordinates). +/// @param[in] cells Cell indices to execute the kernel over. These are +/// the indices into the geometry dofmap. +/// @param[in] dofmap Test function (row) degree-of-freedom data holding /// the (0) dofmap, (1) dofmap block size and (2) dofmap cell indices. -/// @param kernel Kernel function to execute over each cell. -/// @param constants The constant data -/// @param coeffs The coefficient data array of shape (cells.size(), -/// cstride). -/// @param cell_info0 The cell permutation information for the test +/// @param[in] kernel Kernel function to execute over each cell. +/// @param[in] constants Constant coefficient data in the kernel. +/// @param[in] coeffs Coefficient data in the kernel. It has shape +/// `(cells.size(), num_cell_coeffs)`. `coeffs(i, j)` is the `j`th +/// coefficient for cell `i`. +/// @param[in] cell_info0 Cell permutation information for the test /// function mesh. template void assemble_cells( @@ -691,12 +704,13 @@ void assemble_cells( } /// @brief Execute kernel over cells and accumulate result in vector. -/// @tparam T The scalar type +/// +/// @tparam T Scalar type. /// @tparam _bs The block size of the form test function dof map. If /// less than zero the block size is determined at runtime. If `_bs` is /// positive the block size is used as a compile-time constant, which /// has performance benefits. -/// @param P0 Function that applies transformation P0.b in-place to +/// @param P0 Function that applies transformation `P0.b` in-place to /// transform test degrees-of-freedom. /// @param[in,out] b The vector to accumulate into. /// @param[in] x_dofmap Dofmap for the mesh geometry. @@ -1064,7 +1078,7 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, b, x_dofmap, x, kernel, facets, {dofmap0, bs0, facets0}, P0, {dofmap1, bs1, facets1}, P1T, constants, mdspanx2x_t(coeffs.data(), facets.extent(0), 2, cstride), cell_info0, - cell_info1, perms, bc_values1, bc_markers1, x0, alpha); + cell_info1, bc_values1, bc_markers1, x0, alpha, perms); } } @@ -1078,16 +1092,16 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, /// built), but the trial space may differ. If x0 is not supplied, then /// it is treated as zero. /// -/// @param[in,out] b The vector to be modified -/// @param[in] a The bilinear forms, where a[j] is the form that -/// generates A_j -/// @param[in] constants Constants that appear in `a` -/// @param[in] coeffs Coefficients that appear in `a` +/// @param[in,out] b Array to be modified. +/// @param[in] a Bilinear forms, where `a[j]` is the form that generates +/// `A_j`. +/// @param[in] constants Constants that appear in `a`. +/// @param[in] coeffs Coefficients that appear in `a`. /// @param[in] bcs1 List of boundary conditions for each block, i.e. -/// bcs1[2] are the boundary conditions applied to the columns of a[2] / -/// x0[2] block -/// @param[in] x0 The vectors used in the lifting -/// @param[in] alpha Scaling to apply +/// `bcs1[2]` are the boundary conditions applied to the columns of +/// `a[2]`/ `x0[2]` block. +/// @param[in] x0 Arrays used in the lifting. +/// @param[in] alpha Scaling to apply. template void apply_lifting( std::span b, @@ -1154,7 +1168,7 @@ void apply_lifting( } /// @brief Assemble linear form into a vector. -/// @param[in,out] b The vector to be assembled. It will not be zeroed +/// @param[in,out] b Array to be accumulated into. It will not be zeroed /// before assembly. /// @param[in] L Linear forms to assemble into b. /// @param[in] x Mesh coordinates. @@ -1334,9 +1348,9 @@ void assemble_vector( } /// @brief Assemble linear form into a vector. -/// @param[in,out] b The vector to be assembled. It will not be zeroed +/// @param[in,out] b Array to accumulate into. It will not be zeroed /// before assembly. -/// @param[in] L The linear forms to assemble into b. +/// @param[in] L Linear forms to assemble into b. /// @param[in] constants Packed constants that appear in `L`. /// @param[in] coefficients Packed coefficients that appear in `L.` template