Exawind · ddement · Feb 26, 2025 · Feb 7, 2025 · Feb 7, 2025 · Feb 10, 2025
diff --git a/src/elements/beams/create_beams.hpp b/src/elements/beams/create_beams.hpp
@@ -51,24 +51,38 @@ inline Beams CreateBeams(const BeamsInput& beams_input, const std::vector<Node>&
         }
 
         // Populate views for this element
-        PopulateElementViews(
-            beams_input.elements[i],  // Element inputs
-            nodes,
-            Kokkos::subview(host_node_x0, i, Kokkos::make_pair(size_t{0U}, num_nodes), Kokkos::ALL),
-            Kokkos::subview(host_qp_weight, i, Kokkos::make_pair(size_t{0U}, num_qps)),
+        PopulateNodeX0(
+            beams_input.elements[i], nodes,
+            Kokkos::subview(host_node_x0, i, Kokkos::make_pair(0UL, num_nodes), Kokkos::ALL)
+        );
+        PopulateQPWeight(
+            beams_input.elements[i],
+            Kokkos::subview(host_qp_weight, i, Kokkos::make_pair(0UL, num_qps))
+        );
+        PopulateShapeFunctionValues(
+            beams_input.elements[i], nodes,
             Kokkos::subview(
-                host_qp_Mstar, i, Kokkos::make_pair(size_t{0U}, num_qps), Kokkos::ALL, Kokkos::ALL
-            ),
+                host_shape_interp, i, Kokkos::make_pair(0UL, num_nodes),
+                Kokkos::make_pair(0UL, num_qps)
+            )
+        );
+        PopulateShapeFunctionDerivatives(
+            beams_input.elements[i], nodes,
             Kokkos::subview(
-                host_qp_Cstar, i, Kokkos::make_pair(size_t{0U}, num_qps), Kokkos::ALL, Kokkos::ALL
-            ),
+                host_shape_deriv, i, Kokkos::make_pair(0UL, num_nodes),
+                Kokkos::make_pair(0UL, num_qps)
+            )
+        );
+        PopulateQPMstar(
+            beams_input.elements[i],
             Kokkos::subview(
-                host_shape_interp, i, Kokkos::make_pair(size_t{0U}, num_nodes),
-                Kokkos::make_pair(size_t{0U}, num_qps)
-            ),
+                host_qp_Mstar, i, Kokkos::make_pair(0UL, num_qps), Kokkos::ALL, Kokkos::ALL
+            )
+        );
+        PopulateQPCstar(
+            beams_input.elements[i],
             Kokkos::subview(
-                host_shape_deriv, i, Kokkos::make_pair(size_t{0U}, num_nodes),
-                Kokkos::make_pair(size_t{0U}, num_qps)
+                host_qp_Cstar, i, Kokkos::make_pair(0UL, num_qps), Kokkos::ALL, Kokkos::ALL
             )
         );
     }

diff --git a/src/elements/beams/populate_element_views.hpp b/src/elements/beams/populate_element_views.hpp
@@ -7,48 +7,35 @@
 
 namespace openturbine {
 
-/**
- * @brief Populates Views with beam element data for numerical integration
- *
- * @param elem The beam element containing node, quadrature, and section data
- * @param node_x0 View for initial nodal positions and orientations (size = num_nodes x num_dofs)
- * @param qp_weight View for quadrature point weights (size = num_qps)
- * @param qp_Mstar View for interpolated mass matrices at quadrature points (size = num_qps x 6 x 6)
- * @param qp_Cstar View for interpolated stiffness matrices at quadrature points
- *                (size = num_qps x 6 x 6)
- * @param shape_interp View for shape function interpolation weights (size = num_nodes x num_qps)
- * @param shape_deriv View for shape function derivative weights (size = num_nodes x num_qps)
- *
- * This function transforms element data into a format suitable for numerical integration:
- * - Maps node and section positions from [0,1] to [-1,1]
- * - Computes shape function weights for interpolation and derivatives using Lagrange polynomials
- * - Interpolates section properties (mass and stiffness) at quadrature points using
- *   linear interpolation
- */
-template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
-inline void PopulateElementViews(
-    const BeamElement& elem, const std::vector<Node>& nodes, T1 node_x0, T2 qp_weight, T3 qp_Mstar,
-    T4 qp_Cstar, T5 shape_interp, T6 shape_deriv
+inline void PopulateNodeX0(
+    const BeamElement& elem, const std::vector<Node>& nodes,
+    const Kokkos::View<double* [7], Kokkos::LayoutStride, Kokkos::HostSpace>& node_x0
 ) {
-    // Map node positions from [0,1] to [-1,1]
-    std::vector<double> node_xi(elem.node_ids.size());
-    for (size_t i = 0; i < elem.node_ids.size(); ++i) {
-        node_xi[i] = 2 * nodes[elem.node_ids[i]].s - 1;
-    }
-
-    // Populate node initial position and orientation
     for (size_t j = 0; j < elem.node_ids.size(); ++j) {
         for (size_t k = 0U; k < kLieGroupComponents; ++k) {
             node_x0(j, k) = nodes[elem.node_ids[j]].x[k];
         }
     }
+}
 
-    // Populate quadrature weights
+inline void PopulateQPWeight(
+    const BeamElement& elem,
+    const Kokkos::View<double*, Kokkos::LayoutStride, Kokkos::HostSpace>& qp_weight
+) {
     for (size_t j = 0; j < elem.quadrature.size(); ++j) {
         qp_weight(j) = elem.quadrature[j][1];
     }
+}
+
+inline void PopulateShapeFunctionValues(
+    const BeamElement& elem, const std::vector<Node>& nodes,
+    const Kokkos::View<double**, Kokkos::LayoutStride, Kokkos::HostSpace>& shape_interp
+) {
+    std::vector<double> node_xi(elem.node_ids.size());
+    for (size_t i = 0; i < elem.node_ids.size(); ++i) {
+        node_xi[i] = 2 * nodes[elem.node_ids[i]].s - 1;
+    }
 
-    // Populate shape interpolation and derivative weights (Lagrange polynomial)
     std::vector<double> weights;
     for (size_t j = 0; j < elem.quadrature.size(); ++j) {
         auto qp_xi = elem.quadrature[j][0];
@@ -57,35 +44,82 @@ inline void PopulateElementViews(
         for (size_t k = 0; k < node_xi.size(); ++k) {
             shape_interp(k, j) = weights[k];
         }
+    }
+}
+
+inline void PopulateShapeFunctionDerivatives(
+    const BeamElement& elem, const std::vector<Node>& nodes,
+    const Kokkos::View<double**, Kokkos::LayoutStride, Kokkos::HostSpace>& shape_deriv
+) {
+    std::vector<double> node_xi(elem.node_ids.size());
+    for (size_t i = 0; i < elem.node_ids.size(); ++i) {
+        node_xi[i] = 2 * nodes[elem.node_ids[i]].s - 1;
+    }
+
+    std::vector<double> weights;
+    for (size_t j = 0; j < elem.quadrature.size(); ++j) {
+        auto qp_xi = elem.quadrature[j][0];
 
         LagrangePolynomialDerivWeights(qp_xi, node_xi, weights);
         for (size_t k = 0; k < node_xi.size(); ++k) {
             shape_deriv(k, j) = weights[k];
         }
     }
+}
 
-    // Map section positions from [0,1] to [-1,1]
+inline void PopulateQPMstar(
+    const BeamElement& elem,
+    const Kokkos::View<double* [6][6], Kokkos::LayoutStride, Kokkos::HostSpace>& qp_Mstar
+) {
+    std::vector<double> weights(elem.sections.size());
     std::vector<double> section_xi(elem.sections.size());
     for (size_t i = 0; i < elem.sections.size(); ++i) {
         section_xi[i] = 2 * elem.sections[i].position - 1;
     }
 
-    // Populate section mass and stiffness matrices at quadrature points by
-    // linearly interpolating between section values
-    Kokkos::deep_copy(qp_Mstar, 0.);
-    Kokkos::deep_copy(qp_Cstar, 0.);
     for (size_t i = 0; i < elem.quadrature.size(); ++i) {
         auto qp_xi = elem.quadrature[i][0];
         LinearInterpWeights(qp_xi, section_xi, weights);
+        for (size_t m = 0; m < 6; ++m) {
+            for (size_t n = 0; n < 6; ++n) {
+                qp_Mstar(i, m, n) = 0.;
+            }
+        }
         for (size_t j = 0; j < section_xi.size(); ++j) {
             for (size_t m = 0; m < 6; ++m) {
                 for (size_t n = 0; n < 6; ++n) {
                     qp_Mstar(i, m, n) += elem.sections[j].M_star[m][n] * weights[j];
-                    qp_Cstar(i, m, n) += elem.sections[j].C_star[m][n] * weights[j];
                 }
             }
         }
     }
 }
 
+inline void PopulateQPCstar(
+    const BeamElement& elem,
+    const Kokkos::View<double* [6][6], Kokkos::LayoutStride, Kokkos::HostSpace>& qp_Cstar
+) {
+    std::vector<double> weights(elem.sections.size());
+    std::vector<double> section_xi(elem.sections.size());
+    for (size_t i = 0; i < elem.sections.size(); ++i) {
+        section_xi[i] = 2 * elem.sections[i].position - 1;
+    }
+
+    for (size_t i = 0; i < elem.quadrature.size(); ++i) {
+        auto qp_xi = elem.quadrature[i][0];
+        LinearInterpWeights(qp_xi, section_xi, weights);
+        for (size_t m = 0; m < 6; ++m) {
+            for (size_t n = 0; n < 6; ++n) {
+                qp_Cstar(i, m, n) = 0.;
+            }
+        }
+        for (size_t j = 0; j < section_xi.size(); ++j) {
+            for (size_t m = 0; m < 6; ++m) {
+                for (size_t n = 0; n < 6; ++n) {
+                    qp_Cstar(i, m, n) += elem.sections[j].C_star[m][n] * weights[j];
+                }
+            }
+        }
+    }
+}
 }  // namespace openturbine
diff --git a/src/solver/contribute_beams_to_sparse_matrix.hpp b/src/solver/contribute_beams_to_sparse_matrix.hpp
@@ -24,14 +24,17 @@ struct ContributeBeamsToSparseMatrix {
         constexpr auto is_sorted = true;
         constexpr auto force_atomic =
             !std::is_same_v<Kokkos::TeamPolicy<>::member_type::execution_space, Kokkos::Serial>;
-        Kokkos::parallel_for(Kokkos::TeamThreadRange(member, num_nodes), [&](size_t node_1) {
-            const auto num_dofs = count_active_dofs(element_freedom_signature(i, node_1));
-            auto row_data_data = Kokkos::Array<typename RowDataType::value_type, 6>{};
-            auto col_idx_data = Kokkos::Array<typename ColIdxType::value_type, 6>{};
-            auto row_data = RowDataType(row_data_data.data(), num_dofs);
-            auto col_idx = ColIdxType(col_idx_data.data(), num_dofs);
+        Kokkos::parallel_for(
+            Kokkos::TeamThreadRange(member, num_nodes * num_nodes),
+            [&](size_t node_12) {
+                const auto node_1 = node_12 % num_nodes;
+                const auto node_2 = node_12 / num_nodes;
+                const auto num_dofs = count_active_dofs(element_freedom_signature(i, node_1));
+                auto row_data_data = Kokkos::Array<typename RowDataType::value_type, 6>{};
+                auto col_idx_data = Kokkos::Array<typename ColIdxType::value_type, 6>{};
+                auto row_data = RowDataType(row_data_data.data(), num_dofs);
+                auto col_idx = ColIdxType(col_idx_data.data(), num_dofs);
 
-            for (auto node_2 = 0U; node_2 < num_nodes; ++node_2) {
                 for (auto component_2 = 0U; component_2 < num_dofs; ++component_2) {
                     col_idx(component_2) =
                         static_cast<int>(element_freedom_table(i, node_2, component_2));
@@ -47,7 +50,7 @@ struct ContributeBeamsToSparseMatrix {
                     );
                 }
             }
-        });
+        );
     }
 };
 

diff --git a/src/solver/create_sparse_dense_solver.hpp b/src/solver/create_sparse_dense_solver.hpp
@@ -12,12 +12,7 @@ CreateSparseDenseSolver(
     Teuchos::RCP<GlobalCrsMatrixType>& A, Teuchos::RCP<GlobalMultiVectorType>& x_mv,
     Teuchos::RCP<GlobalMultiVectorType>& b
 ) {
-    using ExecutionSpace = typename GlobalCrsMatrixType::execution_space;
-
-    const auto solver_name = (std::is_same_v<ExecutionSpace, Kokkos::DefaultHostExecutionSpace>)
-                                 ? std::string{"klu2"}
-                                 : std::string{"basker"};
-
+    const auto solver_name = std::string{"klu2"};
     auto amesos_solver =
         Amesos2::create<GlobalCrsMatrixType, GlobalMultiVectorType>(solver_name, A, x_mv, b);
     amesos_solver->symbolicFactorization();

diff --git a/src/step/assemble_constraints_matrix.hpp b/src/step/assemble_constraints_matrix.hpp
@@ -44,7 +44,10 @@ inline void AssembleConstraintsMatrix(Solver& solver, Constraints& constraints)
             "CopySparseValuesToTranspose", constraint_transpose_policy,
             CopySparseValuesToTranspose<Solver::CrsMatrixType>{solver.B, tmp_row_map, solver.B_t}
         );
-        KokkosSparse::sort_crs_matrix(solver.B_t);
+        {
+            auto sort_region = Kokkos::Profiling::ScopedRegion("Sort");
+            KokkosSparse::sort_crs_matrix(solver.B_t);
+        }
     }
 
     {