Skip to content

Commit

Permalink
Merge pull request #350 from Exawind/small_problem_gpu_optimization
Browse files Browse the repository at this point in the history
Made a few optimizations for running OpenTurbine on GPU.
  • Loading branch information
ddement authored Feb 26, 2025
2 parents edd5491 + 031d7e2 commit a68ae74
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 67 deletions.
42 changes: 28 additions & 14 deletions src/elements/beams/create_beams.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,24 +51,38 @@ inline Beams CreateBeams(const BeamsInput& beams_input, const std::vector<Node>&
}

// Populate views for this element
PopulateElementViews(
beams_input.elements[i], // Element inputs
nodes,
Kokkos::subview(host_node_x0, i, Kokkos::make_pair(size_t{0U}, num_nodes), Kokkos::ALL),
Kokkos::subview(host_qp_weight, i, Kokkos::make_pair(size_t{0U}, num_qps)),
PopulateNodeX0(
beams_input.elements[i], nodes,
Kokkos::subview(host_node_x0, i, Kokkos::make_pair(0UL, num_nodes), Kokkos::ALL)
);
PopulateQPWeight(
beams_input.elements[i],
Kokkos::subview(host_qp_weight, i, Kokkos::make_pair(0UL, num_qps))
);
PopulateShapeFunctionValues(
beams_input.elements[i], nodes,
Kokkos::subview(
host_qp_Mstar, i, Kokkos::make_pair(size_t{0U}, num_qps), Kokkos::ALL, Kokkos::ALL
),
host_shape_interp, i, Kokkos::make_pair(0UL, num_nodes),
Kokkos::make_pair(0UL, num_qps)
)
);
PopulateShapeFunctionDerivatives(
beams_input.elements[i], nodes,
Kokkos::subview(
host_qp_Cstar, i, Kokkos::make_pair(size_t{0U}, num_qps), Kokkos::ALL, Kokkos::ALL
),
host_shape_deriv, i, Kokkos::make_pair(0UL, num_nodes),
Kokkos::make_pair(0UL, num_qps)
)
);
PopulateQPMstar(
beams_input.elements[i],
Kokkos::subview(
host_shape_interp, i, Kokkos::make_pair(size_t{0U}, num_nodes),
Kokkos::make_pair(size_t{0U}, num_qps)
),
host_qp_Mstar, i, Kokkos::make_pair(0UL, num_qps), Kokkos::ALL, Kokkos::ALL
)
);
PopulateQPCstar(
beams_input.elements[i],
Kokkos::subview(
host_shape_deriv, i, Kokkos::make_pair(size_t{0U}, num_nodes),
Kokkos::make_pair(size_t{0U}, num_qps)
host_qp_Cstar, i, Kokkos::make_pair(0UL, num_qps), Kokkos::ALL, Kokkos::ALL
)
);
}
Expand Down
124 changes: 86 additions & 38 deletions src/elements/beams/populate_element_views.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,48 +7,46 @@

namespace openturbine {

/**
* @brief Populates Views with beam element data for numerical integration
*
* @param elem The beam element containing node, quadrature, and section data
* @param node_x0 View for initial nodal positions and orientations (size = num_nodes x num_dofs)
* @param qp_weight View for quadrature point weights (size = num_qps)
* @param qp_Mstar View for interpolated mass matrices at quadrature points (size = num_qps x 6 x 6)
* @param qp_Cstar View for interpolated stiffness matrices at quadrature points
* (size = num_qps x 6 x 6)
* @param shape_interp View for shape function interpolation weights (size = num_nodes x num_qps)
* @param shape_deriv View for shape function derivative weights (size = num_nodes x num_qps)
*
* This function transforms element data into a format suitable for numerical integration:
* - Maps node and section positions from [0,1] to [-1,1]
* - Computes shape function weights for interpolation and derivatives using Lagrange polynomials
* - Interpolates section properties (mass and stiffness) at quadrature points using
* linear interpolation
*/
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
inline void PopulateElementViews(
const BeamElement& elem, const std::vector<Node>& nodes, T1 node_x0, T2 qp_weight, T3 qp_Mstar,
T4 qp_Cstar, T5 shape_interp, T6 shape_deriv
/// @brief Populate the node initial position and orientation
inline void PopulateNodeX0(
const BeamElement& elem, const std::vector<Node>& nodes,
const Kokkos::View<double* [7], Kokkos::LayoutStride, Kokkos::HostSpace>& node_x0
) {
// Map node positions from [0,1] to [-1,1]
std::vector<double> node_xi(elem.node_ids.size());
for (size_t i = 0; i < elem.node_ids.size(); ++i) {
node_xi[i] = 2 * nodes[elem.node_ids[i]].s - 1;
}

// Populate node initial position and orientation
for (size_t j = 0; j < elem.node_ids.size(); ++j) {
for (size_t k = 0U; k < kLieGroupComponents; ++k) {
for (size_t k = 0U; k < 7U; ++k) {
node_x0(j, k) = nodes[elem.node_ids[j]].x[k];
}
}
}

// Populate quadrature weights
/// @brief Populate the integration weights at each quadrature point
inline void PopulateQPWeight(
const BeamElement& elem,
const Kokkos::View<double*, Kokkos::LayoutStride, Kokkos::HostSpace>& qp_weight
) {
for (size_t j = 0; j < elem.quadrature.size(); ++j) {
qp_weight(j) = elem.quadrature[j][1];
}
}

/// @brief Map node positions from [0,1] to [-1,1]
inline std::vector<double> MapNodePositions(
const BeamElement& elem, const std::vector<Node>& nodes
) {
std::vector<double> node_xi(elem.node_ids.size());
for (size_t i = 0; i < elem.node_ids.size(); ++i) {
node_xi[i] = 2 * nodes[elem.node_ids[i]].s - 1;
}
return node_xi;
}

/// @brief Populate shape function values at each quadrature point
inline void PopulateShapeFunctionValues(
const BeamElement& elem, const std::vector<Node>& nodes,
const Kokkos::View<double**, Kokkos::LayoutStride, Kokkos::HostSpace>& shape_interp
) {
const auto node_xi = MapNodePositions(elem, nodes);

// Populate shape interpolation and derivative weights (Lagrange polynomial)
std::vector<double> weights;
for (size_t j = 0; j < elem.quadrature.size(); ++j) {
auto qp_xi = elem.quadrature[j][0];
Expand All @@ -57,35 +55,85 @@ inline void PopulateElementViews(
for (size_t k = 0; k < node_xi.size(); ++k) {
shape_interp(k, j) = weights[k];
}
}
}

/// @brief Populate shape function derivatives at each quadrature point
inline void PopulateShapeFunctionDerivatives(
const BeamElement& elem, const std::vector<Node>& nodes,
const Kokkos::View<double**, Kokkos::LayoutStride, Kokkos::HostSpace>& shape_deriv
) {
const auto node_xi = MapNodePositions(elem, nodes);

std::vector<double> weights;
for (size_t j = 0; j < elem.quadrature.size(); ++j) {
auto qp_xi = elem.quadrature[j][0];

LagrangePolynomialDerivWeights(qp_xi, node_xi, weights);
for (size_t k = 0; k < node_xi.size(); ++k) {
shape_deriv(k, j) = weights[k];
}
}
}

// Map section positions from [0,1] to [-1,1]
/// @brief Map section positions from [0,1] to [-1,1]
inline std::vector<double> MapSectionPositions(const BeamElement& elem) {
std::vector<double> section_xi(elem.sections.size());
for (size_t i = 0; i < elem.sections.size(); ++i) {
section_xi[i] = 2 * elem.sections[i].position - 1;
}
return section_xi;
}

/// @brief Populate mass matrix values at each quadrature point
inline void PopulateQPMstar(
const BeamElement& elem,
const Kokkos::View<double* [6][6], Kokkos::LayoutStride, Kokkos::HostSpace>& qp_Mstar
) {
const auto section_xi = MapSectionPositions(elem);
std::vector<double> weights(elem.sections.size());

// Populate section mass and stiffness matrices at quadrature points by
// linearly interpolating between section values
Kokkos::deep_copy(qp_Mstar, 0.);
Kokkos::deep_copy(qp_Cstar, 0.);
for (size_t i = 0; i < elem.quadrature.size(); ++i) {
auto qp_xi = elem.quadrature[i][0];
LinearInterpWeights(qp_xi, section_xi, weights);
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Mstar(i, m, n) = 0.;
}
}
for (size_t j = 0; j < section_xi.size(); ++j) {
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Mstar(i, m, n) += elem.sections[j].M_star[m][n] * weights[j];
qp_Cstar(i, m, n) += elem.sections[j].C_star[m][n] * weights[j];
}
}
}
}
}

/// @brief Populate stiffness matrix values at each quadrature point
inline void PopulateQPCstar(
const BeamElement& elem,
const Kokkos::View<double* [6][6], Kokkos::LayoutStride, Kokkos::HostSpace>& qp_Cstar
) {
const auto section_xi = MapSectionPositions(elem);
std::vector<double> weights(elem.sections.size());

for (size_t i = 0; i < elem.quadrature.size(); ++i) {
auto qp_xi = elem.quadrature[i][0];
LinearInterpWeights(qp_xi, section_xi, weights);
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Cstar(i, m, n) = 0.;
}
}
for (size_t j = 0; j < section_xi.size(); ++j) {
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Cstar(i, m, n) += elem.sections[j].C_star[m][n] * weights[j];
}
}
}
}
}
} // namespace openturbine
19 changes: 11 additions & 8 deletions src/solver/contribute_beams_to_sparse_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,17 @@ struct ContributeBeamsToSparseMatrix {
constexpr auto is_sorted = true;
constexpr auto force_atomic =
!std::is_same_v<Kokkos::TeamPolicy<>::member_type::execution_space, Kokkos::Serial>;
Kokkos::parallel_for(Kokkos::TeamThreadRange(member, num_nodes), [&](size_t node_1) {
const auto num_dofs = count_active_dofs(element_freedom_signature(i, node_1));
auto row_data_data = Kokkos::Array<typename RowDataType::value_type, 6>{};
auto col_idx_data = Kokkos::Array<typename ColIdxType::value_type, 6>{};
auto row_data = RowDataType(row_data_data.data(), num_dofs);
auto col_idx = ColIdxType(col_idx_data.data(), num_dofs);
Kokkos::parallel_for(
Kokkos::TeamThreadRange(member, num_nodes * num_nodes),
[&](size_t node_12) {
const auto node_1 = node_12 % num_nodes;
const auto node_2 = node_12 / num_nodes;
const auto num_dofs = count_active_dofs(element_freedom_signature(i, node_1));
auto row_data_data = Kokkos::Array<typename RowDataType::value_type, 6>{};
auto col_idx_data = Kokkos::Array<typename ColIdxType::value_type, 6>{};
auto row_data = RowDataType(row_data_data.data(), num_dofs);
auto col_idx = ColIdxType(col_idx_data.data(), num_dofs);

for (auto node_2 = 0U; node_2 < num_nodes; ++node_2) {
for (auto component_2 = 0U; component_2 < num_dofs; ++component_2) {
col_idx(component_2) =
static_cast<int>(element_freedom_table(i, node_2, component_2));
Expand All @@ -47,7 +50,7 @@ struct ContributeBeamsToSparseMatrix {
);
}
}
});
);
}
};

Expand Down
7 changes: 1 addition & 6 deletions src/solver/create_sparse_dense_solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,7 @@ CreateSparseDenseSolver(
Teuchos::RCP<GlobalCrsMatrixType>& A, Teuchos::RCP<GlobalMultiVectorType>& x_mv,
Teuchos::RCP<GlobalMultiVectorType>& b
) {
using ExecutionSpace = typename GlobalCrsMatrixType::execution_space;

const auto solver_name = (std::is_same_v<ExecutionSpace, Kokkos::DefaultHostExecutionSpace>)
? std::string{"klu2"}
: std::string{"basker"};

const auto solver_name = std::string{"klu2"};
auto amesos_solver =
Amesos2::create<GlobalCrsMatrixType, GlobalMultiVectorType>(solver_name, A, x_mv, b);
amesos_solver->symbolicFactorization();
Expand Down
5 changes: 4 additions & 1 deletion src/step/assemble_constraints_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ inline void AssembleConstraintsMatrix(Solver& solver, Constraints& constraints)
"CopySparseValuesToTranspose", constraint_transpose_policy,
CopySparseValuesToTranspose<Solver::CrsMatrixType>{solver.B, tmp_row_map, solver.B_t}
);
KokkosSparse::sort_crs_matrix(solver.B_t);
{
auto sort_region = Kokkos::Profiling::ScopedRegion("Sort");
KokkosSparse::sort_crs_matrix(solver.B_t);
}
}

{
Expand Down

0 comments on commit a68ae74

Please sign in to comment.