Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Made a few optimizations for running OpenTurbine on GPU. #350

Merged
merged 4 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 28 additions & 14 deletions src/elements/beams/create_beams.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,24 +51,38 @@ inline Beams CreateBeams(const BeamsInput& beams_input, const std::vector<Node>&
}

// Populate views for this element
PopulateElementViews(
beams_input.elements[i], // Element inputs
nodes,
Kokkos::subview(host_node_x0, i, Kokkos::make_pair(size_t{0U}, num_nodes), Kokkos::ALL),
Kokkos::subview(host_qp_weight, i, Kokkos::make_pair(size_t{0U}, num_qps)),
PopulateNodeX0(
beams_input.elements[i], nodes,
Kokkos::subview(host_node_x0, i, Kokkos::make_pair(0UL, num_nodes), Kokkos::ALL)
);
PopulateQPWeight(
beams_input.elements[i],
Kokkos::subview(host_qp_weight, i, Kokkos::make_pair(0UL, num_qps))
);
PopulateShapeFunctionValues(
beams_input.elements[i], nodes,
Kokkos::subview(
host_qp_Mstar, i, Kokkos::make_pair(size_t{0U}, num_qps), Kokkos::ALL, Kokkos::ALL
),
host_shape_interp, i, Kokkos::make_pair(0UL, num_nodes),
Kokkos::make_pair(0UL, num_qps)
)
);
PopulateShapeFunctionDerivatives(
beams_input.elements[i], nodes,
Kokkos::subview(
host_qp_Cstar, i, Kokkos::make_pair(size_t{0U}, num_qps), Kokkos::ALL, Kokkos::ALL
),
host_shape_deriv, i, Kokkos::make_pair(0UL, num_nodes),
Kokkos::make_pair(0UL, num_qps)
)
);
PopulateQPMstar(
beams_input.elements[i],
Kokkos::subview(
host_shape_interp, i, Kokkos::make_pair(size_t{0U}, num_nodes),
Kokkos::make_pair(size_t{0U}, num_qps)
),
host_qp_Mstar, i, Kokkos::make_pair(0UL, num_qps), Kokkos::ALL, Kokkos::ALL
)
);
PopulateQPCstar(
beams_input.elements[i],
Kokkos::subview(
host_shape_deriv, i, Kokkos::make_pair(size_t{0U}, num_nodes),
Kokkos::make_pair(size_t{0U}, num_qps)
host_qp_Cstar, i, Kokkos::make_pair(0UL, num_qps), Kokkos::ALL, Kokkos::ALL
)
);
}
Expand Down
108 changes: 71 additions & 37 deletions src/elements/beams/populate_element_views.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,48 +7,35 @@

namespace openturbine {

/**
* @brief Populates Views with beam element data for numerical integration
*
* @param elem The beam element containing node, quadrature, and section data
* @param node_x0 View for initial nodal positions and orientations (size = num_nodes x num_dofs)
* @param qp_weight View for quadrature point weights (size = num_qps)
* @param qp_Mstar View for interpolated mass matrices at quadrature points (size = num_qps x 6 x 6)
* @param qp_Cstar View for interpolated stiffness matrices at quadrature points
* (size = num_qps x 6 x 6)
* @param shape_interp View for shape function interpolation weights (size = num_nodes x num_qps)
* @param shape_deriv View for shape function derivative weights (size = num_nodes x num_qps)
*
* This function transforms element data into a format suitable for numerical integration:
* - Maps node and section positions from [0,1] to [-1,1]
* - Computes shape function weights for interpolation and derivatives using Lagrange polynomials
* - Interpolates section properties (mass and stiffness) at quadrature points using
* linear interpolation
*/
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
inline void PopulateElementViews(
const BeamElement& elem, const std::vector<Node>& nodes, T1 node_x0, T2 qp_weight, T3 qp_Mstar,
T4 qp_Cstar, T5 shape_interp, T6 shape_deriv
inline void PopulateNodeX0(
const BeamElement& elem, const std::vector<Node>& nodes,
const Kokkos::View<double* [7], Kokkos::LayoutStride, Kokkos::HostSpace>& node_x0
) {
// Map node positions from [0,1] to [-1,1]
std::vector<double> node_xi(elem.node_ids.size());
for (size_t i = 0; i < elem.node_ids.size(); ++i) {
node_xi[i] = 2 * nodes[elem.node_ids[i]].s - 1;
}

// Populate node initial position and orientation
for (size_t j = 0; j < elem.node_ids.size(); ++j) {
for (size_t k = 0U; k < kLieGroupComponents; ++k) {
node_x0(j, k) = nodes[elem.node_ids[j]].x[k];
}
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I propose the following readability improvement:

/// @brief Populate the node initial position and orientation
inline void PopulateNodeX0(
    const BeamElement& elem, const std::vector<Node>& nodes,
    const Kokkos::View<double* [7], Kokkos::LayoutStride, Kokkos::HostSpace>& node_x0
) {
    for (size_t j = 0; j < elem.node_ids.size(); ++j) {
        for (size_t k = 0U; k < 7; ++k) {
            node_x0(j, k) = nodes[elem.node_ids[j]].x[k];
        }
    }
}


// Populate quadrature weights
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's keep the comment like so

/// @brief Populate the quadrature weights

inline void PopulateQPWeight(
const BeamElement& elem,
const Kokkos::View<double*, Kokkos::LayoutStride, Kokkos::HostSpace>& qp_weight
) {
for (size_t j = 0; j < elem.quadrature.size(); ++j) {
qp_weight(j) = elem.quadrature[j][1];
}
}

inline void PopulateShapeFunctionValues(
const BeamElement& elem, const std::vector<Node>& nodes,
const Kokkos::View<double**, Kokkos::LayoutStride, Kokkos::HostSpace>& shape_interp
) {
std::vector<double> node_xi(elem.node_ids.size());
for (size_t i = 0; i < elem.node_ids.size(); ++i) {
node_xi[i] = 2 * nodes[elem.node_ids[i]].s - 1;
}

// Populate shape interpolation and derivative weights (Lagrange polynomial)
std::vector<double> weights;
for (size_t j = 0; j < elem.quadrature.size(); ++j) {
auto qp_xi = elem.quadrature[j][0];
Expand All @@ -57,35 +44,82 @@ inline void PopulateElementViews(
for (size_t k = 0; k < node_xi.size(); ++k) {
shape_interp(k, j) = weights[k];
}
}
}

inline void PopulateShapeFunctionDerivatives(
const BeamElement& elem, const std::vector<Node>& nodes,
const Kokkos::View<double**, Kokkos::LayoutStride, Kokkos::HostSpace>& shape_deriv
) {
std::vector<double> node_xi(elem.node_ids.size());
for (size_t i = 0; i < elem.node_ids.size(); ++i) {
node_xi[i] = 2 * nodes[elem.node_ids[i]].s - 1;
}

std::vector<double> weights;
for (size_t j = 0; j < elem.quadrature.size(); ++j) {
auto qp_xi = elem.quadrature[j][0];

LagrangePolynomialDerivWeights(qp_xi, node_xi, weights);
for (size_t k = 0; k < node_xi.size(); ++k) {
shape_deriv(k, j) = weights[k];
}
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could remove the duplicate code here (Lines 30-68) by doing the following. There might be more optimization possible by combining the shape + shape deriv functions into one however I suppose you wanted to keep them separate.

/// @brief Map node positions from [0,1] to [-1,1]
inline std::vector<double> MapNodePositions(
    const BeamElement& elem, const std::vector<Node>& nodes
) {
    std::vector<double> node_xi(elem.node_ids.size());
    for (size_t i = 0; i < elem.node_ids.size(); ++i) {
        node_xi[i] = 2 * nodes[elem.node_ids[i]].s - 1;
    }
    return node_xi;
}

/// @brief Populate the shape function values
inline void PopulateShapeFunctionValues(
    const BeamElement& elem, const std::vector<Node>& nodes,
    const Kokkos::View<double**, Kokkos::LayoutStride, Kokkos::HostSpace>& shape_interp
) {
    const auto node_xi = MapNodePositions(elem, nodes);

    // Populate shape interpolation values (Lagrange polynomials)
    std::vector<double> weights;
    for (size_t j = 0; j < elem.quadrature.size(); ++j) {
        auto qp_xi = elem.quadrature[j][0];
        LagrangePolynomialInterpWeights(qp_xi, node_xi, weights);
        for (size_t k = 0; k < node_xi.size(); ++k) {
            shape_interp(k, j) = weights[k];
        }
    }
}

/// @brief Populate the shape function derivatives
inline void PopulateShapeFunctionDerivatives(
    const BeamElement& elem, const std::vector<Node>& nodes,
    const Kokkos::View<double**, Kokkos::LayoutStride, Kokkos::HostSpace>& shape_deriv
) {
    const auto node_xi = MapNodePositions(elem, nodes);

    // Populate shape function derivative values (Lagrange polynomials)
    std::vector<double> weights;
    for (size_t j = 0; j < elem.quadrature.size(); ++j) {
        auto qp_xi = elem.quadrature[j][0];
        LagrangePolynomialDerivWeights(qp_xi, node_xi, weights);
        for (size_t k = 0; k < node_xi.size(); ++k) {
            shape_deriv(k, j) = weights[k];
        }
    }
}


// Map section positions from [0,1] to [-1,1]
inline void PopulateQPMstar(
const BeamElement& elem,
const Kokkos::View<double* [6][6], Kokkos::LayoutStride, Kokkos::HostSpace>& qp_Mstar
) {
std::vector<double> weights(elem.sections.size());
std::vector<double> section_xi(elem.sections.size());
for (size_t i = 0; i < elem.sections.size(); ++i) {
section_xi[i] = 2 * elem.sections[i].position - 1;
}

// Populate section mass and stiffness matrices at quadrature points by
// linearly interpolating between section values
Kokkos::deep_copy(qp_Mstar, 0.);
Kokkos::deep_copy(qp_Cstar, 0.);
for (size_t i = 0; i < elem.quadrature.size(); ++i) {
auto qp_xi = elem.quadrature[i][0];
LinearInterpWeights(qp_xi, section_xi, weights);
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Mstar(i, m, n) = 0.;
}
}
for (size_t j = 0; j < section_xi.size(); ++j) {
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Mstar(i, m, n) += elem.sections[j].M_star[m][n] * weights[j];
qp_Cstar(i, m, n) += elem.sections[j].C_star[m][n] * weights[j];
}
}
}
}
}

inline void PopulateQPCstar(
const BeamElement& elem,
const Kokkos::View<double* [6][6], Kokkos::LayoutStride, Kokkos::HostSpace>& qp_Cstar
) {
std::vector<double> weights(elem.sections.size());
std::vector<double> section_xi(elem.sections.size());
for (size_t i = 0; i < elem.sections.size(); ++i) {
section_xi[i] = 2 * elem.sections[i].position - 1;
}

for (size_t i = 0; i < elem.quadrature.size(); ++i) {
auto qp_xi = elem.quadrature[i][0];
LinearInterpWeights(qp_xi, section_xi, weights);
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Cstar(i, m, n) = 0.;
}
}
for (size_t j = 0; j < section_xi.size(); ++j) {
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Cstar(i, m, n) += elem.sections[j].C_star[m][n] * weights[j];
}
}
}
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On a similar vein to my previous comment, we could modularize as follows to remove redundant code in M* and C* population. And I wonder if we could write a lambda-like function to consolidate the sectional matrix creation logic since they look awfully similar -- I leave that to your judgement.

/// @brief Map section positions from [0,1] to [-1,1]
inline std::vector<double> MapSectionPositions(const BeamElement& elem) {
    std::vector<double> section_xi(elem.sections.size());
    for (size_t i = 0; i < elem.sections.size(); ++i) {
        section_xi[i] = 2 * elem.sections[i].position - 1;
    }
    return section_xi;
}

/// @brief Populate section mass matrix Mstar
inline void PopulateQPMstar(
    const BeamElement& elem,
    const Kokkos::View<double* [6][6], Kokkos::LayoutStride, Kokkos::HostSpace>& qp_Mstar
) {
    const auto section_xi = MapSectionPositions(elem);
    std::vector<double> weights(elem.sections.size());
    for (size_t i = 0; i < elem.quadrature.size(); ++i) {
        auto qp_xi = elem.quadrature[i][0];
        LinearInterpWeights(qp_xi, section_xi, weights);
        for (size_t m = 0; m < 6; ++m) {
            for (size_t n = 0; n < 6; ++n) {
                qp_Mstar(i, m, n) = 0.;
            }
        }
        for (size_t j = 0; j < section_xi.size(); ++j) {
            for (size_t m = 0; m < 6; ++m) {
                for (size_t n = 0; n < 6; ++n) {
                    qp_Mstar(i, m, n) += elem.sections[j].M_star[m][n] * weights[j];
                }
            }
        }
    }
}

/// @brief Populate section stiffness matrix Cstar
inline void PopulateQPCstar(
    const BeamElement& elem,
    const Kokkos::View<double* [6][6], Kokkos::LayoutStride, Kokkos::HostSpace>& qp_Cstar
) {
    const auto section_xi = MapSectionPositions(elem);
    std::vector<double> weights(elem.sections.size());
    for (size_t i = 0; i < elem.quadrature.size(); ++i) {
        auto qp_xi = elem.quadrature[i][0];
        LinearInterpWeights(qp_xi, section_xi, weights);
        for (size_t m = 0; m < 6; ++m) {
            for (size_t n = 0; n < 6; ++n) {
                qp_Cstar(i, m, n) = 0.;
            }
        }
        for (size_t j = 0; j < section_xi.size(); ++j) {
            for (size_t m = 0; m < 6; ++m) {
                for (size_t n = 0; n < 6; ++n) {
                    qp_Cstar(i, m, n) += elem.sections[j].C_star[m][n] * weights[j];
                }
            }
        }
    }
}

} // namespace openturbine
19 changes: 11 additions & 8 deletions src/solver/contribute_beams_to_sparse_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,17 @@ struct ContributeBeamsToSparseMatrix {
constexpr auto is_sorted = true;
constexpr auto force_atomic =
!std::is_same_v<Kokkos::TeamPolicy<>::member_type::execution_space, Kokkos::Serial>;
Kokkos::parallel_for(Kokkos::TeamThreadRange(member, num_nodes), [&](size_t node_1) {
const auto num_dofs = count_active_dofs(element_freedom_signature(i, node_1));
auto row_data_data = Kokkos::Array<typename RowDataType::value_type, 6>{};
auto col_idx_data = Kokkos::Array<typename ColIdxType::value_type, 6>{};
auto row_data = RowDataType(row_data_data.data(), num_dofs);
auto col_idx = ColIdxType(col_idx_data.data(), num_dofs);
Kokkos::parallel_for(
Kokkos::TeamThreadRange(member, num_nodes * num_nodes),
[&](size_t node_12) {
const auto node_1 = node_12 % num_nodes;
const auto node_2 = node_12 / num_nodes;
const auto num_dofs = count_active_dofs(element_freedom_signature(i, node_1));
auto row_data_data = Kokkos::Array<typename RowDataType::value_type, 6>{};
auto col_idx_data = Kokkos::Array<typename ColIdxType::value_type, 6>{};
auto row_data = RowDataType(row_data_data.data(), num_dofs);
auto col_idx = ColIdxType(col_idx_data.data(), num_dofs);

for (auto node_2 = 0U; node_2 < num_nodes; ++node_2) {
for (auto component_2 = 0U; component_2 < num_dofs; ++component_2) {
col_idx(component_2) =
static_cast<int>(element_freedom_table(i, node_2, component_2));
Expand All @@ -47,7 +50,7 @@ struct ContributeBeamsToSparseMatrix {
);
}
}
});
);
}
};

Expand Down
7 changes: 1 addition & 6 deletions src/solver/create_sparse_dense_solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,7 @@ CreateSparseDenseSolver(
Teuchos::RCP<GlobalCrsMatrixType>& A, Teuchos::RCP<GlobalMultiVectorType>& x_mv,
Teuchos::RCP<GlobalMultiVectorType>& b
) {
using ExecutionSpace = typename GlobalCrsMatrixType::execution_space;

const auto solver_name = (std::is_same_v<ExecutionSpace, Kokkos::DefaultHostExecutionSpace>)
? std::string{"klu2"}
: std::string{"basker"};

const auto solver_name = std::string{"klu2"};
auto amesos_solver =
Amesos2::create<GlobalCrsMatrixType, GlobalMultiVectorType>(solver_name, A, x_mv, b);
amesos_solver->symbolicFactorization();
Expand Down
5 changes: 4 additions & 1 deletion src/step/assemble_constraints_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ inline void AssembleConstraintsMatrix(Solver& solver, Constraints& constraints)
"CopySparseValuesToTranspose", constraint_transpose_policy,
CopySparseValuesToTranspose<Solver::CrsMatrixType>{solver.B, tmp_row_map, solver.B_t}
);
KokkosSparse::sort_crs_matrix(solver.B_t);
{
auto sort_region = Kokkos::Profiling::ScopedRegion("Sort");
KokkosSparse::sort_crs_matrix(solver.B_t);
}
}

{
Expand Down
Loading