-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Made a few optimizations for running OpenTurbine on GPU. #350
Conversation
ddement
commented
Feb 7, 2025
- Removed deep copies from system setup code, saving a lot of time for large problems
- Changed to using KLU2 even on GPU, which improved the 3000 blade case from 32s to 16s. Also improved 3 blade case solve time by >50%.
- Merged loops to increase parallelism in system matrix contribution.
1. Removed deep copies from system setup code, saving a lot of time for large problems 2. Changed to using KLU2 even on GPU, which improved the 3000 blade case from 32s to 16s. Also improved 3 blade case solve time by >50%. 3. Merged loops to increase parallelism in system matrix contribution.
247b75c
to
c24a77f
Compare
inline void PopulateNodeX0( | ||
const BeamElement& elem, const std::vector<Node>& nodes, | ||
const Kokkos::View<double* [7], Kokkos::LayoutStride, Kokkos::HostSpace>& node_x0 | ||
) { | ||
// Map node positions from [0,1] to [-1,1] | ||
std::vector<double> node_xi(elem.node_ids.size()); | ||
for (size_t i = 0; i < elem.node_ids.size(); ++i) { | ||
node_xi[i] = 2 * nodes[elem.node_ids[i]].s - 1; | ||
} | ||
|
||
// Populate node initial position and orientation | ||
for (size_t j = 0; j < elem.node_ids.size(); ++j) { | ||
for (size_t k = 0U; k < kLieGroupComponents; ++k) { | ||
node_x0(j, k) = nodes[elem.node_ids[j]].x[k]; | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I propose the following readability improvement:
/// @brief Populate the node initial position and orientation
inline void PopulateNodeX0(
const BeamElement& elem, const std::vector<Node>& nodes,
const Kokkos::View<double* [7], Kokkos::LayoutStride, Kokkos::HostSpace>& node_x0
) {
for (size_t j = 0; j < elem.node_ids.size(); ++j) {
for (size_t k = 0U; k < 7; ++k) {
node_x0(j, k) = nodes[elem.node_ids[j]].x[k];
}
}
}
|
||
// Populate quadrature weights |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's keep the comment like so
/// @brief Populate the quadrature weights
|
||
LagrangePolynomialDerivWeights(qp_xi, node_xi, weights); | ||
for (size_t k = 0; k < node_xi.size(); ++k) { | ||
shape_deriv(k, j) = weights[k]; | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could remove the duplicate code here (Lines 30-68) by doing the following. There might be more optimization possible by combining the shape + shape deriv functions into one however I suppose you wanted to keep them separate.
/// @brief Map node positions from [0,1] to [-1,1]
inline std::vector<double> MapNodePositions(
const BeamElement& elem, const std::vector<Node>& nodes
) {
std::vector<double> node_xi(elem.node_ids.size());
for (size_t i = 0; i < elem.node_ids.size(); ++i) {
node_xi[i] = 2 * nodes[elem.node_ids[i]].s - 1;
}
return node_xi;
}
/// @brief Populate the shape function values
inline void PopulateShapeFunctionValues(
const BeamElement& elem, const std::vector<Node>& nodes,
const Kokkos::View<double**, Kokkos::LayoutStride, Kokkos::HostSpace>& shape_interp
) {
const auto node_xi = MapNodePositions(elem, nodes);
// Populate shape interpolation values (Lagrange polynomials)
std::vector<double> weights;
for (size_t j = 0; j < elem.quadrature.size(); ++j) {
auto qp_xi = elem.quadrature[j][0];
LagrangePolynomialInterpWeights(qp_xi, node_xi, weights);
for (size_t k = 0; k < node_xi.size(); ++k) {
shape_interp(k, j) = weights[k];
}
}
}
/// @brief Populate the shape function derivatives
inline void PopulateShapeFunctionDerivatives(
const BeamElement& elem, const std::vector<Node>& nodes,
const Kokkos::View<double**, Kokkos::LayoutStride, Kokkos::HostSpace>& shape_deriv
) {
const auto node_xi = MapNodePositions(elem, nodes);
// Populate shape function derivative values (Lagrange polynomials)
std::vector<double> weights;
for (size_t j = 0; j < elem.quadrature.size(); ++j) {
auto qp_xi = elem.quadrature[j][0];
LagrangePolynomialDerivWeights(qp_xi, node_xi, weights);
for (size_t k = 0; k < node_xi.size(); ++k) {
shape_deriv(k, j) = weights[k];
}
}
}
inline void PopulateQPCstar( | ||
const BeamElement& elem, | ||
const Kokkos::View<double* [6][6], Kokkos::LayoutStride, Kokkos::HostSpace>& qp_Cstar | ||
) { | ||
std::vector<double> weights(elem.sections.size()); | ||
std::vector<double> section_xi(elem.sections.size()); | ||
for (size_t i = 0; i < elem.sections.size(); ++i) { | ||
section_xi[i] = 2 * elem.sections[i].position - 1; | ||
} | ||
|
||
for (size_t i = 0; i < elem.quadrature.size(); ++i) { | ||
auto qp_xi = elem.quadrature[i][0]; | ||
LinearInterpWeights(qp_xi, section_xi, weights); | ||
for (size_t m = 0; m < 6; ++m) { | ||
for (size_t n = 0; n < 6; ++n) { | ||
qp_Cstar(i, m, n) = 0.; | ||
} | ||
} | ||
for (size_t j = 0; j < section_xi.size(); ++j) { | ||
for (size_t m = 0; m < 6; ++m) { | ||
for (size_t n = 0; n < 6; ++n) { | ||
qp_Cstar(i, m, n) += elem.sections[j].C_star[m][n] * weights[j]; | ||
} | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On a similar vein to my previous comment, we could modularize as follows to remove redundant code in M* and C* population. And I wonder if we could write a lambda-like function to consolidate the sectional matrix creation logic since they look awfully similar -- I leave that to your judgement.
/// @brief Map section positions from [0,1] to [-1,1]
inline std::vector<double> MapSectionPositions(const BeamElement& elem) {
std::vector<double> section_xi(elem.sections.size());
for (size_t i = 0; i < elem.sections.size(); ++i) {
section_xi[i] = 2 * elem.sections[i].position - 1;
}
return section_xi;
}
/// @brief Populate section mass matrix Mstar
inline void PopulateQPMstar(
const BeamElement& elem,
const Kokkos::View<double* [6][6], Kokkos::LayoutStride, Kokkos::HostSpace>& qp_Mstar
) {
const auto section_xi = MapSectionPositions(elem);
std::vector<double> weights(elem.sections.size());
for (size_t i = 0; i < elem.quadrature.size(); ++i) {
auto qp_xi = elem.quadrature[i][0];
LinearInterpWeights(qp_xi, section_xi, weights);
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Mstar(i, m, n) = 0.;
}
}
for (size_t j = 0; j < section_xi.size(); ++j) {
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Mstar(i, m, n) += elem.sections[j].M_star[m][n] * weights[j];
}
}
}
}
}
/// @brief Populate section stiffness matrix Cstar
inline void PopulateQPCstar(
const BeamElement& elem,
const Kokkos::View<double* [6][6], Kokkos::LayoutStride, Kokkos::HostSpace>& qp_Cstar
) {
const auto section_xi = MapSectionPositions(elem);
std::vector<double> weights(elem.sections.size());
for (size_t i = 0; i < elem.quadrature.size(); ++i) {
auto qp_xi = elem.quadrature[i][0];
LinearInterpWeights(qp_xi, section_xi, weights);
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Cstar(i, m, n) = 0.;
}
}
for (size_t j = 0; j < section_xi.size(); ++j) {
for (size_t m = 0; m < 6; ++m) {
for (size_t n = 0; n < 6; ++n) {
qp_Cstar(i, m, n) += elem.sections[j].C_star[m][n] * weights[j];
}
}
}
}
}