Skip to content

Commit

Permalink
Moved Tangent application from sparse matrix assembly to contribution…
Browse files Browse the repository at this point in the history
… calculation

This change improves performance significantly and decreases memory usage
  • Loading branch information
ddement committed Feb 21, 2025
1 parent 337d507 commit 06f98d1
Show file tree
Hide file tree
Showing 18 changed files with 177 additions and 304 deletions.
8 changes: 2 additions & 6 deletions src/elements/beams/beams.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ struct Beams {
Kokkos::View<double** [6]> qp_Fe; // External force

Kokkos::View<double** [6]> residual_vector_terms;
Kokkos::View<double*** [6][6]> stiffness_matrix_terms;
Kokkos::View<double*** [6][6]> inertia_matrix_terms;
Kokkos::View<double*** [6][6]> system_matrix_terms;

// Shape Function data
Kokkos::View<double***> shape_interp; // Shape function values
Expand Down Expand Up @@ -107,10 +106,7 @@ struct Beams {
qp_E("qp_E", num_elems, max_elem_qps),
qp_Fe("qp_Fe", num_elems, max_elem_qps),
residual_vector_terms("residual_vector_terms", num_elems, max_elem_nodes),
stiffness_matrix_terms(
"stiffness_matrix_terms", num_elems, max_elem_nodes, max_elem_nodes
),
inertia_matrix_terms("inertia_matrix_terms", num_elems, max_elem_nodes, max_elem_nodes),
system_matrix_terms("system_matrix_terms", num_elems, max_elem_nodes, max_elem_nodes),
// Shape Function data
shape_interp("shape_interp", num_elems, max_elem_nodes, max_elem_qps),
shape_deriv("deriv_interp", num_elems, max_elem_nodes, max_elem_qps) {
Expand Down
6 changes: 2 additions & 4 deletions src/elements/masses/masses.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ struct Masses {
Kokkos::View<double* [6][6]> qp_Mstar; //< Mass matrix in material frame

Kokkos::View<double* [6]> residual_vector_terms;
Kokkos::View<double* [6][6]> stiffness_matrix_terms;
Kokkos::View<double* [6][6]> inertia_matrix_terms;
Kokkos::View<double* [6][6]> system_matrix_terms;

explicit Masses(const size_t n_mass_elems)
: num_elems(n_mass_elems),
Expand All @@ -41,8 +40,7 @@ struct Masses {
node_x0("node_x0", num_elems),
qp_Mstar("qp_Mstar", num_elems),
residual_vector_terms("residual_vector_terms", num_elems),
stiffness_matrix_terms("stiffness_matrix_terms", num_elems),
inertia_matrix_terms("inertia_matrix_terms", num_elems) {
system_matrix_terms("system_matrix_terms", num_elems) {
Kokkos::deep_copy(num_nodes_per_element, 1); // Always 1 node per element
Kokkos::deep_copy(element_freedom_signature, FreedomSignature::AllComponents);
}
Expand Down
2 changes: 1 addition & 1 deletion src/solver/contribute_masses_to_sparse_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ struct ContributeMassesToSparseMatrix {
KOKKOS_FUNCTION
void operator()(size_t i) const {
constexpr auto is_sorted = true;
constexpr auto force_atomic = false;
constexpr auto force_atomic = !std::is_same_v<Kokkos::DefaultExecutionSpace, Kokkos::Serial>;
const auto num_dofs = count_active_dofs(element_freedom_signature(i));
auto row_data_data = Kokkos::Array<typename RowDataType::value_type, 6>{};
auto col_idx_data = Kokkos::Array<typename ColIdxType::value_type, 6>{};
Expand Down
22 changes: 5 additions & 17 deletions src/solver/solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,13 @@ struct Solver {
size_t num_system_dofs; //< Number of system degrees of freedom
size_t num_dofs; //< Number of degrees of freedom

KernelHandle system_spgemm_handle;
KernelHandle constraints_spgemm_handle;
KernelHandle system_spadd_handle;
KernelHandle spc_spadd_handle;
KernelHandle full_system_spadd_handle;

CrsMatrixType K; //< Stiffness matrix
CrsMatrixType T; //< Tangent operator
CrsMatrixType B; //< Constraints matrix
CrsMatrixType B_t; //< Transpose of constraints matrix
CrsMatrixType static_system_matrix; //< Static system matrix
CrsMatrixType system_matrix; //< System matrix
CrsMatrixType constraints_matrix; //< Constraints matrix
CrsMatrixType system_matrix_full; //< System matrix with constraints
Expand Down Expand Up @@ -115,10 +111,6 @@ struct Solver {
: num_system_nodes(node_IDs.extent(0)),
num_system_dofs(ComputeNumSystemDofs(node_freedom_allocation_table)),
num_dofs(num_system_dofs + num_constraint_dofs),
K(CreateKMatrix<CrsMatrixType>(
num_system_dofs, node_freedom_allocation_table, node_freedom_map_table,
num_nodes_per_element, node_state_indices
)),
T(CreateTMatrix<CrsMatrixType>(
num_system_dofs, node_freedom_allocation_table, node_freedom_map_table
)),
Expand All @@ -132,8 +124,10 @@ struct Solver {
constraint_base_node_freedom_table, constraint_target_node_freedom_table,
constraint_row_range, constraint_base_node_col_range, constraint_target_node_col_range
)),
static_system_matrix(CreateMatrixSpgemm(K, T, system_spgemm_handle)),
system_matrix(CreateMatrixSpadd(K, static_system_matrix, system_spadd_handle)),
system_matrix(CreateKMatrix<CrsMatrixType>(
num_system_dofs, node_freedom_allocation_table, node_freedom_map_table,
num_nodes_per_element, node_state_indices
)),
constraints_matrix(CreateMatrixSpgemm(B, T, constraints_spgemm_handle)),
system_matrix_full(CreateSystemMatrixFull(num_system_dofs, num_dofs, system_matrix)),
constraints_matrix_full(
Expand All @@ -160,12 +154,10 @@ struct Solver {
: num_system_nodes(other.num_system_nodes),
num_system_dofs(other.num_system_dofs),
num_dofs(other.num_dofs),
K("K", other.K),
T("T", other.T),
B("B", other.B),
B_t("B_t", other.B_t),
static_system_matrix(CreateMatrixSpgemm(K, T, system_spgemm_handle)),
system_matrix(CreateMatrixSpadd(K, static_system_matrix, system_spadd_handle)),
system_matrix("system_matrix", other.system_matrix),
constraints_matrix(CreateMatrixSpgemm(B, T, constraints_spgemm_handle)),
system_matrix_full(CreateSystemMatrixFull(num_system_dofs, num_dofs, system_matrix)),
constraints_matrix_full(
Expand Down Expand Up @@ -201,17 +193,13 @@ struct Solver {
std::swap(num_system_dofs, tmp.num_system_dofs);
std::swap(num_dofs, tmp.num_dofs);

std::swap(system_spgemm_handle, tmp.system_spgemm_handle);
std::swap(constraints_spgemm_handle, tmp.constraints_spgemm_handle);
std::swap(system_spadd_handle, tmp.system_spadd_handle);
std::swap(spc_spadd_handle, tmp.spc_spadd_handle);
std::swap(full_system_spadd_handle, tmp.full_system_spadd_handle);

std::swap(K, tmp.K);
std::swap(T, tmp.T);
std::swap(B, tmp.B);
std::swap(B_t, tmp.B_t);
std::swap(static_system_matrix, tmp.static_system_matrix);
std::swap(system_matrix, tmp.system_matrix);
std::swap(constraints_matrix, tmp.constraints_matrix);
std::swap(system_matrix_full, tmp.system_matrix_full);
Expand Down
50 changes: 5 additions & 45 deletions src/step/assemble_system_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,69 +21,29 @@ inline void AssembleSystemMatrix(Solver& solver, Elements& elements) {
auto springs_sparse_matrix_policy =
Kokkos::RangePolicy<>(0, static_cast<int>(elements.springs.num_elems));

// Assemble stiffness matrix by contributing beam, mass, and spring stiffness matrices to the
// global sparse matrix
Kokkos::deep_copy(solver.K.values, 0.);
Kokkos::deep_copy(solver.system_matrix.values, 0.);
Kokkos::parallel_for(
"ContributeBeamsToSparseMatrix", beams_sparse_matrix_policy,
ContributeBeamsToSparseMatrix<Solver::CrsMatrixType>{
elements.beams.num_nodes_per_element, elements.beams.element_freedom_signature,
elements.beams.element_freedom_table, elements.beams.stiffness_matrix_terms, solver.K
elements.beams.element_freedom_table, elements.beams.system_matrix_terms,
solver.system_matrix
}
);
Kokkos::fence();
Kokkos::parallel_for(
"ContributeMassesToSparseMatrix", masses_sparse_matrix_policy,
ContributeMassesToSparseMatrix<Solver::CrsMatrixType>{
elements.masses.element_freedom_signature, elements.masses.element_freedom_table,
elements.masses.stiffness_matrix_terms, solver.K
elements.masses.system_matrix_terms, solver.system_matrix
}
);
Kokkos::fence();
Kokkos::parallel_for(
"ContributeSpringsToSparseMatrix", springs_sparse_matrix_policy,
ContributeSpringsToSparseMatrix<Solver::CrsMatrixType>{
elements.springs.element_freedom_signature, elements.springs.element_freedom_table,
elements.springs.stiffness_matrix_terms, solver.K
}
);

Kokkos::fence();
{
auto static_region = Kokkos::Profiling::ScopedRegion("Assemble Static System Matrix");
KokkosSparse::spgemm_numeric(
solver.system_spgemm_handle, solver.K, false, solver.T, false,
solver.static_system_matrix
);
}

// Assemble inertia matrix by contributing beam and mass inertia matrices to the global
// sparse matrix
Kokkos::deep_copy(solver.K.values, 0.);
Kokkos::parallel_for(
"ContributeBeamsToSparseMatrix", beams_sparse_matrix_policy,
ContributeBeamsToSparseMatrix<Solver::CrsMatrixType>{
elements.beams.num_nodes_per_element, elements.beams.element_freedom_signature,
elements.beams.element_freedom_table, elements.beams.inertia_matrix_terms, solver.K
elements.springs.stiffness_matrix_terms, solver.system_matrix
}
);
Kokkos::fence();
Kokkos::parallel_for(
"ContributeMassesToSparseMatrix", masses_sparse_matrix_policy,
ContributeMassesToSparseMatrix<Solver::CrsMatrixType>{
elements.masses.element_freedom_signature, elements.masses.element_freedom_table,
elements.masses.inertia_matrix_terms, solver.K
}
);

Kokkos::fence();
{
auto system_region = Kokkos::Profiling::ScopedRegion("Assemble System Matrix");
KokkosSparse::spadd_numeric(
&solver.system_spadd_handle, 1., solver.K, 1., solver.static_system_matrix,
solver.system_matrix
);
}
}

} // namespace openturbine
4 changes: 2 additions & 2 deletions src/step/step.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ inline bool Step(
return false;
}

UpdateSystemVariables(parameters, elements, state);

UpdateTangentOperator(parameters, state);

UpdateSystemVariables(parameters, elements, state);

AssembleTangentOperator(solver, state);

AssembleSystemResidual(solver, elements, state);
Expand Down
21 changes: 12 additions & 9 deletions src/step/update_system_variables_beams.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,18 @@ namespace openturbine {
inline void UpdateSystemVariablesBeams(
StepParameters& parameters, const Beams& beams, State& state
) {
const auto num_nodes = beams.max_elem_nodes;
const auto num_qps = beams.max_elem_qps;

auto range_policy = Kokkos::TeamPolicy<>(static_cast<int>(beams.num_elems), Kokkos::AUTO());
const auto shape_size =
Kokkos::View<double**>::shmem_size(beams.max_elem_nodes, beams.max_elem_qps);
const auto weight_size = Kokkos::View<double*>::shmem_size(beams.max_elem_qps);
const auto node_variable_size = Kokkos::View<double* [7]>::shmem_size(beams.max_elem_nodes);
const auto qp_variable_size = Kokkos::View<double* [6]>::shmem_size(beams.max_elem_qps);
const auto qp_matrix_size = Kokkos::View<double* [6][6]>::shmem_size(beams.max_elem_qps);
const auto shape_size = Kokkos::View<double**>::shmem_size(num_nodes, num_qps);
const auto weight_size = Kokkos::View<double*>::shmem_size(num_qps);
const auto node_variable_size = Kokkos::View<double* [7]>::shmem_size(num_nodes);
const auto qp_variable_size = Kokkos::View<double* [6]>::shmem_size(num_qps);
const auto qp_matrix_size = Kokkos::View<double* [6][6]>::shmem_size(num_qps);
const auto system_matrix_size = Kokkos::View<double** [6][6]>::shmem_size(num_nodes, num_nodes);
auto smem = 2 * shape_size + 2 * weight_size + 4 * node_variable_size + 5 * qp_variable_size +
7 * qp_matrix_size;
7 * qp_matrix_size + 2 * system_matrix_size;
range_policy.set_scratch_size(1, Kokkos::PerTeam(smem));

Kokkos::parallel_for(
Expand All @@ -31,6 +34,7 @@ inline void UpdateSystemVariablesBeams(
state.q,
state.v,
state.vd,
state.tangent,
beams.node_state_indices,
beams.num_nodes_per_element,
beams.num_qps_per_element,
Expand All @@ -47,8 +51,7 @@ inline void UpdateSystemVariablesBeams(
beams.qp_Cstar,
beams.qp_Fe,
beams.residual_vector_terms,
beams.stiffness_matrix_terms,
beams.inertia_matrix_terms
beams.system_matrix_terms
}
);
}
Expand Down
4 changes: 2 additions & 2 deletions src/step/update_system_variables_masses.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ inline void UpdateSystemVariablesMasses(
Kokkos::parallel_for(
"masses::CalculateQuadraturePointValues", masses.num_elems,
masses::CalculateQuadraturePointValues{
parameters.beta_prime, parameters.gamma_prime, state.q, state.v, state.vd,
parameters.beta_prime, parameters.gamma_prime, state.q, state.v, state.vd, state.tangent,
masses.state_indices, masses.gravity, masses.qp_Mstar, masses.node_x0,
masses.residual_vector_terms, masses.stiffness_matrix_terms, masses.inertia_matrix_terms
masses.residual_vector_terms, masses.system_matrix_terms
}
);
}
Expand Down
1 change: 1 addition & 0 deletions src/system/beams/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ install(FILES
calculate_quadrature_point_values.hpp
calculate_stiffness_quadrature_point_values.hpp
calculate_strain.hpp
calculate_system_matrix.hpp
calculate_temporary_variables.hpp
integrate_inertia_matrix.hpp
integrate_residual_vector.hpp
Expand Down
42 changes: 30 additions & 12 deletions src/system/beams/calculate_quadrature_point_values.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "calculate_inertial_quadrature_point_values.hpp"
#include "calculate_stiffness_quadrature_point_values.hpp"
#include "calculate_system_matrix.hpp"
#include "integrate_inertia_matrix.hpp"
#include "integrate_residual_vector.hpp"
#include "integrate_stiffness_matrix.hpp"
Expand All @@ -16,6 +17,7 @@ struct CalculateQuadraturePointValues {
Kokkos::View<double* [7]>::const_type Q;
Kokkos::View<double* [6]>::const_type V;
Kokkos::View<double* [6]>::const_type A;
Kokkos::View<double* [6][6]>::const_type tangent;
Kokkos::View<size_t**>::const_type node_state_indices;
Kokkos::View<size_t*>::const_type num_nodes_per_element;
Kokkos::View<size_t*>::const_type num_qps_per_element;
Expand All @@ -32,8 +34,7 @@ struct CalculateQuadraturePointValues {
Kokkos::View<double** [6][6]>::const_type qp_Cstar_;
Kokkos::View<double** [6]> qp_FE_;
Kokkos::View<double** [6]> residual_vector_terms_;
Kokkos::View<double*** [6][6]> stiffness_matrix_terms_;
Kokkos::View<double*** [6][6]> inertia_matrix_terms_;
Kokkos::View<double*** [6][6]> system_matrix_terms_;

KOKKOS_FUNCTION
void operator()(Kokkos::TeamPolicy<>::member_type member) const {
Expand All @@ -47,7 +48,8 @@ struct CalculateQuadraturePointValues {

const auto qp_range = Kokkos::TeamThreadRange(member, num_qps);
const auto node_range = Kokkos::TeamThreadRange(member, num_nodes);
const auto node_squared_range = Kokkos::TeamThreadRange(member, num_nodes * simd_nodes);
const auto node_squared_range = Kokkos::TeamThreadRange(member, num_nodes * num_nodes);
const auto node_squared_simd_range = Kokkos::TeamThreadRange(member, num_nodes * simd_nodes);

const auto shape_interp =
Kokkos::View<double**, Kokkos::LayoutLeft>(member.team_scratch(1), num_nodes, num_qps);
Expand Down Expand Up @@ -75,6 +77,10 @@ struct CalculateQuadraturePointValues {
const auto qp_Muu = Kokkos::View<double* [6][6]>(member.team_scratch(1), num_qps);
const auto qp_Guu = Kokkos::View<double* [6][6]>(member.team_scratch(1), num_qps);

const auto stiffness_matrix_terms =
Kokkos::View<double** [6][6]>(member.team_scratch(1), num_nodes, num_nodes);
const auto inertia_matrix_terms =
Kokkos::View<double** [6][6]>(member.team_scratch(1), num_nodes, num_nodes);
KokkosBatched::TeamVectorCopy<Kokkos::TeamPolicy<>::member_type>::invoke(
member, Kokkos::subview(shape_interp_, i_elem, Kokkos::ALL, Kokkos::ALL), shape_interp
);
Expand Down Expand Up @@ -114,23 +120,35 @@ struct CalculateQuadraturePointValues {
Kokkos::parallel_for(qp_range, stiffness_quad_point_calculator);
member.team_barrier();

const auto residual_integrator = IntegrateResidualVectorElement{
const auto residual_integrator = beams::IntegrateResidualVectorElement{
i_elem, num_qps, qp_weight, qp_jacobian, shape_interp, shape_deriv, node_FX,
qp_Fc, qp_Fd, qp_Fi, qp_Fe, qp_Fg, residual_vector_terms_
};
Kokkos::parallel_for(node_range, residual_integrator);

const auto stiffness_matrix_integrator = IntegrateStiffnessMatrixElement{
i_elem, num_nodes, num_qps, qp_weight, qp_jacobian, shape_interp, shape_deriv,
qp_Kuu, qp_Puu, qp_Cuu, qp_Ouu, qp_Quu, stiffness_matrix_terms_
const auto stiffness_matrix_integrator = beams::IntegrateStiffnessMatrixElement{
i_elem, num_nodes, num_qps, qp_weight, qp_jacobian, shape_interp, shape_deriv,
qp_Kuu, qp_Puu, qp_Cuu, qp_Ouu, qp_Quu, stiffness_matrix_terms
};
Kokkos::parallel_for(node_squared_range, stiffness_matrix_integrator);
Kokkos::parallel_for(node_squared_simd_range, stiffness_matrix_integrator);

const auto inertia_matrix_integrator = IntegrateInertiaMatrixElement{
i_elem, num_nodes, num_qps, qp_weight, qp_jacobian, shape_interp,
qp_Muu, qp_Guu, beta_prime_, gamma_prime_, inertia_matrix_terms_
const auto inertia_matrix_integrator = beams::IntegrateInertiaMatrixElement{
i_elem, num_nodes, num_qps, qp_weight, qp_jacobian, shape_interp,
qp_Muu, qp_Guu, beta_prime_, gamma_prime_, inertia_matrix_terms
};
Kokkos::parallel_for(node_squared_range, inertia_matrix_integrator);
Kokkos::parallel_for(node_squared_simd_range, inertia_matrix_integrator);
member.team_barrier();

const auto system_matrix_calculator = beams::CalculateSystemMatrix{
i_elem,
num_nodes,
tangent,
node_state_indices,
stiffness_matrix_terms,
inertia_matrix_terms,
system_matrix_terms_
};
Kokkos::parallel_for(node_squared_range, system_matrix_calculator);
}
};

Expand Down
Loading

0 comments on commit 06f98d1

Please sign in to comment.