Skip to content

Commit

Permalink
Implemented SIMD loop for IntegrateStiffnessMatrix
Browse files Browse the repository at this point in the history
  • Loading branch information
ddement committed Aug 10, 2024
1 parent 6efeec5 commit e699a2a
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 5 deletions.
96 changes: 92 additions & 4 deletions src/restruct_poc/system/integrate_stiffness_matrix.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <Kokkos_Core.hpp>
#include <Kokkos_SIMD.hpp>

#include "src/restruct_poc/beams/beams.hpp"
#include "src/restruct_poc/types.hpp"
Expand Down Expand Up @@ -53,6 +54,92 @@ struct IntegrateStiffnessMatrixElement {
}
};

struct IntegrateStiffnessMatrixElement2 {
int i_elem;
size_t num_nodes;
size_t num_qps;
size_t first_node;
size_t first_qp;
View_N::const_type qp_weight_;
View_N::const_type qp_jacobian_;
Kokkos::View<double**, Kokkos::LayoutLeft> shape_interp_;
Kokkos::View<double**, Kokkos::LayoutLeft> shape_deriv_;
Kokkos::View<double* [6][6]>::const_type qp_Kuu_;
Kokkos::View<double* [6][6]>::const_type qp_Puu_;
Kokkos::View<double* [6][6]>::const_type qp_Cuu_;
Kokkos::View<double* [6][6]>::const_type qp_Ouu_;
Kokkos::View<double* [6][6]>::const_type qp_Quu_;
Kokkos::View<double***> gbl_M_;

KOKKOS_FUNCTION
void operator()(size_t i_index) const {
using simd_type = Kokkos::Experimental::native_simd<double>;
using tag_type = Kokkos::Experimental::element_aligned_tag;
constexpr auto width = simd_type::size();
auto j_index = 0U;
for(; j_index + width < num_nodes; j_index += width) {
auto local_M_data = Kokkos::Array<simd_type, 36>{};
const auto local_M = Kokkos::View<simd_type[6][6]>(local_M_data.data());
for (auto k = 0U; k < num_qps; ++k) {
const auto w = qp_weight_(k);
const auto jacobian = qp_jacobian_(k);
const auto phi_i = shape_interp_(i_index, k);
auto phi_j = simd_type{};
phi_j.copy_from(&shape_interp_(j_index, k), tag_type());
const auto phi_prime_i = shape_deriv_(i_index, k);
auto phi_prime_j = simd_type{};
phi_prime_j.copy_from(&shape_deriv_(j_index, k), tag_type());
const auto K = w * phi_i * phi_j * jacobian;
const auto P = w * (phi_i * phi_prime_j);
const auto C = w * (phi_prime_i * phi_prime_j / jacobian);
const auto O = w * (phi_prime_i * phi_j);
for (auto m = 0U; m < 6U; ++m) {
for (auto n = 0U; n < 6U; ++n) {
local_M(m, n) += K * (qp_Kuu_(k, m, n) + qp_Quu_(k, m, n)) +
P * qp_Puu_(k, m, n) + C * qp_Cuu_(k, m, n) +
O * qp_Ouu_(k, m, n);
}
}
}
for (auto m = 0U; m < 6U; ++m) {
for (auto n = 0U; n < 6U; ++n) {
for (auto lane = 0U; lane < width; ++lane) {
gbl_M_(i_elem, i_index * 6 + m, (j_index+lane) * 6 + n) = local_M(m, n)[lane];

}
}
}
}
for(; j_index < num_nodes; ++j_index) {
auto local_M_data = Kokkos::Array<double, 36>{};
const auto local_M = Kokkos::View<double[6][6]>(local_M_data.data());
for (auto k = 0U; k < num_qps; ++k) {
const auto w = qp_weight_(k);
const auto jacobian = qp_jacobian_(k);
const auto phi_i = shape_interp_(i_index, k);
const auto phi_j = shape_interp_(j_index, k);
const auto phi_prime_i = shape_deriv_(i_index, k);
const auto phi_prime_j = shape_deriv_(j_index, k);
const auto K = w * phi_i * phi_j * jacobian;
const auto P = w * (phi_i * phi_prime_j);
const auto C = w * (phi_prime_i * phi_prime_j / jacobian);
const auto O = w * (phi_prime_i * phi_j);
for (auto m = 0U; m < 6U; ++m) {
for (auto n = 0U; n < 6U; ++n) {
local_M(m, n) += K * (qp_Kuu_(k, m, n) + qp_Quu_(k, m, n)) +
P * qp_Puu_(k, m, n) + C * qp_Cuu_(k, m, n) +
O * qp_Ouu_(k, m, n);
}
}
}
for (auto m = 0U; m < 6U; ++m) {
for (auto n = 0U; n < 6U; ++n) {
gbl_M_(i_elem, i_index * 6 + m, j_index * 6 + n) = local_M(m, n);
}
}
}
}
};
struct IntegrateStiffnessMatrix {
Kokkos::View<Beams::ElemIndices*>::const_type elem_indices;
View_NxN::const_type qp_weight_;
Expand All @@ -72,9 +159,9 @@ struct IntegrateStiffnessMatrix {
const auto idx = elem_indices(i_elem);

const auto shape_interp =
Kokkos::View<double**>(member.team_scratch(1), idx.num_nodes, idx.num_qps);
Kokkos::View<double**, Kokkos::LayoutLeft>(member.team_scratch(1), idx.num_nodes, idx.num_qps);
const auto shape_deriv =
Kokkos::View<double**>(member.team_scratch(1), idx.num_nodes, idx.num_qps);
Kokkos::View<double**, Kokkos::LayoutLeft>(member.team_scratch(1), idx.num_nodes, idx.num_qps);

const auto qp_weight = Kokkos::View<double*>(member.team_scratch(1), idx.num_qps);
const auto qp_jacobian = Kokkos::View<double*>(member.team_scratch(1), idx.num_qps);
Expand Down Expand Up @@ -104,9 +191,10 @@ struct IntegrateStiffnessMatrix {
});
member.team_barrier();

const auto node_range = Kokkos::TeamThreadMDRange(member, idx.num_nodes, idx.num_nodes);
const auto element_integrator = IntegrateStiffnessMatrixElement{
const auto node_range = Kokkos::TeamThreadRange(member, idx.num_nodes);
const auto element_integrator = IntegrateStiffnessMatrixElement2{
i_elem,
idx.num_nodes,
idx.num_qps,
idx.node_range.first,
idx.qp_range.first,
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/restruct_poc/test_rotor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ TEST(RotorTest, IEA15RotorHub) {
// Build vector of nodes (straight along x axis, no rotation)
// Calculate displacement, velocity, acceleration assuming a
// 1 rad/s angular velocity around the z axis
constexpr size_t num_blades = 3;
constexpr size_t num_blades = 300;
auto blade_list = std::array<size_t, num_blades>{};
std::iota(std::begin(blade_list), std::end(blade_list), 0);

Expand Down

0 comments on commit e699a2a

Please sign in to comment.