Skip to content

Commit

Permalink
ODE - RK: fixing small issues reported by Yaro (#2229)
Browse files Browse the repository at this point in the history
* ODE - RK: fixing small issues reported by Yaro

1. fix integer division to floating point division
2. fix evaluation of max scaled error
3. increase or decrease time step using uniform formula
4. use num_steps instead of max_steps for dt calculation
5. add a time step when using constant dt to avoid issues with round-off errors
6. fixing exponent and moving adaptivity computation out of RKStep
7. adding time step counter
8. adding more tests and keep track of time steps if wanted

Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>

* RK: fixing variable name after rebase

Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>

* RK: enabling most methods after fixing test related issues

Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>

* RK: passing new unit-tests

Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>

* Applying clang-format

Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>

* RK: fix bad subview creation

Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>

* RK: fix bug that computes the inital step size for non-adaptive case

This prevents having the user defined time step and leads to
wrong results. The rate of convergence tests are now passing!

Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>

* clang-format...

Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>

* RK: tweaking the tolerances a bit

On GPU the lowest order method (RK1-2) is accumulating a bit more
errors than on CPU. Only an issue when comparing values to zero
where the absolute tolerance is needed to detect good conv.

Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>

* Adding reference for some implementation details and heuristic values

Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>

---------

Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>
  • Loading branch information
lucbv authored Nov 4, 2024
1 parent 14b20bc commit afb3771
Show file tree
Hide file tree
Showing 12 changed files with 1,042 additions and 63 deletions.
2 changes: 1 addition & 1 deletion ode/impl/KokkosODE_BDF_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ struct BDF_system_wrapper2 {
if (compute_jac) {
mySys.evaluate_jacobian(t, dt, y, jac);

// J = I - dt*(dy/dy)
// J = I - dt*(df/dy)
for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) {
for (int colIdx = 0; colIdx < neqs; ++colIdx) {
jac(rowIdx, colIdx) = -dt * jac(rowIdx, colIdx);
Expand Down
53 changes: 53 additions & 0 deletions ode/impl/KokkosODE_RungeKuttaTables_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,59 @@ struct ButcherTableau<4, 6> // Referred to as DOPRI5 or RKDP
11.0 / 84.0 - 187.0 / 2100.0, -1.0 / 40.0}};
};

// Coefficients obtained from:
// J. H. Verner
// "Explicit Runge-Kutta methods with estimates of the local truncation error",
// Journal of Numerical Analysis, Volume 15, Issue 4, 1978,
// https://doi.org/10.1137/0715051.
template <>
struct ButcherTableau<5, 7> // Referred to as Verner 5-6 or VER56
{
static constexpr int order = 6;
static constexpr int nstages = 8;
Kokkos::Array<double, (nstages * nstages + nstages) / 2> a{{0.0,
1.0 / 6.0,
0.0,
4.0 / 75.0,
16.0 / 75.0,
0.0,
5.0 / 6.0,
-8.0 / 3.0,
5.0 / 2.0,
0.0,
-165.0 / 64.0,
55.0 / 6.0,
-425.0 / 64.0,
85.0 / 96.0,
0.0,
12.0 / 5.0,
-8.0,
4015.0 / 612.0,
-11.0 / 36.0,
88.0 / 255.0,
0.0,
-8263.0 / 15000.0,
124.0 / 75.0,
-643.0 / 680.0,
-81.0 / 250.0,
2484.0 / 10625.0,
0.0,
0.0,
3501.0 / 1720.0,
-300.0 / 43.0,
297275.0 / 52632.0,
-319.0 / 2322.0,
24068.0 / 84065.0,
3850.0 / 26703.0,
0.0}};
Kokkos::Array<double, nstages> b{
{3.0 / 4.0, 0.0, 875.0 / 2244.0, 23.0 / 72.0, 264.0 / 1955.0, 0.0, 125.0 / 11592.0, 43.0 / 616.0}};
Kokkos::Array<double, nstages> c{{0.0, 1.0 / 6.0, 4.0 / 15.0, 2.0 / 3.0, 5.0 / 6.0, 1.0, 1.0 / 15.0, 1.0}};
Kokkos::Array<double, nstages> e{{3.0 / 4.0 - 13.0 / 160.0, 0.0, 875.0 / 2244.0 - 2375.0 / 5984.0,
23.0 / 72.0 - 5.0 / 16.0, 264.0 / 1955.0 - 12.0 / 85.0, -3.0 / 44.0,
125.0 / 11592.0, 43.0 / 616.0}};
};

} // namespace Impl
} // namespace KokkosODE

Expand Down
137 changes: 108 additions & 29 deletions ode/impl/KokkosODE_RungeKutta_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,84 @@
#include "KokkosODE_RungeKuttaTables_impl.hpp"
#include "KokkosODE_Types.hpp"

#include "iostream"

namespace KokkosODE {
namespace Impl {

// This algorithm is mostly derived from
// E. Hairer, S. P. Norsett G. Wanner,
// "Solving Ordinary Differential Equations I:
// Nonstiff Problems", Sec. II.4.
// Note that all floating point values below
// have been heuristically selected for
// convergence performance.
template <class ode_type, class mat_type, class vec_type, class res_type, class scalar_type>
KOKKOS_FUNCTION void first_step_size(const ode_type ode, const int order, const scalar_type t0, const scalar_type atol,
const scalar_type rtol, const vec_type& y0, const res_type& f0, const vec_type y1,
const mat_type temp, scalar_type& dt_ini) {
using KAT = Kokkos::ArithTraits<scalar_type>;

// Extract subviews to store intermediate data
auto f1 = Kokkos::subview(temp, 1, Kokkos::ALL());

// Compute norms for y0 and f0
double n0 = KAT::zero(), n1 = KAT::zero(), dt0, scale;
for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
scale = atol + rtol * Kokkos::abs(y0(eqIdx));
n0 += Kokkos::pow(y0(eqIdx) / scale, 2);
n1 += Kokkos::pow(f0(eqIdx) / scale, 2);
}
n0 = Kokkos::sqrt(n0) / Kokkos::sqrt(ode.neqs);
n1 = Kokkos::sqrt(n1) / Kokkos::sqrt(ode.neqs);

// Select dt0
if ((n0 < 1e-5) || (n1 < 1e-5)) {
dt0 = 1e-6;
} else {
dt0 = 0.01 * n0 / n1;
}

// Estimate y at t0 + dt0
for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
y1(eqIdx) = y0(eqIdx) + dt0 * f0(eqIdx);
}

// Compute f at t0+dt0 and y1,
// then compute the norm of f(t0+dt0, y1) - f(t0, y0)
scalar_type n2 = KAT::zero();
ode.evaluate_function(t0 + dt0, dt0, y1, f1);
for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
n2 += Kokkos::pow((f1(eqIdx) - f0(eqIdx)) / (atol + rtol * Kokkos::abs(y0(eqIdx))), 2);
}
n2 = Kokkos::sqrt(n2) / (dt0 * Kokkos::sqrt(ode.neqs));

// Finally select initial time step dt_ini
if ((n1 <= 1e-15) && (n2 <= 1e-15)) {
dt_ini = Kokkos::max(1e-6, dt0 * 1e-3);
} else {
dt_ini = Kokkos::pow(0.01 / Kokkos::max(n1, n2), KAT::one() / order);
}

dt_ini = Kokkos::min(100 * dt0, dt_ini);

// Zero out temp variables just to be safe...
for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
f0(eqIdx) = 0.0;
y1(eqIdx) = 0.0;
f1(eqIdx) = 0.0;
}
} // first_step_size

// y_new = y_old + dt*sum(b_i*k_i) i in [1, nstages]
// k_i = f(t+c_i*dt, y_old+sum(a_{ij}*k_i)) j in [1, i-1]
// we need to compute the k_i and store them as we go
// to use them for k_{i+1} computation.
template <class ode_type, class table_type, class vec_type, class mv_type, class scalar_type>
KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, const bool adaptivity, scalar_type t,
scalar_type dt, const vec_type& y_old, const vec_type& y_new, const vec_type& temp,
const mv_type& k_vecs) {
const int neqs = ode.neqs;
const int nstages = table.nstages;
KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, scalar_type t, scalar_type dt,
const vec_type& y_old, const vec_type& y_new, const vec_type& temp, const mv_type& k_vecs) {
const int neqs = ode.neqs;
constexpr int nstages = table_type::nstages;

// first set y_new = y_old
for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
Expand Down Expand Up @@ -72,34 +137,42 @@ KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, const bool a
y_new(eqIdx) += dt * table.b[stageIdx] * k(eqIdx);
}
}

// Compute estimation of the error using k_vecs and table.e
if (adaptivity == true) {
for (int eqIdx = 0; eqIdx < neqs; ++eqIdx) {
temp(eqIdx) = 0;
for (int stageIdx = 0; stageIdx < nstages; ++stageIdx) {
temp(eqIdx) += dt * table.e[stageIdx] * k_vecs(stageIdx, eqIdx);
}
}
}
} // RKStep

// Note that the control values for
// time step increase/decrease are
// heuristically chosen based on
// L. F. Shampine and M. W. Reichelt
// "The Matlab ODE suite" SIAM J. Sci.
// Comput. Vol. 18, No. 1, pp. 1-22
// Jan. 1997
template <class ode_type, class table_type, class vec_type, class mv_type, class scalar_type>
KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, const table_type& table,
const KokkosODE::Experimental::ODE_params& params,
const scalar_type t_start, const scalar_type t_end,
const vec_type& y0, const vec_type& y, const vec_type& temp,
const mv_type& k_vecs) {
const mv_type& k_vecs, int* const step_count) {
constexpr scalar_type error_threshold = 1;
bool adapt = params.adaptivity;
scalar_type error_n;
bool adapt = params.adaptivity;
bool dt_was_reduced;
if (std::is_same_v<table_type, ButcherTableau<0, 0>>) {
if constexpr (std::is_same_v<table_type, ButcherTableau<0, 0>>) {
adapt = false;
}

// Set current time and initial time step
scalar_type t_now = t_start;
scalar_type dt = (t_end - t_start) / params.max_steps;
scalar_type t_now = t_start, dt = 0.0;
if (adapt == true) {
ode.evaluate_function(t_start, 0, y0, temp);
first_step_size(ode, table_type::order, t_start, params.abs_tol, params.rel_tol, y0, temp, y, k_vecs, dt);
if (dt < params.min_step_size) {
dt = params.min_step_size;
}
} else {
dt = (t_end - t_start) / params.num_steps;
}

*step_count = 0;

// Loop over time steps to integrate ODE
for (int stepIdx = 0; (stepIdx < params.max_steps) && (t_now <= t_end); ++stepIdx) {
Expand All @@ -119,28 +192,33 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, con
// Take tentative steps until the requested error
// is met. This of course only works for adaptive
// solvers, for fix time steps we simply do not
// compute and check what error of the current step
// compute and check the error of the current step
while (error_threshold < error) {
// Take a step of Runge-Kutta integrator
RKStep(ode, table, adapt, t_now, dt, y0, y, temp, k_vecs);
RKStep(ode, table, t_now, dt, y0, y, temp, k_vecs);

// Compute the largest error and decide on
// the size of the next time step to take.
error = 0;
if (adapt) {

// Compute estimation of the error using k_vecs and table.e
if (adapt == true) {
// Compute the error
for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
error = Kokkos::max(error, Kokkos::abs(temp(eqIdx)));
tol = Kokkos::max(
tol, params.abs_tol + params.rel_tol * Kokkos::max(Kokkos::abs(y(eqIdx)), Kokkos::abs(y0(eqIdx))));
tol = params.abs_tol + params.rel_tol * Kokkos::max(Kokkos::abs(y(eqIdx)), Kokkos::abs(y0(eqIdx)));
error_n = 0;
for (int stageIdx = 0; stageIdx < table.nstages; ++stageIdx) {
error_n += dt * table.e[stageIdx] * k_vecs(stageIdx, eqIdx);
}
error += (error_n * error_n) / (tol * tol);
}
error = error / tol;
error = Kokkos::sqrt(error / ode.neqs);

// Reduce the time step if error
// is too large and current step
// is rejected.
if (error > 1) {
dt = dt * Kokkos::max(0.2, 0.8 / Kokkos::pow(error, 1 / table.order));
dt = dt * Kokkos::max(0.2, 0.8 * Kokkos::pow(error, -1.0 / table.order));
dt_was_reduced = true;
}

Expand All @@ -150,14 +228,15 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, con

// Update time and initial condition for next time step
t_now += dt;
*step_count += 1;
for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
y0(eqIdx) = y(eqIdx);
}

if (t_now < t_end) {
if (adapt && !dt_was_reduced && error < 0.5) {
// Compute new time increment
dt = dt * Kokkos::min(10.0, Kokkos::max(2.0, 0.9 * Kokkos::pow(error, 1 / table.order)));
dt = dt * Kokkos::min(10.0, Kokkos::max(2.0, 0.9 * Kokkos::pow(error, -1.0 / table.order)));
}
} else {
return Experimental::ode_solver_status::SUCCESS;
Expand Down
3 changes: 2 additions & 1 deletion ode/src/KokkosODE_BDF.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ struct BDF {

const double dt = (t_end - t_start) / num_steps;
double t = t_start;
int count = 0;

// Load y0 into y_vecs(:, 0)
for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
Expand All @@ -107,7 +108,7 @@ struct BDF {
}
KokkosODE::Experimental::ODE_params params(table.order - 1);
for (int stepIdx = 0; stepIdx < init_steps; ++stepIdx) {
KokkosODE::Experimental::RungeKutta<RK_type::RKF45>::Solve(ode, params, t, t + dt, y0, y, update, kstack);
KokkosODE::Experimental::RungeKutta<RK_type::RKF45>::Solve(ode, params, t, t + dt, y0, y, update, kstack, &count);

for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
y_vecs(eqIdx, stepIdx + 1) = y(eqIdx);
Expand Down
13 changes: 10 additions & 3 deletions ode/src/KokkosODE_RungeKutta.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ enum RK_type : int {
RK4 = 4, ///< Runge-Kutta classic order 4 method
RKF45 = 5, ///< Fehlberg order 5 method
RKCK = 6, ///< Cash-Karp method
RKDP = 7 ///< Dormand-Prince method
RKDP = 7, ///< Dormand-Prince method
VER56 = 8 ///< Verner order 6 method
};

template <RK_type T>
Expand Down Expand Up @@ -86,6 +87,11 @@ struct RK_Tableau_helper<RK_type::RKDP> {
using table_type = KokkosODE::Impl::ButcherTableau<4, 6>;
};

template <>
struct RK_Tableau_helper<RK_type::VER56> {
using table_type = KokkosODE::Impl::ButcherTableau<5, 7>;
};

/// \brief Unspecialized version of the RungeKutta solvers
///
/// \tparam RK_type an RK_type enum value used to specify
Expand Down Expand Up @@ -128,9 +134,10 @@ struct RungeKutta {
template <class ode_type, class vec_type, class mv_type, class scalar_type>
KOKKOS_FUNCTION static ode_solver_status Solve(const ode_type& ode, const KokkosODE::Experimental::ODE_params& params,
const scalar_type t_start, const scalar_type t_end, const vec_type& y0,
const vec_type& y, const vec_type& temp, const mv_type& k_vecs) {
const vec_type& y, const vec_type& temp, const mv_type& k_vecs,
int* const count) {
table_type table;
return KokkosODE::Impl::RKSolve(ode, table, params, t_start, t_end, y0, y, temp, k_vecs);
return KokkosODE::Impl::RKSolve(ode, table, params, t_start, t_end, y0, y, temp, k_vecs, count);
}
};

Expand Down
11 changes: 10 additions & 1 deletion ode/src/KokkosODE_Types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,21 @@ struct ODE_params {
int num_steps, max_steps;
double abs_tol, rel_tol, min_step_size;

KOKKOS_FUNCTION
ODE_params()
: adaptivity(true), num_steps(100), max_steps(10000), abs_tol(1e-12), rel_tol(1e-6), min_step_size(1e-9) {}

// Constructor that only specify the desired number of steps.
// In this case no adaptivity is provided, the time step will
// be constant such that dt = (tend - tstart) / num_steps;
KOKKOS_FUNCTION
ODE_params(const int num_steps_)
: adaptivity(false), num_steps(num_steps_), max_steps(num_steps_), abs_tol(0), rel_tol(0), min_step_size(0) {}
: adaptivity(false),
num_steps(num_steps_),
max_steps(num_steps_ + 1),
abs_tol(1e-12),
rel_tol(1e-6),
min_step_size(0) {}

/// ODE_parms construtor for adaptive time stepping.
KOKKOS_FUNCTION
Expand Down
1 change: 1 addition & 0 deletions ode/unit_test/Test_ODE.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
// Explicit integrators
#include "Test_ODE_RK.hpp"
#include "Test_ODE_RK_chem.hpp"
#include "Test_ODE_RK_counts.hpp"

// Implicit integrators
#include "Test_ODE_Newton.hpp"
Expand Down
Loading

0 comments on commit afb3771

Please sign in to comment.