ODE - RK: fixing small issues reported by Yaro (#2229)

* ODE - RK: fixing small issues reported by Yaro 1. fix integer division to floating point division 2. fix evaluation of max scaled error 3. increase or decrease time step using uniform formula 4. use num_steps instead of max_steps for dt calculation 5. add a time step when using constant dt to avoid issues with round-off errors 6. fixing exponent and moving adaptivity computation out of RKStep 7. adding time step counter 8. adding more tests and keep track of time steps if wanted Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov> * RK: fixing variable name after rebase Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov> * RK: enabling most methods after fixing test related issues Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov> * RK: passing new unit-tests Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov> * Applying clang-format Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov> * RK: fix bad subview creation Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov> * RK: fix bug that computes the inital step size for non-adaptive case This prevents having the user defined time step and leads to wrong results. The rate of convergence tests are now passing! Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov> * clang-format... Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov> * RK: tweaking the tolerances a bit On GPU the lowest order method (RK1-2) is accumulating a bit more errors than on CPU. Only an issue when comparing values to zero where the absolute tolerance is needed to detect good conv. Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov> * Adding reference for some implementation details and heuristic values Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov> --------- Signed-off-by: Luc Berger-Vergiat <lberge@sandia.gov>
kokkos · Nov 4, 2024 · afb3771 · afb3771
1 parent 14b20bc
commit afb3771
Show file tree

Hide file tree

Showing 12 changed files with 1,042 additions and 63 deletions.
diff --git a/ode/impl/KokkosODE_BDF_impl.hpp b/ode/impl/KokkosODE_BDF_impl.hpp
@@ -142,7 +142,7 @@ struct BDF_system_wrapper2 {
     if (compute_jac) {
       mySys.evaluate_jacobian(t, dt, y, jac);
 
-      // J = I - dt*(dy/dy)
+      // J = I - dt*(df/dy)
       for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) {
         for (int colIdx = 0; colIdx < neqs; ++colIdx) {
           jac(rowIdx, colIdx) = -dt * jac(rowIdx, colIdx);

diff --git a/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp b/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp
@@ -257,6 +257,59 @@ struct ButcherTableau<4, 6>  // Referred to as DOPRI5 or RKDP
                                     11.0 / 84.0 - 187.0 / 2100.0, -1.0 / 40.0}};
 };
 
+// Coefficients obtained from:
+// J. H. Verner
+// "Explicit Runge-Kutta methods with estimates of the local truncation error",
+// Journal of Numerical Analysis, Volume 15, Issue 4, 1978,
+// https://doi.org/10.1137/0715051.
+template <>
+struct ButcherTableau<5, 7>  // Referred to as Verner 5-6 or VER56
+{
+  static constexpr int order   = 6;
+  static constexpr int nstages = 8;
+  Kokkos::Array<double, (nstages * nstages + nstages) / 2> a{{0.0,
+                                                              1.0 / 6.0,
+                                                              0.0,
+                                                              4.0 / 75.0,
+                                                              16.0 / 75.0,
+                                                              0.0,
+                                                              5.0 / 6.0,
+                                                              -8.0 / 3.0,
+                                                              5.0 / 2.0,
+                                                              0.0,
+                                                              -165.0 / 64.0,
+                                                              55.0 / 6.0,
+                                                              -425.0 / 64.0,
+                                                              85.0 / 96.0,
+                                                              0.0,
+                                                              12.0 / 5.0,
+                                                              -8.0,
+                                                              4015.0 / 612.0,
+                                                              -11.0 / 36.0,
+                                                              88.0 / 255.0,
+                                                              0.0,
+                                                              -8263.0 / 15000.0,
+                                                              124.0 / 75.0,
+                                                              -643.0 / 680.0,
+                                                              -81.0 / 250.0,
+                                                              2484.0 / 10625.0,
+                                                              0.0,
+                                                              0.0,
+                                                              3501.0 / 1720.0,
+                                                              -300.0 / 43.0,
+                                                              297275.0 / 52632.0,
+                                                              -319.0 / 2322.0,
+                                                              24068.0 / 84065.0,
+                                                              3850.0 / 26703.0,
+                                                              0.0}};
+  Kokkos::Array<double, nstages> b{
+      {3.0 / 4.0, 0.0, 875.0 / 2244.0, 23.0 / 72.0, 264.0 / 1955.0, 0.0, 125.0 / 11592.0, 43.0 / 616.0}};
+  Kokkos::Array<double, nstages> c{{0.0, 1.0 / 6.0, 4.0 / 15.0, 2.0 / 3.0, 5.0 / 6.0, 1.0, 1.0 / 15.0, 1.0}};
+  Kokkos::Array<double, nstages> e{{3.0 / 4.0 - 13.0 / 160.0, 0.0, 875.0 / 2244.0 - 2375.0 / 5984.0,
+                                    23.0 / 72.0 - 5.0 / 16.0, 264.0 / 1955.0 - 12.0 / 85.0, -3.0 / 44.0,
+                                    125.0 / 11592.0, 43.0 / 616.0}};
+};
+
 }  // namespace Impl
 }  // namespace KokkosODE
 

diff --git a/ode/impl/KokkosODE_RungeKutta_impl.hpp b/ode/impl/KokkosODE_RungeKutta_impl.hpp
@@ -23,19 +23,84 @@
 #include "KokkosODE_RungeKuttaTables_impl.hpp"
 #include "KokkosODE_Types.hpp"
 
+#include "iostream"
+
 namespace KokkosODE {
 namespace Impl {
 
+// This algorithm is mostly derived from
+// E. Hairer, S. P. Norsett G. Wanner,
+// "Solving Ordinary Differential Equations I:
+// Nonstiff Problems", Sec. II.4.
+// Note that all floating point values below
+// have been heuristically selected for
+// convergence performance.
+template <class ode_type, class mat_type, class vec_type, class res_type, class scalar_type>
+KOKKOS_FUNCTION void first_step_size(const ode_type ode, const int order, const scalar_type t0, const scalar_type atol,
+                                     const scalar_type rtol, const vec_type& y0, const res_type& f0, const vec_type y1,
+                                     const mat_type temp, scalar_type& dt_ini) {
+  using KAT = Kokkos::ArithTraits<scalar_type>;
+
+  // Extract subviews to store intermediate data
+  auto f1 = Kokkos::subview(temp, 1, Kokkos::ALL());
+
+  // Compute norms for y0 and f0
+  double n0 = KAT::zero(), n1 = KAT::zero(), dt0, scale;
+  for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
+    scale = atol + rtol * Kokkos::abs(y0(eqIdx));
+    n0 += Kokkos::pow(y0(eqIdx) / scale, 2);
+    n1 += Kokkos::pow(f0(eqIdx) / scale, 2);
+  }
+  n0 = Kokkos::sqrt(n0) / Kokkos::sqrt(ode.neqs);
+  n1 = Kokkos::sqrt(n1) / Kokkos::sqrt(ode.neqs);
+
+  // Select dt0
+  if ((n0 < 1e-5) || (n1 < 1e-5)) {
+    dt0 = 1e-6;
+  } else {
+    dt0 = 0.01 * n0 / n1;
+  }
+
+  // Estimate y at t0 + dt0
+  for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
+    y1(eqIdx) = y0(eqIdx) + dt0 * f0(eqIdx);
+  }
+
+  // Compute f at t0+dt0 and y1,
+  // then compute the norm of f(t0+dt0, y1) - f(t0, y0)
+  scalar_type n2 = KAT::zero();
+  ode.evaluate_function(t0 + dt0, dt0, y1, f1);
+  for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
+    n2 += Kokkos::pow((f1(eqIdx) - f0(eqIdx)) / (atol + rtol * Kokkos::abs(y0(eqIdx))), 2);
+  }
+  n2 = Kokkos::sqrt(n2) / (dt0 * Kokkos::sqrt(ode.neqs));
+
+  // Finally select initial time step dt_ini
+  if ((n1 <= 1e-15) && (n2 <= 1e-15)) {
+    dt_ini = Kokkos::max(1e-6, dt0 * 1e-3);
+  } else {
+    dt_ini = Kokkos::pow(0.01 / Kokkos::max(n1, n2), KAT::one() / order);
+  }
+
+  dt_ini = Kokkos::min(100 * dt0, dt_ini);
+
+  // Zero out temp variables just to be safe...
+  for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
+    f0(eqIdx) = 0.0;
+    y1(eqIdx) = 0.0;
+    f1(eqIdx) = 0.0;
+  }
+}  // first_step_size
+
 // y_new = y_old + dt*sum(b_i*k_i)    i in [1, nstages]
 // k_i = f(t+c_i*dt, y_old+sum(a_{ij}*k_i))  j in [1, i-1]
 // we need to compute the k_i and store them as we go
 // to use them for k_{i+1} computation.
 template <class ode_type, class table_type, class vec_type, class mv_type, class scalar_type>
-KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, const bool adaptivity, scalar_type t,
-                            scalar_type dt, const vec_type& y_old, const vec_type& y_new, const vec_type& temp,
-                            const mv_type& k_vecs) {
-  const int neqs    = ode.neqs;
-  const int nstages = table.nstages;
+KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, scalar_type t, scalar_type dt,
+                            const vec_type& y_old, const vec_type& y_new, const vec_type& temp, const mv_type& k_vecs) {
+  const int neqs        = ode.neqs;
+  constexpr int nstages = table_type::nstages;
 
   // first set y_new = y_old
   for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
@@ -72,34 +137,42 @@ KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, const bool a
       y_new(eqIdx) += dt * table.b[stageIdx] * k(eqIdx);
     }
   }
-
-  // Compute estimation of the error using k_vecs and table.e
-  if (adaptivity == true) {
-    for (int eqIdx = 0; eqIdx < neqs; ++eqIdx) {
-      temp(eqIdx) = 0;
-      for (int stageIdx = 0; stageIdx < nstages; ++stageIdx) {
-        temp(eqIdx) += dt * table.e[stageIdx] * k_vecs(stageIdx, eqIdx);
-      }
-    }
-  }
 }  // RKStep
 
+// Note that the control values for
+// time step increase/decrease are
+// heuristically chosen based on
+// L. F. Shampine and M. W. Reichelt
+// "The Matlab ODE suite" SIAM J. Sci.
+// Comput. Vol. 18, No. 1, pp. 1-22
+// Jan. 1997
 template <class ode_type, class table_type, class vec_type, class mv_type, class scalar_type>
 KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, const table_type& table,
                                                         const KokkosODE::Experimental::ODE_params& params,
                                                         const scalar_type t_start, const scalar_type t_end,
                                                         const vec_type& y0, const vec_type& y, const vec_type& temp,
-                                                        const mv_type& k_vecs) {
+                                                        const mv_type& k_vecs, int* const step_count) {
   constexpr scalar_type error_threshold = 1;
-  bool adapt                            = params.adaptivity;
+  scalar_type error_n;
+  bool adapt = params.adaptivity;
   bool dt_was_reduced;
-  if (std::is_same_v<table_type, ButcherTableau<0, 0>>) {
+  if constexpr (std::is_same_v<table_type, ButcherTableau<0, 0>>) {
     adapt = false;
   }
 
   // Set current time and initial time step
-  scalar_type t_now = t_start;
-  scalar_type dt    = (t_end - t_start) / params.max_steps;
+  scalar_type t_now = t_start, dt = 0.0;
+  if (adapt == true) {
+    ode.evaluate_function(t_start, 0, y0, temp);
+    first_step_size(ode, table_type::order, t_start, params.abs_tol, params.rel_tol, y0, temp, y, k_vecs, dt);
+    if (dt < params.min_step_size) {
+      dt = params.min_step_size;
+    }
+  } else {
+    dt = (t_end - t_start) / params.num_steps;
+  }
+
+  *step_count = 0;
 
   // Loop over time steps to integrate ODE
   for (int stepIdx = 0; (stepIdx < params.max_steps) && (t_now <= t_end); ++stepIdx) {
@@ -119,28 +192,33 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, con
     // Take tentative steps until the requested error
     // is met. This of course only works for adaptive
     // solvers, for fix time steps we simply do not
-    // compute and check what error of the current step
+    // compute and check the error of the current step
     while (error_threshold < error) {
       // Take a step of Runge-Kutta integrator
-      RKStep(ode, table, adapt, t_now, dt, y0, y, temp, k_vecs);
+      RKStep(ode, table, t_now, dt, y0, y, temp, k_vecs);
 
       // Compute the largest error and decide on
       // the size of the next time step to take.
       error = 0;
-      if (adapt) {
+
+      // Compute estimation of the error using k_vecs and table.e
+      if (adapt == true) {
         // Compute the error
         for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
-          error = Kokkos::max(error, Kokkos::abs(temp(eqIdx)));
-          tol   = Kokkos::max(
-              tol, params.abs_tol + params.rel_tol * Kokkos::max(Kokkos::abs(y(eqIdx)), Kokkos::abs(y0(eqIdx))));
+          tol     = params.abs_tol + params.rel_tol * Kokkos::max(Kokkos::abs(y(eqIdx)), Kokkos::abs(y0(eqIdx)));
+          error_n = 0;
+          for (int stageIdx = 0; stageIdx < table.nstages; ++stageIdx) {
+            error_n += dt * table.e[stageIdx] * k_vecs(stageIdx, eqIdx);
+          }
+          error += (error_n * error_n) / (tol * tol);
         }
-        error = error / tol;
+        error = Kokkos::sqrt(error / ode.neqs);
 
         // Reduce the time step if error
         // is too large and current step
         // is rejected.
         if (error > 1) {
-          dt             = dt * Kokkos::max(0.2, 0.8 / Kokkos::pow(error, 1 / table.order));
+          dt             = dt * Kokkos::max(0.2, 0.8 * Kokkos::pow(error, -1.0 / table.order));
           dt_was_reduced = true;
         }
 
@@ -150,14 +228,15 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, con
 
     // Update time and initial condition for next time step
     t_now += dt;
+    *step_count += 1;
     for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
       y0(eqIdx) = y(eqIdx);
     }
 
     if (t_now < t_end) {
       if (adapt && !dt_was_reduced && error < 0.5) {
         // Compute new time increment
-        dt = dt * Kokkos::min(10.0, Kokkos::max(2.0, 0.9 * Kokkos::pow(error, 1 / table.order)));
+        dt = dt * Kokkos::min(10.0, Kokkos::max(2.0, 0.9 * Kokkos::pow(error, -1.0 / table.order)));
       }
     } else {
       return Experimental::ode_solver_status::SUCCESS;

diff --git a/ode/src/KokkosODE_BDF.hpp b/ode/src/KokkosODE_BDF.hpp
@@ -93,6 +93,7 @@ struct BDF {
 
     const double dt = (t_end - t_start) / num_steps;
     double t        = t_start;
+    int count       = 0;
 
     // Load y0 into y_vecs(:, 0)
     for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
@@ -107,7 +108,7 @@ struct BDF {
     }
     KokkosODE::Experimental::ODE_params params(table.order - 1);
     for (int stepIdx = 0; stepIdx < init_steps; ++stepIdx) {
-      KokkosODE::Experimental::RungeKutta<RK_type::RKF45>::Solve(ode, params, t, t + dt, y0, y, update, kstack);
+      KokkosODE::Experimental::RungeKutta<RK_type::RKF45>::Solve(ode, params, t, t + dt, y0, y, update, kstack, &count);
 
       for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
         y_vecs(eqIdx, stepIdx + 1) = y(eqIdx);

diff --git a/ode/src/KokkosODE_RungeKutta.hpp b/ode/src/KokkosODE_RungeKutta.hpp
@@ -38,7 +38,8 @@ enum RK_type : int {
   RK4   = 4,  ///< Runge-Kutta classic order 4 method
   RKF45 = 5,  ///< Fehlberg order 5 method
   RKCK  = 6,  ///< Cash-Karp method
-  RKDP  = 7   ///< Dormand-Prince method
+  RKDP  = 7,  ///< Dormand-Prince method
+  VER56 = 8   ///< Verner order 6 method
 };
 
 template <RK_type T>
@@ -86,6 +87,11 @@ struct RK_Tableau_helper<RK_type::RKDP> {
   using table_type = KokkosODE::Impl::ButcherTableau<4, 6>;
 };
 
+template <>
+struct RK_Tableau_helper<RK_type::VER56> {
+  using table_type = KokkosODE::Impl::ButcherTableau<5, 7>;
+};
+
 /// \brief Unspecialized version of the RungeKutta solvers
 ///
 /// \tparam RK_type an RK_type enum value used to specify
@@ -128,9 +134,10 @@ struct RungeKutta {
   template <class ode_type, class vec_type, class mv_type, class scalar_type>
   KOKKOS_FUNCTION static ode_solver_status Solve(const ode_type& ode, const KokkosODE::Experimental::ODE_params& params,
                                                  const scalar_type t_start, const scalar_type t_end, const vec_type& y0,
-                                                 const vec_type& y, const vec_type& temp, const mv_type& k_vecs) {
+                                                 const vec_type& y, const vec_type& temp, const mv_type& k_vecs,
+                                                 int* const count) {
     table_type table;
-    return KokkosODE::Impl::RKSolve(ode, table, params, t_start, t_end, y0, y, temp, k_vecs);
+    return KokkosODE::Impl::RKSolve(ode, table, params, t_start, t_end, y0, y, temp, k_vecs, count);
   }
 };
 

diff --git a/ode/src/KokkosODE_Types.hpp b/ode/src/KokkosODE_Types.hpp
@@ -27,12 +27,21 @@ struct ODE_params {
   int num_steps, max_steps;
   double abs_tol, rel_tol, min_step_size;
 
+  KOKKOS_FUNCTION
+  ODE_params()
+      : adaptivity(true), num_steps(100), max_steps(10000), abs_tol(1e-12), rel_tol(1e-6), min_step_size(1e-9) {}
+
   // Constructor that only specify the desired number of steps.
   // In this case no adaptivity is provided, the time step will
   // be constant such that dt = (tend - tstart) / num_steps;
   KOKKOS_FUNCTION
   ODE_params(const int num_steps_)
-      : adaptivity(false), num_steps(num_steps_), max_steps(num_steps_), abs_tol(0), rel_tol(0), min_step_size(0) {}
+      : adaptivity(false),
+        num_steps(num_steps_),
+        max_steps(num_steps_ + 1),
+        abs_tol(1e-12),
+        rel_tol(1e-6),
+        min_step_size(0) {}
 
   /// ODE_parms construtor for adaptive time stepping.
   KOKKOS_FUNCTION

diff --git a/ode/unit_test/Test_ODE.hpp b/ode/unit_test/Test_ODE.hpp
@@ -19,6 +19,7 @@
 // Explicit integrators
 #include "Test_ODE_RK.hpp"
 #include "Test_ODE_RK_chem.hpp"
+#include "Test_ODE_RK_counts.hpp"
 
 // Implicit integrators
 #include "Test_ODE_Newton.hpp"