kokkos · yasahi-hpc · Jan 27, 2025 · Dec 2, 2024 · Dec 2, 2024 · Jan 21, 2025
diff --git a/examples/09_derivative/09_derivative.cpp b/examples/09_derivative/09_derivative.cpp
@@ -0,0 +1,222 @@
+// SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file
+//
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+#include <cmath>
+#include <iostream>
+#include <Kokkos_Core.hpp>
+#include <Kokkos_Complex.hpp>
+#include <KokkosFFT.hpp>
+
+#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \
+    defined(KOKKOS_ENABLE_SYCL)
+constexpr int TILE0 = 4;
+constexpr int TILE1 = 32;
+#else
+constexpr int TILE0 = 4;
+constexpr int TILE1 = 4;
+#endif
+
+using execution_space = Kokkos::DefaultExecutionSpace;
+template <typename T>
+using View1D = Kokkos::View<T*, Kokkos::LayoutRight, execution_space>;
+template <typename T>
+using View2D = Kokkos::View<T**, Kokkos::LayoutRight, execution_space>;
+template <typename T>
+using View3D = Kokkos::View<T***, Kokkos::LayoutRight, execution_space>;
+
+void compute_derivative(const int nx, const int ny, const int nz,
+                        double& seconds);
+
+// \brief Initialize the grid, wavenumbers, and the test function values
+// u = sin(2 * x) + cos(3 * y)
+// \tparam RealView1DType: Type for 1D grids in real space
+// \tparam RealView3DType: Type for 3D values in real space
+// \tparam ComplexView2DType: Type for 2D values in Fourier space
+//
+// \param x [out]: 1D grid in x direction
+// \param y [out]: 1D grid in y direction
+// \param ikx [out]: 2D grid in Fourier space for x direction
+// \param iky [out]: 2D grid in Fourier space for y direction
+// \param u [out]: 3D field in real space
+template <typename RealView1DType, typename ComplexView2DType,
+          typename RealView3DType>
+void initialize(RealView1DType& x, RealView1DType& y, ComplexView2DType& ikx,
+                ComplexView2DType& iky, RealView3DType& u) {
+  using value_type    = typename RealView1DType::non_const_value_type;
+  const auto pi       = Kokkos::numbers::pi_v<double>;
+  const value_type Lx = 2.0 * pi, Ly = 2.0 * pi;
+  const int nx = u.extent(2), ny = u.extent(1), nz = u.extent(0);
+  const value_type dx = Lx / static_cast<value_type>(nx),
+                   dy = Ly / static_cast<value_type>(ny);
+
+  // Initialize grids
+  auto h_x = Kokkos::create_mirror_view(x);
+  auto h_y = Kokkos::create_mirror_view(y);
+  for (int ix = 0; ix < nx; ++ix) h_x(ix) = static_cast<value_type>(ix) * dx;
+  for (int iy = 0; iy < ny; ++iy) h_y(iy) = static_cast<value_type>(iy) * dy;
+
+  // Initialize wave numbers
+  const Kokkos::complex<value_type> I(0.0, 1.0);  // Imaginary unit
+  auto h_ikx = Kokkos::create_mirror_view(ikx);
+  auto h_iky = Kokkos::create_mirror_view(iky);
+  for (int iy = 0; iy < ny; ++iy) {
+    for (int ix = 0; ix < nx / 2; ++ix) {
+      h_ikx(iy, ix) = I * 2.0 * pi * static_cast<value_type>(ix) / Lx;
+    }
+  }
+
+  for (int iy = 0; iy < ny; ++iy) {
+    for (int ix = 0; ix < nx / 2 + 1; ++ix) {
+      auto tmp_iy   = iy < ny / 2 ? iy : iy - ny;
+      h_iky(iy, ix) = I * 2.0 * pi * static_cast<value_type>(tmp_iy) / Ly;
+    }
+  }
+
+  // Initialize field
+  auto h_u = Kokkos::create_mirror_view(u);
+  for (int jz = 0; jz < nz; jz++) {
+    for (int jy = 0; jy < ny; jy++) {
+      for (int jx = 0; jx < nx; jx++) {
+        h_u(jz, jy, jx) = std::sin(2.0 * h_x(jx)) + std::cos(3.0 * h_y(jy));
+      }
+    }
+  }
+
+  Kokkos::deep_copy(x, h_x);
+  Kokkos::deep_copy(y, h_y);
+  Kokkos::deep_copy(ikx, h_ikx);
+  Kokkos::deep_copy(iky, h_iky);
+  Kokkos::deep_copy(u, h_u);
+}
+
+// \brief Compute analytical solution of the derivative
+// du/dx + du/dy = 2 * cos(2 * x) - 3 * sin(3 * y)
+// \tparam RealView1DType: Type for 1D grids in real space
+// \tparam RealView3DType: Type for 3D values in real space
+//
+// \param x [in]: 1D grid in x direction
+// \param y [in]: 1D grid in y direction
+// \param dudxy [out]: 3D fiels of the analytical derivative values
+template <typename RealView1DType, typename RealView3DType>
+void analytical_solution(RealView1DType& x, RealView1DType& y,
+                         RealView3DType& dudxy) {
+  // Copy grids to host
+  auto h_x = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), x);
+  auto h_y = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), y);
+
+  // Compute the analytical solution on host
+  const int nx = dudxy.extent(2), ny = dudxy.extent(1), nz = dudxy.extent(0);
+  auto h_dudxy = Kokkos::create_mirror_view(dudxy);
+  for (int iz = 0; iz < nz; iz++) {
+    for (int iy = 0; iy < ny; iy++) {
+      for (int ix = 0; ix < nx; ix++) {
+        h_dudxy(iz, iy, ix) =
+            2.0 * std::cos(2.0 * h_x(ix)) - 3.0 * std::sin(3.0 * h_y(iy));
+      }
+    }
+  }
+
+  Kokkos::deep_copy(dudxy, h_dudxy);
+}
+
+// \brief Compute the derivative of a function using FFT-based methods and
+// compare with the analytical solution
+// \param nx [in]: Number of grid points in the x-direction
+// \param ny [in]: Number of grid points in the y-direction
+// \param nz [in]: Number of grid points in the z-direction
+// \param seconds [out]: Time taken to compute the derivatives (in seconds)
+void compute_derivative(const int nx, const int ny, const int nz,
+                        double& seconds) {
+  // View types
+  using RealView1D    = View1D<double>;
+  using RealView3D    = View3D<double>;
+  using ComplexView2D = View2D<Kokkos::complex<double>>;
+  using ComplexView3D = View3D<Kokkos::complex<double>>;
+
+  // Declare grids
+  RealView1D x("x", nx), y("y", ny);
+  ComplexView2D ikx("ikx", ny, nx / 2 + 1), iky("iky", ny, nx / 2 + 1);
+
+  // Variables to be transformed
+  RealView3D u("u", nz, ny, nx), dudxy("dudxy", nz, ny, nx);
+  ComplexView3D u_hat("u_hat", nz, ny, nx / 2 + 1);
+
+  initialize(x, y, ikx, iky, u);
+  analytical_solution(x, y, dudxy);
+
+  // MDRanges used in the kernels
+  using range2D_type = Kokkos::MDRangePolicy<
+      execution_space,
+      Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Right>>;
+  using tile2D_type  = typename range2D_type::tile_type;
+  using point2D_type = typename range2D_type::point_type;
+
+  execution_space exec;
+
+  range2D_type range2d(exec, point2D_type{{0, 0}},
+                       point2D_type{{ny, nx / 2 + 1}},
+                       tile2D_type{{TILE0, TILE1}});
+
+  // kokkos-fft plans
+  KokkosFFT::Plan r2c_plan(exec, u, u_hat, KokkosFFT::Direction::forward,
+                           KokkosFFT::axis_type<2>({-2, -1}));
+  KokkosFFT::Plan c2r_plan(exec, u_hat, u, KokkosFFT::Direction::backward,
+                           KokkosFFT::axis_type<2>({-2, -1}));
+
+  // Start computation
+  Kokkos::Timer timer;
+
+  // Forward transform u -> u_hat (=FFT (u))
+  r2c_plan.execute(u, u_hat);
+
+  // Compute derivatives by multiplications in Fourier space
+  Kokkos::parallel_for(
+      "ComputeDerivative", range2d, KOKKOS_LAMBDA(const int iy, const int ix) {
+        auto ikx_tmp = ikx(iy, ix), iky_tmp = iky(iy, ix);
+        for (int iz = 0; iz < nz; ++iz) {
+          u_hat(iz, iy, ix) =
+              (ikx_tmp * u_hat(iz, iy, ix) + iky_tmp * u_hat(iz, iy, ix));
+        }
+      });
+
+  // Backward transform u_hat -> u (=IFFT (u_hat))
+  c2r_plan.execute(u_hat, u);  // normalization is made here
+  exec.fence();
+  seconds = timer.seconds();
+
+  // Check results
+  auto h_u = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), u);
+  auto h_dudxy =
+      Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), dudxy);
+
+  const double epsilon = 1.e-8;
+  for (int iz = 0; iz < nz; ++iz) {
+    for (int iy = 0; iy < ny; ++iy) {
+      for (int ix = 0; ix < nx; ++ix) {
+        if (std::abs(h_dudxy(iz, iy, ix)) <= epsilon) continue;
+        auto relative_error = std::abs(h_dudxy(iz, iy, ix) - h_u(iz, iy, ix)) /
+                              std::abs(h_dudxy(iz, iy, ix));
+        if (relative_error > epsilon) {
+          std::cerr << "Error: " << h_dudxy(iz, iy, ix)
+                    << " != " << h_u(iz, iy, ix) << std::endl;
+          return;
+        }
+      }
+    }
+  }
+}
+
+int main(int argc, char* argv[]) {
+  Kokkos::initialize(argc, argv);
+  {
+    const int nx = 128, ny = 128, nz = 128;
+    double seconds = 0.0;
+    compute_derivative(nx, ny, nz, seconds);
+    std::cout << "2D derivative with FFT took: " << seconds << " [s]"
+              << std::endl;
+  }
+  Kokkos::finalize();
+
+  return 0;
+}
diff --git a/examples/09_derivative/CMakeLists.txt b/examples/09_derivative/CMakeLists.txt
@@ -0,0 +1,6 @@
+# SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file
+#
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+add_executable(09_derivative 09_derivative.cpp)
+target_link_libraries(09_derivative PUBLIC KokkosFFT::fft)
diff --git a/examples/09_derivative/derivative.py b/examples/09_derivative/derivative.py
@@ -0,0 +1,105 @@
+# SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file
+#
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+import numpy as np
+import time
+
+def initialize(nx: int, ny: int, nz: int):
+    """
+    Initialize the grid, wavenumbers, and test function values.
+    u = sin(2 * x) + cos(3 * y)
+
+    Parameters:
+        nx (int): Number of grid points in the x-direction.
+        ny (int): Number of grid points in the y-direction.
+        nz (int): Number of grid points in the z-direction (batch direction).
+
+    Returns:
+        tuple: A tuple containing:
+            - X (np.ndarray): 2D array of x-coordinates on the grid.
+            - Y (np.ndarray): 2D array of y-coordinates on the grid.
+            - ikx (np.ndarray): 2D array of Fourier wavenumbers in the x-direction.
+            - iky (np.ndarray): 2D array of Fourier wavenumbers in the y-direction.
+            - u (np.ndarray): 3D array of test function values.
+    """
+    Lx, Ly = 2*np.pi, 2*np.pi
+
+    # Define and initialize the grid
+    x = np.linspace(0, 2*np.pi, nx, endpoint=False)
+    y = np.linspace(0, 2*np.pi, ny, endpoint=False)
+
+    X, Y = np.meshgrid(x, y)
+
+    # Initialize the wavenumbers
+    ikx = np.zeros((ny, nx//2+1), dtype=np.complex128)
+    iky = np.zeros((ny, nx//2+1), dtype=np.complex128)
+
+    for iy in range(ny):
+        for ix in range(nx//2):
+            ikx[iy, ix] = 1j * 2 * np.pi / Lx * ix
+
+    for iy in range(ny):
+        for ix in range(nx//2+1):
+            _iy = iy if iy < ny//2 else iy - ny
+            iky[iy, ix] = 1j * 2 * np.pi / Lx * _iy
+
+    # Initialize the data
+    u = np.sin(2 * X) + np.cos(3 * Y)
+    u_batched = np.repeat(u[np.newaxis, :, :], nz, axis=0)
+
+    return X, Y, ikx, iky, u_batched
+
+def analytical_solution(X: np.ndarray, Y: np.ndarray, nz: int) -> np.ndarray:
+    """
+    Compute the analytical solution for the derivative of the test function:
+    dudxy = du/dx + du/dy = 2 * cos(2 * x) - 3 * sin(3 * y)
+
+    Parameters:
+        X (np.ndarray): 2D array of x-coordinates on the grid.
+        Y (np.ndarray): 2D array of y-coordinates on the grid.
+        nz (int): Number of grid points in the z-direction (batch direction).
+
+    Returns:
+        np.ndarray: 3D array of the analytical derivative values.
+    """
+    dudxy = 2 * np.cos(2 * X) - 3 * np.sin(3 * Y)
+    return np.repeat(dudxy[np.newaxis, :, :], nz, axis=0)
+
+def compute_derivative(nx: int, ny: int, nz: int) -> np.float64:
+    """
+    Compute the derivative of a function using FFT-based methods and
+    compare with the analytical solution.
+
+    Parameters:
+        nx (int): Number of grid points in the x-direction.
+        ny (int): Number of grid points in the y-direction.
+        nz (int): Number of grid points in the z-direction (batch direction).
+
+    Returns:
+        float: Time taken to compute the derivatives (in seconds).
+    """
+    X, Y, ikx, iky, u = initialize(nx, ny, nz)
+    dudxy = analytical_solution(X, Y, nz)
+
+    # Compute the derivative
+    start = time.time()
+
+    # Forward transform u -> u_hat (=FFT (u))
+    u_hat = np.fft.rfft2(u)
+
+    # Compute derivatives by multiplications in Fourier space
+    u_hat = ikx * u_hat + iky * u_hat
+
+    # Backward transform u_hat -> u (=IFFT (u_hat))
+    u = np.fft.irfft2(u_hat)
+    seconds = time.time() - start
+
+    np.testing.assert_allclose(u, dudxy, atol=1e-10)
+
+    return seconds
+
+if __name__ == '__main__':
+    nx, ny, nz = 128, 128, 128
+    seconds = compute_derivative(nx, ny, nz)
+    print(f"2D derivative with FFT took {seconds} [s]")
diff --git a/examples/09_derivative/test_derivative.py b/examples/09_derivative/test_derivative.py
@@ -0,0 +1,61 @@
+# SPDX-FileCopyrightText: (C) The kokkos-fft development team, see COPYRIGHT.md file
+#
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+import numpy as np
+import pytest
+from derivative import (
+    initialize,
+    analytical_solution,
+    compute_derivative,
+)
+
+@pytest.mark.parametrize("n", [16, 32])
+def test_initialize(n: int) -> None:
+    X, Y, ikx, iky, u = initialize(n, n, n)
+
+    # Check shapes
+    assert X.shape == (n, n)
+    assert Y.shape == (n, n)
+    assert ikx.shape == (n, n//2+1)
+    assert iky.shape == (n, n//2+1)
+    assert u.shape == (n, n, n)
+
+    # Check dx, dy
+    lx, ly = 2*np.pi, 2*np.pi
+    dx, dy = lx / n, ly / n
+    np.testing.assert_allclose(X[0, 1] - X[0, 0], dx)
+    np.testing.assert_allclose(Y[1, 0] - Y[0, 0], dx)
+
+    # Check dikx, diky
+    dikx, diky = 1j * 2 * np.pi / lx, 1j * 2 * np.pi / ly
+    np.testing.assert_allclose(ikx[0, 1], dikx)
+    np.testing.assert_allclose(iky[1, 0], diky)
+
+    # Check the Hermitian symmetry of iky
+    # iky is Hermitian, so iky[i, j] = -iky[-i, j]
+    for i in range(1, n//2):
+        np.testing.assert_allclose(iky[i], -iky[-i])
+
+    # Check it is a batch
+    u0 = u[0]
+    for i in range(1, n):
+        np.testing.assert_allclose(u[i], u0)
+
+@pytest.mark.parametrize("n", [16, 32])
+def test_analytical_solution(n: int) -> None:
+    X, Y, ikx, iky, u = initialize(n, n, n)
+    dudxy = analytical_solution(X, Y, n)
+
+    # Check shapes
+    assert dudxy.shape == (n, n, n)
+
+    # Check it is a batch
+    dudxy0 = dudxy[0]
+    for i in range(1, n):
+        np.testing.assert_allclose(dudxy[i], dudxy0)
+
+@pytest.mark.parametrize("n", [16, 32])
+def test_derivative(n: int) -> None:
+    # The following function fails if it is not correct
+    _ = compute_derivative(n, n, n)