diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml index 1f8e1610..3656fc37 100644 --- a/.github/workflows/build_test.yaml +++ b/.github/workflows/build_test.yaml @@ -132,6 +132,7 @@ jobs: -DCMAKE_CXX_COMPILER=${{ matrix.backend.cxx_compiler }} \ -DCMAKE_CXX_STANDARD=17 \ -DBUILD_TESTING=ON \ + -DKokkosFFT_ENABLE_BENCHMARK=ON \ -DKokkosFFT_INTERNAL_Kokkos=ON \ ${{ matrix.backend.cmake_flags }} \ ${{ matrix.target.cmake_flags }} diff --git a/.gitmodules b/.gitmodules index df51ef69..1bff3dde 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "tpls/googletest"] path = tpls/googletest url = https://github.com/google/googletest.git +[submodule "tpls/benchmark"] + path = tpls/benchmark + url = https://github.com/google/benchmark.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bcf30b2..bc7d831f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,8 +6,21 @@ list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_SOURCE_DIR}/cmake") # Options option(BUILD_EXAMPLES "Build KokkosFFT examples" ON) -option(KokkosFFT_ENABLE_HOST_AND_DEVICE "Enable FFT on both host and device") -option(KokkosFFT_INTERNAL_Kokkos "Build internal Kokkos instead of relying on external one") +option(KokkosFFT_ENABLE_HOST_AND_DEVICE "Enable FFT on both host and device" OFF) +option(KokkosFFT_INTERNAL_Kokkos "Build internal Kokkos instead of relying on external one" OFF) +option(KokkosFFT_ENABLE_BENCHMARK "Build benchmarks for KokkosFFT" OFF) + +# Version information +set(KokkosFFT_VERSION_MAJOR 0) +set(KokkosFFT_VERSION_MINOR 0) +set(KokkosFFT_VERSION_PATCH 00) +set(KokkosFFT_VERSION "${KokkosFFT_VERSION_MAJOR}.${KokkosFFT_VERSION_MINOR}.${KokkosFFT_VERSION_PATCH}") + +#Set variables for config file +math(EXPR KOKKOSFFT_VERSION "${KokkosFFT_VERSION_MAJOR} * 10000 + ${KokkosFFT_VERSION_MINOR} * 100 + ${KokkosFFT_VERSION_PATCH}") +math(EXPR KOKKOSFFT_VERSION_MAJOR "${KOKKOSFFT_VERSION} / 10000") +math(EXPR KOKKOSFFT_VERSION_MINOR "${KOKKOSFFT_VERSION} / 100 % 100") +math(EXPR KOKKOSFFT_VERSION_PATCH "${KOKKOSFFT_VERSION} % 100") if (NOT KokkosFFT_INTERNAL_Kokkos) # First check, Kokkos is added as subdirectory or not @@ -27,6 +40,33 @@ if(BUILD_TESTING) endif() endif() +# Benchmark +if(KokkosFFT_ENABLE_BENCHMARK) + option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." OFF) + add_subdirectory(tpls/benchmark) + + # [TO DO] Fix this, it detects benchmark not a googlebench + #find_package(benchmark QUIET) + #if(NOT benchmark_FOUND) + # add_subdirectory(tpls/benchmark) + #endif() +endif() + +# Configure files to display configuration +# Configure the library +set( + PACKAGE_NAME_CONFIG_FILES + KokkosFFT_config.h + KokkosFFT_Version_Info.hpp +) + +foreach(CONFIG_FILE ${PACKAGE_NAME_CONFIG_FILES}) + configure_file( + cmake/${CONFIG_FILE}.in + ${CMAKE_BINARY_DIR}/${CONFIG_FILE} + ) +endforeach() + # Set directories used for install include(GNUInstallDirs) set(LIBDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}) @@ -71,7 +111,7 @@ configure_package_config_file(cmake/KokkosFFTConfig.cmake.in write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/KokkosFFTConfigVersion.cmake - VERSION 0.0.0 + VERSION ${KokkosFFT_VERSION} COMPATIBILITY SameMajorVersion ) @@ -79,4 +119,4 @@ install( FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosFFTConfig.cmake ${CMAKE_CURRENT_BINARY_DIR}/KokkosFFTConfigVersion.cmake DESTINATION ${INSTALL_LIBDIR} -) +) \ No newline at end of file diff --git a/cmake/KokkosFFT_Version_Info.hpp.in b/cmake/KokkosFFT_Version_Info.hpp.in new file mode 100644 index 00000000..0d0f9812 --- /dev/null +++ b/cmake/KokkosFFT_Version_Info.hpp.in @@ -0,0 +1,36 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSFFT_VERSION_INFO_HPP +#define KOKKOSFFT_VERSION_INFO_HPP + +#include + +namespace KokkosFFT { +namespace Impl { + +constexpr std::string_view GIT_BRANCH = R"branch(@GIT_BRANCH@)branch"; +constexpr std::string_view GIT_COMMIT_HASH = "@GIT_COMMIT_HASH@"; +constexpr std::string_view GIT_CLEAN_STATUS = "@GIT_CLEAN_STATUS@"; +constexpr std::string_view GIT_COMMIT_DESCRIPTION = + R"message(@GIT_COMMIT_DESCRIPTION@)message"; +constexpr std::string_view GIT_COMMIT_DATE = "@GIT_COMMIT_DATE@"; +constexpr std::string_view BENCHMARK_VERSION = "@BENCHMARK_VERSION@"; + +} // namespace Impl +} // namespace KokkosFFT + +#endif \ No newline at end of file diff --git a/cmake/KokkosFFT_config.h.in b/cmake/KokkosFFT_config.h.in new file mode 100644 index 00000000..cf8321a3 --- /dev/null +++ b/cmake/KokkosFFT_config.h.in @@ -0,0 +1,30 @@ +#ifndef KOKKOSFFT_CONFIG_H +#define KOKKOSFFT_CONFIG_H + +/* Define the current version of Kokkos Kernels */ +#define KOKKOSFFT_VERSION @KOKKOSFFT_VERSION@ +#define KOKKOSFFT_VERSION_MAJOR @KOKKOSFFT_VERSION_MAJOR@ +#define KOKKOSFFT_VERSION_MINOR @KOKKOSFFT_VERSION_MINOR@ +#define KOKKOSFFT_VERSION_PATCH @KOKKOSFFT_VERSION_PATCH@ + +/* Define if building in debug mode */ +#cmakedefine HAVE_KOKKOSFFT_DEBUG + +#cmakedefine KOKKOSFFT_ENABLE_BENCHMARK + +/* Define this macro if experimental features of KokkosFFT are enabled */ +#cmakedefine HAVE_KOKKOSFFT_EXPERIMENTAL + +/* FFTW */ +#cmakedefine KOKKOSFFT_ENABLE_TPL_FFTW + +/* CUFFT */ +#cmakedefine KOKKOSFFT_ENABLE_TPL_CUFFT + +/* HIPFFT */ +#cmakedefine KOKKOSFFT_ENABLE_TPL_HIPFFT + +/* ONEMKL */ +#cmakedefine KOKKOSFFT_ENABLE_TPL_ONEMKL + +#endif \ No newline at end of file diff --git a/common/src/CMakeLists.txt b/common/src/CMakeLists.txt index fa7d5acd..0af53a3c 100644 --- a/common/src/CMakeLists.txt +++ b/common/src/CMakeLists.txt @@ -8,33 +8,38 @@ target_link_libraries(common if(Kokkos_ENABLE_CUDA) find_package(CUDAToolkit REQUIRED COMPONENTS cufft) target_link_libraries(common INTERFACE CUDA::cufft) + target_compile_definitions(common INTERFACE KOKKOSFFT_ENABLE_TPL_CUFFT) if(KokkosFFT_ENABLE_HOST_AND_DEVICE) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) - target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE) + target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE KOKKOSFFT_ENABLE_TPL_FFTW) endif() elseif(Kokkos_ENABLE_HIP) find_package(hipfft REQUIRED) target_link_libraries(common INTERFACE hip::hipfft) + target_compile_definitions(common INTERFACE KOKKOSFFT_ENABLE_TPL_HIPFFT) if(KokkosFFT_ENABLE_HOST_AND_DEVICE) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) - target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE) + target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE KOKKOSFFT_ENABLE_TPL_FFTW) endif() elseif(Kokkos_ENABLE_SYCL) find_package(MKL REQUIRED COMPONENTS SYCL) target_link_libraries(common INTERFACE MKL::MKL_SYCL) + target_compile_definitions(common INTERFACE KOKKOSFFT_ENABLE_TPL_ONEMKL) if(KokkosFFT_ENABLE_HOST_AND_DEVICE) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) - target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE) + target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE KOKKOSFFT_ENABLE_TPL_FFTW) endif() elseif(Kokkos_ENABLE_OPENMP) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) + target_compile_definitions(common INTERFACE KOKKOSFFT_ENABLE_TPL_FFTW) elseif(Kokkos_ENABLE_SERIAL) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double) + target_compile_definitions(common INTERFACE KOKKOSFFT_ENABLE_TPL_FFTW) endif() target_compile_features(common INTERFACE cxx_std_17) diff --git a/common/src/KokkosFFT_normalization.hpp b/common/src/KokkosFFT_normalization.hpp index 47614a98..e9e5e4cf 100644 --- a/common/src/KokkosFFT_normalization.hpp +++ b/common/src/KokkosFFT_normalization.hpp @@ -61,7 +61,7 @@ void normalize(const ExecutionSpace& exec_space, ViewType& inout, if (to_normalize) _normalize(exec_space, inout, coef); } -auto swap_direction(Normalization normalization) { +inline auto swap_direction(Normalization normalization) { Normalization new_direction = Normalization::FORWARD; switch (normalization) { case Normalization::FORWARD: new_direction = Normalization::BACKWARD; break; diff --git a/docker/hip/Dockerfile b/docker/hip/Dockerfile index 3e94d376..21f13e8d 100644 --- a/docker/hip/Dockerfile +++ b/docker/hip/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE=rocm/dev-ubuntu-20.04:5.2 +ARG BASE=rocm/dev-ubuntu-20.04:5.4 FROM $BASE ARG ADDITIONAL_PACKAGES diff --git a/fft/CMakeLists.txt b/fft/CMakeLists.txt index ffddf45b..e2b19192 100644 --- a/fft/CMakeLists.txt +++ b/fft/CMakeLists.txt @@ -1,4 +1,8 @@ add_subdirectory(src) if(BUILD_TESTING) add_subdirectory(unit_test) +endif() + +if(KokkosFFT_ENABLE_BENCHMARK) + add_subdirectory(perf_test) endif() \ No newline at end of file diff --git a/fft/perf_test/BenchmarkMain.cpp b/fft/perf_test/BenchmarkMain.cpp new file mode 100644 index 00000000..2b6c856b --- /dev/null +++ b/fft/perf_test/BenchmarkMain.cpp @@ -0,0 +1,37 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +*/ + +#include + +#include "Benchmark_Context.hpp" +#include + +int main(int argc, char** argv) { + Kokkos::initialize(argc, argv); + { + benchmark::Initialize(&argc, argv); + benchmark::SetDefaultTimeUnit(benchmark::kSecond); + KokkosFFTBenchmark::add_benchmark_context(true); + + benchmark::RunSpecifiedBenchmarks(); + + benchmark::Shutdown(); + } + Kokkos::finalize(); + return 0; +} \ No newline at end of file diff --git a/fft/perf_test/Benchmark_Context.hpp b/fft/perf_test/Benchmark_Context.hpp new file mode 100644 index 00000000..337b3f90 --- /dev/null +++ b/fft/perf_test/Benchmark_Context.hpp @@ -0,0 +1,124 @@ +#ifndef KOKKOSFFT_BENCHMARK_CONTEXT_HPP +#define KOKKOSFFT_BENCHMARK_CONTEXT_HPP + +#include +#include + +#include + +#include +#include "KokkosFFT_PrintConfiguration.hpp" +#include + +namespace KokkosFFTBenchmark { +/// \brief Remove unwanted spaces and colon signs from input string. In case of +/// invalid input it will return an empty string. +inline std::string remove_unwanted_characters(std::string str) { + auto from = str.find_first_not_of(" :"); + auto to = str.find_last_not_of(" :"); + + if (from == std::string::npos || to == std::string::npos) { + return ""; + } + + // return extracted part of string without unwanted spaces and colon signs + return str.substr(from, to + 1); +} + +/// \brief Extract all key:value pairs from kokkos configuration and add it to +/// the benchmark context +inline void add_kokkos_configuration(bool verbose) { + std::ostringstream msg; + Kokkos::print_configuration(msg, verbose); + KokkosFFT::print_configuration(msg); + + // Iterate over lines returned from kokkos and extract key:value pairs + std::stringstream ss{msg.str()}; + for (std::string line; std::getline(ss, line, '\n');) { + auto found = line.find_first_of(':'); + if (found != std::string::npos) { + auto val = remove_unwanted_characters(line.substr(found + 1)); + // Ignore line without value, for example a category name + if (!val.empty()) { + benchmark::AddCustomContext( + remove_unwanted_characters(line.substr(0, found)), val); + } + } + } +} + +/// \brief Add Kokkos Kernels git info and google benchmark release to +/// benchmark context. +inline void add_version_info() { + using namespace KokkosFFT::Impl; + + if (!GIT_BRANCH.empty()) { + benchmark::AddCustomContext("GIT_BRANCH", std::string(GIT_BRANCH)); + benchmark::AddCustomContext("GIT_COMMIT_HASH", + std::string(GIT_COMMIT_HASH)); + benchmark::AddCustomContext("GIT_CLEAN_STATUS", + std::string(GIT_CLEAN_STATUS)); + benchmark::AddCustomContext("GIT_COMMIT_DESCRIPTION", + std::string(GIT_COMMIT_DESCRIPTION)); + benchmark::AddCustomContext("GIT_COMMIT_DATE", + std::string(GIT_COMMIT_DATE)); + } + if (!BENCHMARK_VERSION.empty()) { + benchmark::AddCustomContext("GOOGLE_BENCHMARK_VERSION", + std::string(BENCHMARK_VERSION)); + } +} + +inline void add_env_info() { + auto num_threads = std::getenv("OMP_NUM_THREADS"); + if (num_threads) { + benchmark::AddCustomContext("OMP_NUM_THREADS", num_threads); + } + auto dynamic = std::getenv("OMP_DYNAMIC"); + if (dynamic) { + benchmark::AddCustomContext("OMP_DYNAMIC", dynamic); + } + auto proc_bind = std::getenv("OMP_PROC_BIND"); + if (proc_bind) { + benchmark::AddCustomContext("OMP_PROC_BIND", proc_bind); + } + auto places = std::getenv("OMP_PLACES"); + if (places) { + benchmark::AddCustomContext("OMP_PLACES", places); + } +} + +/// \brief Gather all context information and add it to benchmark context +inline void add_benchmark_context(bool verbose = false) { + add_kokkos_configuration(verbose); + add_version_info(); + add_env_info(); +} + +/** + * \brief Report throughput and amount of data processed for simple View + * operations + */ +template +void report_results(benchmark::State& state, InViewType in, OutViewType out, + double time) { + // data processed in megabytes + const double in_data_processed = + static_cast(in.size() * sizeof(typename InViewType::value_type)) / + 1.0e6; + const double out_data_processed = + static_cast(out.size() * + sizeof(typename OutViewType::value_type)) / + 1.0e6; + + state.SetIterationTime(time); + state.counters["MB (In)"] = benchmark::Counter(in_data_processed); + state.counters["MB (Out)"] = benchmark::Counter(out_data_processed); + state.counters["GB/s"] = + benchmark::Counter((in_data_processed + out_data_processed) / 1.0e3, + benchmark::Counter::kIsIterationInvariantRate); +} + +} // namespace KokkosFFTBenchmark + +#endif \ No newline at end of file diff --git a/fft/perf_test/CMakeLists.txt b/fft/perf_test/CMakeLists.txt new file mode 100644 index 00000000..ddb531f0 --- /dev/null +++ b/fft/perf_test/CMakeLists.txt @@ -0,0 +1,23 @@ +set(BENCHMARK_NAME PerformanceTest_Benchmark) + +add_executable( + ${BENCHMARK_NAME} + BenchmarkMain.cpp + PerfTest_FFT1.cpp + PerfTest_FFT2.cpp +) + +target_link_libraries(${BENCHMARK_NAME} PUBLIC benchmark::benchmark KokkosFFT::fft) +target_include_directories(${BENCHMARK_NAME} PUBLIC ${CMAKE_BINARY_DIR}) + +string(TIMESTAMP BENCHMARK_TIME "%Y-%m-%d_T%H-%M-%S" UTC) +set( + BENCHMARK_ARGS + --benchmark_counters_tabular=true + --benchmark_out=${BENCHMARK_NAME}_${BENCHMARK_TIME}.json +) + +add_test( + NAME ${BENCHMARK_NAME} + COMMAND ${BENCHMARK_NAME} ${BENCHMARK_ARGS} +) \ No newline at end of file diff --git a/fft/perf_test/KokkosFFT_PrintConfiguration.hpp b/fft/perf_test/KokkosFFT_PrintConfiguration.hpp new file mode 100644 index 00000000..022b3efc --- /dev/null +++ b/fft/perf_test/KokkosFFT_PrintConfiguration.hpp @@ -0,0 +1,82 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSFFT_PRINT_CONFIGURATION_HPP +#define KOKKOSFFT_PRINT_CONFIGURATION_HPP + +#include "KokkosFFT_config.h" +#include "KokkosFFT_TplsVersion.hpp" +#include + +namespace KokkosFFT { +namespace Impl { + +inline void print_cufft_version_if_enabled(std::ostream& os) { +#if defined(KOKKOSFFT_ENABLE_TPL_CUFFT) + os << " " + << "KOKKOSFFT_ENABLE_TPL_CUFFT: " << cufft_version_string() << "\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_CUFFT: no\n"; +#endif +} + +inline void print_enabled_tpls(std::ostream& os) { +#ifdef KOKKOSFFT_ENABLE_TPL_FFTW + os << " " + << "KOKKOSFFT_ENABLE_TPL_FFTW: yes\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_FFTW: no\n"; +#endif + + print_cufft_version_if_enabled(os); + +#ifdef KOKKOSFFT_ENABLE_TPL_HIPFFT + os << " " + << "KOKKOSFFT_ENABLE_TPL_HIPFFT: yes\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_HIPFFT: no\n"; +#endif + +#ifdef KOKKOSFFT_ENABLE_TPL_ONEMKL + os << " " + << "KOKKOSFFT_ENABLE_TPL_ONEMKL: yes\n"; +#else + os << " " + << "KOKKOSFFT_ENABLE_TPL_ONEMKL: no\n"; +#endif +} + +inline void print_version(std::ostream& os) { + // KOKKOSFFT_VERSION is used because MAJOR, MINOR and PATCH macros + // are not available in FFT + os << " " + << "KokkosFFT Version: " << KOKKOSFFT_VERSION_MAJOR << "." + << KOKKOSFFT_VERSION_MINOR << "." << KOKKOSFFT_VERSION_PATCH << '\n'; +} +} // namespace Impl + +inline void print_configuration(std::ostream& os) { + Impl::print_version(os); + + os << "TPLs: \n"; + Impl::print_enabled_tpls(os); +} + +} // namespace KokkosFFT + +#endif \ No newline at end of file diff --git a/fft/perf_test/KokkosFFT_TplsVersion.hpp b/fft/perf_test/KokkosFFT_TplsVersion.hpp new file mode 100644 index 00000000..e5ec33da --- /dev/null +++ b/fft/perf_test/KokkosFFT_TplsVersion.hpp @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef KOKKOSFFT_TPLS_VERSIONS_HPP +#define KOKKOSFFT_TPLS_VERSIONS_HPP + +#include "KokkosFFT_config.h" +#include +#include + +#if defined(KOKKOSFFT_ENABLE_TPL_CUFFT) +#include "cufft.h" +#endif + +namespace KokkosFFT { +#if defined(KOKKOSFFT_ENABLE_TPL_CUFFT) +inline std::string cufft_version_string() { + // Print version + std::stringstream ss; + + ss << CUFFT_VER_MAJOR << "." << CUFFT_VER_MINOR << "." << CUFFT_VER_PATCH; + + return ss.str(); +} +#endif + +} // namespace KokkosFFT +#endif \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT1.cpp b/fft/perf_test/PerfTest_FFT1.cpp new file mode 100644 index 00000000..0f535d77 --- /dev/null +++ b/fft/perf_test/PerfTest_FFT1.cpp @@ -0,0 +1,121 @@ +#include "PerfTest_FFT1.hpp" + +namespace KokkosFFTBenchmark { + +// 1D FFT on 1D View +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 1D IFFT on 1D View +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 1D RFFT on 1D View +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 1D IRFFT on 1D View +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT_1DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(4096, 65536) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +} // namespace KokkosFFTBenchmark \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT1.hpp b/fft/perf_test/PerfTest_FFT1.hpp new file mode 100644 index 00000000..6424b4e7 --- /dev/null +++ b/fft/perf_test/PerfTest_FFT1.hpp @@ -0,0 +1,103 @@ +#ifndef KOKKOSFFT_PERFTEST_FFT1_HPP +#define KOKKOSFFT_PERFTEST_FFT1_HPP + +#include +#include +#include +#include "Benchmark_Context.hpp" + +using execution_space = Kokkos::DefaultExecutionSpace; + +namespace KokkosFFTBenchmark { + +template +void fft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::fft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void ifft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::ifft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void rfft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::rfft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void irfft(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::irfft(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +static void FFT_1DView(benchmark::State& state) { + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView1DType x("x", n), x_hat("x_hat", n); + + fft(x, x_hat, state); +} + +template +static void IFFT_1DView(benchmark::State& state) { + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView1DType x("x", n), x_hat("x_hat", n); + + ifft(x, x_hat, state); +} + +template +static void RFFT_1DView(benchmark::State& state) { + using RealView1DType = Kokkos::View; + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + RealView1DType x("x", n); + ComplexView1DType x_hat("x_hat", n / 2 + 1); + + rfft(x, x_hat, state); +} + +template +static void IRFFT_1DView(benchmark::State& state) { + using RealView1DType = Kokkos::View; + using ComplexView1DType = + Kokkos::View*, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView1DType x("x", n / 2 + 1); + RealView1DType x_hat("x_hat", n); + + irfft(x, x_hat, state); +} + +} // namespace KokkosFFTBenchmark + +#endif \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT2.cpp b/fft/perf_test/PerfTest_FFT2.cpp new file mode 100644 index 00000000..00358f28 --- /dev/null +++ b/fft/perf_test/PerfTest_FFT2.cpp @@ -0,0 +1,121 @@ +#include "PerfTest_FFT2.hpp" + +namespace KokkosFFTBenchmark { + +// 2D FFT on 2D View +BENCHMARK(FFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(FFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 2D IFFT on 2D View +BENCHMARK(IFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 2D RFFT on 2D View +BENCHMARK(RFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(RFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +// 2D IRFFT on 2D View +BENCHMARK(IRFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(IRFFT2_2DView) + ->ArgName("N") + ->RangeMultiplier(2) + ->Range(256, 4096) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +} // namespace KokkosFFTBenchmark \ No newline at end of file diff --git a/fft/perf_test/PerfTest_FFT2.hpp b/fft/perf_test/PerfTest_FFT2.hpp new file mode 100644 index 00000000..a4c6ce64 --- /dev/null +++ b/fft/perf_test/PerfTest_FFT2.hpp @@ -0,0 +1,104 @@ +#ifndef KOKKOSFFT_PERFTEST_FFT2_HPP +#define KOKKOSFFT_PERFTEST_FFT2_HPP + +#include +#include +#include +#include "Benchmark_Context.hpp" + +using execution_space = Kokkos::DefaultExecutionSpace; +using axis_type = KokkosFFT::axis_type<2>; + +namespace KokkosFFTBenchmark { + +template +void fft2(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::fft2(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void ifft2(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::ifft2(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void rfft2(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::rfft2(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +void irfft2(const InViewType& in, OutViewType& out, benchmark::State& state) { + for (auto _ : state) { + Kokkos::fence(); + Kokkos::Timer timer; + KokkosFFT::irfft2(execution_space(), in, out); + KokkosFFTBenchmark::report_results(state, in, out, timer.seconds()); + } +} + +template +static void FFT2_2DView(benchmark::State& state) { + using ComplexView2DType = + Kokkos::View**, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView2DType x("x", n, n), x_hat("x_hat", n, n); + + fft2(x, x_hat, state); +} + +template +static void IFFT2_2DView(benchmark::State& state) { + using ComplexView2DType = + Kokkos::View**, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView2DType x("x", n, n), x_hat("x_hat", n, n); + + ifft2(x, x_hat, state); +} + +template +static void RFFT2_2DView(benchmark::State& state) { + using RealView2DType = Kokkos::View; + using ComplexView2DType = + Kokkos::View**, LayoutType, execution_space>; + + const int n = state.range(0); + RealView2DType x("x", n, n); + ComplexView2DType x_hat("x_hat", n, n / 2 + 1); + + rfft2(x, x_hat, state); +} + +template +static void IRFFT2_2DView(benchmark::State& state) { + using RealView2DType = Kokkos::View; + using ComplexView2DType = + Kokkos::View**, LayoutType, execution_space>; + + const int n = state.range(0); + ComplexView2DType x("x", n, n / 2 + 1); + RealView2DType x_hat("x_hat", n, n); + + irfft2(x, x_hat, state); +} + +} // namespace KokkosFFTBenchmark + +#endif \ No newline at end of file diff --git a/fft/src/KokkosFFT_Cuda_transform.hpp b/fft/src/KokkosFFT_Cuda_transform.hpp index 4ba45964..23dc22f3 100644 --- a/fft/src/KokkosFFT_Cuda_transform.hpp +++ b/fft/src/KokkosFFT_Cuda_transform.hpp @@ -5,43 +5,43 @@ namespace KokkosFFT { namespace Impl { -void _exec(cufftHandle& plan, cufftReal* idata, cufftComplex* odata, - [[maybe_unused]] int direction) { +inline void _exec(cufftHandle& plan, cufftReal* idata, cufftComplex* odata, + [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecR2C(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecR2C failed"); } -void _exec(cufftHandle& plan, cufftDoubleReal* idata, cufftDoubleComplex* odata, - [[maybe_unused]] int direction) { +inline void _exec(cufftHandle& plan, cufftDoubleReal* idata, + cufftDoubleComplex* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecD2Z(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecD2Z failed"); } -void _exec(cufftHandle& plan, cufftComplex* idata, cufftReal* odata, - [[maybe_unused]] int direction) { +inline void _exec(cufftHandle& plan, cufftComplex* idata, cufftReal* odata, + [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecC2R(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecC2R failed"); } -void _exec(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleReal* odata, - [[maybe_unused]] int direction) { +inline void _exec(cufftHandle& plan, cufftDoubleComplex* idata, + cufftDoubleReal* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecZ2D(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecZ2D failed"); } -void _exec(cufftHandle& plan, cufftComplex* idata, cufftComplex* odata, - int direction) { +inline void _exec(cufftHandle& plan, cufftComplex* idata, cufftComplex* odata, + int direction) { cufftResult cufft_rt = cufftExecC2C(plan, idata, odata, direction); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecC2C failed"); } -void _exec(cufftHandle& plan, cufftDoubleComplex* idata, - cufftDoubleComplex* odata, int direction) { +inline void _exec(cufftHandle& plan, cufftDoubleComplex* idata, + cufftDoubleComplex* odata, int direction) { cufftResult cufft_rt = cufftExecZ2Z(plan, idata, odata, direction); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecZ2Z failed"); diff --git a/fft/src/KokkosFFT_HIP_transform.hpp b/fft/src/KokkosFFT_HIP_transform.hpp index 30c1785a..2bd9ce86 100644 --- a/fft/src/KokkosFFT_HIP_transform.hpp +++ b/fft/src/KokkosFFT_HIP_transform.hpp @@ -5,43 +5,43 @@ namespace KokkosFFT { namespace Impl { -void _exec(hipfftHandle& plan, hipfftReal* idata, hipfftComplex* odata, - [[maybe_unused]] int direction) { +inline void _exec(hipfftHandle& plan, hipfftReal* idata, hipfftComplex* odata, + [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecR2C(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecR2C failed"); } -void _exec(hipfftHandle& plan, hipfftDoubleReal* idata, - hipfftDoubleComplex* odata, [[maybe_unused]] int direction) { +inline void _exec(hipfftHandle& plan, hipfftDoubleReal* idata, + hipfftDoubleComplex* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecD2Z(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecD2Z failed"); } -void _exec(hipfftHandle& plan, hipfftComplex* idata, hipfftReal* odata, - [[maybe_unused]] int direction) { +inline void _exec(hipfftHandle& plan, hipfftComplex* idata, hipfftReal* odata, + [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecC2R(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecC2R failed"); } -void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, - hipfftDoubleReal* odata, [[maybe_unused]] int direction) { +inline void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, + hipfftDoubleReal* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecZ2D(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecZ2D failed"); } -void _exec(hipfftHandle& plan, hipfftComplex* idata, hipfftComplex* odata, - int direction) { +inline void _exec(hipfftHandle& plan, hipfftComplex* idata, + hipfftComplex* odata, int direction) { hipfftResult hipfft_rt = hipfftExecC2C(plan, idata, odata, direction); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecC2C failed"); } -void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, - hipfftDoubleComplex* odata, int direction) { +inline void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, + hipfftDoubleComplex* odata, int direction) { hipfftResult hipfft_rt = hipfftExecZ2Z(plan, idata, odata, direction); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecZ2Z failed"); diff --git a/tpls/benchmark b/tpls/benchmark new file mode 160000 index 00000000..e9905638 --- /dev/null +++ b/tpls/benchmark @@ -0,0 +1 @@ +Subproject commit e990563876ef92990e873dc5b479d3b79cda2547