From 73ead8989f78477e6d9c9f8315c42abe14e43962 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Wed, 29 Jan 2025 00:29:33 -0500 Subject: [PATCH 1/4] Enable GPU-aware MPI by default This turns on GPU-aware MPI by default. On all current machines, simulations run faster with GPU-aware MPI enabled. Two technical issues that prevented this are now resolved: AMReX now has the communication arena, which does not use managed memory, and SLURM no longer uses cgroup isolation for GPU bindings by default. Closes https://github.com/AMReX-Codes/amrex/issues/2967. --- Src/Base/AMReX_ParallelDescriptor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Base/AMReX_ParallelDescriptor.cpp b/Src/Base/AMReX_ParallelDescriptor.cpp index f6ac26e7984..67848505f2e 100644 --- a/Src/Base/AMReX_ParallelDescriptor.cpp +++ b/Src/Base/AMReX_ParallelDescriptor.cpp @@ -57,7 +57,7 @@ namespace amrex::ParallelDescriptor { #endif #ifdef AMREX_USE_GPU - bool use_gpu_aware_mpi = false; + bool use_gpu_aware_mpi = true; #else bool use_gpu_aware_mpi = false; #endif From cc4ea66086ad4a7362902b33b8b91e671645cb08 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 30 Jan 2025 10:03:28 +1100 Subject: [PATCH 2/4] check for GPU-aware support --- Src/Base/AMReX_ParallelDescriptor.cpp | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/Src/Base/AMReX_ParallelDescriptor.cpp b/Src/Base/AMReX_ParallelDescriptor.cpp index 67848505f2e..83d55bb273a 100644 --- a/Src/Base/AMReX_ParallelDescriptor.cpp +++ b/Src/Base/AMReX_ParallelDescriptor.cpp @@ -10,6 +10,9 @@ #ifdef BL_USE_MPI #include +#if __has_include() && defined(OPEN_MPI) +# include +#endif #endif #ifdef AMREX_PMI @@ -57,7 +60,7 @@ namespace amrex::ParallelDescriptor { #endif #ifdef AMREX_USE_GPU - bool use_gpu_aware_mpi = true; + bool use_gpu_aware_mpi = false; #else bool use_gpu_aware_mpi = false; #endif @@ -1510,6 +1513,28 @@ ReadAndBcastFile (const std::string& filename, Vector& charBuf, void Initialize () { +#if defined(AMREX_USE_CUDA) + +#if (defined(OMPI_HAVE_MPI_EXT_CUDA) && OMPI_HAVE_MPI_EXT_CUDA) || (defined(MPICH) && defined(MPIX_GPU_SUPPORT_CUDA)) + use_gpu_aware_mpi = (bool) MPIX_Query_cuda_support(); +#endif + +#elif defined(AMREX_USE_HIP) + +#if defined(OMPI_HAVE_MPI_EXT_ROCM) && OMPI_HAVE_MPI_EXT_ROCM + use_gpu_aware_mpi = (bool) MPIX_Query_rocm_support(); +#elif defined(MPICH) && defined(MPIX_GPU_SUPPORT_HIP) + use_gpu_aware_mpi = (bool) MPIX_Query_hip_support(); +#endif + +#elif defined(AMREX_USE_SYCL) + +#if defined(MPICH) && defined(MPIX_GPU_SUPPORT_ZE) + use_gpu_aware_mpi = (bool) MPIX_Query_ze_support(); +#endif + +#endif + #ifndef BL_AMRPROF ParmParse pp("amrex"); pp.queryAdd("use_gpu_aware_mpi", use_gpu_aware_mpi); From 269c8a324e886306e0c9b31ea605ce59b772fa1e Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 2 Feb 2025 15:00:56 -0800 Subject: [PATCH 3/4] Apply suggestions from code review --- Src/Base/AMReX_ParallelDescriptor.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Src/Base/AMReX_ParallelDescriptor.cpp b/Src/Base/AMReX_ParallelDescriptor.cpp index 83d55bb273a..88e11c3b324 100644 --- a/Src/Base/AMReX_ParallelDescriptor.cpp +++ b/Src/Base/AMReX_ParallelDescriptor.cpp @@ -1524,13 +1524,19 @@ Initialize () #if defined(OMPI_HAVE_MPI_EXT_ROCM) && OMPI_HAVE_MPI_EXT_ROCM use_gpu_aware_mpi = (bool) MPIX_Query_rocm_support(); #elif defined(MPICH) && defined(MPIX_GPU_SUPPORT_HIP) - use_gpu_aware_mpi = (bool) MPIX_Query_hip_support(); + int is_supported = 0; + if (MPIX_GPU_query_support(MPIX_GPU_SUPPORT_HIP, &is_supported) == MPI_SUCCESS) { + use_gpu_aware_mpi = (bool) is_supported; + } #endif #elif defined(AMREX_USE_SYCL) #if defined(MPICH) && defined(MPIX_GPU_SUPPORT_ZE) - use_gpu_aware_mpi = (bool) MPIX_Query_ze_support(); + int is_supported = 0; + if (MPIX_GPU_query_support(MPIX_GPU_SUPPORT_ZE, &is_supported) == MPI_SUCCESS) { + use_gpu_aware_mpi = (bool) is_supported; + } #endif #endif From c6e3c923cce3549ead4372b09147f9d86aac7b36 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Mon, 3 Feb 2025 16:22:07 +1100 Subject: [PATCH 4/4] update docs --- Docs/sphinx_documentation/source/GPU.rst | 31 ++++++++++++------------ 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst index b4b7aba9b0f..9e370b8ee20 100644 --- a/Docs/sphinx_documentation/source/GPU.rst +++ b/Docs/sphinx_documentation/source/GPU.rst @@ -1643,7 +1643,7 @@ Finally, the parallel communication of particle data has been ported and optimiz platforms. This includes :cpp:`Redistribute()`, which moves particles back to the proper grids after their positions have changed, as well as :cpp:`fillNeighbors()` and :cpp:`updateNeighbors()`, which are used to exchange halo particles. As with :cpp:`MultiFab` data, these have been designed to minimize host / device traffic as much as possible, and can -take advantage of the Cuda-aware MPI implementations available on platforms such as ORNL's Summit. +take advantage of the GPU-aware MPI implementations available on platforms such as ORNL's Frontier. Profiling with GPUs @@ -1742,17 +1742,18 @@ Inputs Parameters The following inputs parameters control the behavior of amrex when running on GPUs. They should be prefaced by "amrex" in your :cpp:`inputs` file. -+----------------------------+-----------------------------------------------------------------------+-------------+----------+ -| | Description | Type | Default | -+============================+=======================================================================+=============+==========+ -| use_gpu_aware_mpi | Whether to use GPU memory for communication buffers during MPI calls. | Bool | 0 | -| | If true, the buffers will use device memory. If false (i.e., 0), they | | | -| | will use pinned memory. In practice, we find it is not always worth | | | -| | it to use GPU aware MPI. | | | -+----------------------------+-----------------------------------------------------------------------+-------------+----------+ -| abort_on_out_of_gpu_memory | If the size of free memory on the GPU is less than the size of a | Bool | 0 | -| | requested allocation, AMReX will call AMReX::Abort() with an error | | | -| | describing how much free memory there is and what was requested. | | | -+----------------------------+-----------------------------------------------------------------------+-------------+----------+ -| the_arena_is_managed | Whether :cpp:`The_Arena()` allocates managed memory. | Bool | 0 | -+----------------------------+-----------------------------------------------------------------------+-------------+----------+ ++----------------------------+-----------------------------------------------------------------------+-------------+----------------+ +| | Description | Type | Default | ++============================+=======================================================================+=============+================+ +| use_gpu_aware_mpi | Whether to use GPU memory for communication buffers during MPI calls. | Bool | MPI-dependent | +| | If true, the buffers will use device memory. If false (i.e., 0), they | | | +| | will use pinned memory. It will be activated if AMReX detects that | | | +| | GPU-aware MPI is supported by the MPI library (MPICH, OpenMPI, and | | | +| | derivative implementations). | | | ++----------------------------+-----------------------------------------------------------------------+-------------+----------------+ +| abort_on_out_of_gpu_memory | If the size of free memory on the GPU is less than the size of a | Bool | 0 | +| | requested allocation, AMReX will call AMReX::Abort() with an error | | | +| | describing how much free memory there is and what was requested. | | | ++----------------------------+-----------------------------------------------------------------------+-------------+----------------+ +| the_arena_is_managed | Whether :cpp:`The_Arena()` allocates managed memory. | Bool | 0 | ++----------------------------+-----------------------------------------------------------------------+-------------+----------------+