Skip to content

Commit

Permalink
Sync GPU stream before getting the time in TinyProfiler (AMReX-Codes#…
Browse files Browse the repository at this point in the history
…3763)

In AMReX-Codes#1505
`tiny_profiler.device_synchronize_around_region = 1` was added to better
measure timings on GPU using nvtx, however this would also be useful for
the timings in TinyProfiler itself. Currently codes have to custom-add
these syncs to get meaningful output from TinyProfiler on GPU

https://github.com/Hi-PACE/hipace/blob/development/src/utils/HipaceProfilerWrapper.H
  • Loading branch information
AlexanderSinn authored Feb 14, 2024
1 parent 1b66aa3 commit 8b476a9
Showing 1 changed file with 20 additions and 20 deletions.
40 changes: 20 additions & 20 deletions Src/Base/AMReX_TinyProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ TinyProfiler::start () noexcept
#endif
if (!regionstack.empty()) {

#ifdef AMREX_USE_GPU
if (device_synchronize_around_region) {
amrex::Gpu::streamSynchronize();
}
#endif

#ifdef AMREX_USE_CUPTI
if (uCUPTI) {
cudaDeviceSynchronize();
Expand All @@ -111,12 +117,6 @@ TinyProfiler::start () noexcept
in_parallel_region = false;
#endif

#ifdef AMREX_USE_GPU
if (device_synchronize_around_region) {
amrex::Gpu::streamSynchronize();
}
#endif

#ifdef AMREX_USE_CUDA
nvtxRangePush(fname.c_str());
#elif defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX)
Expand Down Expand Up @@ -149,8 +149,14 @@ TinyProfiler::stop () noexcept
#ifdef AMREX_USE_OMP
#pragma omp master
#endif
if (!stats.empty())
{
if (!stats.empty()) {

#ifdef AMREX_USE_GPU
if (device_synchronize_around_region) {
amrex::Gpu::streamSynchronize();
}
#endif

double t;
int nKernelCalls = 0;
#ifdef AMREX_USE_CUPTI
Expand Down Expand Up @@ -207,12 +213,6 @@ TinyProfiler::stop () noexcept
std::get<1>(parent) += dtin;
}

#ifdef AMREX_USE_GPU
if (device_synchronize_around_region) {
amrex::Gpu::streamSynchronize();
}
#endif

#ifdef AMREX_USE_CUDA
nvtxRangePop();
#elif defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX)
Expand Down Expand Up @@ -242,8 +242,12 @@ TinyProfiler::stop (unsigned boxUintID) noexcept
#ifdef AMREX_USE_OMP
#pragma omp master
#endif
if (!stats.empty())
{
if (!stats.empty()) {

if (device_synchronize_around_region) {
amrex::Gpu::streamSynchronize();
}

double t;
cudaDeviceSynchronize();
cuptiActivityFlushAll(0);
Expand Down Expand Up @@ -293,10 +297,6 @@ TinyProfiler::stop (unsigned boxUintID) noexcept
std::get<1>(parent) += dtin;
}

if (device_synchronize_around_region) {
amrex::Gpu::streamSynchronize();
}

#ifdef AMREX_USE_CUDA
nvtxRangePop();
#elif defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX)
Expand Down

0 comments on commit 8b476a9

Please sign in to comment.