diff --git a/Src/FFT/AMReX_FFT_Poisson.H b/Src/FFT/AMReX_FFT_Poisson.H index c42780ef1b9..aedb76f360c 100644 --- a/Src/FFT/AMReX_FFT_Poisson.H +++ b/Src/FFT/AMReX_FFT_Poisson.H @@ -97,8 +97,9 @@ private: #endif /** - * \brief 3D Poisson solver for periodic boundaries in the first two - * dimensions and Neumann in the last dimension. + * \brief 3D Poisson solver for periodic, Dirichlet & Neumann boundaries in + * the first two dimensions, and Neumann in the last dimension. The last + * dimension could have non-uniform mesh. */ template class PoissonHybrid @@ -106,9 +107,38 @@ class PoissonHybrid public: using T = typename MF::value_type; + template ,int> = 0> + PoissonHybrid (Geometry const& geom, + Array,AMREX_SPACEDIM> const& bc) + : m_geom(geom), m_bc(bc) + { + bool periodic_xy = true; + for (int idim = 0; idim < 2; ++idim) { + periodic_xy = periodic_xy && (bc[idim].first == Boundary::periodic); + AMREX_ALWAYS_ASSERT((bc[idim].first == Boundary::periodic && + bc[idim].second == Boundary::periodic) || + (bc[idim].first != Boundary::periodic && + bc[idim].second != Boundary::periodic)); + } + Info info{}; + info.setBatchMode(true); + if (periodic_xy) { + m_r2c = std::make_unique>(m_geom.Domain(), + info); + } else { + m_r2x = std::make_unique> (m_geom.Domain(), + m_bc, info); + } + } + template ,int> = 0> explicit PoissonHybrid (Geometry const& geom) - : m_geom(geom), m_r2c(geom.Domain(), Info().setBatchMode(true)) + : m_geom(geom), + m_bc{AMREX_D_DECL(std::make_pair(Boundary::periodic,Boundary::periodic), + std::make_pair(Boundary::periodic,Boundary::periodic), + std::make_pair(Boundary::even,Boundary::even))}, + m_r2c(std::make_unique> + (geom.Domain(), Info().setBatchMode(true))) { #if (AMREX_SPACEDIM == 3) AMREX_ALWAYS_ASSERT(geom.isPeriodic(0) && geom.isPeriodic(1)); @@ -128,12 +158,21 @@ public: void solve (MF& soln, MF const& rhs, Vector const& dz); void solve (MF& soln, MF const& rhs, Gpu::DeviceVector const& dz); - template - void solve_doit (MF& soln, MF const& rhs, DZ const& dz); // has to be public for cuda + // This is public for cuda + template + void solve_z (FA& spmf, DZ const& dz); private: + + template + void solve_doit (MF& soln, MF const& rhs, DZ const& dz); + Geometry m_geom; - R2C m_r2c; + Array,AMREX_SPACEDIM> m_bc; + std::unique_ptr> m_r2x; + std::unique_ptr> m_r2c; + MF m_spmf_r; + FabArray>> m_spmf_c; }; template @@ -161,7 +200,6 @@ void Poisson::solve (MF& soln, MF const& rhs) auto scale = (m_r2x) ? m_r2x->scalingFactor() : m_r2c->scalingFactor(); GpuArray offset{AMREX_D_DECL(T(0),T(0),T(0))}; - // Not sure about odd-even and even-odd yet for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { if (m_bc[idim].first == Boundary::odd && m_bc[idim].second == Boundary::odd) @@ -299,26 +337,99 @@ void PoissonHybrid::solve_doit (MF& soln, MF const& rhs, DZ const& dz) { BL_PROFILE("FFT::PoissonHybrid::solve"); + AMREX_ASSERT(soln.is_cell_centered() && rhs.is_cell_centered()); + #if (AMREX_SPACEDIM < 3) amrex::ignore_unused(soln, rhs, dz); #else - auto facx = T(2)*Math::pi()/T(m_geom.ProbLength(0)); - auto facy = T(2)*Math::pi()/T(m_geom.ProbLength(1)); - auto dx = T(m_geom.CellSize(0)); - auto dy = T(m_geom.CellSize(1)); - auto scale = T(1.0)/(T(m_geom.Domain().length(0)) * - T(m_geom.Domain().length(1))); - auto ny = m_geom.Domain().length(1); - auto nz = m_geom.Domain().length(2); - Box cdomain = m_geom.Domain(); - cdomain.setBig(0,cdomain.length(0)/2); - auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(), - {AMREX_D_DECL(true,true,false)}); - DistributionMapping dm = detail::make_iota_distromap(cba.size()); - FabArray > > spmf(cba, dm, 1, 0); + IntVect const& ng = amrex::elemwiseMin(soln.nGrowVect(), IntVect(1)); + + if (m_r2c) + { + if (m_spmf_c.empty()) { + Box cdomain = m_geom.Domain(); + cdomain.setBig(0,cdomain.length(0)/2); + auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(), + {AMREX_D_DECL(true,true,false)}); + DistributionMapping dm = detail::make_iota_distromap(cba.size()); + m_spmf_c.define(cba, dm, 1, 0); + } + + m_r2c->forward(rhs, m_spmf_c); + + solve_z(m_spmf_c, dz); - m_r2c.forward(rhs, spmf); + m_r2c->backward_doit(m_spmf_c, soln, ng, m_geom.periodicity()); + } + else + { + if (m_r2x->m_cy.empty()) { // spectral data is real + // Add getSpectralDataLayout? + auto sba = amrex::decompose(m_geom.Domain(),ParallelContext::NProcsSub(), + {AMREX_D_DECL(true,true,false)}); + DistributionMapping dm = detail::make_iota_distromap(sba.size()); + m_spmf_r.define(sba, dm, 1, 0); + m_r2x->forward(rhs, m_spmf_r); + solve_z(m_spmf_r, dz); + m_r2x->backward(m_spmf_r, soln, ng, m_geom.periodicity()); + } else { // spectral data is complex. one of the first two dimensions is periodic. + Box cdomain = m_geom.Domain(); + if (m_bc[0].first == Boundary::periodic) { + cdomain.setBig(0,cdomain.length(0)/2); + } else { + cdomain.setBig(1,cdomain.length(1)/2); + } + auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(), + {AMREX_D_DECL(true,true,false)}); + DistributionMapping dm = detail::make_iota_distromap(cba.size()); + m_spmf_c.define(cba, dm, 1, 0); + m_r2x->forward(rhs, m_spmf_c); + solve_z(m_spmf_c, dz); + m_r2x->backward(m_spmf_c, soln, ng, m_geom.periodicity()); + } + } + + detail::fill_physbc(soln, m_geom, m_bc); +#endif +} + +template +template +void PoissonHybrid::solve_z (FA& spmf, DZ const& dz) +{ + BL_PROFILE("PoissonHybrid::solve_z"); + +#if (AMREX_SPACEDIM < 3) + amrex::ignore_unused(spmf, dz); +#else + auto facx = Math::pi()/T(m_geom.Domain().length(0)); + auto facy = Math::pi()/T(m_geom.Domain().length(1)); + if (m_bc[0].first == Boundary::periodic) { facx *= T(2); } + if (m_bc[1].first == Boundary::periodic) { facy *= T(2); } + auto dxfac = T(2)/T(m_geom.CellSize(0)*m_geom.CellSize(0)); + auto dyfac = T(2)/T(m_geom.CellSize(1)*m_geom.CellSize(1)); + auto scale = (m_r2x) ? m_r2x->scalingFactor() : m_r2c->scalingFactor(); + + GpuArray offset{T(0),T(0)}; + for (int idim = 0; idim < AMREX_SPACEDIM-1; ++idim) { + if (m_bc[idim].first == Boundary::odd && + m_bc[idim].second == Boundary::odd) + { + offset[idim] = T(1); + } + else if ((m_bc[idim].first == Boundary::odd && + m_bc[idim].second == Boundary::even) || + (m_bc[idim].first == Boundary::even && + m_bc[idim].second == Boundary::odd)) + { + offset[idim] = T(0.5); + } + } + + bool has_dirichlet = (offset[0] != T(0)) || (offset[1] != T(0)); + + auto nz = m_geom.Domain().length(2); for (MFIter mfi(spmf); mfi.isValid(); ++mfi) { @@ -339,28 +450,27 @@ void PoissonHybrid::solve_doit (MF& soln, MF const& rhs, DZ const& dz) amrex::ParallelFor(xybox, [=] AMREX_GPU_DEVICE (int i, int j, int) { - T a = facx*i; - T b = (j < ny/2) ? facy*j : facy*(ny-j); - - T k2 = T(2)*(std::cos(a*dx)-T(1))/(dx*dx) - + T(2)*(std::cos(b*dy)-T(1))/(dy*dy); + T a = facx*(i+offset[0]); + T b = facy*(j+offset[1]); + T k2 = dxfac * (std::cos(a)-T(1)) + + dyfac * (std::cos(b)-T(1)); // Tridiagonal solve with homogeneous Neumann for(int k=0; k < nz; k++) { if(k==0) { - ald(i,j,k) = 0.; - cud(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k+1])); + ald(i,j,k) = T(0.); + cud(i,j,k) = T(2.0) /(dz[k]*(dz[k]+dz[k+1])); bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k); } else if (k == nz-1) { - ald(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k-1])); - cud(i,j,k) = 0.; + ald(i,j,k) = T(2.0) /(dz[k]*(dz[k]+dz[k-1])); + cud(i,j,k) = T(0.); bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k); - if (i == 0 && j == 0) { - bd(i,j,k) *= 2.0; + if (i == 0 && j == 0 && !has_dirichlet) { + bd(i,j,k) *= T(2.0); } } else { - ald(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k-1])); - cud(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k+1])); + ald(i,j,k) = T(2.0) /(dz[k]*(dz[k]+dz[k-1])); + cud(i,j,k) = T(2.0) /(dz[k]*(dz[k]+dz[k+1])); bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k); } } @@ -388,35 +498,34 @@ void PoissonHybrid::solve_doit (MF& soln, MF const& rhs, DZ const& dz) #else - Gpu::DeviceVector> ald(nz); - Gpu::DeviceVector> bd(nz); - Gpu::DeviceVector> cud(nz); - Gpu::DeviceVector> scratch(nz); + Gpu::DeviceVector ald(nz); + Gpu::DeviceVector bd(nz); + Gpu::DeviceVector cud(nz); + Gpu::DeviceVector scratch(nz); amrex::LoopOnCpu(xybox, [&] (int i, int j, int) { - T a = facx*i; - T b = (j < ny/2) ? facy*j : facy*(ny-j); - - T k2 = T(2)*(std::cos(a*dx)-T(1))/(dx*dx) - + T(2)*(std::cos(b*dy)-T(1))/(dy*dy); + T a = facx*(i+offset[0]); + T b = facy*(j+offset[1]); + T k2 = dxfac * (std::cos(a)-T(1)) + + dyfac * (std::cos(b)-T(1)); // Tridiagonal solve with homogeneous Neumann for(int k=0; k < nz; k++) { if(k==0) { - ald[k] = 0.; - cud[k] = 2.0 /(dz[k]*(dz[k]+dz[k+1])); + ald[k] = T(0.); + cud[k] = T(2.0) /(dz[k]*(dz[k]+dz[k+1])); bd[k] = k2 -ald[k]-cud[k]; } else if (k == nz-1) { - ald[k] = 2.0 /(dz[k]*(dz[k]+dz[k-1])); - cud[k] = 0.; + ald[k] = T(2.0) /(dz[k]*(dz[k]+dz[k-1])); + cud[k] = T(0.); bd[k] = k2 -ald[k]-cud[k]; - if (i == 0 && j == 0) { - bd[k] *= 2.0; + if (i == 0 && j == 0 && !has_dirichlet) { + bd[k] *= T(2.0); } } else { - ald[k] = 2.0 /(dz[k]*(dz[k]+dz[k-1])); - cud[k] = 2.0 /(dz[k]*(dz[k]+dz[k+1])); + ald[k] = T(2.0) /(dz[k]*(dz[k]+dz[k-1])); + cud[k] = T(2.0) /(dz[k]*(dz[k]+dz[k+1])); bd[k] = k2 -ald[k]-cud[k]; } } @@ -442,15 +551,6 @@ void PoissonHybrid::solve_doit (MF& soln, MF const& rhs, DZ const& dz) }); #endif } - - IntVect const& ng = amrex::elemwiseMin(soln.nGrowVect(), IntVect(1)); - m_r2c.backward_doit(spmf, soln, ng, m_geom.periodicity()); - - Array,AMREX_SPACEDIM> bc - {AMREX_D_DECL(std::make_pair(Boundary::periodic,Boundary::periodic), - std::make_pair(Boundary::periodic,Boundary::periodic), - std::make_pair(Boundary::even,Boundary::even))}; - detail::fill_physbc(soln, m_geom, bc); #endif } diff --git a/Src/FFT/AMReX_FFT_R2X.H b/Src/FFT/AMReX_FFT_R2X.H index f7fa256bba7..19cd8fca12e 100644 --- a/Src/FFT/AMReX_FFT_R2X.H +++ b/Src/FFT/AMReX_FFT_R2X.H @@ -53,6 +53,15 @@ public: private: + void forward (MF const& inmf, MF& outmf); + void forward (MF const& inmf, cMF& outmf); + void forward (MF const& inmf); + void backward (MF const& inmf, MF& outmf, IntVect const& ngout, + Periodicity const& period); + void backward (cMF const& inmf, MF& outmf, IntVect const& ngout, + Periodicity const& period); + void backward (); + template void forwardThenBackward_doit (MF const& inmf, MF& outmf, F const& post_forward, IntVect const& ngout = IntVect(0), @@ -119,6 +128,8 @@ R2X::R2X (Box const& domain, domain.cellCentered()); #if (AMREX_SPACEDIM == 3) AMREX_ALWAYS_ASSERT(domain.length(1) > 1 || domain.length(2) == 1); +#else + AMREX_ALWAYS_ASSERT(! m_info.batch_mode); #endif for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { AMREX_ALWAYS_ASSERT(domain.length(idim) > 1); @@ -206,7 +217,7 @@ R2X::R2X (Box const& domain, #endif #if (AMREX_SPACEDIM == 3) - if (domain.length(2) > 1) { + if (domain.length(2) > 1 && !m_info.batch_mode) { if (! m_cy.empty()) { // copy(m_cy, m_cz) m_dom_cz = Box(IntVect(0), IntVect(AMREX_D_DECL(m_dom_cy.bigEnd(2), @@ -315,7 +326,7 @@ R2X::R2X (Box const& domain, #endif #if (AMREX_SPACEDIM == 3) - if (domain.length(2) > 1) { + if (domain.length(2) > 1 && !m_info.batch_mode) { if (! m_cy.empty()) { // copy(m_cy, m_cz) m_cmd_cy2cz = std::make_unique @@ -499,11 +510,11 @@ R2X::~R2X () template T R2X::scalingFactor () const { - auto r = m_dom_0.numPts(); - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - if (m_bc[idim].first != Boundary::periodic - && m_dom_0.length(idim) > 1) - { + Long r = 1; + int ndims = m_info.batch_mode ? AMREX_SPACEDIM-1 : AMREX_SPACEDIM; + for (int idim = 0; idim < ndims; ++idim) { + r *= m_dom_0.length(idim); + if (m_bc[idim].first != Boundary::periodic && (m_dom_0.length(idim) > 1)) { r *= 2; } } @@ -526,7 +537,53 @@ void R2X::forwardThenBackward_doit (MF const& inmf, MF& outmf, { BL_PROFILE("FFT::R2X::forwardbackward"); - // forward + this->forward(inmf); + + // post-forward + + int actual_dim = AMREX_SPACEDIM; +#if (AMREX_SPACEDIM >= 2) + if (m_dom_0.length(1) == 1) { actual_dim = 1; } +#endif +#if (AMREX_SPACEDIM == 3) + if ((m_dom_0.length(2) == 1) && (m_dom_0.length(1) > 1)) { actual_dim = 2; } +#endif + + if (actual_dim == 1) { + if (m_cx.empty()) { + post_forward_doit<0>(detail::get_fab(m_rx), post_forward); + } else { + post_forward_doit<0>(detail::get_fab(m_cx), post_forward); + } + } +#if (AMREX_SPACEDIM >= 2) + else if (actual_dim == 2) { + if (m_cy.empty()) { + post_forward_doit<1>(detail::get_fab(m_ry), post_forward); + } else { + post_forward_doit<1>(detail::get_fab(m_cy), post_forward); + } + } +#endif +#if (AMREX_SPACEDIM == 3) + else if (actual_dim == 3) { + if (m_cz.empty()) { + post_forward_doit<2>(detail::get_fab(m_rz), post_forward); + } else { + post_forward_doit<2>(detail::get_fab(m_cz), post_forward); + } + } +#endif + + this->backward(); + + outmf.ParallelCopy(m_rx, 0, 0, 1, IntVect(0), ngout, period); +} + +template +void R2X::forward (MF const& inmf) +{ + BL_PROFILE("FFT::R2X::forward"); m_rx.ParallelCopy(inmf, 0, 0, 1); if (m_bc[0].first == Boundary::periodic) { @@ -575,44 +632,57 @@ void R2X::forwardThenBackward_doit (MF const& inmf, MF& outmf, m_fft_fwd_z.template compute_r2c(); } #endif +} - // post-forward +template +void R2X::forward (MF const& inmf, MF& outmf) +{ + this->forward(inmf); - int actual_dim = AMREX_SPACEDIM; -#if (AMREX_SPACEDIM >= 2) - if (m_dom_0.length(1) == 1) { actual_dim = 1; } -#endif #if (AMREX_SPACEDIM == 3) - if ((m_dom_0.length(2) == 1) && (m_dom_0.length(1) > 1)) { actual_dim = 2; } -#endif - - if (actual_dim == 1) { - if (m_cx.empty()) { - post_forward_doit<0>(detail::get_fab(m_rx), post_forward); - } else { - post_forward_doit<0>(detail::get_fab(m_cx), post_forward); - } - } -#if (AMREX_SPACEDIM >= 2) - else if (actual_dim == 2) { + if (m_info.batch_mode) { if (m_cy.empty()) { - post_forward_doit<1>(detail::get_fab(m_ry), post_forward); + ParallelCopy(outmf, m_dom_rx, m_ry, 0, 0, 1, IntVect(0), Swap01{}); } else { - post_forward_doit<1>(detail::get_fab(m_cy), post_forward); + amrex::Abort("R2X::forward(MF,MF): How did this happen?"); } - } + } else #endif + { + amrex::ignore_unused(outmf); + amrex::Abort("R2X::forward(MF,MF): TODO"); + } +} + +template +void R2X::forward (MF const& inmf, cMF& outmf) +{ + this->forward(inmf); + #if (AMREX_SPACEDIM == 3) - else if (actual_dim == 3) { - if (m_cz.empty()) { - post_forward_doit<2>(detail::get_fab(m_rz), post_forward); + if (m_info.batch_mode) { + if (!m_cy.empty()) { + auto lo = m_dom_cy.smallEnd(); + auto hi = m_dom_cy.bigEnd(); + std::swap(lo[0],lo[1]); + std::swap(hi[0],hi[1]); + Box dom(lo,hi); + ParallelCopy(outmf, dom, m_cy, 0, 0, 1, IntVect(0), Swap01{}); } else { - post_forward_doit<2>(detail::get_fab(m_cz), post_forward); + amrex::Abort("R2X::forward(MF,cMF): How did this happen?"); } - } + } else #endif + { + amrex::ignore_unused(outmf); + amrex::Abort("R2X::forward(MF,cMF): TODO"); + } +} - // backward +template +void R2X::backward () +{ + BL_PROFILE("FFT::R2X::backward"); #if (AMREX_SPACEDIM == 3) if (m_bc[2].first != Boundary::periodic) @@ -660,6 +730,51 @@ void R2X::forwardThenBackward_doit (MF const& inmf, MF& outmf, } else { m_fft_bwd_x.template compute_r2r(); } +} + +template +void R2X::backward (MF const& inmf, MF& outmf, IntVect const& ngout, + Periodicity const& period) +{ +#if (AMREX_SPACEDIM == 3) + if (m_info.batch_mode) { + if (m_cy.empty()) { + ParallelCopy(m_ry, m_dom_ry, inmf, 0, 0, 1, IntVect(0), Swap01{}); + } else { + amrex::Abort("R2X::backward(MF,MF): How did this happen?"); + } + } else +#endif + { + amrex::ignore_unused(inmf,outmf,ngout,period); + amrex::Abort("R2X::backward(MF,MF): TODO"); + } + + this->backward(); + + outmf.ParallelCopy(m_rx, 0, 0, 1, IntVect(0), ngout, period); +} + +template +void R2X::backward (cMF const& inmf, MF& outmf, IntVect const& ngout, + Periodicity const& period) +{ +#if (AMREX_SPACEDIM == 3) + if (m_info.batch_mode) { + if (!m_cy.empty()) { + ParallelCopy(m_cy, m_dom_cy, inmf, 0, 0, 1, IntVect(0), Swap01{}); + } else { + amrex::Abort("R2X::backward(cMF,MF): How did this happen?"); + } + } else +#endif + { + amrex::ignore_unused(inmf,outmf,ngout,period); + amrex::Abort("R2X::backward(cMF,MF): TODO"); + } + + this->backward(); + outmf.ParallelCopy(m_rx, 0, 0, 1, IntVect(0), ngout, period); } diff --git a/Tests/FFT/Poisson/main.cpp b/Tests/FFT/Poisson/main.cpp index a4518ba723f..f344edbd380 100644 --- a/Tests/FFT/Poisson/main.cpp +++ b/Tests/FFT/Poisson/main.cpp @@ -195,13 +195,23 @@ int main (int argc, char* argv[]) }}} #if (AMREX_SPACEDIM == 3) - { - amrex::Print() << " Testing PoissonHybrid\n"; + amrex::Print() << " Testing PoissonHybrid\n"; + icase = 0; + for (int ycase = 0; ycase < ncasesy; ++ycase) { + for (int xcase = 0; xcase < ncases ; ++xcase) { + ++icase; Array,AMREX_SPACEDIM> - fft_bc{std::make_pair(FFT::Boundary::periodic,FFT::Boundary::periodic), - std::make_pair(FFT::Boundary::periodic,FFT::Boundary::periodic), + fft_bc{bcs[xcase], bcs[ycase], std::make_pair(FFT::Boundary::even,FFT::Boundary::even)}; + amrex::Print() << " (" << icase << ") Testing ("; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + amrex::Print() << "(" << getEnumNameString(fft_bc[idim].first) + << "," << getEnumNameString(fft_bc[idim].second) + << ")"; + if (idim+1 < AMREX_SPACEDIM) { amrex::Print() << " "; } + } + amrex::Print() << ")\n"; MultiFab rhs(ba,dm,1,0); MultiFab soln(ba,dm,1,1); @@ -211,7 +221,7 @@ int main (int argc, char* argv[]) Gpu::DeviceVector dz(n_cell_z, geom.CellSize(2)); // or Vector dz(n_cell_z, geom.CellSize(2)); - FFT::PoissonHybrid fft_poisson(geom); + FFT::PoissonHybrid fft_poisson(geom, fft_bc); fft_poisson.solve(soln, rhs, dz); auto [bnorm, rnorm] = check_convergence(soln, rhs, geom); @@ -223,7 +233,7 @@ int main (int argc, char* argv[]) auto eps = 1.e-11; #endif AMREX_ALWAYS_ASSERT(rnorm < eps*bnorm); - } + }} #endif }