From 8f79b7e2eb30423a19feb8c6c54cd3b74186a765 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <weiqunzhang@lbl.gov>
Date: Fri, 6 Oct 2023 09:48:51 -0700
Subject: [PATCH] EdgeFluxRegister for MHD

This implements EdgeFluxRegister for refluxing in MHD. The equation here is

  \frac{\partial B}{\partial t} + curl E = 0.

Here, B is on faces and E is on edges.
---
 .../sphinx_documentation/source/Debugging.rst |   2 +-
 Docs/sphinx_documentation/source/GPU.rst      |   2 +-
 Src/AmrCore/AMReX_AmrCore.cpp                 |   1 -
 Src/Base/AMReX_BaseFab.H                      |   2 +-
 Src/Boundary/AMReX_BoundaryFwd.H              |   1 +
 Src/Boundary/AMReX_EdgeFluxRegister.H         | 117 +++++
 Src/Boundary/AMReX_EdgeFluxRegister.cpp       | 399 ++++++++++++++++++
 Src/Boundary/CMakeLists.txt                   |   8 +
 Src/Boundary/Make.package                     |   5 +
 Src/EB/AMReX_EB2_Level.H                      |   2 +-
 .../MLMG/AMReX_MLNodeLaplacian_misc.cpp       |   2 +-
 .../MLMG/AMReX_MLNodeLaplacian_sync.cpp       |   2 +-
 12 files changed, 536 insertions(+), 7 deletions(-)
 create mode 100644 Src/Boundary/AMReX_EdgeFluxRegister.H
 create mode 100644 Src/Boundary/AMReX_EdgeFluxRegister.cpp

diff --git a/Docs/sphinx_documentation/source/Debugging.rst b/Docs/sphinx_documentation/source/Debugging.rst
index 89eee31c2bd..aa5a9dcc9a3 100644
--- a/Docs/sphinx_documentation/source/Debugging.rst
+++ b/Docs/sphinx_documentation/source/Debugging.rst
@@ -24,7 +24,7 @@ handling of floating point exceptions: ``amrex.fpe_trap_invalid`` for
 NaNs, ``amrex.fpe_trap_zero`` for division by zero and
 ``amrex.fpe_trap_overflow`` for overflow.  To more effectively trap the
 use of uninitialized values, AMReX also initializes ``FArrayBox``\ s in
-``MulitFab``\ s and arrays allocated by ``bl_allocate`` to signaling NaNs when it is compiled
+``MultiFab``\ s and arrays allocated by ``bl_allocate`` to signaling NaNs when it is compiled
 with ``TEST=TRUE`` or ``DEBUG=TRUE`` in GNU make, or with ``-DCMAKE_BUILD_TYPE=Debug`` in CMake.
 One can also control the setting for ``FArrayBox`` using the runtime parameter, ``fab.init_snan``.
 Note for Macs, M1 and M2 chips using Arm64 architecture are not able to trap division by zero.
diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst
index 08297cb3e2a..c6dd57b9115 100644
--- a/Docs/sphinx_documentation/source/GPU.rst
+++ b/Docs/sphinx_documentation/source/GPU.rst
@@ -796,7 +796,7 @@ As another example, the following function computes the max- and 1-norm of a
 ::
 
     GpuTuple<Real,Real> compute_norms (MultiFab const& mf,
-                                       iMulitiFab const& mask)
+                                       iMultiFab const& mask)
     {
         auto const& data_ma = mf.const_arrays();
         auto const& mask_ma = mask.const_arrays();
diff --git a/Src/AmrCore/AMReX_AmrCore.cpp b/Src/AmrCore/AMReX_AmrCore.cpp
index ed0cd5d1020..b27532dc0d8 100644
--- a/Src/AmrCore/AMReX_AmrCore.cpp
+++ b/Src/AmrCore/AMReX_AmrCore.cpp
@@ -1,6 +1,5 @@
 
 #include <AMReX_AmrCore.H>
-#include <AMReX_Print.H>
 
 #ifdef AMREX_PARTICLES
 #include <AMReX_AmrParGDB.H>
diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H
index 26d45077d9e..b1224f14008 100644
--- a/Src/Base/AMReX_BaseFab.H
+++ b/Src/Base/AMReX_BaseFab.H
@@ -542,7 +542,7 @@ public:
                       int numcomp = 1) noexcept;
     /**
     * \brief As above, except that the destination Box is specified,
-    * but the source Box is taken to the equal to the source
+    * but the source Box is taken to the equal to the destination
     * Box, and all components of the destination BaseFab are
     * copied.
     */
diff --git a/Src/Boundary/AMReX_BoundaryFwd.H b/Src/Boundary/AMReX_BoundaryFwd.H
index 6c42d8e5f06..93bc5009c94 100644
--- a/Src/Boundary/AMReX_BoundaryFwd.H
+++ b/Src/Boundary/AMReX_BoundaryFwd.H
@@ -11,6 +11,7 @@ class InterpBndryDataT;
 class Mask;
 class MultiMask;
 class YAFluxRegisterT;
+class EdgeFluxRegister;
 
 }
 
diff --git a/Src/Boundary/AMReX_EdgeFluxRegister.H b/Src/Boundary/AMReX_EdgeFluxRegister.H
new file mode 100644
index 00000000000..46813c2246a
--- /dev/null
+++ b/Src/Boundary/AMReX_EdgeFluxRegister.H
@@ -0,0 +1,117 @@
+#ifndef AMREX_EDGE_FLUX_REGISTER_H_
+#define AMREX_EDGE_FLUX_REGISTER_H_
+#include <AMReX_Config.H>
+
+#include <AMReX_iMultiFab.H>
+#include <AMReX_LayoutData.H>
+#include <AMReX_MultiFab.H>
+
+namespace amrex {
+
+/**
+ * Edge Flux Register for Constrained Transport
+ *
+ * This Flux Register is useful for solving system like dB/dt + curl E = 0
+ * on a staggered mesh. (Here d is of course partial derivation.) B is a
+ * vector on cell faces, and E is a vector on cell edges. In 2D, E has only
+ * one component, Ez, and it is on the nodes of a 2d mesh.
+ *
+ * At the beginning of a coarse step, `reset()` is called.  In MFIter for
+ * the coarse level advance, `CrseAdd` is called with coarse flux (i.e., E).
+ * The flux is not scaled.  In MFIter for the fine level advance, `FineAdd`
+ * is called.  After the fine level finishes its time steps, `Reflux` is
+ * called to update the coarse level B on the coarse/fine boundary. The user
+ * is also expected to call this version of average_down_faces from
+ * AMReX_MultiFabUtil.H to synchronize the coarse level data with the fine
+ * level.
+ *
+ * \vertbatim
+    template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int>>
+    void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
+                             const Array<MF*,AMREX_SPACEDIM>& crse,
+                             const IntVect& ratio, const Geometry& crse_geom)
+ * \endverbatim
+ *
+ * Note that both CrseAdd and FineAdd are async for GPU builds. That means
+ * it's the user's responsibility to keep the FArrayBox arguments alive or
+ * call Gpu::streamSynchronize() when necessary.
+ *
+ * Because staggered grids are used, tiling could be very confusing. To avoid
+ * confusion, this class assumes that tiling is not enabled for the MFIter
+ * loop containing calls to CrseAdd and FineAdd.
+ *
+ * If the equation has an extra factor due to the choice of units, the
+ * factor can be absorbed into dt. If we have `v x B` instead of E, the sign
+ * can also been absorbed into dt. Note that whatever the choice of sign is,
+ * the dt arguments passed to CrseAdd and FineAdd should have the same sign.
+ *
+ * We try to keep the interface simple by not providing overloads that
+ * specify the component index. If the user's data does not start with
+ * component 0, it can be worked around by creating alias FArrayBox and
+ * MultiFab.
+ */
+class EdgeFluxRegister
+{
+public:
+
+    EdgeFluxRegister () = default;
+
+    EdgeFluxRegister (const BoxArray& fba, const BoxArray& cba,
+                      const DistributionMapping& fdm, const DistributionMapping& cdm,
+                      const Geometry& fgeom, const Geometry& cgeom,
+                      int nvar = 1);
+
+    void define (const BoxArray& fba, const BoxArray& cba,
+                 const DistributionMapping& fdm, const DistributionMapping& cdm,
+                 const Geometry& fgeom, const Geometry& cgeom,
+                 int nvar = 1);
+
+    void reset ();
+
+#if (AMREX_SPACEDIM == 3)
+
+    void CrseAdd (MFIter const& mfi, const Array<FArrayBox const*,3>& E_crse, Real dt_crse);
+    void FineAdd (MFIter const& mfi, const Array<FArrayBox const*,3>& E_fine, Real dt_fine);
+
+#else /* 2D */
+
+    void CrseAdd (MFIter const& mfi, FArrayBox const& E_crse, Real dt_crse);
+    void FineAdd (MFIter const& mfi, FArrayBox const& E_fine, Real dt_fine);
+
+#endif
+
+    void Reflux (Array<MultiFab*,AMREX_SPACEDIM> const& B_crse) const;
+
+private:
+
+    Geometry m_fine_geom;
+    Geometry m_crse_geom;
+
+    IntVect m_ratio;
+    int m_ncomp;
+
+#if (AMREX_SPACEDIM == 3)
+
+    Array<MultiFab,AMREX_SPACEDIM> m_E_crse; // on original grids
+
+    // There are AMREX_SPACEDIM*2 faces. For each face, we need to store two
+    // component. For example, at the x-faces, we need to store Ey and Ez.
+    Array<Array<MultiFab,2>,AMREX_SPACEDIM*2> m_E_fine;
+
+    // Mask on the coarse level indicating overlap with m_E_fine
+    Array<iMultiFab,AMREX_SPACEDIM> m_fine_mask;
+
+#else
+
+    MultiFab m_E_crse;
+    Array<MultiFab,AMREX_SPACEDIM*2> m_E_fine;
+    iMultiFab m_fine_mask;
+
+#endif
+
+    LayoutData<int> m_has_cf; // Flag on the coarse level indicating c/f interface
+};
+
+}
+
+#endif
diff --git a/Src/Boundary/AMReX_EdgeFluxRegister.cpp b/Src/Boundary/AMReX_EdgeFluxRegister.cpp
new file mode 100644
index 00000000000..871aeb59fb1
--- /dev/null
+++ b/Src/Boundary/AMReX_EdgeFluxRegister.cpp
@@ -0,0 +1,399 @@
+#include <AMReX_EdgeFluxRegister.H>
+#include <AMReX_MultiFabUtil.H>
+
+namespace amrex {
+
+#if (AMREX_SPACEDIM == 3)
+static constexpr std::array<IntVect,3> E_ixtype{IntVect(0,1,1),IntVect(1,0,1),IntVect(1,1,0)};
+#endif
+
+EdgeFluxRegister::EdgeFluxRegister (const BoxArray& fba, const BoxArray& cba,
+                                    const DistributionMapping& fdm, const DistributionMapping& cdm,
+                                    const Geometry& fgeom, const Geometry& cgeom,
+                                    int nvar)
+{
+    define(fba, cba, fdm, cdm, fgeom, cgeom, nvar);
+}
+
+void EdgeFluxRegister::define (const BoxArray& fba, const BoxArray& cba,
+                               const DistributionMapping& fdm, const DistributionMapping& cdm,
+                               const Geometry& fgeom, const Geometry& cgeom,
+                               int nvar)
+{
+    m_fine_geom = fgeom;
+    m_crse_geom = cgeom;
+    m_ratio = fgeom.Domain().size() / cgeom.Domain().size();
+    AMREX_ALWAYS_ASSERT(fgeom.Domain() == amrex::refine(cgeom.Domain(), m_ratio));
+    m_ncomp = nvar;
+
+    m_has_cf.define(cba, cdm);
+    for (MFIter mfi(m_has_cf, MFItInfo().DisableDeviceSync()); mfi.isValid(); ++mfi) {
+        m_has_cf[mfi] = 0;
+    }
+
+#if (AMREX_SPACEDIM == 3)
+
+    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+        m_E_crse[idim].define(amrex::convert(cba,E_ixtype[idim]), cdm, nvar, 0);
+    }
+    for (OrientationIter oit; oit.isValid(); ++oit) {
+        auto face = oit();
+        const int direction = face.coordDir();
+        int count = 0;
+        for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+            if (idim != direction) {
+                BoxArray bba(amrex::convert(fba,IntVect(0)),
+                             BATransformer(face, IndexType(E_ixtype[idim]), 0, 1, 0));
+                bba.coarsen(m_ratio);
+                m_E_fine[face][count++].define(bba, fdm, nvar, 0);
+            }
+        }
+    }
+
+    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+        Array<MultiFab const*,4> fmf;
+        int count = 0;
+        for (OrientationIter oit; oit.isValid(); ++oit) {
+            auto face = oit();
+            const int direction = face.coordDir();
+            if (direction != idim) {
+                // For x-direction, we store Ey and then Ez in m_E_fine.
+                // For y-direction, we store Ex and then Ez in m_E_fine.
+                // For z-direction, we store Ey and then Ez in m_E_fine.
+                const int m = (idim < direction) ? idim : idim-1;
+                fmf[count++] = & m_E_fine[face][m];
+            }
+        }
+
+        for (int m = 0; m < 4; ++m) {
+            LayoutData<int> tmp_has_cf;
+            // We use IntVect(1) as ref ratio because fmf has already be coarsened
+            auto tmp_mask = makeFineMask(m_E_crse[idim], *fmf[m], IntVect(0), IntVect(1),
+                                         m_crse_geom.periodicity(), 0, 1, tmp_has_cf);
+            if (m == 0) {
+                m_fine_mask[idim] = std::move(tmp_mask);
+            } else {
+                amrex::Add(m_fine_mask[idim], tmp_mask, 0, 0, 1, 0);
+            }
+            for (MFIter mfi(m_has_cf, MFItInfo().DisableDeviceSync()); mfi.isValid(); ++mfi) {
+                m_has_cf[mfi] += tmp_has_cf[mfi];
+            }
+        }
+    }
+
+#else /* 2D */
+
+    m_E_crse.define(amrex::convert(cba,IntVect(1)), cdm, nvar, 0);
+
+    for (OrientationIter oit; oit.isValid(); ++oit) {
+        auto face = oit();
+        BoxArray bba(amrex::convert(fba,IntVect(0)),
+                     BATransformer(face, IndexType::TheNodeType(), 0, 1, 0));
+        bba.coarsen(m_ratio);
+        m_E_fine[face].define(bba, fdm, nvar, 0);
+    }
+
+    for (OrientationIter oit; oit.isValid(); ++oit) {
+        auto face = oit();
+        LayoutData<int> tmp_has_cf;
+        // We use IntVect(1) as ref ratio because fmf has already be coarsened
+        auto tmp_mask = makeFineMask(m_E_crse, m_E_fine[face], IntVect(0), IntVect(1),
+                                     m_crse_geom.periodicity(), 0, 1, tmp_has_cf);
+        if (int(face) == 0) {
+            m_fine_mask = std::move(tmp_mask);
+        } else {
+            amrex::Add(m_fine_mask, tmp_mask, 0, 0, 1, 0);
+        }
+        for (MFIter mfi(m_has_cf, MFItInfo().DisableDeviceSync()); mfi.isValid(); ++mfi) {
+            m_has_cf[mfi] += tmp_has_cf[mfi];
+        }
+    }
+
+#endif
+}
+
+void EdgeFluxRegister::reset ()
+{
+#if (AMREX_SPACEDIM == 3)
+
+    for (auto& mf : m_E_crse) {
+        auto const& ma = mf.arrays();
+        ParallelFor(mf, IntVect(0), mf.nComp(),
+        [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k, int n)
+        {
+            ma[bno](i,j,k,n) = Real(0.0);
+        });
+    }
+    for (auto& a : m_E_fine) {
+        for (auto& mf : a) {
+#ifdef AMREX_USE_GPU
+            auto const& ma = mf.arrays();
+            ParallelFor(mf, IntVect(0), mf.nComp(),
+            [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k, int n)
+            {
+                ma[bno](i,j,k,n) = Real(0.0);
+            });
+#else
+            // Due to its special BoxArray, it's not safe do tiling
+#ifdef AMREX_USE_OMP
+#pragma omp parallel
+#endif
+            for (MFIter mfi(mf); mfi.isValid(); ++mfi) {
+                mf[mfi].template setVal<RunOn::Host>(Real(0.0));
+            }
+#endif
+        }
+    }
+
+#else /* 2D */
+
+    {
+        auto const& ma = m_E_crse.arrays();
+        ParallelFor(m_E_crse, IntVect(0), m_E_crse.nComp(),
+        [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k, int n)
+        {
+            ma[bno](i,j,k,n) = Real(0.0);
+        });
+    }
+    for (auto& mf : m_E_fine) {
+#ifdef AMREX_USE_GPU
+        auto const& ma = mf.arrays();
+        ParallelFor(mf, IntVect(0), mf.nComp(),
+        [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k, int n)
+        {
+            ma[bno](i,j,k,n) = Real(0.0);
+        });
+#else
+        // Due to its special BoxArray, it's not safe do tiling
+#ifdef AMREX_USE_OMP
+#pragma omp parallel
+#endif
+        for (MFIter mfi(mf); mfi.isValid(); ++mfi) {
+            mf[mfi].template setVal<RunOn::Host>(Real(0.0));
+        }
+#endif
+    }
+
+#endif
+
+    Gpu::synchronize();
+}
+
+#if (AMREX_SPACEDIM == 3)
+
+void EdgeFluxRegister::CrseAdd (MFIter const& mfi, const Array<FArrayBox const*,3>& E_crse,
+                                Real dt_crse)
+{
+    AMREX_ASSERT(mfi.validbox() == mfi.tilebox());
+    if (m_has_cf[mfi]) {
+        auto const& dst0 = m_E_crse[0].array(mfi);
+        auto const& dst1 = m_E_crse[1].array(mfi);
+        auto const& dst2 = m_E_crse[2].array(mfi);
+        auto const& src0 = E_crse[0]->const_array();
+        auto const& src1 = E_crse[1]->const_array();
+        auto const& src2 = E_crse[2]->const_array();
+        amrex::ParallelFor
+            (amrex::convert(mfi.validbox(),E_ixtype[0]), m_ncomp,
+             [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
+             {
+                 dst0(i,j,k,n) += src0(i,j,k,n) * dt_crse;
+             },
+             amrex::convert(mfi.validbox(),E_ixtype[1]), m_ncomp,
+             [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
+             {
+                 dst1(i,j,k,n) += src1(i,j,k,n) * dt_crse;
+             },
+             amrex::convert(mfi.validbox(),E_ixtype[2]), m_ncomp,
+             [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
+             {
+                 dst2(i,j,k,n) += src2(i,j,k,n) * dt_crse;
+             });
+    }
+}
+
+void EdgeFluxRegister::FineAdd (MFIter const& mfi, const Array<FArrayBox const*,3>& E_fine,
+                                Real dt_fine)
+{
+    AMREX_ASSERT(mfi.validbox() == mfi.tilebox());
+    auto const ratio = m_ratio;
+    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+        auto const& src = E_fine[idim]->const_array();
+        for (OrientationIter oit; oit.isValid(); ++oit) {
+            auto face = oit();
+            const int direction = face.coordDir();
+            if (direction != idim) {
+                // For x-direction, we store Ey and then Ez in m_E_fine.
+                // For y-direction, we store Ex and then Ez in m_E_fine.
+                // For z-direction, we store Ey and then Ez in m_E_fine.
+                const int m = (idim < direction) ? idim : idim-1;
+                auto const& dst = m_E_fine[face][m].array(mfi);
+                AMREX_ASSERT(E_fine[idim]->box().ixType() == m_E_fine[face][m].ixType());
+                auto offset = IntVect::TheDimensionVector(idim).dim3();
+                auto dt2 = dt_fine / Real(ratio[idim]);
+                ParallelFor(Box(dst), m_ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
+                {
+                    int ii = ratio[0]*i;
+                    int jj = ratio[1]*j;
+                    int kk = ratio[2]*k;
+                    for (int rr = 0; rr < ratio[idim]; ++rr) {
+                        dst(i,j,k,n) += src(ii+offset.x*rr,jj+offset.y*rr,kk+offset.z*rr,n)*dt2;
+                    }
+                });
+            }
+        }
+    }
+}
+
+#else /* 2D */
+
+void EdgeFluxRegister::CrseAdd (MFIter const& mfi, FArrayBox const& E_crse, Real dt_crse)
+{
+    AMREX_ASSERT(mfi.validbox() == mfi.tilebox());
+    if (m_has_cf[mfi]) {
+        auto const& dst = m_E_crse.array(mfi);
+        auto const& src = E_crse.const_array();
+        amrex::ParallelFor(amrex::convert(mfi.validbox(),IntVect(1)), m_ncomp,
+                           [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
+                           {
+                               dst(i,j,k,n) += src(i,j,k,n) * dt_crse;
+                           });
+    }
+}
+
+void EdgeFluxRegister::FineAdd (MFIter const& mfi, FArrayBox const& E_fine, Real dt_fine)
+{
+    AMREX_ASSERT(mfi.validbox() == mfi.tilebox());
+    auto const ratio = m_ratio;
+    auto const& src = E_fine.const_array();
+    for (OrientationIter oit; oit.isValid(); ++oit) {
+        auto face = oit();
+        auto const& dst = m_E_fine[face].array(mfi);
+        ParallelFor(Box(dst), m_ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int, int n)
+        {
+            int ii = ratio[0]*i;
+            int jj = ratio[1]*j;
+            dst(i,j,0,n) += src(ii,jj,0,n) * dt_fine;
+        });
+    }
+
+}
+
+#endif
+
+void EdgeFluxRegister::Reflux (Array<MultiFab*,AMREX_SPACEDIM> const& B_crse) const
+{
+#if (AMREX_SPACEDIM == 3)
+
+    Array<MultiFab,AMREX_SPACEDIM> E_cfine;
+
+    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+        E_cfine[idim].define(m_E_crse[idim].boxArray(), m_E_crse[idim].DistributionMap(), m_ncomp, 0);
+        for (OrientationIter oit; oit.isValid(); ++oit) {
+            auto face = oit();
+            const int direction = face.coordDir();
+            if (direction != idim) {
+                // For x-direction, we store Ey and then Ez in m_E_fine.
+                // For y-direction, we store Ex and then Ez in m_E_fine.
+                // For z-direction, we store Ey and then Ez in m_E_fine.
+                const int m = (idim < direction) ? idim : idim-1;
+                E_cfine[idim].ParallelCopy(m_E_fine[face][m], m_crse_geom.periodicity());
+            }
+        }
+    }
+
+    Real dxi = m_crse_geom.InvCellSize(0);
+    Real dyi = m_crse_geom.InvCellSize(1);
+    Real dzi = m_crse_geom.InvCellSize(2);
+
+#ifdef AMREX_USE_OMP
+#pragma omp parallel if (Gpu::notInLaunchRegion())
+#endif
+    for (MFIter mfi(*B_crse[0]); mfi.isValid(); ++mfi) {
+        if (m_has_cf[mfi]) {
+            Box xbx = amrex::convert(mfi.validbox(), B_crse[0]->ixType());
+            Box ybx = amrex::convert(mfi.validbox(), B_crse[1]->ixType());
+            Box zbx = amrex::convert(mfi.validbox(), B_crse[2]->ixType());
+            auto const& xmsk = m_fine_mask[0].const_array(mfi);
+            auto const& ymsk = m_fine_mask[1].const_array(mfi);
+            auto const& zmsk = m_fine_mask[2].const_array(mfi);
+            auto const& Bx = B_crse[0]->array(mfi);
+            auto const& By = B_crse[1]->array(mfi);
+            auto const& Bz = B_crse[2]->array(mfi);
+            auto const& cEx = m_E_crse[0].const_array(mfi);
+            auto const& cEy = m_E_crse[1].const_array(mfi);
+            auto const& cEz = m_E_crse[2].const_array(mfi);
+            auto const& fEx = E_cfine[0].const_array(mfi);
+            auto const& fEy = E_cfine[1].const_array(mfi);
+            auto const& fEz = E_cfine[2].const_array(mfi);
+            ParallelFor
+                (xbx, m_ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
+                 {
+                     Real dEym = ymsk(i,j  ,k  ) ? (fEy(i,j  ,k  ,n)-cEy(i,j  ,k  ,n)) : Real(0.0);
+                     Real dEyp = ymsk(i,j  ,k+1) ? (fEy(i,j  ,k+1,n)-cEy(i,j  ,k+1,n)) : Real(0.0);
+                     Real dEzm = zmsk(i,j  ,k  ) ? (fEz(i,j  ,k  ,n)-cEz(i,j  ,k  ,n)) : Real(0.0);
+                     Real dEzp = zmsk(i,j+1,k  ) ? (fEz(i,j+1,k  ,n)-cEz(i,j+1,k  ,n)) : Real(0.0);
+                     Bx(i,j,k,n) -= (dEzp-dEzm)*dyi - (dEyp-dEym)*dzi;
+                 },
+                 ybx, m_ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
+                 {
+                     Real dExm = xmsk(i  ,j,k  ) ? (fEx(i  ,j,k  ,n)-cEx(i  ,j,k  ,n)) : Real(0.0);
+                     Real dExp = xmsk(i  ,j,k+1) ? (fEx(i  ,j,k+1,n)-cEx(i  ,j,k+1,n)) : Real(0.0);
+                     Real dEzm = zmsk(i  ,j,k  ) ? (fEz(i  ,j,k  ,n)-cEz(i  ,j,k  ,n)) : Real(0.0);
+                     Real dEzp = zmsk(i+1,j,k  ) ? (fEz(i+1,j,k  ,n)-cEz(i+1,j,k  ,n)) : Real(0.0);
+                     By(i,j,k,n) -= (dExp-dExm)*dzi - (dEzp-dEzm)*dxi;
+                 },
+                 zbx, m_ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
+                 {
+                     Real dExm = xmsk(i  ,j  ,k) ? (fEx(i  ,j  ,k,n)-cEx(i  ,j  ,k,n)) : Real(0.0);
+                     Real dExp = xmsk(i  ,j+1,k) ? (fEx(i  ,j+1,k,n)-cEx(i  ,j+1,k,n)) : Real(0.0);
+                     Real dEym = ymsk(i  ,j  ,k) ? (fEy(i  ,j  ,k,n)-cEy(i  ,j  ,k,n)) : Real(0.0);
+                     Real dEyp = ymsk(i+1,j  ,k) ? (fEy(i+1,j  ,k,n)-cEy(i+1,j  ,k,n)) : Real(0.0);
+                     Bz(i,j,k,n) -= (dEyp-dEym)*dxi - (dExp-dExm)*dyi;
+                 });
+        }
+    }
+
+#else /* 2D */
+
+    MultiFab E_cfine(m_E_crse.boxArray(), m_E_crse.DistributionMap(), m_ncomp, 0);
+
+    for (OrientationIter oit; oit.isValid(); ++oit) {
+        auto face = oit();
+        E_cfine.ParallelCopy(m_E_fine[face], m_crse_geom.periodicity());
+    }
+
+    Real dxi = m_crse_geom.InvCellSize(0);
+    Real dyi = m_crse_geom.InvCellSize(1);
+
+#ifdef AMREX_USE_OMP
+#pragma omp parallel if (Gpu::notInLaunchRegion())
+#endif
+    for (MFIter mfi(*B_crse[0]); mfi.isValid(); ++mfi) {
+        if (m_has_cf[mfi]) {
+            Box xbx = amrex::convert(mfi.validbox(), B_crse[0]->ixType());
+            Box ybx = amrex::convert(mfi.validbox(), B_crse[1]->ixType());
+            auto const& zmsk = m_fine_mask.const_array(mfi);
+            auto const& Bx = B_crse[0]->array(mfi);
+            auto const& By = B_crse[1]->array(mfi);
+            auto const& cEz = m_E_crse.const_array(mfi);
+            auto const& fEz = E_cfine.const_array(mfi);
+            ParallelFor
+                (xbx, m_ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
+                 {
+                     Real dEzm = zmsk(i,j  ,k  ) ? (fEz(i,j  ,k  ,n)-cEz(i,j  ,k  ,n)) : Real(0.0);
+                     Real dEzp = zmsk(i,j+1,k  ) ? (fEz(i,j+1,k  ,n)-cEz(i,j+1,k  ,n)) : Real(0.0);
+                     Bx(i,j,k,n) -= (dEzp-dEzm)*dyi;
+                 },
+                 ybx, m_ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
+                 {
+                     Real dEzm = zmsk(i  ,j,k  ) ? (fEz(i  ,j,k  ,n)-cEz(i  ,j,k  ,n)) : Real(0.0);
+                     Real dEzp = zmsk(i+1,j,k  ) ? (fEz(i+1,j,k  ,n)-cEz(i+1,j,k  ,n)) : Real(0.0);
+                     By(i,j,k,n) -= - (dEzp-dEzm)*dxi;
+                 });
+        }
+    }
+
+#endif
+}
+
+}
diff --git a/Src/Boundary/CMakeLists.txt b/Src/Boundary/CMakeLists.txt
index f8d2e7e4929..30be881ad19 100644
--- a/Src/Boundary/CMakeLists.txt
+++ b/Src/Boundary/CMakeLists.txt
@@ -23,6 +23,14 @@ foreach(D IN LISTS AMReX_SPACEDIM)
        AMReX_BoundaryFwd.H
        )
 
+    if (NOT (D EQUAL 1))
+       target_sources(amrex_${D}d
+          PRIVATE
+          AMReX_EdgeFluxRegister.H
+          AMReX_EdgeFluxRegister.cpp
+          )
+    endif ()
+
     if (AMReX_FORTRAN)
        target_sources(amrex_${D}d
           PRIVATE
diff --git a/Src/Boundary/Make.package b/Src/Boundary/Make.package
index 36968b1f9d8..7dae7ec913f 100644
--- a/Src/Boundary/Make.package
+++ b/Src/Boundary/Make.package
@@ -12,6 +12,11 @@ CEXE_headers += AMReX_LOUtil_K.H
 CEXE_headers += AMReX_YAFluxRegister_K.H AMReX_YAFluxRegister_$(DIM)D_K.H
 CEXE_headers += AMReX_YAFluxRegister.H
 
+ifneq ($(DIM),1)
+  CEXE_headers += AMReX_EdgeFluxRegister.H
+  CEXE_sources += AMReX_EdgeFluxRegister.cpp
+endif
+
 CEXE_headers += AMReX_BoundaryFwd.H
 
 ifneq ($(BL_NO_FORT),TRUE)
diff --git a/Src/EB/AMReX_EB2_Level.H b/Src/EB/AMReX_EB2_Level.H
index cd7f8fc6083..0c7614606f1 100644
--- a/Src/EB/AMReX_EB2_Level.H
+++ b/Src/EB/AMReX_EB2_Level.H
@@ -483,7 +483,7 @@ GShopLevel<G>::define_fine (G const& gshop, const Geometry& geom,
             break;
         } else {
             auto ls = m_mgf.getLevelSet();
-            // This is an alias MulitFab, therefore FillBoundary on it is fine.
+            // This is an alias MultiFab, therefore FillBoundary on it is fine.
             ls.FillBoundary(geom.periodicity());
             if (amrex::Verbose() > 0) {
                 if (nsmallcells) {
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp
index 4abf219e44f..9cb5ec880fa 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp
@@ -895,7 +895,7 @@ MLNodeLaplacian::compRHS (const Vector<MultiFab*>& rhs, const Vector<MultiFab*>&
     //
     // Note that div vel we copmute on a coarse/fine nodes is not a
     // composite divergence.  It has been restricted so that it is suitable
-    // as RHS for our geometric mulitgrid solver with a MG hirerachy
+    // as RHS for our geometric multigrid solver with a MG hirerachy
     // including multiple AMR levels.
     //
     // Also note that even for RAP, we do doubling at Nuemann boundary,
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sync.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sync.cpp
index f8bff06337b..587f9508ef5 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sync.cpp
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_sync.cpp
@@ -621,7 +621,7 @@ MLNodeLaplacian::reflux (int crse_amrlev,
     //
     //  Note that the residue we copmute on a coarse/fine node is not a
     //  composite divergence.  It has been restricted so that it is suitable
-    //  as RHS for our geometric mulitgrid solver with a MG hirerachy
+    //  as RHS for our geometric multigrid solver with a MG hirerachy
     //  including multiple AMR levels.
     //