RevathiJambunathan
diff --git a/‎.github/workflows/dependencies/hip.sh
+49 b/‎.github/workflows/dependencies/hip.sh
+49
diff --git a/‎.github/workflows/source/wrongFileNameInExamples
+2 b/‎.github/workflows/source/wrongFileNameInExamples
+2
diff --git a/‎.github/workflows/ubuntu.yml
+20-2 b/‎.github/workflows/ubuntu.yml
+20-2
diff --git a/‎CMakeLists.txt
+11-2 b/‎CMakeLists.txt
+11-2
diff --git a/‎Docs/source/building/cmake.rst
+1-1 b/‎Docs/source/building/cmake.rst
+1-1
diff --git a/‎Docs/source/building/juwels.rst
+8 b/‎Docs/source/building/juwels.rst
+8
diff --git a/‎Docs/source/building/summit.rst
+5-2 b/‎Docs/source/building/summit.rst
+5-2
diff --git a/‎Docs/source/running_cpp/parameters.rst
+21-4 b/‎Docs/source/running_cpp/parameters.rst
+21-4
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+#
+# Copyright 2020 The WarpX Community
+#
+# License: BSD-3-Clause-LBNL
+# Authors: Axel Huebl
+
+# search recursive inside a folder if a file contains tabs
+#
+# @result 0 if no files are found, else 1
+#
+
+set -eu -o pipefail
+
+# Ref.: https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html#ubuntu
+wget -q -O - http://repo.radeon.com/rocm/rocm.gpg.key \
+  | sudo apt-key add -
+echo 'deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main' \
+  | sudo tee /etc/apt/sources.list.d/rocm.list
+
+echo 'export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin' \
+  | sudo tee -a /etc/profile.d/rocm.sh
+# we should not need to export HIP_PATH=/opt/rocm/hip with those installs
+
+sudo apt-get update
+
+# Ref.: https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html#installing-development-packages-for-cross-compilation
+# meta-package: rocm-dkms
+# OpenCL: rocm-opencl
+# other: rocm-dev rocm-utils
+sudo apt-get install -y --no-install-recommends \
+    build-essential \
+    gfortran        \
+    libnuma-dev     \
+    libopenmpi-dev  \
+    openmpi-bin     \
+    rocm-dev        \
+    rocrand
+
+# activate
+#
+source /etc/profile.d/rocm.sh
+hipcc --version
+
+# cmake-easyinstall
+#
+sudo curl -L -o /usr/local/bin/cmake-easyinstall https://git.io/JvLxY
+sudo chmod a+x /usr/local/bin/cmake-easyinstall
+export CEI_SUDO="sudo"
@@ -17,6 +17,7 @@ do
     if [[ ${file:0:6 } != inputs       ]] &&
        [[ ${file:0:12} != PICMI_inputs ]] &&
        [[ ${file:0:8 } != analysis     ]] &&
+       [[ ${file:  -4} != yaml         ]] &&
        [[ ${file:0:6 } != README       ]]
     then
         files+=($file)
@@ -32,6 +33,7 @@ then
     echo " - inputs       : for WarpX input files"
     echo " - PICMI_inputs : for PICMI-compliant input scripts"
     echo " - analysis     : for scripts testing the accuracy of a test"
+    echo " - *.yaml       : for third-party input, e.g. Ascent in situ visualization"
     echo " - README       : for readme files"
     echo ""
     echo "Please rename the file(s) to comply, or move to another folder"
 
@@ -100,7 +100,6 @@ jobs:
         sudo apt-get install -y intel-oneapi-dpcpp-cpp-compiler intel-oneapi-mkl-devel
         set +e
         source /opt/intel/oneapi/setvars.sh
-        source /opt/intel/oneapi/compiler/2021.1-beta08/env/vars.sh
         set -e
         git clone https://github.com/openPMD/openPMD-api.git
         mkdir openPMD-api/build
@@ -113,11 +112,30 @@ jobs:
       run: |
         set +e
         source /opt/intel/oneapi/setvars.sh
-        source /opt/intel/oneapi/compiler/2021.1-beta08/env/vars.sh
         set -e
         export CXX=$(which dpcpp)
         export CC=$(which clang)
 
         mkdir build_sp && cd build_sp
         cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DWarpX_MPI=OFF -DWarpX_COMPUTE=DPCPP -DWarpX_OPENPMD=ON -DWarpX_openpmd_internal=OFF -DWarpX_PRECISION=single
         make -j 2
+
+  build_hip:
+    name: HIP SP [Linux]
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v2
+    - name: install dependencies
+      shell: bash
+      run: .github/workflows/dependencies/hip.sh
+    - name: build WarpX
+      shell: bash
+      run: |
+        source /etc/profile.d/rocm.sh
+        hipcc --version
+        export CXX=$(which hipcc)
+        export CC=$(which hipcc)
+
+        mkdir build_sp && cd build_sp
+        cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DWarpX_MPI=ON -DWarpX_COMPUTE=HIP -DAMD_ARCH=gfx900 -DWarpX_OPENPMD=ON -DWarpX_PRECISION=single
+        make -j 2
@@ -14,6 +14,15 @@ if(CMAKE_BINARY_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
 endif()
 
 
+# CMake policies ##############################################################
+#
+# CMake 3.18+: CMAKE_CUDA_ARCHITECTURES
+# https://cmake.org/cmake/help/latest/policy/CMP0104.html
+if(POLICY CMP0104)
+    cmake_policy(SET CMP0104 OLD)
+endif()
+
+
 # CCache Support ##############################################################
 #
 # this is an optional tool that stores compiled object files; allows fast
@@ -54,8 +63,8 @@ if(NOT WarpX_PRECISION IN_LIST WarpX_PRECISION_VALUES)
     message(FATAL_ERROR "WarpX_PRECISION (${WarpX_PRECISION}) must be one of ${WarpX_PRECISION_VALUES}")
 endif()
 
-set(WarpX_COMPUTE_VALUES NOACC OMP CUDA DPCPP) # HIP
-set(WarpX_COMPUTE OMP CACHE STRING "On-node, accelerated computing backend (NOACC/OMP/CUDA/DPCPP)")
+set(WarpX_COMPUTE_VALUES NOACC OMP CUDA DPCPP HIP)
+set(WarpX_COMPUTE OMP CACHE STRING "On-node, accelerated computing backend (NOACC/OMP/CUDA/DPCPP/HIP)")
 set_property(CACHE WarpX_COMPUTE PROPERTY STRINGS ${WarpX_COMPUTE_VALUES})
 if(NOT WarpX_COMPUTE IN_LIST WarpX_COMPUTE_VALUES)
     message(FATAL_ERROR "WarpX_PRECISION (${WarpX_COMPUTE}) must be one of ${WarpX_COMPUTE_VALUES}")
 
@@ -123,7 +123,7 @@ CMake Option                  Default & Values                             Descr
 ============================= ============================================ =======================================================
 ``CMAKE_BUILD_TYPE``          **RelWithDebInfo**/Release/Debug             Type of build, symbols & optimizations
 ``WarpX_ASCENT``              ON/**OFF**                                   Ascent in situ visualization
-``WarpX_COMPUTE``             NOACC/**OMP**/CUDA/DPCPP                     On-node, accelerated computing backend
+``WarpX_COMPUTE``             NOACC/**OMP**/CUDA/DPCPP/HIP                 On-node, accelerated computing backend
 ``WarpX_DIMS``                **3**/2/RZ                                   Simulation dimensionality
 ``WarpX_MPI``                 **ON**/OFF                                   Multi-node support (message-passing)
 ``WarpX_MPI_THREAD_MULTIPLE`` **ON**/OFF                                   MPI thread-multiple support, i.e. for ``async_io``
 
@@ -3,6 +3,14 @@
 Juwels (JSC)
 ============
 
+.. note::
+
+   There's currently a bug when building WarpX on Juwels! WarpX does not compile on the latest version of the development branches of WarpX and AMReX.
+   Below are the latest working commits. Please, checkout to those commits before compiling.
+   If you need more recent features, select the specific commits with `git cherry-pick <commit hash>`
+   * WarpX: a548b14e8108ab22294f85516c4e9ea8b1462703
+   * AMReX: 21269eff092d0a03aff9269b1200c0e408fde90e
+
 The `Juwels supercomputer <https://www.fz-juelich.de/ias/jsc/EN/Expertise/Supercomputers/JUWELS/JUWELS_node.html>`_ is located at JSC.
 
 See `this page <https://apps.fz-juelich.de/jsc/hps/juwels/quickintro.html>`_ for a quick introduction.
 
@@ -134,9 +134,12 @@ Then, ``cd`` into the directory ``$HOME/src/warpx`` and use the following comman
 
 .. code-block:: bash
 
-   make -j 16 COMP=gcc USE_GPU=TRUE USE_OPENPMD=TRUE
+   mkdir -p build
+   cd build
+   cmake .. -DWarpX_OPENPMD=ON -DWarpX_DIMS=3 -DWarpX_COMPUTE=CUDA -DCMAKE_CUDA_ARCHITECTURES=70 -DCUDA_ARCH=7.0
+   make -j 10
 
-The other :ref:`general compile-time options <building-source>` apply as usual.
+The general :ref:`cmake compile-time options <building-cmake>` apply as usual.
 
 
 Running
 
@@ -376,6 +376,11 @@ Particle initialization
     precision within a reasonable time ; in that case, users can set a
     relaxed precision requirement through ``self_fields_required_precision``.
 
+* ``<species_name>.self_fields_max_iters`` (`integer`, default: 200)
+    Maximum number of iterations used for MLMG solver for initial space-charge
+    fields calculation. In case if MLMG converges but fails to reach the desired
+    ``self_fields_required_precision``, this parameter may be increased.
+
 * ``<species_name>.profile`` (`string`)
     Density profile for this species. The options are:
 
@@ -1209,7 +1214,7 @@ Numerics and algorithms
     This option guarantees charge conservation only when used in combination with ``psatd.periodic_single_box_fft=1``, namely for periodic single-box simulations with global FFTs without guard cells.
     The implementation for domain decomposition with local FFTs over guard cells is planned but not yet completed.
 
-* ``psatd.update_with_rho`` (`0` or `1`; default: `0`)
+* ``psatd.update_with_rho`` (`0` or `1`)
     If true, the update equation for the electric field is expressed in terms of both the current density and the charge density, namely :math:`\widehat{\boldsymbol{J}}^{\,n+1/2}`, :math:`\widehat\rho^{n}`, and :math:`\widehat\rho^{n+1}`.
     If false, instead, the update equation for the electric field is expressed in terms of the current density :math:`\widehat{\boldsymbol{J}}^{\,n+1/2}` only.
     If charge is expected to be conserved (by setting, for example, ``psatd.current_correction=1``), then the two formulations are expected to be equivalent.
@@ -1276,6 +1281,11 @@ Numerics and algorithms
 
     The coefficients :math:`C`, :math:`S`, :math:`\theta`, :math:`\nu`, :math:`\chi_1`, :math:`\chi_2`, and :math:`\chi_3` are defined in (`Lehe et al, PRE 94, 2016 <https://doi.org/10.1103/PhysRevE.94.053305>`_).
 
+    The default value for ``psatd.update_with_rho`` is ``1`` if ``psatd.v_galilean`` is non-zero or
+    in RZ geometry and ``0`` otherwise.
+
+    Note that ``psatd.update_with_rho=0`` is not supported in RZ geometry.
+
 * ``pstad.v_galilean`` (`3 floats`, in units of the speed of light; default `0. 0. 0.`)
     Defines the galilean velocity.
     Non-zero `v_galilean` activates Galilean algorithm, which suppresses the Numerical Cherenkov instability
@@ -1433,11 +1443,12 @@ In-situ capabilities can be used by turning on Sensei or Ascent (provided they a
 
 * ``<diag_name>.fields_to_plot`` (list of `strings`, optional)
     Fields written to output.
-    Possible values: ``Ex`` ``Ey`` ``Ez`` ``Bx`` ``By`` ``Bz`` ``jx`` ``jy`` ``jz`` ``part_per_cell`` ``rho`` ``F`` ``part_per_grid`` ``part_per_proc`` ``divE`` ``divB`` and ``rho_<species_name>``, where ``<species_name>`` must match the name of one of the available particle species.
+    Possible values: ``Ex`` ``Ey`` ``Ez`` ``Bx`` ``By`` ``Bz`` ``jx`` ``jy`` ``jz`` ``part_per_cell`` ``rho`` ``F`` ``part_per_grid`` ``divE`` ``divB`` and ``rho_<species_name>``, where ``<species_name>`` must match the name of one of the available particle species.
     Default is ``<diag_name>.fields_to_plot = Ex Ey Ez Bx By Bz jx jy jz``.
+    Note that the fields are averaged on the cell centers before they are written to file.
 
 * ``<diag_name>.plot_raw_fields`` (`0` or `1`) optional (default `0`)
-    By default, the fields written in the plot files are averaged on the nodes.
+    By default, the fields written in the plot files are averaged on the cell centers.
     When ```warpx.plot_raw_fields`` is `1`, then the raw (i.e. unaveraged)
     fields are also saved in the output files.
     Only works with ``<diag_name>.format = plotfile``.
@@ -1462,7 +1473,10 @@ In-situ capabilities can be used by turning on Sensei or Ascent (provided they a
 * ``<diag_name>.coarsening_ratio`` (list of `int`) optional (default `1 1 1`)
     Reduce size of the field output by this ratio in each dimension.
     (This is done by averaging the field over 1 or 2 points along each direction, depending on the staggering).
-    ``plot_coarsening_ratio`` should be an integer divisor of ``blocking_factor``, defined in the :ref:`parallelization <parallelization_warpx>` section.
+    If ``blocking_factor`` and ``max_grid_size`` are used for the domain decomposition, as detailed in
+    the :ref:`parallelization <parallelization_warpx>` section, ``coarsening_ratio`` should be an integer
+    divisor of ``blocking_factor``. If ``warpx.numprocs`` is used instead, the total number of cells in a given
+    dimension must be a multiple of the ``coarsening_ratio`` multiplied by ``numprocs`` in that dimension.
 
 * ``<diag_name>.file_prefix`` (`string`) optional (default `diags/plotfiles/plt`)
     Root for output file names. Supports sub-directories.
@@ -1661,6 +1675,9 @@ Reduced Diagnostics
         the maximum value of the norm :math:`|B|` of the magnetic field,
         at mesh refinement levels from  0 to :math:`n`.
 
+        Note that the fields are averaged on the cell centers before their maximum values are
+        computed.
+
     * ``ParticleNumber``
         This type computes the total number of macroparticles in the simulation (for each species
         and summed over all species). It can be useful in particular for simulations with creation