Skip to content

Commit 2ac1993

Browse files
committed
Modernize idcpu Treatment
- faster: less emitted operations, no jumps - cheaper: less used registers - safer: no read-before-write warnings - cooler: no explanation needed
1 parent 82aec47 commit 2ac1993

File tree

12 files changed

+42
-50
lines changed

12 files changed

+42
-50
lines changed

Source/Diagnostics/ReducedDiags/FieldProbeParticleContainer.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ FieldProbeParticleContainer::AddNParticles (int lev,
9797
for (int i = 0; i < np; i++)
9898
{
9999
auto & idcpu_data = pinned_tile.GetStructOfArrays().GetIdCPUData();
100-
idcpu_data.push_back(0);
101-
amrex::ParticleIDWrapper{idcpu_data.back()} = ParticleType::NextID();
102-
amrex::ParticleCPUWrapper(idcpu_data.back()) = ParallelDescriptor::MyProc();
100+
idcpu_data.push_back(amrex::SetParticleIDandCPU(ParticleType::NextID(), ParallelDescriptor::MyProc()));
103101
}
104102

105103
// write Real attributes (SoA) to particle initialized zero

Source/EmbeddedBoundary/ParticleBoundaryProcess.H

+3-1
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@
77
#ifndef PARTICLEBOUNDARYPROCESS_H_
88
#define PARTICLEBOUNDARYPROCESS_H_
99

10+
#include <AMReX_Particle.H>
1011
#include <AMReX_REAL.H>
1112
#include <AMReX_RealVect.H>
1213
#include <AMReX_Random.H>
1314

15+
1416
namespace ParticleBoundaryProcess {
1517

1618
struct NoOp {
@@ -29,7 +31,7 @@ struct Absorb {
2931
const amrex::RealVect& /*pos*/, const amrex::RealVect& /*normal*/,
3032
amrex::RandomEngine const& /*engine*/) const noexcept
3133
{
32-
ptd.id(i) = -ptd.id(i);
34+
amrex::ParticleIDWrapper{ptd.m_idcpu[i]}.make_invalid();
3335
}
3436
};
3537
}

Source/EmbeddedBoundary/ParticleScraper.H

+3-3
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
* passed in to this function as an argument. This function can access the
3939
* position at which the particle hit the boundary, and also the associated
4040
* normal vector. Particles can be `absorbed` by setting their ids to negative
41-
* to flag them for removal. Likewise, the can be reflected back into the domain
41+
* to flag them for removal. Likewise, they can be reflected back into the domain
4242
* by modifying their data appropriately and leaving their ids alone.
4343
*
4444
* This version operates only at the specified level.
@@ -82,7 +82,7 @@ scrapeParticles (PC& pc, const amrex::Vector<const amrex::MultiFab*>& distance_t
8282
* passed in to this function as an argument. This function can access the
8383
* position at which the particle hit the boundary, and also the associated
8484
* normal vector. Particles can be `absorbed` by setting their ids to negative
85-
* to flag them for removal. Likewise, the can be reflected back into the domain
85+
* to flag them for removal. Likewise, they can be reflected back into the domain
8686
* by modifying their data appropriately and leaving their ids alone.
8787
*
8888
* This version operates over all the levels in the pc.
@@ -175,7 +175,7 @@ scrapeParticles (PC& pc, const amrex::Vector<const amrex::MultiFab*>& distance_t
175175
[=] AMREX_GPU_DEVICE (const int ip, amrex::RandomEngine const& engine) noexcept
176176
{
177177
// skip particles that are already flagged for removal
178-
if (ptd.id(ip) < 0) return;
178+
if (!amrex::ParticleIDWrapper{ptd.m_idcpu[ip]}.is_valid()) return;
179179

180180
amrex::ParticleReal xp, yp, zp;
181181
getPosition(ip, xp, yp, zp);

Source/Particles/Collision/BinaryCollision/DSMC/SplitAndScatterFunc.H

+5-7
Original file line numberDiff line numberDiff line change
@@ -78,16 +78,14 @@ int splitScatteringParticles (
7878
// to replace the following lambda.
7979
auto const atomicSetIdMinus = [] AMREX_GPU_DEVICE (uint64_t & idcpu)
8080
{
81-
constexpr amrex::Long minus_one_long = -1;
82-
uint64_t tmp = 0;
83-
amrex::ParticleIDWrapper wrapper(tmp);
84-
wrapper = minus_one_long;
8581
#if defined(AMREX_USE_OMP)
8682
#pragma omp atomic write
87-
idcpu = wrapper.m_idata;
83+
idcpu = amrex::ParticleIdCpus::Invalid;
8884
#else
89-
auto *old_ptr = reinterpret_cast<unsigned long long*>(&idcpu);
90-
amrex::Gpu::Atomic::Exch(old_ptr, (unsigned long long) wrapper.m_idata);
85+
amrex::Gpu::Atomic::Exch(
86+
(unsigned long long)&idcpu,
87+
(unsigned long long)amrex::ParticleIdCpus::Invalid
88+
);
9189
#endif
9290
};
9391

Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H

+6-7
Original file line numberDiff line numberDiff line change
@@ -206,16 +206,14 @@ public:
206206
// to replace the following lambda.
207207
auto const atomicSetIdMinus = [] AMREX_GPU_DEVICE (uint64_t & idcpu)
208208
{
209-
constexpr amrex::Long minus_one_long = -1;
210-
uint64_t tmp = 0;
211-
amrex::ParticleIDWrapper wrapper(tmp);
212-
wrapper = minus_one_long;
213209
#if defined(AMREX_USE_OMP)
214210
#pragma omp atomic write
215-
idcpu = wrapper.m_idata;
211+
idcpu = amrex::ParticleIdCpus::Invalid;
216212
#else
217-
auto *old_ptr = reinterpret_cast<unsigned long long*>(&idcpu);
218-
amrex::Gpu::Atomic::Exch(old_ptr, (unsigned long long) wrapper.m_idata);
213+
amrex::Gpu::Atomic::Exch(
214+
(unsigned long long)&idcpu,
215+
(unsigned long long)amrex::ParticleIdCpus::Invalid
216+
);
219217
#endif
220218
};
221219

@@ -224,6 +222,7 @@ public:
224222
if (w1[p_pair_indices_1[i]] <= amrex::ParticleReal(0.))
225223
{
226224
atomicSetIdMinus(idcpu1[p_pair_indices_1[i]]);
225+
227226
}
228227
if (w2[p_pair_indices_2[i]] <= amrex::ParticleReal(0.))
229228
{

Source/Particles/ElementaryProcess/QEDPairGeneration.H

+1-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ public:
167167
p_ux, p_uy, p_uz,
168168
engine);
169169

170-
amrex::ParticleIDWrapper{src.m_idcpu[i_src]} = -1; // destroy photon after pair generation
170+
src.m_idcpu[i_src] = amrex::ParticleIdCpus::Invalid; // destroy photon after pair generation
171171
}
172172

173173
private:

Source/Particles/ElementaryProcess/QEDPhotonEmission.H

+1-1
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ void cleanLowEnergyPhotons(
261261
const auto phot_energy2 = (ux*ux + uy*uy + uz*uz)*me_c*me_c;
262262

263263
if (phot_energy2 < energy_threshold2) {
264-
amrex::ParticleIDWrapper{p_idcpu[ip]} = -1;
264+
p_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
265265
}
266266
});
267267
}

Source/Particles/ParticleCreation/SmartCreate.H

+1-2
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,7 @@ struct SmartCreate
6161
amrex::ignore_unused(x,y);
6262
#endif
6363

64-
amrex::ParticleIDWrapper{prt.m_idcpu[i_prt]} = id;
65-
amrex::ParticleCPUWrapper{prt.m_idcpu[i_prt]} = cpu;
64+
prt.m_idcpu[i_prt] = amrex::SetParticleIDandCPU(id, cpu);
6665

6766
// initialize the real components after position
6867
for (int j = AMREX_SPACEDIM; j < PartData::NAR; ++j) {

Source/Particles/ParticleCreation/SmartUtils.H

+1-2
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ void setNewParticleIDs (PTile& ptile, int old_size, int num_added)
6565
amrex::ParallelFor(num_added, [=] AMREX_GPU_DEVICE (int ip) noexcept
6666
{
6767
auto const new_id = ip + old_size;
68-
amrex::ParticleIDWrapper{ptd.m_idcpu[new_id]} = pid+ip;
69-
amrex::ParticleCPUWrapper{ptd.m_idcpu[new_id]} = cpuid;
68+
ptd.m_idcpu[new_id] = amrex::SetParticleIDandCPU(pid+ip, cpuid);
7069
});
7170
}
7271

Source/Particles/PhysicalParticleContainer.cpp

+11-13
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ namespace
239239
if (has_breit_wheeler) {p_optical_depth_BW[ip] = 0._rt;}
240240
#endif
241241

242-
amrex::ParticleIDWrapper{idcpu[ip]} = -1;
242+
idcpu[ip] = amrex::ParticleIdCpus::Invalid;
243243
}
244244
}
245245

@@ -1225,8 +1225,7 @@ PhysicalParticleContainer::AddPlasma (PlasmaInjector const& plasma_injector, int
12251225
for (int i_part = 0; i_part < pcounts[index]; ++i_part)
12261226
{
12271227
long ip = poffset[index] + i_part;
1228-
amrex::ParticleIDWrapper{pa_idcpu[ip]} = pid+ip;
1229-
amrex::ParticleCPUWrapper{pa_idcpu[ip]} = cpuid;
1228+
pa_idcpu[ip] = amrex::SetParticleIDandCPU(pid+ip, cpuid);
12301229
const XDim3 r = (fine_overlap_box.ok() && fine_overlap_box.contains(iv)) ?
12311230
// In the refined injection region: use refinement ratio `lrrfac`
12321231
inj_pos->getPositionUnitBox(i_part, lrrfac, engine) :
@@ -1766,8 +1765,7 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector,
17661765
for (int i_part = 0; i_part < pcounts[index]; ++i_part)
17671766
{
17681767
const long ip = poffset[index] + i_part;
1769-
amrex::ParticleIDWrapper{pa_idcpu[ip]} = pid+ip;
1770-
amrex::ParticleCPUWrapper{pa_idcpu[ip]} = cpuid;
1768+
pa_idcpu[ip] = amrex::SetParticleIDandCPU(pid+ip, cpuid);
17711769

17721770
// This assumes the flux_pos is of type InjectorPositionRandomPlane
17731771
const XDim3 r = (fine_overlap_box.ok() && fine_overlap_box.contains(iv)) ?
@@ -1792,27 +1790,27 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector,
17921790
// the particles will be within the domain.
17931791
#if defined(WARPX_DIM_3D)
17941792
if (!ParticleUtils::containsInclusive(tile_realbox, XDim3{ppos.x,ppos.y,ppos.z})) {
1795-
amrex::ParticleIDWrapper{pa_idcpu[ip]} = -1;
1793+
pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
17961794
continue;
17971795
}
17981796
#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
17991797
amrex::ignore_unused(k);
18001798
if (!ParticleUtils::containsInclusive(tile_realbox, XDim3{ppos.x,ppos.z,0.0_prt})) {
1801-
amrex::ParticleIDWrapper{pa_idcpu[ip]} = -1;
1799+
pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
18021800
continue;
18031801
}
18041802
#else
18051803
amrex::ignore_unused(j,k);
18061804
if (!ParticleUtils::containsInclusive(tile_realbox, XDim3{ppos.z,0.0_prt,0.0_prt})) {
1807-
amrex::ParticleIDWrapper{pa_idcpu[ip]} = -1;
1805+
pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
18081806
continue;
18091807
}
18101808
#endif
18111809
// Lab-frame simulation
18121810
// If the particle's initial position is not within or on the species's
18131811
// xmin, xmax, ymin, ymax, zmin, zmax, go to the next generated particle.
18141812
if (!flux_pos->insideBoundsInclusive(ppos.x, ppos.y, ppos.z)) {
1815-
amrex::ParticleIDWrapper{pa_idcpu[ip]} = -1;
1813+
pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
18161814
continue;
18171815
}
18181816

@@ -1845,8 +1843,8 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector,
18451843
#endif
18461844
Real flux = inj_flux->getFlux(ppos.x, ppos.y, ppos.z, t);
18471845
// Remove particle if flux is negative or 0
1848-
if ( flux <=0 ){
1849-
amrex::ParticleIDWrapper{pa_idcpu[ip]} = -1;
1846+
if (flux <= 0) {
1847+
pa_idcpu[ip] = amrex::ParticleIdCpus::Invalid;
18501848
continue;
18511849
}
18521850

@@ -1855,7 +1853,7 @@ PhysicalParticleContainer::AddPlasmaFlux (PlasmaInjector const& plasma_injector,
18551853
}
18561854

18571855
#ifdef WARPX_QED
1858-
if(loc_has_quantum_sync){
1856+
if (loc_has_quantum_sync) {
18591857
p_optical_depth_QSR[ip] = quantum_sync_get_opt(engine);
18601858
}
18611859

@@ -2459,7 +2457,7 @@ PhysicalParticleContainer::SplitParticles (int lev)
24592457
}
24602458
#endif
24612459
// invalidate the particle
2462-
amrex::ParticleIDWrapper{idcpu[i]} = -1;
2460+
idcpu[i] = amrex::ParticleIdCpus::Invalid;
24632461
}
24642462
}
24652463
}

Source/Particles/Resampling/LevelingThinning.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ void LevelingThinning::operator() (WarpXParIter& pti, const int lev,
113113
// Remove particle with probability 1 - particle_weight/level_weight
114114
if (random_number > w[indices[i]]/level_weight)
115115
{
116-
amrex::ParticleIDWrapper{idcpu[indices[i]]} = -1;
116+
idcpu[indices[i]] = amrex::ParticleIdCpus::Invalid;
117117
}
118118
// Set particle weight to level weight otherwise
119119
else

Source/Particles/WarpXParticleContainer.cpp

+8-9
Original file line numberDiff line numberDiff line change
@@ -212,13 +212,12 @@ WarpXParticleContainer::AddNParticles (int /*lev*/, long n,
212212
for (auto i = ibegin; i < iend; ++i)
213213
{
214214
auto & idcpu_data = pinned_tile.GetStructOfArrays().GetIdCPUData();
215-
idcpu_data.push_back(0);
216-
if (id==-1) {
217-
amrex::ParticleIDWrapper{idcpu_data.back()} = ParticleType::NextID();
218-
} else {
219-
amrex::ParticleIDWrapper{idcpu_data.back()} = id;
215+
216+
amrex::Long current_id = id; // copy input
217+
if (id == -1) {
218+
current_id = ParticleType::NextID();
220219
}
221-
amrex::ParticleCPUWrapper(idcpu_data.back()) = ParallelDescriptor::MyProc();
220+
idcpu_data.push_back(amrex::SetParticleIDandCPU(current_id, ParallelDescriptor::MyProc()));
222221

223222
#ifdef WARPX_DIM_RZ
224223
r[i-ibegin] = std::sqrt(x[i]*x[i] + y[i]*y[i]);
@@ -1480,8 +1479,8 @@ WarpXParticleContainer::ApplyBoundaryConditions (){
14801479
pti.numParticles(),
14811480
[=] AMREX_GPU_DEVICE (long i, amrex::RandomEngine const& engine) {
14821481
// skip particles that are already flagged for removal
1483-
auto const id = amrex::ParticleIDWrapper{idcpu[i]};
1484-
if (id < 0) { return; }
1482+
auto pidw = amrex::ParticleIDWrapper{idcpu[i]};
1483+
if (!pidw.is_valid()) { return; }
14851484

14861485
ParticleReal x, y, z;
14871486
GetPosition.AsStored(i, x, y, z);
@@ -1503,7 +1502,7 @@ WarpXParticleContainer::ApplyBoundaryConditions (){
15031502
boundary_conditions, engine);
15041503

15051504
if (particle_lost) {
1506-
amrex::ParticleIDWrapper{idcpu[i]} = -id;
1505+
pidw.make_invalid();
15071506
} else {
15081507
SetPosition.AsStored(i, x, y, z);
15091508
}

0 commit comments

Comments
 (0)