From b855fbbf004dffe9709b83265469dbf28fb1fecb Mon Sep 17 00:00:00 2001 From: "George N. Wong" Date: Fri, 22 Mar 2024 00:12:25 -0400 Subject: [PATCH 01/10] add rough implementation of Lagrangian MC particle pusher --- src/bvals/bvals_part.cpp | 3 + src/particles/particles.cpp | 16 ++++ src/particles/particles.hpp | 2 +- src/particles/particles_pushers.cpp | 140 +++++++++++++++++++++++++++- 4 files changed, 156 insertions(+), 5 deletions(-) diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp index 9c79d0e8..0e080a10 100644 --- a/src/bvals/bvals_part.cpp +++ b/src/bvals/bvals_part.cpp @@ -64,6 +64,9 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { Kokkos::realloc(sendlist, static_cast(0.1*npart)); par_for("part_update",DevExeSpace(),0,(npart-1), KOKKOS_LAMBDA(const int p) { int m = pi(PGID,p) - gids; + + // TODO(GNW): This won't work if we don't store particle velocities because then + // IPX, IPY, IPZ will resolve to 0, 2, 4 rather than 0, 1, 2 Real x1 = pr(IPX,p); Real x2 = pr(IPY,p); Real x3 = pr(IPZ,p); diff --git a/src/particles/particles.cpp b/src/particles/particles.cpp index 0c26350f..77d92bda 100644 --- a/src/particles/particles.cpp +++ b/src/particles/particles.cpp @@ -45,6 +45,10 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : std::string ptype = pin->GetString("particles","particle_type"); if (ptype.compare("cosmic_ray") == 0) { particle_type = ParticleType::cosmic_ray; + } else if (ptype.compare("lagrangian_mc") == 0) { + // TODO(GNW): Support initialization + // TODO(GNW): Restrict which pusher is alowed based on particle type + particle_type = ParticleType::lagrangian_mc; } else { std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__ << std::endl << "Particle type = '" << ptype << "' not recognized" @@ -58,6 +62,8 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : std::string ppush = pin->GetString("particles","pusher"); if (ppush.compare("drift") == 0) { pusher = ParticlesPusher::drift; + } else if (ppush.compare("lagrangian_mc") == 0) { + pusher = ParticlesPusher::lagrangian_mc; } else { std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__ << std::endl << "Particle pusher must be specified in block" @@ -81,6 +87,16 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : nidata = 2; break; } + case ParticleType::lagrangian_mc: + { + // TODO(GNW): This implementation may have issues for IPX, IPY, IPZ + // without IVX, IVY, IVZ... + int ndim=2; + if (pmy_pack->pmesh->three_d) {ndim+=1;} + nrdata = ndim; + nidata = 2; + break; + } default: break; } diff --git a/src/particles/particles.hpp b/src/particles/particles.hpp index 3ab43781..4fc22e95 100644 --- a/src/particles/particles.hpp +++ b/src/particles/particles.hpp @@ -23,7 +23,7 @@ enum class ParticlesPusher {drift, leap_frog, lagrangian_tracer, lagrangian_mc}; // constants that enumerate ParticleTypes -enum class ParticleType {cosmic_ray}; +enum class ParticleType {cosmic_ray, lagrangian_mc}; //---------------------------------------------------------------------------------------- //! \struct ParticlesTaskIDs diff --git a/src/particles/particles_pushers.cpp b/src/particles/particles_pushers.cpp index 46b3ebb2..c071478b 100644 --- a/src/particles/particles_pushers.cpp +++ b/src/particles/particles_pushers.cpp @@ -8,9 +8,13 @@ #include "athena.hpp" #include "mesh/mesh.hpp" +#include "hydro/hydro.hpp" +#include "mhd/mhd.hpp" #include "driver/driver.hpp" #include "particles.hpp" +#include + namespace particles { //---------------------------------------------------------------------------------------- //! \fn void Particles::ParticlesPush @@ -26,9 +30,38 @@ TaskStatus Particles::Push(Driver *pdriver, int stage) { auto &mbsize = pmy_pack->pmb->mb_size; auto &pi = prtcl_idata; auto &pr = prtcl_rdata; - auto dt_ = (pmy_pack->pmesh->dt); + auto dt_ = pmy_pack->pmesh->dt; auto gids = pmy_pack->gids; + // Used in lagrangian_mc pusher + auto dtold_ = pmy_pack->pmesh->dtold; + auto &u1_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->u1:pmy_pack->pmhd->u1; + auto &uflx = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflx:pmy_pack->pmhd->uflx; + auto &flx1_ = uflx.x1f; + auto &flx2_ = uflx.x2f; + auto &flx3_ = uflx.x3f; + + // TODO(GNW): Maybe move this outside and pass it in as needed? What is the + // overhead associated with recreating the pool each call? + + Kokkos::Random_XorShift64_Pool<> rand_pool64(pmy_pack->gids); + + // TODO(GNW): Note that for MC tracers we need to handle mesh refinement (in AMR) + // more carefully. + + // TODO(GNW): Is it better to do an if statement here and capture the variables + // for the lambda or keep the switch? + + // TODO(GNW): Are these fluxes are the correct ones to use (RK question)? + + // TODO(GNW): The particle pusher gets called before the time integrator, but + // for the MC pusher we need to know the fluxes from the previous + // time step, which means we need to read from u1 and fluxes. + // One way to deal with this is just to have the MC pusher update + // particle positions "one step out of phase" and use dtold. An + // alternative is to have the MC pusher use a separate task that + // gets called after the time integrator. + switch (pusher) { case ParticlesPusher::drift: @@ -49,9 +82,108 @@ TaskStatus Particles::Push(Driver *pdriver, int stage) { } }); - break; - default: - break; + break; + + case ParticlesPusher::lagrangian_mc: + + par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1), + KOKKOS_LAMBDA(const int p) { + // get random number state this thread + auto rand_gen = rand_pool64.get_state(); + + int m = pi(PGID,p) - gids; + + int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; + int jp = 0; + int kp = 0; + + if (multi_d) { + jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js; + } + + if (three_d) { + kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks; + } + + bool particle_has_moved = false; + Real reduced_mass = u1_(m,IDN,kp,jp,ip); + + // by convention, these values will be negative when there is outflow + // with respect to the current particle's cell + Real flx1_left = flx1_(m,IDN,kp,jp,ip); + Real flx1_right = -flx1_(m,IDN,kp,jp,ip+1); + Real flx2_left = (multi_d) ? flx2_(m,IDN,kp,jp,jp) : 0.; + Real flx2_right = (multi_d) ? -flx2_(m,IDN,kp,jp+1,jp) : 0.; + Real flx3_left = (three_d) ? flx3_(m,IDN,kp,kp,jp) : 0.; + Real flx3_right = (three_d) ? -flx3_(m,IDN,kp+1,kp,jp) : 0.; + + flx1_left *= mbsize.d_view(m).dx1 * dtold_; + flx1_right *= mbsize.d_view(m).dx1 * dtold_; + + flx2_left *= mbsize.d_view(m).dx2 * dtold_; + flx2_right *= mbsize.d_view(m).dx2 * dtold_; + + flx3_left *= mbsize.d_view(m).dx3 * dtold_; + flx3_right *= mbsize.d_view(m).dx3 * dtold_; + + if (flx1_left < 0) { + if (rand_gen.frand() < fabs(flx1_left)/reduced_mass) { + pr(IPX,p) = (ip - 1 - is) * mbsize.d_view(m).dx1 + mbsize.d_view(m).x1min; + particle_has_moved = true; + } + reduced_mass += flx1_left; + } + + if (!particle_has_moved && flx1_right < 0) { + if (rand_gen.frand() < fabs(flx1_right)/reduced_mass) { + pr(IPX,p) = (ip + 1 - is) * mbsize.d_view(m).dx1 + mbsize.d_view(m).x1min; + particle_has_moved = true; + } + reduced_mass += flx1_right; + } + + if (multi_d) { + if (!particle_has_moved && flx2_left < 0) { + if (rand_gen.frand() < fabs(flx2_left)/reduced_mass) { + pr(IPY,p) = (jp - 1 - js) * mbsize.d_view(m).dx2 + mbsize.d_view(m).x2min; + particle_has_moved = true; + } + reduced_mass += flx2_left; + } + + if (!particle_has_moved && flx2_right < 0) { + if (rand_gen.frand() < fabs(flx2_right)/reduced_mass) { + pr(IPY,p) = (jp + 1 - js) * mbsize.d_view(m).dx2 + mbsize.d_view(m).x2min; + particle_has_moved = true; + } + reduced_mass += flx2_right; + } + } + + if (three_d) { + if (!particle_has_moved && flx3_left < 0) { + if (rand_gen.frand() < fabs(flx3_left)/reduced_mass) { + pr(IPZ,p) = (kp - 1 - ks) * mbsize.d_view(m).dx3 + mbsize.d_view(m).x3min; + particle_has_moved = true; + } + reduced_mass += flx3_left; + } + + if (!particle_has_moved && flx3_right < 0) { + if (rand_gen.frand() < fabs(flx3_right)/reduced_mass) { + pr(IPZ,p) = (kp + 1 - ks) * mbsize.d_view(m).dx3 + mbsize.d_view(m).x3min; + particle_has_moved = true; + } + reduced_mass += flx3_right; + } + } + rand_pool64.free_state(rand_gen); // free state for use by other threads + }); + + break; + + default: + break; } return TaskStatus::complete; From b8a35ef8a1cd370b8652e96eca9f957a6b40ed9e Mon Sep 17 00:00:00 2001 From: "George N. Wong" Date: Tue, 2 Apr 2024 01:52:03 -0400 Subject: [PATCH 02/10] use corrected full-step fluxes --- src/athena.hpp | 2 +- src/bvals/bvals_part.cpp | 3 +- src/hydro/hydro.cpp | 19 ++ src/hydro/hydro.hpp | 7 + src/hydro/hydro_fluxes.cpp | 61 +++++++ src/hydro/hydro_tasks.cpp | 3 +- src/mhd/mhd.cpp | 19 ++ src/mhd/mhd.hpp | 7 + src/mhd/mhd_fluxes.cpp | 61 +++++++ src/mhd/mhd_tasks.cpp | 3 +- src/particles/particles.cpp | 34 ++-- src/particles/particles.hpp | 7 + src/particles/particles_pushers.cpp | 272 ++++++++++++---------------- src/particles/particles_tasks.cpp | 41 ++++- src/pgen/kh.cpp | 61 +++++++ 15 files changed, 423 insertions(+), 177 deletions(-) diff --git a/src/athena.hpp b/src/athena.hpp index 7aae1916..02a1ee00 100644 --- a/src/athena.hpp +++ b/src/athena.hpp @@ -52,7 +52,7 @@ enum VariableIndex {IDN=0, IM1=1, IVX=1, IM2=2, IVY=2, IM3=3, IVZ=3, IEN=4, ITM= // array indices for components of magnetic field enum BFieldIndex {IBX=0, IBY=1, IBZ=2}; // array indices for particle arrays -enum ParticlesIndex {PGID=0, PTAG=1, IPX=0, IPVX=1, IPY=2, IPVY=3, IPZ=4, IPVZ=5}; +enum ParticlesIndex {PGID=0, PTAG=1, IPX=0, IPY=1, IPZ=2, IPVX=3, IPVY=4, IPVZ=5}; // integer constants to specify spatial reconstruction methods enum ReconstructionMethod {dc, plm, ppm4, ppmx, wenoz}; diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp index 0e080a10..76482ae1 100644 --- a/src/bvals/bvals_part.cpp +++ b/src/bvals/bvals_part.cpp @@ -65,8 +65,6 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { par_for("part_update",DevExeSpace(),0,(npart-1), KOKKOS_LAMBDA(const int p) { int m = pi(PGID,p) - gids; - // TODO(GNW): This won't work if we don't store particle velocities because then - // IPX, IPY, IPZ will resolve to 0, 2, 4 rather than 0, 1, 2 Real x1 = pr(IPX,p); Real x2 = pr(IPY,p); Real x3 = pr(IPZ,p); @@ -194,6 +192,7 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { pr(IPZ,p) -= (meshsize.x3max - meshsize.x3min); } }); + nprtcl_send = counter; Kokkos::resize(sendlist, nprtcl_send); // sync sendlist device array with host diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp index c1dec57f..8c362f95 100644 --- a/src/hydro/hydro.cpp +++ b/src/hydro/hydro.cpp @@ -32,6 +32,7 @@ Hydro::Hydro(MeshBlockPack *ppack, ParameterInput *pin) : coarse_w0("cprim",1,1,1,1,1), u1("cons1",1,1,1,1,1), uflx("uflx",1,1,1,1,1), + uflxidnsaved("uflxidn",1,1,1,1), utest("utest",1,1,1,1,1), fofc("fofc",1,1,1,1) { // Total number of MeshBlocks on this rank to be used in array dimensioning @@ -287,4 +288,22 @@ Hydro::~Hydro() { if (psrc != nullptr) {delete psrc;} } +//---------------------------------------------------------------------------------------- +// SetSaveUFlxIdn: set flag to save density flux over entire step + +void Hydro::SetSaveUFlxIdn() { + int nmb = std::max((pmy_pack->nmb_thispack), (pmy_pack->pmesh->nmb_maxperrank)); + auto &indcs = pmy_pack->pmesh->mb_indcs; + int ncells1 = indcs.nx1 + 2*(indcs.ng); + int ncells2 = (indcs.nx2 > 1)? (indcs.nx2 + 2*(indcs.ng)) : 1; + int ncells3 = (indcs.nx3 > 1)? (indcs.nx3 + 2*(indcs.ng)) : 1; + + // allocated saved arrays for density flux + Kokkos::realloc(uflxidnsaved.x1f, nmb, ncells3, ncells2, ncells1+1); + Kokkos::realloc(uflxidnsaved.x2f, nmb, ncells3, ncells2+1, ncells1); + Kokkos::realloc(uflxidnsaved.x3f, nmb, ncells3+1, ncells2, ncells1); + + uflxidn_saved = true; +} + } // namespace hydro diff --git a/src/hydro/hydro.hpp b/src/hydro/hydro.hpp index fb7e72f9..ce3701bd 100644 --- a/src/hydro/hydro.hpp +++ b/src/hydro/hydro.hpp @@ -41,6 +41,7 @@ struct HydroTaskIDs { TaskID sendf; TaskID recvf; TaskID expl; + TaskID savef; TaskID restu; TaskID sendu; TaskID recvu; @@ -88,6 +89,10 @@ class Hydro { DvceFaceFld5D uflx; // fluxes of conserved quantities on cell faces Real dtnew; + // following used to save step-to-step flux values + bool uflxidn_saved = false; + DvceFaceFld4D uflxidnsaved; + // following used for FOFC DvceArray4D fofc; // flag for each cell to indicate if FOFC is needed bool use_fofc = false; // flag to enable FOFC @@ -96,6 +101,7 @@ class Hydro { HydroTaskIDs id; // functions... + void SetSaveUFlxIdn(); void AssembleHydroTasks(std::map> tl); // ...in "before_stagen_tl" list TaskStatus InitRecv(Driver *d, int stage); @@ -105,6 +111,7 @@ class Hydro { TaskStatus SendFlux(Driver *d, int stage); TaskStatus RecvFlux(Driver *d, int stage); TaskStatus ExpRKUpdate(Driver *d, int stage); + TaskStatus SaveFlux(Driver *d, int stage); TaskStatus RestrictU(Driver *d, int stage); TaskStatus SendU(Driver *d, int stage); TaskStatus RecvU(Driver *d, int stage); diff --git a/src/hydro/hydro_fluxes.cpp b/src/hydro/hydro_fluxes.cpp index 2078de39..108ab946 100644 --- a/src/hydro/hydro_fluxes.cpp +++ b/src/hydro/hydro_fluxes.cpp @@ -356,4 +356,65 @@ template void Hydro::CalculateFluxes(Driver *pdriver, in template void Hydro::CalculateFluxes(Driver *pdriver, int stage); template void Hydro::CalculateFluxes(Driver *pdriver, int stage); +//---------------------------------------------------------------------------------------- +//! \fn TaskStatus Hydro::SaveFlux +//! \brief Save IDN fluxes across all stages for full time step + +TaskStatus Hydro::SaveFlux(Driver *pdrive, int stage) { + if (uflxidn_saved) { + auto &indcs = pmy_pack->pmesh->mb_indcs; + int is = indcs.is, ie = indcs.ie; + int js = indcs.js, je = indcs.je; + int ks = indcs.ks, ke = indcs.ke; + bool &multi_d = pmy_pack->pmesh->multi_d; + bool &three_d = pmy_pack->pmesh->three_d; + + Real dt = pmy_pack->pmesh->dt; + int nmb1 = pmy_pack->nmb_thispack - 1; + auto flx1 = uflx.x1f; + auto flx2 = uflx.x2f; + auto flx3 = uflx.x3f; + auto flxidn1 = uflxidnsaved.x1f; + auto flxidn2 = uflxidnsaved.x2f; + auto flxidn3 = uflxidnsaved.x3f; + auto &mbsize = pmy_pack->pmb->mb_size; + + Real dtfactor = dt / 2.0; + + par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1, + KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) { + + // TODO(GNW): need to be careful with different time integrators + + // save dF1/dx1 + if (j <= je && k <= ke) { + if (stage == 1) { + flxidn1(m,k,j,i) = flx1(m,IDN,k,j,i) / mbsize.d_view(m).dx1 * dtfactor; + } else { + flxidn1(m,k,j,i) += flx1(m,IDN,k,j,i) / mbsize.d_view(m).dx1 * dtfactor; + } + } + + // save dF2/dx2 + if (multi_d && i <= ie && k <= ke) { + if (stage == 1) { + flxidn2(m,k,j,i) = flx2(m,IDN,k,j,i) / mbsize.d_view(m).dx2 * dtfactor; + } else { + flxidn2(m,k,j,i) += flx2(m,IDN,k,j,i) / mbsize.d_view(m).dx2 * dtfactor; + } + } + + // save dF3/dx3 + if (three_d && i <= ie && j <= je) { + if (stage == 1) { + flxidn3(m,k,j,i) = flx3(m,IDN,k,j,i) / mbsize.d_view(m).dx3 * dtfactor; + } else { + flxidn3(m,k,j,i) += flx3(m,IDN,k,j,i) / mbsize.d_view(m).dx3 * dtfactor; + } + } + }); + } + return TaskStatus::complete; +} + } // namespace hydro diff --git a/src/hydro/hydro_tasks.cpp b/src/hydro/hydro_tasks.cpp index 411df62e..ede546e2 100644 --- a/src/hydro/hydro_tasks.cpp +++ b/src/hydro/hydro_tasks.cpp @@ -53,7 +53,8 @@ void Hydro::AssembleHydroTasks(std::map> id.sendf = tl["stagen"]->AddTask(&Hydro::SendFlux, this, id.flux); id.recvf = tl["stagen"]->AddTask(&Hydro::RecvFlux, this, id.sendf); id.expl = tl["stagen"]->AddTask(&Hydro::ExpRKUpdate, this, id.recvf); - id.restu = tl["stagen"]->AddTask(&Hydro::RestrictU, this, id.expl); + id.savef = tl["stagen"]->AddTask(&Hydro::SaveFlux, this, id.expl); + id.restu = tl["stagen"]->AddTask(&Hydro::RestrictU, this, id.savef); id.sendu = tl["stagen"]->AddTask(&Hydro::SendU, this, id.restu); id.recvu = tl["stagen"]->AddTask(&Hydro::RecvU, this, id.sendu); id.bcs = tl["stagen"]->AddTask(&Hydro::ApplyPhysicalBCs, this, id.recvu); diff --git a/src/mhd/mhd.cpp b/src/mhd/mhd.cpp index 097865dd..1f5c178c 100644 --- a/src/mhd/mhd.cpp +++ b/src/mhd/mhd.cpp @@ -40,6 +40,7 @@ MHD::MHD(MeshBlockPack *ppack, ParameterInput *pin) : efld("efld",1,1,1,1), wsaved("wsaved",1,1,1,1,1), bccsaved("bccsaved",1,1,1,1,1), + uflxidnsaved("uflxidn",1,1,1,1), e3x1("e3x1",1,1,1,1), e2x1("e2x1",1,1,1,1), e1x2("e1x2",1,1,1,1), @@ -357,4 +358,22 @@ void MHD::SetSaveWBcc() { wbcc_saved = true; } +//---------------------------------------------------------------------------------------- +// SetSaveUFlxIdn: set flag to save density flux over entire step + +void MHD::SetSaveUFlxIdn() { + int nmb = std::max((pmy_pack->nmb_thispack), (pmy_pack->pmesh->nmb_maxperrank)); + auto &indcs = pmy_pack->pmesh->mb_indcs; + int ncells1 = indcs.nx1 + 2*(indcs.ng); + int ncells2 = (indcs.nx2 > 1)? (indcs.nx2 + 2*(indcs.ng)) : 1; + int ncells3 = (indcs.nx3 > 1)? (indcs.nx3 + 2*(indcs.ng)) : 1; + + // allocated saved arrays for density flux + Kokkos::realloc(uflxidnsaved.x1f, nmb, ncells3, ncells2, ncells1+1); + Kokkos::realloc(uflxidnsaved.x2f, nmb, ncells3, ncells2+1, ncells1); + Kokkos::realloc(uflxidnsaved.x3f, nmb, ncells3+1, ncells2, ncells1); + + uflxidn_saved = true; +} + } // namespace mhd diff --git a/src/mhd/mhd.hpp b/src/mhd/mhd.hpp index 360fc9b5..b505abf4 100644 --- a/src/mhd/mhd.hpp +++ b/src/mhd/mhd.hpp @@ -48,6 +48,7 @@ struct MHDTaskIDs { TaskID sendf; TaskID recvf; TaskID expl; + TaskID savef; TaskID restu; TaskID sendu; TaskID recvu; @@ -115,6 +116,10 @@ class MHD { DvceArray5D wsaved; DvceArray5D bccsaved; + // following used to save step-to-step flux values + bool uflxidn_saved = false; + DvceFaceFld4D uflxidnsaved; + // following used for FOFC algorithm DvceArray4D fofc; // flag for each cell to indicate if FOFC is needed bool use_fofc = false; // flag to enable FOFC @@ -124,6 +129,7 @@ class MHD { // functions... void SetSaveWBcc(); + void SetSaveUFlxIdn(); void AssembleMHDTasks(std::map> tl); // ...in "before_timeintegrator" task list TaskStatus SaveMHDState(Driver *d, int stage); @@ -135,6 +141,7 @@ class MHD { TaskStatus SendFlux(Driver *d, int stage); TaskStatus RecvFlux(Driver *d, int stage); TaskStatus ExpRKUpdate(Driver *d, int stage); + TaskStatus SaveFlux(Driver *d, int stage); TaskStatus RestrictU(Driver *d, int stage); TaskStatus SendU(Driver *d, int stage); TaskStatus RecvU(Driver *d, int stage); diff --git a/src/mhd/mhd_fluxes.cpp b/src/mhd/mhd_fluxes.cpp index a8408a27..64daeb3b 100644 --- a/src/mhd/mhd_fluxes.cpp +++ b/src/mhd/mhd_fluxes.cpp @@ -417,4 +417,65 @@ template void MHD::CalculateFluxes(Driver *pdriver, int st template void MHD::CalculateFluxes(Driver *pdriver, int stage); template void MHD::CalculateFluxes(Driver *pdriver, int stage); +//---------------------------------------------------------------------------------------- +//! \fn TaskStatus MHD::SaveFlux +//! \brief Save IDN fluxes across all stages for full time step + +TaskStatus MHD::SaveFlux(Driver *pdrive, int stage) { + if (uflxidn_saved) { + auto &indcs = pmy_pack->pmesh->mb_indcs; + int is = indcs.is, ie = indcs.ie; + int js = indcs.js, je = indcs.je; + int ks = indcs.ks, ke = indcs.ke; + bool &multi_d = pmy_pack->pmesh->multi_d; + bool &three_d = pmy_pack->pmesh->three_d; + + Real dt = pmy_pack->pmesh->dt; + int nmb1 = pmy_pack->nmb_thispack - 1; + auto flx1 = uflx.x1f; + auto flx2 = uflx.x2f; + auto flx3 = uflx.x3f; + auto flxidn1 = uflxidnsaved.x1f; + auto flxidn2 = uflxidnsaved.x2f; + auto flxidn3 = uflxidnsaved.x3f; + auto &mbsize = pmy_pack->pmb->mb_size; + + Real dtfactor = dt / 2.0; + + par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1, + KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) { + + // TODO(GNW): need to be careful with different time integrators + + // save dF1/dx1 + if (j <= je && k <= ke) { + if (stage == 1) { + flxidn1(m,k,j,i) = flx1(m,IDN,k,j,i) / mbsize.d_view(m).dx1 * dtfactor; + } else { + flxidn1(m,k,j,i) += flx1(m,IDN,k,j,i) / mbsize.d_view(m).dx1 * dtfactor; + } + } + + // save dF2/dx2 + if (multi_d && i <= ie && k <= ke) { + if (stage == 1) { + flxidn2(m,k,j,i) = flx2(m,IDN,k,j,i) / mbsize.d_view(m).dx2 * dtfactor; + } else { + flxidn2(m,k,j,i) += flx2(m,IDN,k,j,i) / mbsize.d_view(m).dx2 * dtfactor; + } + } + + // save dF3/dx3 + if (three_d && i <= ie && j <= je) { + if (stage == 1) { + flxidn3(m,k,j,i) = flx3(m,IDN,k,j,i) / mbsize.d_view(m).dx3 * dtfactor; + } else { + flxidn3(m,k,j,i) += flx3(m,IDN,k,j,i) / mbsize.d_view(m).dx3 * dtfactor; + } + } + }); + } + return TaskStatus::complete; +} + } // namespace mhd diff --git a/src/mhd/mhd_tasks.cpp b/src/mhd/mhd_tasks.cpp index 2f3f4c2d..f8c059c9 100644 --- a/src/mhd/mhd_tasks.cpp +++ b/src/mhd/mhd_tasks.cpp @@ -45,7 +45,8 @@ void MHD::AssembleMHDTasks(std::map> tl) id.sendf = tl["stagen"]->AddTask(&MHD::SendFlux, this, id.flux); id.recvf = tl["stagen"]->AddTask(&MHD::RecvFlux, this, id.sendf); id.expl = tl["stagen"]->AddTask(&MHD::ExpRKUpdate, this, id.recvf); - id.restu = tl["stagen"]->AddTask(&MHD::RestrictU, this, id.expl); + id.savef = tl["stagen"]->AddTask(&MHD::SaveFlux, this, id.expl); + id.restu = tl["stagen"]->AddTask(&MHD::RestrictU, this, id.savef); id.sendu = tl["stagen"]->AddTask(&MHD::SendU, this, id.restu); id.recvu = tl["stagen"]->AddTask(&MHD::RecvU, this, id.sendu); id.efld = tl["stagen"]->AddTask(&MHD::CornerE, this, id.recvu); diff --git a/src/particles/particles.cpp b/src/particles/particles.cpp index 77d92bda..9284ddc7 100644 --- a/src/particles/particles.cpp +++ b/src/particles/particles.cpp @@ -14,6 +14,8 @@ #include "globals.hpp" #include "parameter_input.hpp" #include "mesh/mesh.hpp" +#include "hydro/hydro.hpp" +#include "mhd/mhd.hpp" #include "bvals/bvals.hpp" #include "particles.hpp" @@ -22,7 +24,8 @@ namespace particles { // constructor, initializes data structures and parameters Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : - pmy_pack(ppack) { + pmy_pack(ppack), + rand_pool64(pmy_pack->gids) { // check this is at least a 2D problem if (pmy_pack->pmesh->one_d) { std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__ << std::endl @@ -46,7 +49,6 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : if (ptype.compare("cosmic_ray") == 0) { particle_type = ParticleType::cosmic_ray; } else if (ptype.compare("lagrangian_mc") == 0) { - // TODO(GNW): Support initialization // TODO(GNW): Restrict which pusher is alowed based on particle type particle_type = ParticleType::lagrangian_mc; } else { @@ -62,8 +64,22 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : std::string ppush = pin->GetString("particles","pusher"); if (ppush.compare("drift") == 0) { pusher = ParticlesPusher::drift; + if (particle_type == ParticleType::lagrangian_mc) { + std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__ + << std::endl << "Particle pusher 'drift' not allowed for lagrangian_mc" + << std::endl; + std::exit(EXIT_FAILURE); + } } else if (ppush.compare("lagrangian_mc") == 0) { + // TODO(GNW): is this the right place to set timestep? + // force driver to inherit timestep from fluid + dtnew = std::numeric_limits::max(); pusher = ParticlesPusher::lagrangian_mc; + if (ppack->pmhd != nullptr) { + ppack->pmhd->SetSaveUFlxIdn(); + } else if (ppack->phydro != nullptr) { + ppack->phydro->SetSaveUFlxIdn(); + } } else { std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__ << std::endl << "Particle pusher must be specified in block" @@ -81,19 +97,15 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : switch (particle_type) { case ParticleType::cosmic_ray: { - int ndim=4; - if (pmy_pack->pmesh->three_d) {ndim+=2;} - nrdata = ndim; + // save particle position then velocity for compiler optimizations even though + // 2d runs will not require all six real entries + nrdata = 6; nidata = 2; break; } case ParticleType::lagrangian_mc: { - // TODO(GNW): This implementation may have issues for IPX, IPY, IPZ - // without IVX, IVY, IVZ... - int ndim=2; - if (pmy_pack->pmesh->three_d) {ndim+=1;} - nrdata = ndim; + nrdata = 3; nidata = 2; break; } @@ -104,7 +116,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : Kokkos::realloc(prtcl_idata, nidata, nprtcl_thispack); // allocate boundary object - pbval_part = new ParticlesBoundaryValues(this, pin); + pbval_part = new ParticlesBoundaryValues(this, pin); // TODO(GNW): do I need to check this? } //---------------------------------------------------------------------------------------- diff --git a/src/particles/particles.hpp b/src/particles/particles.hpp index 4fc22e95..d1de50a1 100644 --- a/src/particles/particles.hpp +++ b/src/particles/particles.hpp @@ -17,6 +17,8 @@ #include "tasklist/task_list.hpp" #include "bvals/bvals.hpp" +#include + // forward declarations // constants that enumerate ParticlesPusher options @@ -82,8 +84,13 @@ class Particles { TaskStatus ClearSend(Driver *pdriver, int stage); TaskStatus ClearRecv(Driver *pdriver, int stage); + // ... for particle pushers + void PushDrift(); + void PushLagrangianMC(); + private: MeshBlockPack* pmy_pack; // ptr to MeshBlockPack containing this Particles + Kokkos::Random_XorShift64_Pool<> rand_pool64; }; } // namespace particles diff --git a/src/particles/particles_pushers.cpp b/src/particles/particles_pushers.cpp index c071478b..eb704e70 100644 --- a/src/particles/particles_pushers.cpp +++ b/src/particles/particles_pushers.cpp @@ -18,9 +18,31 @@ namespace particles { //---------------------------------------------------------------------------------------- //! \fn void Particles::ParticlesPush -// \brief +// \brief TODO(GNW): write here TaskStatus Particles::Push(Driver *pdriver, int stage) { + + switch (pusher) { + case ParticlesPusher::drift: + PushDrift(); + break; + + case ParticlesPusher::lagrangian_mc: + PushLagrangianMC(); + break; + + default: + break; + } + + return TaskStatus::complete; +} + +//---------------------------------------------------------------------------------------- +//! \fn void Particles::PushDrift +//! \brief TODO(GNW): write here + +void Particles::PushDrift() { auto &indcs = pmy_pack->pmesh->mb_indcs; int is = indcs.is; int js = indcs.js; @@ -33,159 +55,105 @@ TaskStatus Particles::Push(Driver *pdriver, int stage) { auto dt_ = pmy_pack->pmesh->dt; auto gids = pmy_pack->gids; - // Used in lagrangian_mc pusher - auto dtold_ = pmy_pack->pmesh->dtold; - auto &u1_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->u1:pmy_pack->pmhd->u1; - auto &uflx = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflx:pmy_pack->pmhd->uflx; - auto &flx1_ = uflx.x1f; - auto &flx2_ = uflx.x2f; - auto &flx3_ = uflx.x3f; - - // TODO(GNW): Maybe move this outside and pass it in as needed? What is the - // overhead associated with recreating the pool each call? - - Kokkos::Random_XorShift64_Pool<> rand_pool64(pmy_pack->gids); - - // TODO(GNW): Note that for MC tracers we need to handle mesh refinement (in AMR) - // more carefully. - - // TODO(GNW): Is it better to do an if statement here and capture the variables - // for the lambda or keep the switch? - - // TODO(GNW): Are these fluxes are the correct ones to use (RK question)? - - // TODO(GNW): The particle pusher gets called before the time integrator, but - // for the MC pusher we need to know the fluxes from the previous - // time step, which means we need to read from u1 and fluxes. - // One way to deal with this is just to have the MC pusher update - // particle positions "one step out of phase" and use dtold. An - // alternative is to have the MC pusher use a separate task that - // gets called after the time integrator. - - switch (pusher) { - case ParticlesPusher::drift: - - par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1), - KOKKOS_LAMBDA(const int p) { - int m = pi(PGID,p) - gids; - int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; - pr(IPX,p) += 0.5*dt_*pr(IPVX,p); - - if (multi_d) { - int jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js; - pr(IPY,p) += 0.5*dt_*pr(IPVY,p); - } - - if (three_d) { - int kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks; - pr(IPZ,p) += 0.5*dt_*pr(IPVZ,p); - } - }); - - break; - - case ParticlesPusher::lagrangian_mc: - - par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1), - KOKKOS_LAMBDA(const int p) { - // get random number state this thread - auto rand_gen = rand_pool64.get_state(); - - int m = pi(PGID,p) - gids; - - int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; - int jp = 0; - int kp = 0; - - if (multi_d) { - jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js; - } - - if (three_d) { - kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks; - } - - bool particle_has_moved = false; - Real reduced_mass = u1_(m,IDN,kp,jp,ip); - - // by convention, these values will be negative when there is outflow - // with respect to the current particle's cell - Real flx1_left = flx1_(m,IDN,kp,jp,ip); - Real flx1_right = -flx1_(m,IDN,kp,jp,ip+1); - Real flx2_left = (multi_d) ? flx2_(m,IDN,kp,jp,jp) : 0.; - Real flx2_right = (multi_d) ? -flx2_(m,IDN,kp,jp+1,jp) : 0.; - Real flx3_left = (three_d) ? flx3_(m,IDN,kp,kp,jp) : 0.; - Real flx3_right = (three_d) ? -flx3_(m,IDN,kp+1,kp,jp) : 0.; - - flx1_left *= mbsize.d_view(m).dx1 * dtold_; - flx1_right *= mbsize.d_view(m).dx1 * dtold_; - - flx2_left *= mbsize.d_view(m).dx2 * dtold_; - flx2_right *= mbsize.d_view(m).dx2 * dtold_; - - flx3_left *= mbsize.d_view(m).dx3 * dtold_; - flx3_right *= mbsize.d_view(m).dx3 * dtold_; - - if (flx1_left < 0) { - if (rand_gen.frand() < fabs(flx1_left)/reduced_mass) { - pr(IPX,p) = (ip - 1 - is) * mbsize.d_view(m).dx1 + mbsize.d_view(m).x1min; - particle_has_moved = true; - } - reduced_mass += flx1_left; - } - - if (!particle_has_moved && flx1_right < 0) { - if (rand_gen.frand() < fabs(flx1_right)/reduced_mass) { - pr(IPX,p) = (ip + 1 - is) * mbsize.d_view(m).dx1 + mbsize.d_view(m).x1min; - particle_has_moved = true; - } - reduced_mass += flx1_right; - } - - if (multi_d) { - if (!particle_has_moved && flx2_left < 0) { - if (rand_gen.frand() < fabs(flx2_left)/reduced_mass) { - pr(IPY,p) = (jp - 1 - js) * mbsize.d_view(m).dx2 + mbsize.d_view(m).x2min; - particle_has_moved = true; - } - reduced_mass += flx2_left; - } - - if (!particle_has_moved && flx2_right < 0) { - if (rand_gen.frand() < fabs(flx2_right)/reduced_mass) { - pr(IPY,p) = (jp + 1 - js) * mbsize.d_view(m).dx2 + mbsize.d_view(m).x2min; - particle_has_moved = true; - } - reduced_mass += flx2_right; - } - } - - if (three_d) { - if (!particle_has_moved && flx3_left < 0) { - if (rand_gen.frand() < fabs(flx3_left)/reduced_mass) { - pr(IPZ,p) = (kp - 1 - ks) * mbsize.d_view(m).dx3 + mbsize.d_view(m).x3min; - particle_has_moved = true; - } - reduced_mass += flx3_left; - } - - if (!particle_has_moved && flx3_right < 0) { - if (rand_gen.frand() < fabs(flx3_right)/reduced_mass) { - pr(IPZ,p) = (kp + 1 - ks) * mbsize.d_view(m).dx3 + mbsize.d_view(m).x3min; - particle_has_moved = true; - } - reduced_mass += flx3_right; - } - } - rand_pool64.free_state(rand_gen); // free state for use by other threads - }); + par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1), + KOKKOS_LAMBDA(const int p) { + int m = pi(PGID,p) - gids; + int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; + pr(IPX,p) += 0.5*dt_*pr(IPVX,p); + + if (multi_d) { + int jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js; + pr(IPY,p) += 0.5*dt_*pr(IPVY,p); + } + + if (three_d) { + int kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks; + pr(IPZ,p) += 0.5*dt_*pr(IPVZ,p); + } + }); +} - break; +//---------------------------------------------------------------------------------------- +//! \fn void Particles::PushLagrangianMC +//! \brief TODO(GNW): write here - default: - break; - } +void Particles::PushLagrangianMC() { + auto &indcs = pmy_pack->pmesh->mb_indcs; + int is = indcs.is; + int js = indcs.js; + int ks = indcs.ks; + bool &multi_d = pmy_pack->pmesh->multi_d; + bool &three_d = pmy_pack->pmesh->three_d; + auto &mbsize = pmy_pack->pmb->mb_size; + auto &pi = prtcl_idata; + auto &pr = prtcl_rdata; + auto gids = pmy_pack->gids; - return TaskStatus::complete; + auto &u1_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->u1:pmy_pack->pmhd->u1; + auto &uflxidn_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved; + auto &flx1_ = uflxidn_.x1f; + auto &flx2_ = uflxidn_.x2f; + auto &flx3_ = uflxidn_.x3f; + + // TODO(GNW): be careful with AMR + // TODO(GNW): this does not work with SMR yet + + par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1), + KOKKOS_LAMBDA(const int p) { + + auto rand_gen = rand_pool64.get_state(); + + int m = pi(PGID,p) - gids; + + int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; + int jp = js; + int kp = ks; + + if (multi_d) { + jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js; + } + + if (three_d) { + kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks; + } + + // get normalized fluxes based on local density + Real mass = u1_(m,IDN,kp,jp,ip); + + // by convention, these values will be positive when there is outflow + // with respect to the current particle's cell + Real flx1_left = -flx1_(m,kp,jp,ip) / mass; + Real flx1_right = flx1_(m,kp,jp,ip+1) / mass; + Real flx2_left = (multi_d) ? -flx2_(m,kp,jp,ip) / mass : 0.; + Real flx2_right = (multi_d) ? flx2_(m,kp,jp+1,ip) / mass : 0.; + Real flx3_left = (three_d) ? -flx3_(m,kp,jp,ip) / mass : 0.; + Real flx3_right = (three_d) ? flx3_(m,kp+1,jp,ip) / mass : 0.; + + flx1_left = flx1_left < 0 ? 0 : flx1_left; + flx1_right = flx1_right < 0 ? 0 : flx1_right; + flx2_left = flx2_left < 0 ? 0 : flx2_left; + flx2_right = flx2_right < 0 ? 0 : flx2_right; + flx3_left = flx3_left < 0 ? 0 : flx3_left; + flx3_right = flx3_right < 0 ? 0 : flx3_right; + + Real rand = rand_gen.frand(); + + if (rand < flx1_left) { + pr(IPX,p) -= mbsize.d_view(m).dx1; + } else if (rand < flx1_left + flx1_right) { + pr(IPX,p) += mbsize.d_view(m).dx1; + } else if (multi_d && rand < flx1_left + flx1_right + flx2_left) { + pr(IPY,p) -= mbsize.d_view(m).dx2; + } else if (multi_d && rand < flx1_left + flx1_right + flx2_left + flx2_right) { + pr(IPY,p) += mbsize.d_view(m).dx2; + } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left) { + pr(IPZ,p) -= mbsize.d_view(m).dx3; + } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left + flx3_right) { + pr(IPZ,p) += mbsize.d_view(m).dx3; + } + + rand_pool64.free_state(rand_gen); // free state for use by other threads + }); } + } // namespace particles diff --git a/src/particles/particles_tasks.cpp b/src/particles/particles_tasks.cpp index 8210a978..2bc3137b 100644 --- a/src/particles/particles_tasks.cpp +++ b/src/particles/particles_tasks.cpp @@ -28,15 +28,38 @@ namespace particles { void Particles::AssembleTasks(std::map> tl) { TaskID none(0); - // particle integration done in "before_timeintegrator" task list - id.push = tl["before_timeintegrator"]->AddTask(&Particles::Push, this, none); - id.newgid = tl["before_timeintegrator"]->AddTask(&Particles::NewGID, this, id.push); - id.count = tl["before_timeintegrator"]->AddTask(&Particles::SendCnt, this, id.newgid); - id.irecv = tl["before_timeintegrator"]->AddTask(&Particles::InitRecv, this, id.count); - id.sendp = tl["before_timeintegrator"]->AddTask(&Particles::SendP, this, id.irecv); - id.recvp = tl["before_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp); - id.crecv = tl["before_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp); - id.csend = tl["before_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv); + switch (particle_type) { + case ParticleType::cosmic_ray: + { + // particle integration done in "before_timeintegrator" task list + id.push = tl["before_timeintegrator"]->AddTask(&Particles::Push, this, none); + id.newgid = tl["before_timeintegrator"]->AddTask(&Particles::NewGID, this, id.push); + id.count = tl["before_timeintegrator"]->AddTask(&Particles::SendCnt, this, id.newgid); + id.irecv = tl["before_timeintegrator"]->AddTask(&Particles::InitRecv, this, id.count); + id.sendp = tl["before_timeintegrator"]->AddTask(&Particles::SendP, this, id.irecv); + id.recvp = tl["before_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp); + id.crecv = tl["before_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp); + id.csend = tl["before_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv); + break; + } + + case ParticleType::lagrangian_mc: + { + // particle integration done in "after_timeintegrator" task list + id.push = tl["after_timeintegrator"]->AddTask(&Particles::Push, this, none); + id.newgid = tl["after_timeintegrator"]->AddTask(&Particles::NewGID, this, id.push); + id.count = tl["after_timeintegrator"]->AddTask(&Particles::SendCnt, this, id.newgid); + id.irecv = tl["after_timeintegrator"]->AddTask(&Particles::InitRecv, this, id.count); + id.sendp = tl["after_timeintegrator"]->AddTask(&Particles::SendP, this, id.irecv); + id.recvp = tl["after_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp); + id.crecv = tl["after_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp); + id.csend = tl["after_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv); + break; + } + + default: + break; + } return; } diff --git a/src/pgen/kh.cpp b/src/pgen/kh.cpp index c47d9793..e5bbfea1 100644 --- a/src/pgen/kh.cpp +++ b/src/pgen/kh.cpp @@ -20,6 +20,9 @@ #include "hydro/hydro.hpp" #include "mhd/mhd.hpp" #include "pgen.hpp" +#include "particles/particles.hpp" + +#include //---------------------------------------------------------------------------------------- //! \fn @@ -186,5 +189,63 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) { pmbp->pmhd->peos->PrimToCons(w0_, bcc0_, u0_, is, ie, js, je, ks, ke); } + // TODO(GNW): support 2d-only particles + // Initialize particles + if (pmbp->ppart != nullptr) { + + // captures for the kernel + auto &u0_ = (pmbp->phydro != nullptr) ? pmbp->phydro->u0 : pmbp->pmhd->u0; + auto &pr = pmy_mesh_->pmb_pack->ppart->prtcl_rdata; + auto &pi = pmy_mesh_->pmb_pack->ppart->prtcl_idata; + auto &mbsize = pmy_mesh_->pmb_pack->pmb->mb_size; + auto &npart = pmy_mesh_->pmb_pack->ppart->nprtcl_thispack; + auto gids = pmy_mesh_->pmb_pack->gids; + auto gide = pmy_mesh_->pmb_pack->gide; + + auto &indcs = pmy_mesh_->mb_indcs; + int &is = indcs.is; + int &js = indcs.js; + int &ks = indcs.ks; + int &nx1 = indcs.nx1; + int &nx2 = indcs.nx2; + int &nx3 = indcs.nx3; + + // initialize particles + Kokkos::Random_XorShift64_Pool<> rand_pool64(pmbp->gids); + par_for("part_update",DevExeSpace(),0,(npart-1), + KOKKOS_LAMBDA(const int p) { + auto rand_gen = rand_pool64.get_state(); // get random number state this thread + // choose parent MeshBlock randomly + int m = static_cast(rand_gen.frand()*(gide - gids + 1.0)); + pi(PGID,p) = gids + m; + + int ip = 0; + int jp = 0; + int kp = 0; + + // choose random cell based on density + while (true) { + ip = static_cast(rand_gen.frand()*nx1) + is; + jp = static_cast(rand_gen.frand()*nx2) + js; + kp = static_cast(rand_gen.frand()*nx3) + ks; + if (u0_(m,IDN,kp,jp,ip) < 1.5) { + if (rand_gen.frand() < 0.5) break; + } else { + break; + } + } + + // set particle position to cell center + random offset + pr(IPX,p) = CellCenterX(ip-is, nx1, mbsize.d_view(m).x1min, mbsize.d_view(m).x1max) + + mbsize.d_view(m).dx1*(rand_gen.frand()-0.5); + pr(IPY,p) = CellCenterX(jp-js, nx2, mbsize.d_view(m).x2min, mbsize.d_view(m).x2max) + + mbsize.d_view(m).dx2*(rand_gen.frand()-0.5); + pr(IPZ,p) = CellCenterX(kp-ks, nx3, mbsize.d_view(m).x3min, mbsize.d_view(m).x3max) + + mbsize.d_view(m).dx3*(rand_gen.frand()-0.5); + + rand_pool64.free_state(rand_gen); // free state for use by other threads + }); + } + return; } From f01ab9acf60d52219d3202ac65b4492a5b7ebb77 Mon Sep 17 00:00:00 2001 From: "George N. Wong" Date: Tue, 9 Jul 2024 03:21:21 -0400 Subject: [PATCH 03/10] update with fixes of mesh refined code for particle indexing. still need to be careful with rank_order. --- src/athena.hpp | 2 +- src/bvals/bvals_part.cpp | 5 + src/hydro/hydro_fluxes.cpp | 5 +- src/main.cpp | 1 + src/mesh/mesh.cpp | 5 + src/mesh/mesh.hpp | 1 + src/mhd/mhd_fluxes.cpp | 3 +- src/particles/particles.cpp | 23 ++- src/particles/particles.hpp | 3 + src/particles/particles_pushers.cpp | 285 +++++++++++++++++++++++++++- src/particles/particles_tasks.cpp | 2 +- src/pgen/kh.cpp | 137 +++++++++---- 12 files changed, 415 insertions(+), 57 deletions(-) diff --git a/src/athena.hpp b/src/athena.hpp index 02a1ee00..de790bc6 100644 --- a/src/athena.hpp +++ b/src/athena.hpp @@ -52,7 +52,7 @@ enum VariableIndex {IDN=0, IM1=1, IVX=1, IM2=2, IVY=2, IM3=3, IVZ=3, IEN=4, ITM= // array indices for components of magnetic field enum BFieldIndex {IBX=0, IBY=1, IBZ=2}; // array indices for particle arrays -enum ParticlesIndex {PGID=0, PTAG=1, IPX=0, IPY=1, IPZ=2, IPVX=3, IPVY=4, IPVZ=5}; +enum ParticlesIndex {PGID=0, PTAG=1, PLASTMOVE=2, PLASTLEVEL=3, IPX=0, IPY=1, IPZ=2, IPVX=3, IPVY=4, IPVZ=5}; // integer constants to specify spatial reconstruction methods enum ReconstructionMethod {dc, plm, ppm4, ppmx, wenoz}; diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp index e0c29116..ed137438 100644 --- a/src/bvals/bvals_part.cpp +++ b/src/bvals/bvals_part.cpp @@ -62,6 +62,8 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { auto &psendl = sendlist; int counter=0; int *pcounter = &counter; + bool &multi_d = pmy_part->pmy_pack->pmesh->multi_d; + bool &three_d = pmy_part->pmy_pack->pmesh->three_d; Kokkos::realloc(sendlist, static_cast(0.1*npart)); par_for("part_update",DevExeSpace(),0,(npart-1), KOKKOS_LAMBDA(const int p) { @@ -87,6 +89,9 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { int fy = (x2 < 0.5*(mbsize.d_view(m).x2min + mbsize.d_view(m).x2max))? 0 : 1; int fz = (x3 < 0.5*(mbsize.d_view(m).x3min + mbsize.d_view(m).x3max))? 0 : 1; + fy = multi_d ? fy : 0; + fz = three_d ? fz : 0; + // only update particle GID if it has crossed MeshBlock boundary if ((abs(ix) + abs(iy) + abs(iz)) != 0) { if (iz == 0) { diff --git a/src/hydro/hydro_fluxes.cpp b/src/hydro/hydro_fluxes.cpp index 108ab946..f20bb9ae 100644 --- a/src/hydro/hydro_fluxes.cpp +++ b/src/hydro/hydro_fluxes.cpp @@ -381,11 +381,10 @@ TaskStatus Hydro::SaveFlux(Driver *pdrive, int stage) { Real dtfactor = dt / 2.0; + // GNW 2024-JUL-5: Warning .. be careful when using this different time integrators par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1, KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) { - - // TODO(GNW): need to be careful with different time integrators - + // save dF1/dx1 if (j <= je && k <= ke) { if (stage == 1) { diff --git a/src/main.cpp b/src/main.cpp index ecb5d2de..ca153219 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -295,6 +295,7 @@ int main(int argc, char *argv[]) { pmesh->pgen = std::make_unique(pinput, pmesh, restartfile); restartfile.Close(); } + pmesh->FinalizeParticleDataStructures(pinput); //--- Step 6. -------------------------------------------------------------------------- // Construct Driver and Outputs. Actual outputs (including initial conditions) are made diff --git a/src/mesh/mesh.cpp b/src/mesh/mesh.cpp index 26b9280b..b35ffab1 100644 --- a/src/mesh/mesh.cpp +++ b/src/mesh/mesh.cpp @@ -613,7 +613,12 @@ void Mesh::AddCoordinatesAndPhysics(ParameterInput *pinput) { pmb_pack->AddCoordinates(pinput); pmb_pack->AddPhysics(pinput); } +} + +//---------------------------------------------------------------------------------------- +// \fn Mesh::FinalizeParticleDataStructures +void Mesh::FinalizeParticleDataStructures(ParameterInput *pinput) { // Determine total number of particles across all ranks particles::Particles *ppart = pmb_pack->ppart; if (ppart != nullptr) { diff --git a/src/mesh/mesh.hpp b/src/mesh/mesh.hpp index b64ea23e..285e2cac 100644 --- a/src/mesh/mesh.hpp +++ b/src/mesh/mesh.hpp @@ -151,6 +151,7 @@ class Mesh { void WriteMeshStructure(); void NewTimeStep(const Real tlim); void AddCoordinatesAndPhysics(ParameterInput *pinput); + void FinalizeParticleDataStructures(ParameterInput *pinput); BoundaryFlag GetBoundaryFlag(const std::string& input_string); std::string GetBoundaryString(BoundaryFlag input_flag); diff --git a/src/mhd/mhd_fluxes.cpp b/src/mhd/mhd_fluxes.cpp index 64daeb3b..3ee97769 100644 --- a/src/mhd/mhd_fluxes.cpp +++ b/src/mhd/mhd_fluxes.cpp @@ -442,11 +442,10 @@ TaskStatus MHD::SaveFlux(Driver *pdrive, int stage) { Real dtfactor = dt / 2.0; + // GNW 2024-JUL-5: Warning .. be careful when using this different time integrators par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1, KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) { - // TODO(GNW): need to be careful with different time integrators - // save dF1/dx1 if (j <= je && k <= ke) { if (stage == 1) { diff --git a/src/particles/particles.cpp b/src/particles/particles.cpp index 9284ddc7..07c6e24a 100644 --- a/src/particles/particles.cpp +++ b/src/particles/particles.cpp @@ -49,7 +49,6 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : if (ptype.compare("cosmic_ray") == 0) { particle_type = ParticleType::cosmic_ray; } else if (ptype.compare("lagrangian_mc") == 0) { - // TODO(GNW): Restrict which pusher is alowed based on particle type particle_type = ParticleType::lagrangian_mc; } else { std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__ @@ -71,8 +70,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : std::exit(EXIT_FAILURE); } } else if (ppush.compare("lagrangian_mc") == 0) { - // TODO(GNW): is this the right place to set timestep? - // force driver to inherit timestep from fluid + // force driver to inherit timestep from fluid by setting desired particle dt to max value dtnew = std::numeric_limits::max(); pusher = ParticlesPusher::lagrangian_mc; if (ppack->pmhd != nullptr) { @@ -106,7 +104,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : case ParticleType::lagrangian_mc: { nrdata = 3; - nidata = 2; + nidata = 4; break; } default: @@ -116,7 +114,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : Kokkos::realloc(prtcl_idata, nidata, nprtcl_thispack); // allocate boundary object - pbval_part = new ParticlesBoundaryValues(this, pin); // TODO(GNW): do I need to check this? + pbval_part = new ParticlesBoundaryValues(this, pin); } //---------------------------------------------------------------------------------------- @@ -125,6 +123,21 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : Particles::~Particles() { } +//---------------------------------------------------------------------------------------- +// ReallocateParticles() +// Update particle arrays and update internal particle count data for new number of +// particles in this pack. This method does not preserve any existing particle data + +void Particles::ReallocateParticles(int new_nprtcl_thispack) { + + // TODO(GNW): maybe check that ntrack is set correctly. also check about whether + // the rank_order below works given that it might have empty ids + + nprtcl_thispack = new_nprtcl_thispack; + Kokkos::realloc(prtcl_rdata, nrdata, nprtcl_thispack); + Kokkos::realloc(prtcl_idata, nidata, nprtcl_thispack); +} + //---------------------------------------------------------------------------------------- // CreatePaticleTags() // Assigns tags to particles (unique integer). Note that tracked particles are always diff --git a/src/particles/particles.hpp b/src/particles/particles.hpp index d1de50a1..6f37dd8c 100644 --- a/src/particles/particles.hpp +++ b/src/particles/particles.hpp @@ -40,6 +40,7 @@ struct ParticlesTaskIDs { TaskID recvp; TaskID csend; TaskID crecv; + TaskID mradj; }; namespace particles { @@ -73,6 +74,7 @@ class Particles { ParticlesTaskIDs id; // functions... + void ReallocateParticles(int new_nprtcl_thispack); void CreateParticleTags(ParameterInput *pin); void AssembleTasks(std::map> tl); TaskStatus Push(Driver *pdriver, int stage); @@ -83,6 +85,7 @@ class Particles { TaskStatus RecvP(Driver *pdriver, int stage); TaskStatus ClearSend(Driver *pdriver, int stage); TaskStatus ClearRecv(Driver *pdriver, int stage); + TaskStatus AdjustMeshRefinement(Driver *pdriver, int stage); // ... for particle pushers void PushDrift(); diff --git a/src/particles/particles_pushers.cpp b/src/particles/particles_pushers.cpp index eb704e70..5cd42bfc 100644 --- a/src/particles/particles_pushers.cpp +++ b/src/particles/particles_pushers.cpp @@ -18,7 +18,7 @@ namespace particles { //---------------------------------------------------------------------------------------- //! \fn void Particles::ParticlesPush -// \brief TODO(GNW): write here +// \brief wrapper with switch to access different particle pushers TaskStatus Particles::Push(Driver *pdriver, int stage) { @@ -40,7 +40,7 @@ TaskStatus Particles::Push(Driver *pdriver, int stage) { //---------------------------------------------------------------------------------------- //! \fn void Particles::PushDrift -//! \brief TODO(GNW): write here +//! \brief push particles based on stored particle internal velocity void Particles::PushDrift() { auto &indcs = pmy_pack->pmesh->mb_indcs; @@ -75,9 +75,11 @@ void Particles::PushDrift() { //---------------------------------------------------------------------------------------- //! \fn void Particles::PushLagrangianMC -//! \brief TODO(GNW): write here +//! \brief push particles using Lagrangian Monte Carlo method (Genel+ 2013, MNRAS.435.1426G) +// WARNING: this implementation may not work well with AMR void Particles::PushLagrangianMC() { + auto &indcs = pmy_pack->pmesh->mb_indcs; int is = indcs.is; int js = indcs.js; @@ -87,7 +89,8 @@ void Particles::PushLagrangianMC() { auto &mbsize = pmy_pack->pmb->mb_size; auto &pi = prtcl_idata; auto &pr = prtcl_rdata; - auto gids = pmy_pack->gids; + auto &gids = pmy_pack->gids; + auto &mblev = pmy_pack->pmb->mb_lev; auto &u1_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->u1:pmy_pack->pmhd->u1; auto &uflxidn_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved; @@ -95,14 +98,12 @@ void Particles::PushLagrangianMC() { auto &flx2_ = uflxidn_.x2f; auto &flx3_ = uflxidn_.x3f; - // TODO(GNW): be careful with AMR - // TODO(GNW): this does not work with SMR yet + auto &rand_pool64_ = rand_pool64; + // GNW 2024-JUL-5: Warning, this may not play well with AMR par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1), KOKKOS_LAMBDA(const int p) { - auto rand_gen = rand_pool64.get_state(); - int m = pi(PGID,p) - gids; int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; @@ -136,24 +137,290 @@ void Particles::PushLagrangianMC() { flx3_left = flx3_left < 0 ? 0 : flx3_left; flx3_right = flx3_right < 0 ? 0 : flx3_right; + auto rand_gen = rand_pool64_.get_state(); Real rand = rand_gen.frand(); + rand_pool64_.free_state(rand_gen); // free state for use by other threads + + // save refinement level of current zone + pi(PLASTLEVEL,p) = mblev.d_view(m); + + // save parity of current zone stored as (i_isodd,j_isodd,k_isodd) * 8 + pi(PLASTMOVE,p) = 32 * (ip % 2) + 16 * (jp % 2) + 8 * (kp % 2); if (rand < flx1_left) { pr(IPX,p) -= mbsize.d_view(m).dx1; + pi(PLASTMOVE,p) += 1; } else if (rand < flx1_left + flx1_right) { pr(IPX,p) += mbsize.d_view(m).dx1; + pi(PLASTMOVE,p) += 2; } else if (multi_d && rand < flx1_left + flx1_right + flx2_left) { pr(IPY,p) -= mbsize.d_view(m).dx2; + pi(PLASTMOVE,p) += 3; } else if (multi_d && rand < flx1_left + flx1_right + flx2_left + flx2_right) { pr(IPY,p) += mbsize.d_view(m).dx2; + pi(PLASTMOVE,p) += 4; } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left) { pr(IPZ,p) -= mbsize.d_view(m).dx3; + pi(PLASTMOVE,p) += 5; } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left + flx3_right) { pr(IPZ,p) += mbsize.d_view(m).dx3; + pi(PLASTMOVE,p) += 6; } + }); +} + +//---------------------------------------------------------------------------------------- +//! \fn TaskStatus Particles::AdjustMeshRefinement +//! \brief update locations of particles that enter meshblocks with new refinement levels + +TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) { + + auto &indcs = pmy_pack->pmesh->mb_indcs; + int is = indcs.is; + int js = indcs.js; + int ks = indcs.ks; + auto &pi = prtcl_idata; + auto &pr = prtcl_rdata; + auto &gids = pmy_pack->gids; + auto &mblev = pmy_pack->pmb->mb_lev; + bool &multi_d = pmy_pack->pmesh->multi_d; + bool &three_d = pmy_pack->pmesh->three_d; + auto &mbsize = pmy_pack->pmb->mb_size; + + auto &uflxidn_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved; + auto &flx1_ = uflxidn_.x1f; + auto &flx2_ = uflxidn_.x2f; + auto &flx3_ = uflxidn_.x3f; - rand_pool64.free_state(rand_gen); // free state for use by other threads + auto &rand_pool64_ = rand_pool64; + + par_for("particle_meshshift",DevExeSpace(),0,(nprtcl_thispack-1), + KOKKOS_LAMBDA(const int p) { + + int m = pi(PGID,p) - gids; + int level = mblev.d_view(m); + + int lastlevel = pi(PLASTLEVEL,p); + int lastmove = pi(PLASTMOVE,p); + + // oddness of the last cell that the particle lived in + int i_parity = lastmove / 32; + int j_parity = (lastmove % 32) / 16; + int k_parity = (lastmove % 16) / 8; + + // direction of last move: + // 1 -> "left" x1 face was chosen + // 2 -> "right" x1 face was chosen + // 3 -> "left" x2 face was chosen + // 4 -> "right" x2 face was chosen + // 5 -> "left" x3 face was chosen + // 6 -> "right" x3 face was chosen + lastmove = lastmove % 8; + + Real dx1 = mbsize.d_view(m).dx1; + Real dx2 = multi_d ? mbsize.d_view(m).dx2 : 0.; + Real dx3 = three_d ? mbsize.d_view(m).dx3 : 0.; + + if (level > lastlevel) { + // this is a higher refinement level, i.e., the zones are smaller now + + if (lastmove == 1) { + // came from zone to right (dx--) + pr(IPX,p) += dx1/2; + + pr(IPY,p) -= dx2/2; + pr(IPZ,p) -= dx3/2; + } else if (lastmove == 2) { + // came from zone to left (dx++) + pr(IPX,p) -= dx1/2; + + pr(IPY,p) -= dx2/2; + pr(IPZ,p) -= dx3/2; + } else if (multi_d && lastmove == 3) { + // came from zone above (dy--) + pr(IPY,p) += dx2/2; + + pr(IPX,p) -= dx1/2; + pr(IPZ,p) -= dx3/2; + } else if (multi_d && lastmove == 4) { + // came from zone below (dy++) + pr(IPY,p) -= dx2/2; + + pr(IPX,p) -= dx1/2; + pr(IPZ,p) -= dx3/2; + } else if (three_d && lastmove == 5) { + // came from zone in front (dz--) + pr(IPZ,p) += dx3/2; + + pr(IPX,p) -= dx1/2; + pr(IPY,p) -= dx2/2; + } else if (three_d && lastmove == 6) { + // came from zone behind (dz++) + pr(IPZ,p) -= dx3/2; + + pr(IPX,p) -= dx1/2; + pr(IPY,p) -= dx2/2; + } + + int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; + int jp = js; + int kp = ks; + + if (multi_d) { + jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js; + } + + if (three_d) { + kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks; + } + + // get fluxes into the four zones that the particle could have ended up in + Real flx1 = 0.; + Real flx2 = 0.; + Real flx3 = 0.; + Real flx4 = 0.; + + if (lastmove == 1) { + // came from zone to the right + flx1 = -flx1_(m,kp,jp,ip+1); + flx2 = (multi_d) ? -flx1_(m,kp,jp+1,ip+1) : 0.; + flx3 = (three_d) ? -flx1_(m,kp+1,jp,ip+1) : 0.; + flx4 = (multi_d && three_d) ? -flx1_(m,kp+1,jp+1,ip+1) : 0.; + } else if (lastmove == 2) { + // came from zone to the left + flx1 = flx1_(m,kp,jp,ip); + flx2 = (multi_d) ? flx1_(m,kp,jp+1,ip) : 0.; + flx3 = (three_d) ? flx1_(m,kp+1,jp,ip) : 0.; + flx4 = (multi_d && three_d) ? flx1_(m,kp+1,jp+1,ip) : 0.; + } else if (lastmove == 3) { + // came from zone above. is at least multi_d + flx1 = -flx2_(m,kp,jp+1,ip); + flx2 = -flx2_(m,kp,jp+1,ip+1); + flx3 = (three_d) ? -flx2_(m,kp+1,jp+1,ip) : 0.; + flx4 = (three_d) ? -flx2_(m,kp+1,jp+1,ip+1) : 0.; + } else if (lastmove == 4) { + // came from zone below. is at least multi_d + flx1 = flx2_(m,kp,jp,ip); + flx2 = flx2_(m,kp,jp,ip+1); + flx3 = (three_d) ? flx2_(m,kp+1,jp,ip) : 0.; + flx4 = (three_d) ? flx2_(m,kp+1,jp,ip+1) : 0.; + } else if (lastmove == 5) { + // came from zone in front. is three_d + flx1 = -flx3_(m,kp+1,jp,ip); + flx2 = -flx3_(m,kp+1,jp,ip+1); + flx3 = -flx3_(m,kp+1,jp+1,ip); + flx4 = -flx3_(m,kp+1,jp+1,ip+1); + } else if (lastmove == 6) { + // came from zone behind. is three_d + flx1 = flx3_(m,kp,jp,ip); + flx2 = flx3_(m,kp,jp,ip+1); + flx3 = flx3_(m,kp,jp+1,ip); + flx4 = flx3_(m,kp,jp+1,ip+1); + } + + flx1 = (flx1 < 0) ? 0. : flx1; + flx2 = (flx2 < 0) ? 0. : flx2; + flx3 = (flx3 < 0) ? 0. : flx3; + flx4 = (flx4 < 0) ? 0. : flx4; + + Real flx_total = flx1 + flx2 + flx3 + flx4; + flx_total = (flx_total > 0) ? flx_total : 1.e-10; + + flx1 /= flx_total; + flx2 /= flx_total; + flx3 /= flx_total; + flx4 /= flx_total; + + auto rand_gen = rand_pool64_.get_state(); + Real rand = rand_gen.frand(); + rand_pool64_.free_state(rand_gen); // free state for use by other threads + + int target_zone = 4; + if (rand < flx1) { + target_zone = 1; + } else if (rand < flx1 + flx2) { + target_zone = 2; + } else if (rand < flx1 + flx2 + flx3) { + target_zone = 3; + } + + if (lastmove == 1 || lastmove == 2) { + if (target_zone == 2) { + pr(IPY,p) += mbsize.d_view(m).dx2; + } else if (target_zone == 3) { + pr(IPZ,p) += mbsize.d_view(m).dx3; + } else if (target_zone == 4) { + pr(IPY,p) += mbsize.d_view(m).dx2; + pr(IPZ,p) += mbsize.d_view(m).dx3; + } + } else if (lastmove == 3 || lastmove == 4) { + if (target_zone == 2) { + pr(IPX,p) += mbsize.d_view(m).dx1; + } else if (target_zone == 3) { + pr(IPZ,p) += mbsize.d_view(m).dx3; + } else if (target_zone == 4) { + pr(IPX,p) += mbsize.d_view(m).dx1; + pr(IPZ,p) += mbsize.d_view(m).dx3; + } + } else if (lastmove == 5 || lastmove == 6) { + if (target_zone == 2) { + pr(IPX,p) += mbsize.d_view(m).dx1; + } else if (target_zone == 3) { + pr(IPY,p) += mbsize.d_view(m).dx2; + } else if (target_zone == 4) { + pr(IPX,p) += mbsize.d_view(m).dx1; + pr(IPY,p) += mbsize.d_view(m).dx2; + } + } + + } else if (level < lastlevel) { + // this is a lower refinement level, i.e., the zones are larger now, + // there's nothing special to do other than to move the particle to + // the center of the new zone + + if (i_parity) { + pr(IPX,p) -= mbsize.d_view(m).dx1/4; + } else { + pr(IPX,p) += mbsize.d_view(m).dx1/4; + } + if (multi_d) { + if (j_parity) { + pr(IPY,p) -= mbsize.d_view(m).dx2/4; + } else { + pr(IPY,p) += mbsize.d_view(m).dx2/4; + } + } + if (three_d) { + if (k_parity) { + pr(IPZ,p) -= mbsize.d_view(m).dx3/4; + } else { + pr(IPZ,p) += mbsize.d_view(m).dx3/4; + } + } + + if (lastmove == 1) { + // came from zone to right (dx--) + pr(IPX,p) -= mbsize.d_view(m).dx1/2; + } else if (lastmove == 2) { + // came from zone to left (dx++) + pr(IPX,p) += mbsize.d_view(m).dx1/2; + } else if (lastmove == 3) { + // came from zone above (dy--) + pr(IPY,p) -= mbsize.d_view(m).dx2/2; + } else if (lastmove == 4) { + // came from zone below (dy++) + pr(IPY,p) += mbsize.d_view(m).dx2/2; + } else if (lastmove == 5) { + // came from zone in front (dz--) + pr(IPZ,p) -= mbsize.d_view(m).dx3/2; + } else if (lastmove == 6) { + // came from zone behind (dz++) + pr(IPZ,p) += mbsize.d_view(m).dx3/2; + } + } }); + + return TaskStatus::complete; } } // namespace particles diff --git a/src/particles/particles_tasks.cpp b/src/particles/particles_tasks.cpp index 2bc3137b..e3215a2d 100644 --- a/src/particles/particles_tasks.cpp +++ b/src/particles/particles_tasks.cpp @@ -54,6 +54,7 @@ void Particles::AssembleTasks(std::map> t id.recvp = tl["after_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp); id.crecv = tl["after_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp); id.csend = tl["after_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv); + id.mradj = tl["after_timeintegrator"]->AddTask(&Particles::AdjustMeshRefinement, this, id.csend); break; } @@ -112,7 +113,6 @@ TaskStatus Particles::RecvP(Driver *pdrive, int stage) { return tstat; } - //---------------------------------------------------------------------------------------- //! \fn TaskList Particles::ClearSend //! \brief Wrapper task list function that checks all MPI sends have completed. diff --git a/src/pgen/kh.cpp b/src/pgen/kh.cpp index e5bbfea1..98c5e308 100644 --- a/src/pgen/kh.cpp +++ b/src/pgen/kh.cpp @@ -49,7 +49,7 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) { // Select either Hydro or MHD DvceArray5D w0_; - Real gm1, p0; + Real gm1; int nfluid, nscalars; if (pmbp->phydro != nullptr) { w0_ = pmbp->phydro->w0; @@ -189,18 +189,14 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) { pmbp->pmhd->peos->PrimToCons(w0_, bcc0_, u0_, is, ie, js, je, ks, ke); } - // TODO(GNW): support 2d-only particles // Initialize particles if (pmbp->ppart != nullptr) { // captures for the kernel auto &u0_ = (pmbp->phydro != nullptr) ? pmbp->phydro->u0 : pmbp->pmhd->u0; - auto &pr = pmy_mesh_->pmb_pack->ppart->prtcl_rdata; - auto &pi = pmy_mesh_->pmb_pack->ppart->prtcl_idata; auto &mbsize = pmy_mesh_->pmb_pack->pmb->mb_size; - auto &npart = pmy_mesh_->pmb_pack->ppart->nprtcl_thispack; + auto &mblev = pmy_mesh_->pmb_pack->pmb->mb_lev; auto gids = pmy_mesh_->pmb_pack->gids; - auto gide = pmy_mesh_->pmb_pack->gide; auto &indcs = pmy_mesh_->mb_indcs; int &is = indcs.is; @@ -210,40 +206,109 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) { int &nx2 = indcs.nx2; int &nx3 = indcs.nx3; - // initialize particles + // init RNG Kokkos::Random_XorShift64_Pool<> rand_pool64(pmbp->gids); - par_for("part_update",DevExeSpace(),0,(npart-1), - KOKKOS_LAMBDA(const int p) { - auto rand_gen = rand_pool64.get_state(); // get random number state this thread - // choose parent MeshBlock randomly - int m = static_cast(rand_gen.frand()*(gide - gids + 1.0)); - pi(PGID,p) = gids + m; - - int ip = 0; - int jp = 0; - int kp = 0; - - // choose random cell based on density - while (true) { - ip = static_cast(rand_gen.frand()*nx1) + is; - jp = static_cast(rand_gen.frand()*nx2) + js; - kp = static_cast(rand_gen.frand()*nx3) + ks; - if (u0_(m,IDN,kp,jp,ip) < 1.5) { - if (rand_gen.frand() < 0.5) break; - } else { - break; - } + + // count total mass across the domain + Real total_mass = 0.0; + const int nmkji = (pmbp->nmb_thispack)*indcs.nx3*indcs.nx2*indcs.nx1; + const int nkji = indcs.nx3*indcs.nx2*indcs.nx1; + const int nji = indcs.nx2*indcs.nx1; + + Kokkos::parallel_reduce("pgen_mass", Kokkos::RangePolicy<>(DevExeSpace(), 0, nmkji), + KOKKOS_LAMBDA(const int &idx, Real &total_mass) { + // compute m,k,j,i indices of thread and evaluate + int m = (idx)/nkji; + int k = (idx - m*nkji)/nji; + int j = (idx - m*nkji - k*nji)/indcs.nx1; + int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is; + k += ks; + j += js; + + Real vol = mbsize.d_view(m).dx1*mbsize.d_view(m).dx2*mbsize.d_view(m).dx3; + total_mass += u0_(m,IDN,k,j,i) * vol; + }, total_mass); + + Real total_mass_thispack = total_mass; + +#if MPI_PARALLEL_ENABLED + // get total mass over all MPI ranks + MPI_Allreduce(MPI_IN_PLACE, &total_mass, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#endif + + // get number of particles for this mbpack using MC to deal with fractional particles + Real target_nparticles = pin->GetOrAddReal("particles","target_count",100000.0); + Real mass_per_particle = total_mass / target_nparticles; + + // create shared array to hold number of particles per zone + DualArray2D nparticles_per_zone("partperzone", nmkji,2); + par_for("particle_count", DevExeSpace(), 0,nmkji-1, + KOKKOS_LAMBDA(int idx) { + int m = (idx)/nkji; + int k = (idx - m*nkji)/nji; + int j = (idx - m*nkji - k*nji)/indcs.nx1; + int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is; + k += ks; + j += js; + Real vol = mbsize.d_view(m).dx1*mbsize.d_view(m).dx2*mbsize.d_view(m).dx3; + Real nppc = u0_(m,IDN,k,j,i) * vol / mass_per_particle; + int nparticles = static_cast(nppc); + nppc = fabs(fmod(nppc, 1.0)); + auto rand_gen = rand_pool64.get_state(); + if (rand_gen.frand() < nppc) { + nparticles += 1; } + nparticles_per_zone.d_view(idx,0) = nparticles; + rand_pool64.free_state(rand_gen); + }); - // set particle position to cell center + random offset - pr(IPX,p) = CellCenterX(ip-is, nx1, mbsize.d_view(m).x1min, mbsize.d_view(m).x1max) + - mbsize.d_view(m).dx1*(rand_gen.frand()-0.5); - pr(IPY,p) = CellCenterX(jp-js, nx2, mbsize.d_view(m).x2min, mbsize.d_view(m).x2max) + - mbsize.d_view(m).dx2*(rand_gen.frand()-0.5); - pr(IPZ,p) = CellCenterX(kp-ks, nx3, mbsize.d_view(m).x3min, mbsize.d_view(m).x3max) + - mbsize.d_view(m).dx3*(rand_gen.frand()-0.5); + // count total number of particles in this pack + nparticles_per_zone.template modify(); + nparticles_per_zone.template sync(); + int nparticles_thispack = 0; + for (int i=0; i(); + nparticles_per_zone.template sync(); + + // helpful debug statement + std::cout << "total mass across domain: " << total_mass << ", total mass in pack: " << total_mass_thispack + << ", target nparticles: " << target_nparticles << ", nparticles in pack: " << nparticles_thispack + << std::endl; + + // reallocate space for particles and get relevant pointers + pmy_mesh_->pmb_pack->ppart->ReallocateParticles(nparticles_thispack); + + auto &pr = pmy_mesh_->pmb_pack->ppart->prtcl_rdata; + auto &pi = pmy_mesh_->pmb_pack->ppart->prtcl_idata; - rand_pool64.free_state(rand_gen); // free state for use by other threads + // initialize particles. only intended for Lagrangian-type particles + par_for("part_init", DevExeSpace(), 0,nmkji-1, + KOKKOS_LAMBDA(int idx) { + int m = (idx)/nkji; + int k = (idx - m*nkji)/nji; + int j = (idx - m*nkji - k*nji)/indcs.nx1; + int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is; + k += ks; + j += js; + + int nparticles_in_zone = nparticles_per_zone.d_view(idx,0); + int starting_index = nparticles_per_zone.d_view(idx,1); + + for (int p=0; p Date: Wed, 31 Jul 2024 11:30:20 -0400 Subject: [PATCH 04/10] lint code for c++ --- src/athena.hpp | 3 +- src/hydro/hydro_fluxes.cpp | 1 - src/mhd/mhd_fluxes.cpp | 1 - src/particles/particles.cpp | 6 +- src/particles/particles_pushers.cpp | 27 ++- src/particles/particles_tasks.cpp | 51 +++-- src/pgen/kh.cpp | 18 +- tst/scripts/style/check_athena_cpp_style.sh | 2 +- tst/scripts/style/clang_format_v1.cfg | 196 ++++++++++++++++++++ tst/scripts/style/clang_format_v2.cfg | 196 ++++++++++++++++++++ 10 files changed, 456 insertions(+), 45 deletions(-) create mode 100644 tst/scripts/style/clang_format_v1.cfg create mode 100644 tst/scripts/style/clang_format_v2.cfg diff --git a/src/athena.hpp b/src/athena.hpp index de790bc6..c97bb73c 100644 --- a/src/athena.hpp +++ b/src/athena.hpp @@ -52,7 +52,8 @@ enum VariableIndex {IDN=0, IM1=1, IVX=1, IM2=2, IVY=2, IM3=3, IVZ=3, IEN=4, ITM= // array indices for components of magnetic field enum BFieldIndex {IBX=0, IBY=1, IBZ=2}; // array indices for particle arrays -enum ParticlesIndex {PGID=0, PTAG=1, PLASTMOVE=2, PLASTLEVEL=3, IPX=0, IPY=1, IPZ=2, IPVX=3, IPVY=4, IPVZ=5}; +enum ParticlesIndex {PGID=0, PTAG=1, PLASTMOVE=2, PLASTLEVEL=3, + IPX=0, IPY=1, IPZ=2, IPVX=3, IPVY=4, IPVZ=5}; // integer constants to specify spatial reconstruction methods enum ReconstructionMethod {dc, plm, ppm4, ppmx, wenoz}; diff --git a/src/hydro/hydro_fluxes.cpp b/src/hydro/hydro_fluxes.cpp index f20bb9ae..f88832bb 100644 --- a/src/hydro/hydro_fluxes.cpp +++ b/src/hydro/hydro_fluxes.cpp @@ -384,7 +384,6 @@ TaskStatus Hydro::SaveFlux(Driver *pdrive, int stage) { // GNW 2024-JUL-5: Warning .. be careful when using this different time integrators par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1, KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) { - // save dF1/dx1 if (j <= je && k <= ke) { if (stage == 1) { diff --git a/src/mhd/mhd_fluxes.cpp b/src/mhd/mhd_fluxes.cpp index 3ee97769..e234065f 100644 --- a/src/mhd/mhd_fluxes.cpp +++ b/src/mhd/mhd_fluxes.cpp @@ -445,7 +445,6 @@ TaskStatus MHD::SaveFlux(Driver *pdrive, int stage) { // GNW 2024-JUL-5: Warning .. be careful when using this different time integrators par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1, KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) { - // save dF1/dx1 if (j <= je && k <= ke) { if (stage == 1) { diff --git a/src/particles/particles.cpp b/src/particles/particles.cpp index 07c6e24a..a2130f5e 100644 --- a/src/particles/particles.cpp +++ b/src/particles/particles.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include "athena.hpp" #include "globals.hpp" @@ -70,7 +71,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : std::exit(EXIT_FAILURE); } } else if (ppush.compare("lagrangian_mc") == 0) { - // force driver to inherit timestep from fluid by setting desired particle dt to max value + // force driver to inherit timestep from fluid by setting particle dt to max value dtnew = std::numeric_limits::max(); pusher = ParticlesPusher::lagrangian_mc; if (ppack->pmhd != nullptr) { @@ -97,7 +98,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : { // save particle position then velocity for compiler optimizations even though // 2d runs will not require all six real entries - nrdata = 6; + nrdata = 6; nidata = 2; break; } @@ -129,7 +130,6 @@ Particles::~Particles() { // particles in this pack. This method does not preserve any existing particle data void Particles::ReallocateParticles(int new_nprtcl_thispack) { - // TODO(GNW): maybe check that ntrack is set correctly. also check about whether // the rank_order below works given that it might have empty ids diff --git a/src/particles/particles_pushers.cpp b/src/particles/particles_pushers.cpp index 5cd42bfc..80f29841 100644 --- a/src/particles/particles_pushers.cpp +++ b/src/particles/particles_pushers.cpp @@ -21,7 +21,6 @@ namespace particles { // \brief wrapper with switch to access different particle pushers TaskStatus Particles::Push(Driver *pdriver, int stage) { - switch (pusher) { case ParticlesPusher::drift: PushDrift(); @@ -42,7 +41,7 @@ TaskStatus Particles::Push(Driver *pdriver, int stage) { //! \fn void Particles::PushDrift //! \brief push particles based on stored particle internal velocity -void Particles::PushDrift() { +void Particles::PushDrift() { auto &indcs = pmy_pack->pmesh->mb_indcs; int is = indcs.is; int js = indcs.js; @@ -75,11 +74,10 @@ void Particles::PushDrift() { //---------------------------------------------------------------------------------------- //! \fn void Particles::PushLagrangianMC -//! \brief push particles using Lagrangian Monte Carlo method (Genel+ 2013, MNRAS.435.1426G) +//! \brief push with Lagrangian Monte Carlo method (Genel+ 2013, MNRAS.435.1426G) // WARNING: this implementation may not work well with AMR void Particles::PushLagrangianMC() { - auto &indcs = pmy_pack->pmesh->mb_indcs; int is = indcs.is; int js = indcs.js; @@ -93,7 +91,8 @@ void Particles::PushLagrangianMC() { auto &mblev = pmy_pack->pmb->mb_lev; auto &u1_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->u1:pmy_pack->pmhd->u1; - auto &uflxidn_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved; + auto &uflxidn_ = (pmy_pack->phydro != nullptr)? + pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved; auto &flx1_ = uflxidn_.x1f; auto &flx2_ = uflxidn_.x2f; auto &flx3_ = uflxidn_.x3f; @@ -103,7 +102,6 @@ void Particles::PushLagrangianMC() { // GNW 2024-JUL-5: Warning, this may not play well with AMR par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1), KOKKOS_LAMBDA(const int p) { - int m = pi(PGID,p) - gids; int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; @@ -159,10 +157,12 @@ void Particles::PushLagrangianMC() { } else if (multi_d && rand < flx1_left + flx1_right + flx2_left + flx2_right) { pr(IPY,p) += mbsize.d_view(m).dx2; pi(PLASTMOVE,p) += 4; - } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left) { + } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + + flx3_left) { pr(IPZ,p) -= mbsize.d_view(m).dx3; pi(PLASTMOVE,p) += 5; - } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left + flx3_right) { + } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + + flx3_left + flx3_right) { pr(IPZ,p) += mbsize.d_view(m).dx3; pi(PLASTMOVE,p) += 6; } @@ -174,7 +174,6 @@ void Particles::PushLagrangianMC() { //! \brief update locations of particles that enter meshblocks with new refinement levels TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) { - auto &indcs = pmy_pack->pmesh->mb_indcs; int is = indcs.is; int js = indcs.js; @@ -187,16 +186,16 @@ TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) { bool &three_d = pmy_pack->pmesh->three_d; auto &mbsize = pmy_pack->pmb->mb_size; - auto &uflxidn_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved; + auto &uflxidn_ = (pmy_pack->phydro != nullptr)? + pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved; auto &flx1_ = uflxidn_.x1f; auto &flx2_ = uflxidn_.x2f; auto &flx3_ = uflxidn_.x3f; auto &rand_pool64_ = rand_pool64; - + par_for("particle_meshshift",DevExeSpace(),0,(nprtcl_thispack-1), KOKKOS_LAMBDA(const int p) { - int m = pi(PGID,p) - gids; int level = mblev.d_view(m); @@ -223,7 +222,7 @@ TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) { if (level > lastlevel) { // this is a higher refinement level, i.e., the zones are smaller now - + if (lastmove == 1) { // came from zone to right (dx--) pr(IPX,p) += dx1/2; @@ -334,7 +333,7 @@ TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) { auto rand_gen = rand_pool64_.get_state(); Real rand = rand_gen.frand(); rand_pool64_.free_state(rand_gen); // free state for use by other threads - + int target_zone = 4; if (rand < flx1) { target_zone = 1; diff --git a/src/particles/particles_tasks.cpp b/src/particles/particles_tasks.cpp index e3215a2d..5f5cfb4b 100644 --- a/src/particles/particles_tasks.cpp +++ b/src/particles/particles_tasks.cpp @@ -32,29 +32,46 @@ void Particles::AssembleTasks(std::map> t case ParticleType::cosmic_ray: { // particle integration done in "before_timeintegrator" task list - id.push = tl["before_timeintegrator"]->AddTask(&Particles::Push, this, none); - id.newgid = tl["before_timeintegrator"]->AddTask(&Particles::NewGID, this, id.push); - id.count = tl["before_timeintegrator"]->AddTask(&Particles::SendCnt, this, id.newgid); - id.irecv = tl["before_timeintegrator"]->AddTask(&Particles::InitRecv, this, id.count); - id.sendp = tl["before_timeintegrator"]->AddTask(&Particles::SendP, this, id.irecv); - id.recvp = tl["before_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp); - id.crecv = tl["before_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp); - id.csend = tl["before_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv); + id.push = tl["before_timeintegrator"]->AddTask(&Particles::Push, + this, none); + id.newgid = tl["before_timeintegrator"]->AddTask(&Particles::NewGID, + this, id.push); + id.count = tl["before_timeintegrator"]->AddTask(&Particles::SendCnt, + this, id.newgid); + id.irecv = tl["before_timeintegrator"]->AddTask(&Particles::InitRecv, + this, id.count); + id.sendp = tl["before_timeintegrator"]->AddTask(&Particles::SendP, + this, id.irecv); + id.recvp = tl["before_timeintegrator"]->AddTask(&Particles::RecvP, + this, id.sendp); + id.crecv = tl["before_timeintegrator"]->AddTask(&Particles::ClearRecv, + this, id.recvp); + id.csend = tl["before_timeintegrator"]->AddTask(&Particles::ClearSend, + this, id.crecv); break; } case ParticleType::lagrangian_mc: { // particle integration done in "after_timeintegrator" task list - id.push = tl["after_timeintegrator"]->AddTask(&Particles::Push, this, none); - id.newgid = tl["after_timeintegrator"]->AddTask(&Particles::NewGID, this, id.push); - id.count = tl["after_timeintegrator"]->AddTask(&Particles::SendCnt, this, id.newgid); - id.irecv = tl["after_timeintegrator"]->AddTask(&Particles::InitRecv, this, id.count); - id.sendp = tl["after_timeintegrator"]->AddTask(&Particles::SendP, this, id.irecv); - id.recvp = tl["after_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp); - id.crecv = tl["after_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp); - id.csend = tl["after_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv); - id.mradj = tl["after_timeintegrator"]->AddTask(&Particles::AdjustMeshRefinement, this, id.csend); + id.push = tl["after_timeintegrator"]->AddTask(&Particles::Push, + this, none); + id.newgid = tl["after_timeintegrator"]->AddTask(&Particles::NewGID, + this, id.push); + id.count = tl["after_timeintegrator"]->AddTask(&Particles::SendCnt, + this, id.newgid); + id.irecv = tl["after_timeintegrator"]->AddTask(&Particles::InitRecv, + this, id.count); + id.sendp = tl["after_timeintegrator"]->AddTask(&Particles::SendP, + this, id.irecv); + id.recvp = tl["after_timeintegrator"]->AddTask(&Particles::RecvP, + this, id.sendp); + id.crecv = tl["after_timeintegrator"]->AddTask(&Particles::ClearRecv, + this, id.recvp); + id.csend = tl["after_timeintegrator"]->AddTask(&Particles::ClearSend, + this, id.crecv); + id.mradj = tl["after_timeintegrator"]->AddTask(&Particles::AdjustMeshRefinement, + this, id.csend); break; } diff --git a/src/pgen/kh.cpp b/src/pgen/kh.cpp index 98c5e308..af1206ba 100644 --- a/src/pgen/kh.cpp +++ b/src/pgen/kh.cpp @@ -191,7 +191,6 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) { // Initialize particles if (pmbp->ppart != nullptr) { - // captures for the kernel auto &u0_ = (pmbp->phydro != nullptr) ? pmbp->phydro->u0 : pmbp->pmhd->u0; auto &mbsize = pmy_mesh_->pmb_pack->pmb->mb_size; @@ -259,7 +258,7 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) { nparticles += 1; } nparticles_per_zone.d_view(idx,0) = nparticles; - rand_pool64.free_state(rand_gen); + rand_pool64.free_state(rand_gen); }); // count total number of particles in this pack @@ -274,8 +273,10 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) { nparticles_per_zone.template sync(); // helpful debug statement - std::cout << "total mass across domain: " << total_mass << ", total mass in pack: " << total_mass_thispack - << ", target nparticles: " << target_nparticles << ", nparticles in pack: " << nparticles_thispack + std::cout << "total mass across domain: " << total_mass + << ", total mass in pack: " << total_mass_thispack + << ", target nparticles: " << target_nparticles + << ", nparticles in pack: " << nparticles_thispack << std::endl; // reallocate space for particles and get relevant pointers @@ -304,9 +305,12 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) { pi(PLASTLEVEL,pidx) = mblev.d_view(m); // set particle to zone center - pr(IPX,pidx) = CellCenterX(i-is, nx1, mbsize.d_view(m).x1min, mbsize.d_view(m).x1max); - pr(IPY,pidx) = CellCenterX(j-js, nx2, mbsize.d_view(m).x2min, mbsize.d_view(m).x2max); - pr(IPZ,pidx) = CellCenterX(k-ks, nx3, mbsize.d_view(m).x3min, mbsize.d_view(m).x3max) - + pr(IPX,pidx) = CellCenterX(i-is, nx1, mbsize.d_view(m).x1min, + mbsize.d_view(m).x1max); + pr(IPY,pidx) = CellCenterX(j-js, nx2, mbsize.d_view(m).x2min, + mbsize.d_view(m).x2max); + pr(IPZ,pidx) = CellCenterX(k-ks, nx3, mbsize.d_view(m).x3min, + mbsize.d_view(m).x3max) - mbsize.d_view(m).dx3/2; } }); diff --git a/tst/scripts/style/check_athena_cpp_style.sh b/tst/scripts/style/check_athena_cpp_style.sh index 76309bb2..8895fa48 100755 --- a/tst/scripts/style/check_athena_cpp_style.sh +++ b/tst/scripts/style/check_athena_cpp_style.sh @@ -17,7 +17,7 @@ # Obtain Google C++ Style Linter: echo "Obtaining Google C++ Style cpplint.py test" -curl https://raw.githubusercontent.com/cpplint/cpplint/develop/cpplint.py \ +curl https://raw.githubusercontent.com/cpplint/cpplint/master/cpplint.py \ --output cpplint.py --silent # Apply Google C++ Style Linter to all source code files at once: diff --git a/tst/scripts/style/clang_format_v1.cfg b/tst/scripts/style/clang_format_v1.cfg new file mode 100644 index 00000000..bbdb1688 --- /dev/null +++ b/tst/scripts/style/clang_format_v1.cfg @@ -0,0 +1,196 @@ +## see https://clang.llvm.org/docs/ClangFormatStyleOptions.html +## +## usage: +## clang-format -style=file:/path/to/clang_format_v1.cfg -i file.cpp +## +--- +AccessModifierOffset: 1 +AlignAfterOpenBracket: AlwaysBreak +AlignArrayOfStructures: Right +AlignConsecutiveAssignments: Consecutive +AlignConsecutiveBitFields: Consecutive +AlignConsecutiveDeclarations: None +AlignConsecutiveMacros: Consecutive +AlignEscapedNewlines: Right +AlignOperands: DontAlign +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: false +AllowAllConstructorInitializersOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +AttributeMacros: +- __capability +BasedOnStyle: Chromium +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: true + AfterControlStatement: Never + AfterEnum: true + AfterExternBlock: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: true + AfterUnion: true + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: true + SplitEmptyFunction: true + SplitEmptyNamespace: true + SplitEmptyRecord: true +BreakAfterJavaFieldAnnotations: true +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeBraces: Attach +BreakBeforeConceptDeclarations: true +BreakBeforeInheritanceComma: true +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: AfterColon +BreakConstructorInitializersBeforeComma: false +BreakInheritanceList: AfterColon +BreakStringLiterals: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: true +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 14 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +## after public or private +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: Never +ExperimentalAutoDetectBinPacking: true +FixNamespaceComments: true +ForEachMacros: +- foreach +- Q_FOREACH +- BOOST_FOREACH +IfMacros: +- KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: +- CaseSensitive: false + Priority: 2 + Regex: ^"(llvm|llvm-c|clang|clang-c)/ + SortPriority: 0 +- CaseSensitive: false + Priority: 3 + Regex: ^(<|"(gtest|gmock|isl|json)/) + SortPriority: 0 +- CaseSensitive: false + Priority: 1 + Regex: .* + SortPriority: 0 +IncludeIsMainRegex: (Test)?$ +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: true +IndentCaseBlocks: true +IndentCaseLabels: true +IndentExternBlock: Indent +IndentGotoLabels: false +IndentPPDirectives: None +IndentRequires: false +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Single +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +LambdaBodyIndentation: Signature +Language: Cpp +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 0 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PPIndentWidth: -1 +PackConstructorInitializers: BinPack +PenaltyBreakAssignment: 0 +PenaltyBreakBeforeFirstCallParameter: 21 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 107 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1073 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1008223 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Right +QualifierAlignment: Leave ## don't change this +ReferenceAlignment: Pointer +ReflowComments: true ## set true to brack a long comment +RemoveBracesLLVM: false +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 0 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false +SpaceAroundPointerQualifiers: Both +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: false +SpaceBeforeInheritanceColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: false + AfterForeachMacros: true + AfterFunctionDeclarationName: false + AfterFunctionDefinitionName: false + AfterIfMacros: true + AfterOverloadedOperator: false + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInCStyleCastParentheses: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Maximum: -1 + Minimum: 1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp03 +StatementAttributeLikeMacros: +- Q_EMIT +StatementMacros: +- Q_UNUSED +- QT_REQUIRE_VERSION +TabWidth: 0 +UseCRLF: true +UseTab: ForIndentation +WhitespaceSensitiveMacros: +- STRINGIZE +- PP_STRINGIZE +- BOOST_PP_STRINGIZE +- NS_SWIFT_NAME +- CF_SWIFT_NAME +... diff --git a/tst/scripts/style/clang_format_v2.cfg b/tst/scripts/style/clang_format_v2.cfg new file mode 100644 index 00000000..1d4c3a8a --- /dev/null +++ b/tst/scripts/style/clang_format_v2.cfg @@ -0,0 +1,196 @@ +## see https://clang.llvm.org/docs/ClangFormatStyleOptions.html +## +## usage: +## clang-format -style=file:/path/to/clang_format_v2.cfg -i file.hpp +## +--- +AccessModifierOffset: 1 +AlignAfterOpenBracket: AlwaysBreak +AlignArrayOfStructures: Right +AlignConsecutiveAssignments: Consecutive +AlignConsecutiveBitFields: Consecutive +AlignConsecutiveDeclarations: None +AlignConsecutiveMacros: Consecutive +AlignEscapedNewlines: Right +AlignOperands: DontAlign +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: false +AllowAllConstructorInitializersOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +AttributeMacros: +- __capability +BasedOnStyle: Chromium +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: true + AfterControlStatement: Never + AfterEnum: true + AfterExternBlock: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: true + AfterUnion: true + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: true + SplitEmptyFunction: true + SplitEmptyNamespace: true + SplitEmptyRecord: true +BreakAfterJavaFieldAnnotations: true +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeBraces: Attach +BreakBeforeConceptDeclarations: true +BreakBeforeInheritanceComma: true +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: AfterColon +BreakConstructorInitializersBeforeComma: false +BreakInheritanceList: AfterColon +BreakStringLiterals: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: true +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 14 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +## after public or private +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: Never +ExperimentalAutoDetectBinPacking: true +FixNamespaceComments: true +ForEachMacros: +- foreach +- Q_FOREACH +- BOOST_FOREACH +IfMacros: +- KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: +- CaseSensitive: false + Priority: 2 + Regex: ^"(llvm|llvm-c|clang|clang-c)/ + SortPriority: 0 +- CaseSensitive: false + Priority: 3 + Regex: ^(<|"(gtest|gmock|isl|json)/) + SortPriority: 0 +- CaseSensitive: false + Priority: 1 + Regex: .* + SortPriority: 0 +IncludeIsMainRegex: (Test)?$ +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: true +IndentCaseBlocks: true +IndentCaseLabels: true +IndentExternBlock: Indent +IndentGotoLabels: false +IndentPPDirectives: None +IndentRequires: false +IndentWidth: 1 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Single +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +LambdaBodyIndentation: Signature +Language: Cpp +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 0 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PPIndentWidth: -1 +PackConstructorInitializers: BinPack +PenaltyBreakAssignment: 0 +PenaltyBreakBeforeFirstCallParameter: 21 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 107 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1073 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1008223 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Right +QualifierAlignment: Leave ## don't change this +ReferenceAlignment: Pointer +ReflowComments: true ## set true to brack a long comment +RemoveBracesLLVM: false +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 0 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false +SpaceAroundPointerQualifiers: Both +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: false +SpaceBeforeInheritanceColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: false + AfterForeachMacros: true + AfterFunctionDeclarationName: false + AfterFunctionDefinitionName: false + AfterIfMacros: true + AfterOverloadedOperator: false + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: false +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInCStyleCastParentheses: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Maximum: -1 + Minimum: 1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp03 +StatementAttributeLikeMacros: +- Q_EMIT +StatementMacros: +- Q_UNUSED +- QT_REQUIRE_VERSION +TabWidth: 0 +UseCRLF: true +UseTab: ForIndentation +WhitespaceSensitiveMacros: +- STRINGIZE +- PP_STRINGIZE +- BOOST_PP_STRINGIZE +- NS_SWIFT_NAME +- CF_SWIFT_NAME +... From 0c1c858a5477d5da24b69c12a56bdaa16356b98a Mon Sep 17 00:00:00 2001 From: "George N. Wong" Date: Thu, 1 Aug 2024 17:19:31 -0400 Subject: [PATCH 05/10] add particle IC for gr torus to remote --- src/pgen/gr_torus.cpp | 128 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/src/pgen/gr_torus.cpp b/src/pgen/gr_torus.cpp index 1220fb47..b3a16401 100644 --- a/src/pgen/gr_torus.cpp +++ b/src/pgen/gr_torus.cpp @@ -42,6 +42,7 @@ #include "hydro/hydro.hpp" #include "mhd/mhd.hpp" #include "radiation/radiation.hpp" +#include "particles/particles.hpp" #include @@ -791,6 +792,133 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) { pmbp->pmhd->peos->PrimToCons(w0_, bcc0_, u0_, is, ie, js, je, ks, ke); } + // Initialize particles + if (pmbp->ppart != nullptr) { + // captures for the kernel + auto &u0_ = (pmbp->phydro != nullptr) ? pmbp->phydro->u0 : pmbp->pmhd->u0; + auto &mbsize = pmy_mesh_->pmb_pack->pmb->mb_size; + auto &mblev = pmy_mesh_->pmb_pack->pmb->mb_lev; + auto gids = pmy_mesh_->pmb_pack->gids; + + auto &indcs = pmy_mesh_->mb_indcs; + int &is = indcs.is; + int &js = indcs.js; + int &ks = indcs.ks; + int &nx1 = indcs.nx1; + int &nx2 = indcs.nx2; + int &nx3 = indcs.nx3; + + // init RNG + Kokkos::Random_XorShift64_Pool<> rand_pool64(pmbp->gids); + + // count total mass across the domain + Real total_mass = 0.0; + const int nmkji = (pmbp->nmb_thispack)*indcs.nx3*indcs.nx2*indcs.nx1; + const int nkji = indcs.nx3*indcs.nx2*indcs.nx1; + const int nji = indcs.nx2*indcs.nx1; + + Kokkos::parallel_reduce("pgen_mass", Kokkos::RangePolicy<>(DevExeSpace(), 0, nmkji), + KOKKOS_LAMBDA(const int &idx, Real &total_mass) { + // compute m,k,j,i indices of thread and evaluate + int m = (idx)/nkji; + int k = (idx - m*nkji)/nji; + int j = (idx - m*nkji - k*nji)/indcs.nx1; + int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is; + k += ks; + j += js; + + Real vol = mbsize.d_view(m).dx1*mbsize.d_view(m).dx2*mbsize.d_view(m).dx3; + total_mass += u0_(m,IDN,k,j,i) * vol; + }, total_mass); + + Real total_mass_thispack = total_mass; + +#if MPI_PARALLEL_ENABLED + // get total mass over all MPI ranks + MPI_Allreduce(MPI_IN_PLACE, &total_mass, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); +#endif + + // get number of particles for this mbpack using MC to deal with fractional particles + Real target_nparticles = pin->GetOrAddReal("particles","target_count",100000.0); + Real mass_per_particle = total_mass / target_nparticles; + + // create shared array to hold number of particles per zone + DualArray2D nparticles_per_zone("partperzone", nmkji,2); + par_for("particle_count", DevExeSpace(), 0,nmkji-1, + KOKKOS_LAMBDA(int idx) { + int m = (idx)/nkji; + int k = (idx - m*nkji)/nji; + int j = (idx - m*nkji - k*nji)/indcs.nx1; + int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is; + k += ks; + j += js; + Real vol = mbsize.d_view(m).dx1*mbsize.d_view(m).dx2*mbsize.d_view(m).dx3; + Real nppc = u0_(m,IDN,k,j,i) * vol / mass_per_particle; + int nparticles = static_cast(nppc); + nppc = fabs(fmod(nppc, 1.0)); + auto rand_gen = rand_pool64.get_state(); + if (rand_gen.frand() < nppc) { + nparticles += 1; + } + nparticles_per_zone.d_view(idx,0) = nparticles; + rand_pool64.free_state(rand_gen); + }); + + // count total number of particles in this pack + nparticles_per_zone.template modify(); + nparticles_per_zone.template sync(); + int nparticles_thispack = 0; + for (int i=0; i(); + nparticles_per_zone.template sync(); + + // helpful debug statement + std::cout << "total mass across domain: " << total_mass + << ", total mass in pack: " << total_mass_thispack + << ", target nparticles: " << target_nparticles + << ", nparticles in pack: " << nparticles_thispack + << std::endl; + + // reallocate space for particles and get relevant pointers + pmy_mesh_->pmb_pack->ppart->ReallocateParticles(nparticles_thispack); + + auto &pr = pmy_mesh_->pmb_pack->ppart->prtcl_rdata; + auto &pi = pmy_mesh_->pmb_pack->ppart->prtcl_idata; + + // initialize particles. only intended for Lagrangian-type particles + par_for("part_init", DevExeSpace(), 0,nmkji-1, + KOKKOS_LAMBDA(int idx) { + int m = (idx)/nkji; + int k = (idx - m*nkji)/nji; + int j = (idx - m*nkji - k*nji)/indcs.nx1; + int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is; + k += ks; + j += js; + + int nparticles_in_zone = nparticles_per_zone.d_view(idx,0); + int starting_index = nparticles_per_zone.d_view(idx,1); + + for (int p=0; p Date: Sat, 3 Aug 2024 12:19:52 -0400 Subject: [PATCH 06/10] revert boundary deletion from dev code. copy in fix for lev-1 from @DVilla95. --- src/bvals/bvals_part.cpp | 475 ++++++++++++++++++++++++++------------- 1 file changed, 319 insertions(+), 156 deletions(-) diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp index ed137438..e7e27688 100644 --- a/src/bvals/bvals_part.cpp +++ b/src/bvals/bvals_part.cpp @@ -44,6 +44,23 @@ void UpdateGID(int &newgid, NeighborBlock nghbr, int myrank, int *pcounter, return; } +//---------------------------------------------------------------------------------------- +//! \fn void ParticlesBoundaryValues::MarkForDestruction() +//! \brief Adds particles that cross the user-defined mesh boundariesto a list used then +//! to destroy them. The list uses the same structure as the sendlist for convenience +//! in the following functions. +//! This opeartion should also be performed for single process runs + +KOKKOS_INLINE_FUNCTION +void MarkForDestruction(int *pcounter, DualArray1D dlist, int p) { + int index = Kokkos::atomic_fetch_add(pcounter,1); + dlist.d_view(index).prtcl_indx = p; + // These particles don't actually get sent, thus following information is not needed + dlist.d_view(index).dest_gid = 0; + dlist.d_view(index).dest_rank = 0; + return; +} + //---------------------------------------------------------------------------------------- //! \fn void ParticlesBoundaryValues::SetNewGID() //! \brief @@ -61,14 +78,14 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { auto &nghbr = pmy_part->pmy_pack->pmb->nghbr; auto &psendl = sendlist; int counter=0; - int *pcounter = &counter; + Kokkos::View atom_count("atom_count"); + Kokkos::deep_copy(atom_count, counter); bool &multi_d = pmy_part->pmy_pack->pmesh->multi_d; bool &three_d = pmy_part->pmy_pack->pmesh->three_d; - Kokkos::realloc(sendlist, static_cast(0.1*npart)); + Kokkos::realloc(sendlist, static_cast(npart)); par_for("part_update",DevExeSpace(),0,(npart-1), KOKKOS_LAMBDA(const int p) { int m = pi(PGID,p) - gids; - int mylevel = mblev.d_view(m); Real x1 = pr(IPX,p); Real x2 = pr(IPY,p); @@ -88,94 +105,215 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { int fx = (x1 < 0.5*(mbsize.d_view(m).x1min + mbsize.d_view(m).x1max))? 0 : 1; int fy = (x2 < 0.5*(mbsize.d_view(m).x2min + mbsize.d_view(m).x2max))? 0 : 1; int fz = (x3 < 0.5*(mbsize.d_view(m).x3min + mbsize.d_view(m).x3max))? 0 : 1; - fy = multi_d ? fy : 0; fz = three_d ? fz : 0; + bool check_boundary = false; + // only update particle GID if it has crossed MeshBlock boundary - if ((abs(ix) + abs(iy) + abs(iz)) != 0) { - if (iz == 0) { - if (iy == 0) { - // x1 face - int indx = NeighborIndex(ix,0,0,0,0); // neighbor at same level - if (nghbr.d_view(m,indx).lev > mylevel) { // neighbor at finer level - indx = NeighborIndex(ix,0,0,fy,fz); + if ((abs(ix) + abs(iy) + abs(iz)) != 0 && !check_boundary) { + // The way GIDs are determined currently is not reliable in SMR cases + // due to nighbours across edges and corners being uninitialized. + // Need extra check + bool send_to_coarser = false; + if (ix < 0) { + // level might be initizialized to -1 on some neighbours + for (int iop = 0; iop <= 3; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} // neighbor at coarser level - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); - } else if (ix == 0) { - // x2 face - int indx = NeighborIndex(0,iy,0,0,0); - if (nghbr.d_view(m,indx).lev > mylevel) { - indx = NeighborIndex(0,iy,0,fx,fz); + } + } else if (ix > 0) { + for (int iop = 4; iop <= 7; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); - } else { - // x1x2 edge - int indx = NeighborIndex(ix,iy,0,0,0); - if (nghbr.d_view(m,indx).lev > mylevel) { - indx = NeighborIndex(ix,iy,0,fz,0); + } + } + if (iy < 0) { + for (int iop = 8; iop <= 11; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); } - } else if (iy == 0) { - if (ix == 0) { - // x3 face - int indx = NeighborIndex(0,0,iz,0,0); - if (nghbr.d_view(m,indx).lev > mylevel) { - indx = NeighborIndex(0,0,iz,fx,fy); + } else if (iy > 0) { + for (int iop = 12; iop <= 15; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); - } else { - // x3x1 edge - int indx = NeighborIndex(ix,0,iz,0,0); - if (nghbr.d_view(m,indx).lev > mylevel) { - indx = NeighborIndex(ix,0,iz,fy,0); + } + } + if (iz < 0) { + for (int iop = 24; iop <= 27; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); } - } else { - if (ix == 0) { - // x2x3 edge - int indx = NeighborIndex(0,iy,iz,0,0); - if (nghbr.d_view(m,indx).lev > mylevel) { - indx = NeighborIndex(0,iy,iz,fx,0); + } else if (iz > 0) { + for (int iop = 28; iop <= 31; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; + } + } + } + int indx = 0; + if (iz == 0) { + if (iy == 0) { + // x1 face + indx = NeighborIndex(ix,0,0,0,0); // neighbor at same level + if (nghbr.d_view(m,indx).lev > mylevel) { // neighbor at finer level + indx = NeighborIndex(ix,0,0,fy,fz); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} // neighbor at coarser level + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } else if (ix == 0) { + // x2 face + indx = NeighborIndex(0,iy,0,0,0); + if (nghbr.d_view(m,indx).lev > mylevel) { + indx = NeighborIndex(0,iy,0,fx,fz); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } else { + // x1x2 edge + indx = NeighborIndex(ix,iy,0,0,0); + if (nghbr.d_view(m,indx).lev > mylevel) { + indx = NeighborIndex(ix,iy,0,fz,0); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + // Using SMR some edge and corner neighbours are uninitialized, + // thus check if the index has increased over the appropriate range + // and try to communicate through faces to a coarser meshblock + // Communication to coarser meshblocks should always go through faces + if (indx > 23 || send_to_coarser) { + bool found_coarser = false; + // First try through x face + indx = NeighborIndex(ix,0,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { + found_coarser = true; + } + // If all faces in the x direction are on a finer level this should + // have already been covered by previous logic, thus check y + if ( !found_coarser ) { + indx = NeighborIndex(0,iy,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { + found_coarser = true; + } + } + } + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } + } else if (iy == 0) { + if (ix == 0) { + // x3 face + indx = NeighborIndex(0,0,iz,0,0); + if (nghbr.d_view(m,indx).lev > mylevel) { + indx = NeighborIndex(0,0,iz,fx,fy); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } else { + // x3x1 edge + indx = NeighborIndex(ix,0,iz,0,0); + if (nghbr.d_view(m,indx).lev > mylevel) { + indx = NeighborIndex(ix,0,iz,fy,0); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (indx > 39 || send_to_coarser) { + bool found_coarser = false; + indx = NeighborIndex(ix,0,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { + found_coarser = true; + } + if ( !found_coarser ) { + indx = NeighborIndex(0,0,iz,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { + found_coarser = true; + } + } + } + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); } else { - // corners - int indx = NeighborIndex(ix,iy,iz,0,0); - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); + if (ix == 0) { + // x2x3 edge + indx = NeighborIndex(0,iy,iz,0,0); + if (nghbr.d_view(m,indx).lev > mylevel) { + indx = NeighborIndex(0,iy,iz,fx,0); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (indx > 47 || send_to_coarser) { + bool found_coarser = false; + indx = NeighborIndex(0,iy,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { + found_coarser = true; + } + if ( !found_coarser ) { + indx = NeighborIndex(0,0,iz,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { + found_coarser = true; + } + } + } + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } else { + // corners + indx = NeighborIndex(ix,iy,iz,0,0); + if (nghbr.d_view(m,indx).gid < 0 || send_to_coarser) { + bool found_coarser = false; + indx = NeighborIndex(ix,0,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { + found_coarser = true; + } + if ( !found_coarser ) { + indx = NeighborIndex(0,iy,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { + found_coarser = true; + } + } + if ( !found_coarser ) { + indx = NeighborIndex(0,0,iz,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { + found_coarser = true; + } + } + } + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } } - } - // reset x,y,z positions if particle crosses Mesh boundary using periodic BCs - if (x1 < meshsize.x1min) { - pr(IPX,p) += (meshsize.x1max - meshsize.x1min); - } else if (x1 > meshsize.x1max) { - pr(IPX,p) -= (meshsize.x1max - meshsize.x1min); - } - if (x2 < meshsize.x2min) { - pr(IPY,p) += (meshsize.x2max - meshsize.x2min); - } else if (x2 > meshsize.x2max) { - pr(IPY,p) -= (meshsize.x2max - meshsize.x2min); - } - if (x3 < meshsize.x3min) { - pr(IPZ,p) += (meshsize.x3max - meshsize.x3min); - } else if (x3 > meshsize.x3max) { - pr(IPZ,p) -= (meshsize.x3max - meshsize.x3min); + // reset x,y,z positions if particle crosses Mesh boundary using periodic BCs + if (x1 < meshsize.x1min) { + pr(IPX,p) += (meshsize.x1max - meshsize.x1min); + } else if (x1 > meshsize.x1max) { + pr(IPX,p) -= (meshsize.x1max - meshsize.x1min); + } + if (x2 < meshsize.x2min) { + pr(IPY,p) += (meshsize.x2max - meshsize.x2min); + } else if (x2 > meshsize.x2max) { + pr(IPY,p) -= (meshsize.x2max - meshsize.x2min); + } + if (x3 < meshsize.x3min) { + pr(IPZ,p) += (meshsize.x3max - meshsize.x3min); + } else if (x3 > meshsize.x3max) { + pr(IPZ,p) -= (meshsize.x3max - meshsize.x3min); + } } - } }); + Kokkos::deep_copy(counter, atom_count); nprtcl_send = counter; Kokkos::resize(sendlist, nprtcl_send); - // sync sendlist device array with host + sendlist.template modify(); sendlist.template sync(); @@ -251,6 +389,8 @@ TaskStatus ParticlesBoundaryValues::CountSendsAndRecvs() { //! \brief TaskStatus ParticlesBoundaryValues::InitPrtclRecv() { + // Moved assignment here for better compatibility with destruction logic + nprtcl_recv=0; #if MPI_PARALLEL_ENABLED // load STL::vector of ParticleMessageData with for // receives // on this rank. Length will be nrecvs, initially this length is unknown @@ -265,55 +405,58 @@ TaskStatus ParticlesBoundaryValues::InitPrtclRecv() { nrecvs = recvs_thisrank.size(); // Figure out how many particles will be received from all ranks - nprtcl_recv=0; for (int n=0; nnrdata)*nprtcl_recv); - Kokkos::realloc(prtcl_irecvbuf, (pmy_part->nidata)*nprtcl_recv); - - // Post non-blocking receives bool no_errors=true; - rrecv_req.clear(); - irecv_req.clear(); - for (int n=0; n 0) { + // Allocate receive buffer + Kokkos::realloc(prtcl_rrecvbuf, (pmy_part->nrdata)*nprtcl_recv); + Kokkos::realloc(prtcl_irecvbuf, (pmy_part->nidata)*nprtcl_recv); + + // Post non-blocking receives + rrecv_req.clear(); + irecv_req.clear(); + for (int n=0; nnrdata)*(recvs_thisrank[n].nprtcls); - int data_end = data_start + (pmy_part->nrdata)*(recvs_thisrank[n].nprtcls - 1); - auto recv_ptr = Kokkos::subview(prtcl_rrecvbuf, std::make_pair(data_start, data_end)); - int drank = recvs_thisrank[n].sendrank; - int tag = 0; // 0 for Reals, 1 for ints - - // Post non-blocking receive - int ierr = MPI_Irecv(recv_ptr.data(), data_size, MPI_ATHENA_REAL, drank, tag, - mpi_comm_part, &(rrecv_req[n])); - if (ierr != MPI_SUCCESS) {no_errors=false;} - data_start += data_size; - } - // Init receives for ints - data_start=0; - for (int n=0; nnidata)*(recvs_thisrank[n].nprtcls); - int data_end = data_start + (pmy_part->nidata)*(recvs_thisrank[n].nprtcls - 1); - auto recv_ptr = Kokkos::subview(prtcl_irecvbuf, std::make_pair(data_start, data_end)); - int drank = recvs_thisrank[n].sendrank; - int tag = 1; // 0 for Reals, 1 for ints - - // Post non-blocking receive - int ierr = MPI_Irecv(recv_ptr.data(), data_size, MPI_INT, drank, tag, - mpi_comm_part, &(irecv_req[n])); - if (ierr != MPI_SUCCESS) {no_errors=false;} - data_start += data_size; + // Init receives for Reals + int data_start=0; + for (int n=0; nnrdata)*(recvs_thisrank[n].nprtcls); + int data_end = data_start + (pmy_part->nrdata)*(recvs_thisrank[n].nprtcls - 1); + auto recv_ptr = Kokkos::subview(prtcl_rrecvbuf, + std::make_pair(data_start, data_end)); + int drank = recvs_thisrank[n].sendrank; + int tag = 0; // 0 for Reals, 1 for ints + + // Post non-blocking receive + int ierr = MPI_Irecv(recv_ptr.data(), data_size, MPI_ATHENA_REAL, drank, tag, + mpi_comm_part, &(rrecv_req[n])); + if (ierr != MPI_SUCCESS) {no_errors=false;} + data_start += data_size; + } + // Init receives for ints + data_start=0; + for (int n=0; nnidata)*(recvs_thisrank[n].nprtcls); + int data_end = data_start + (pmy_part->nidata)*(recvs_thisrank[n].nprtcls - 1); + auto recv_ptr = Kokkos::subview(prtcl_irecvbuf, + std::make_pair(data_start, data_end)); + int drank = recvs_thisrank[n].sendrank; + int tag = 1; // 0 for Reals, 1 for ints + + // Post non-blocking receive + int ierr = MPI_Irecv(recv_ptr.data(), data_size, MPI_INT, drank, tag, + mpi_comm_part, &(irecv_req[n])); + if (ierr != MPI_SUCCESS) {no_errors=false;} + data_start += data_size; + } } // Quit if MPI error detected @@ -352,8 +495,9 @@ TaskStatus ParticlesBoundaryValues::PackAndSendPrtcls() { auto &pi = pmy_part->prtcl_idata; auto &rsendbuf = prtcl_rsendbuf; auto &isendbuf = prtcl_isendbuf; + auto &slist = sendlist; par_for("ppack",DevExeSpace(),0,(nprtcl_send-1), KOKKOS_LAMBDA(const int n) { - int p = sendlist.d_view(n).prtcl_indx; + int p = slist.d_view(n).prtcl_indx; for (int i=0; i(); - sendlist.template sync(); + //In single process runs, size of particle arrays can only be reduced + int &npart = pmy_part->nprtcl_thispack; + + int new_npart = npart + (nprtcl_recv - nprtcl_send); + + // nremain defined outside compiler instructions for end logic + int nremain = 0; + int nremain_d = nprtcl_send - nprtcl_recv; + +#if MPI_PARALLEL_ENABLED // increase size of particle arrays if needed - int new_npart = pmy_part->nprtcl_thispack + (nprtcl_recv - nprtcl_send); if (nprtcl_recv > nprtcl_send) { Kokkos::resize(pmy_part->prtcl_idata, pmy_part->nidata, new_npart); Kokkos::resize(pmy_part->prtcl_rdata, pmy_part->nrdata, new_npart); @@ -460,8 +607,18 @@ TaskStatus ParticlesBoundaryValues::RecvAndUnpackPrtcls() { } // exit if particle communications have not completed if (bflag) {return TaskStatus::incomplete;} +#endif + +#if MPI_PARALLEL_ENABLED + if (nprtcl_send > 0) { + // Sort sendlist on host by index in particle array + std::sort(KE::begin(sendlist.h_view), KE::end(sendlist.h_view), SortByIndex); + // sync sendlist host array with device. This results in sorted array on device + sendlist.template modify(); + sendlist.template sync(); + } - // unpack particles into positions of sent particles + // unpack particles into positions of sent or destroyed particles if (nprtcl_recv > 0) { int nrdata = pmy_part->nrdata; int nidata = pmy_part->nidata; @@ -469,13 +626,16 @@ TaskStatus ParticlesBoundaryValues::RecvAndUnpackPrtcls() { auto &pi = pmy_part->prtcl_idata; auto &rrecvbuf = prtcl_rrecvbuf; auto &irecvbuf = prtcl_irecvbuf; - int npart = pmy_part->nprtcl_thispack; + auto &slist = sendlist; + int nps = nprtcl_send; + int npa = pmy_part->nprtcl_thispack; par_for("punpack",DevExeSpace(),0,(nprtcl_recv-1), KOKKOS_LAMBDA(const int n) { int p; - if (n < nprtcl_send) { - p = sendlist.d_view(n).prtcl_indx; // place particles in holes created by sends + if (n < nps ) { + p = slist.d_view(n).prtcl_indx; // place particles in holes created by send } else { - p = npart + (n - nprtcl_send); // place particle at end of arrays + // place particle at end of arrays + p = npa + (n - nps); } for (int i=0; i 0) { - int i_last_hole = nprtcl_send-1; - int i_next_hole = nprtcl_recv; - for (int n=1; n<=nremain; ++n) { - int nend = npart-n; - if (nend > sendlist.h_view(i_last_hole).prtcl_indx) { - // copy particle from end into hole - int next_hole = sendlist.h_view(i_next_hole).prtcl_indx; - auto rdest = Kokkos::subview(pmy_part->prtcl_rdata, Kokkos::ALL, next_hole); - auto rsrc = Kokkos::subview(pmy_part->prtcl_rdata, Kokkos::ALL, nend); - Kokkos::deep_copy(rdest, rsrc); - auto idest = Kokkos::subview(pmy_part->prtcl_idata, Kokkos::ALL, next_hole); - auto isrc = Kokkos::subview(pmy_part->prtcl_idata, Kokkos::ALL, nend); - Kokkos::deep_copy(idest, isrc); - i_next_hole += 1; - } else { - // this index contains a hole, so do nothing except find new index of last hole - i_last_hole -= 1; - } + // At this point have filled npart_recv holes in particle arrays from sends + // If (nprtcl_recv < nprtcl_send), have to move particles from end of arrays to fill + // remaining holes + nremain = nprtcl_send - nprtcl_recv; + if (nremain > 0) { + int i_last_hole = nprtcl_send-1; + int i_next_hole = nprtcl_recv; + for (int n=1; n<=nremain; ++n) { + int nend = npart-n; + --nremain_d; + if (nend > sendlist.h_view(i_last_hole).prtcl_indx) { + // copy particle from end into hole + int next_hole = sendlist.h_view(i_next_hole).prtcl_indx; + auto rdest = Kokkos::subview(pmy_part->prtcl_rdata, Kokkos::ALL, next_hole); + auto rsrc = Kokkos::subview(pmy_part->prtcl_rdata, Kokkos::ALL, nend); + Kokkos::deep_copy(rdest, rsrc); + auto idest = Kokkos::subview(pmy_part->prtcl_idata, Kokkos::ALL, next_hole); + auto isrc = Kokkos::subview(pmy_part->prtcl_idata, Kokkos::ALL, nend); + Kokkos::deep_copy(idest, isrc); + i_next_hole += 1; + } else { + // this index contains a hole, so do nothing except find new index of last hole + i_last_hole -= 1; } - - // shrink size of particle data arrays - Kokkos::resize(pmy_part->prtcl_idata, pmy_part->nidata, new_npart); - Kokkos::resize(pmy_part->prtcl_rdata, pmy_part->nrdata, new_npart); } } // Update nparticles_thisrank. Update cost array (use npart_thismb[nmb]?) - pmy_part->nprtcl_thispack = new_npart; - pmy_part->pmy_pack->pmesh->nprtcl_thisrank = new_npart; MPI_Allgather(&new_npart,1,MPI_INT,(pmy_part->pmy_pack->pmesh->nprtcl_eachrank),1, MPI_INT,MPI_COMM_WORLD); #endif + + if (nprtcl_send - nprtcl_recv > 0) { + // shrink size of particle data arrays + Kokkos::resize(pmy_part->prtcl_idata, pmy_part->nidata, new_npart); + Kokkos::resize(pmy_part->prtcl_rdata, pmy_part->nrdata, new_npart); + } + pmy_part->nprtcl_thispack = new_npart; + pmy_part->pmy_pack->pmesh->nprtcl_thisrank = new_npart; return TaskStatus::complete; } From 9c0286c9cd94e7d97e2b6a4542b600ab61226a10 Mon Sep 17 00:00:00 2001 From: "George N. Wong" Date: Sat, 3 Aug 2024 19:19:47 -0400 Subject: [PATCH 07/10] fix vtk output to respect only one POINT_DATA header --- src/outputs/vtk_prtcl.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/outputs/vtk_prtcl.cpp b/src/outputs/vtk_prtcl.cpp index eb8371c6..dcac23b2 100644 --- a/src/outputs/vtk_prtcl.cpp +++ b/src/outputs/vtk_prtcl.cpp @@ -182,15 +182,24 @@ void ParticleVTKOutput::WriteOutputFile(Mesh *pm, ParameterInput *pin) { } // Write Part 6: scalar particle data - // Write gid of points + bool have_written_pointdata_header = false; + for (int n=0; n<(pm->pmb_pack->ppart->nidata); ++n) { std::stringstream msg; + + if (!have_written_pointdata_header) { + have_written_pointdata_header = true; + msg << std::endl << std::endl << "POINT_DATA " << npout_total << std::endl; + } + if (n == static_cast(PGID)) { - msg << std::endl << std::endl << "POINT_DATA " << npout_total << std::endl - << "SCALARS gid float" << std::endl << "LOOKUP_TABLE default" << std::endl; + msg << std::endl << "SCALARS gid float" << std::endl + << "LOOKUP_TABLE default" << std::endl; } else if (n == static_cast(PTAG)) { - msg << std::endl << std::endl << "POINT_DATA " << npout_total << std::endl - << "SCALARS ptag float" << std::endl << "LOOKUP_TABLE default" << std::endl; + msg << std::endl << "SCALARS ptag float" << std::endl + << "LOOKUP_TABLE default" << std::endl; + } else if (n == static_cast(PLASTMOVE) || n == static_cast(PLASTLEVEL)) { + continue; } if (global_variable::my_rank == 0) { partfile.Write_any_type_at(msg.str().c_str(),msg.str().size(),header_offset,"byte"); From bad094ee8c1d3da80b4b379ed16e070cc151095e Mon Sep 17 00:00:00 2001 From: "George N. Wong" Date: Mon, 5 Aug 2024 21:14:36 -0400 Subject: [PATCH 08/10] code to freeze particles but reverted to not remove them --- src/bvals/bvals.hpp | 3 +- src/bvals/bvals_part.cpp | 164 +++++---- src/particles/particles.cpp | 6 + src/particles/particles.hpp | 2 + src/particles/particles_pushers.cpp | 538 ++++++++++++++-------------- src/pgen/gr_torus.cpp | 3 + 6 files changed, 382 insertions(+), 334 deletions(-) diff --git a/src/bvals/bvals.hpp b/src/bvals/bvals.hpp index 134638a8..d39c99e2 100644 --- a/src/bvals/bvals.hpp +++ b/src/bvals/bvals.hpp @@ -247,8 +247,9 @@ class ParticlesBoundaryValues { ParticlesBoundaryValues(particles::Particles *ppart, ParameterInput *pin); ~ParticlesBoundaryValues(); - int nprtcl_send, nprtcl_recv; + int nprtcl_send, nprtcl_recv, nprtcl_destroy; DualArray1D sendlist; + DualArray1D destroylist; // Data needed to count number of messages and particles to send between ranks int nsends; // number of MPI sends to neighboring ranks on this rank diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp index e7e27688..1e9b5eda 100644 --- a/src/bvals/bvals_part.cpp +++ b/src/bvals/bvals_part.cpp @@ -46,7 +46,7 @@ void UpdateGID(int &newgid, NeighborBlock nghbr, int myrank, int *pcounter, //---------------------------------------------------------------------------------------- //! \fn void ParticlesBoundaryValues::MarkForDestruction() -//! \brief Adds particles that cross the user-defined mesh boundariesto a list used then +//! \brief Adds particles that cross the user-defined mesh boundaries to a list used //! to destroy them. The list uses the same structure as the sendlist for convenience //! in the following functions. //! This opeartion should also be performed for single process runs @@ -83,80 +83,105 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { bool &multi_d = pmy_part->pmy_pack->pmesh->multi_d; bool &three_d = pmy_part->pmy_pack->pmesh->three_d; + // support for user boundary conditions like for gr torus + auto &mb_bcs = pmy_part->pmy_pack->pmb->mb_bcs; + const Real &min_rad = pmy_part->min_radius; + Kokkos::realloc(sendlist, static_cast(npart)); par_for("part_update",DevExeSpace(),0,(npart-1), KOKKOS_LAMBDA(const int p) { - int m = pi(PGID,p) - gids; - int mylevel = mblev.d_view(m); - Real x1 = pr(IPX,p); - Real x2 = pr(IPY,p); - Real x3 = pr(IPZ,p); - - // length of MeshBlock in each direction - Real lx = (mbsize.d_view(m).x1max - mbsize.d_view(m).x1min); - Real ly = (mbsize.d_view(m).x2max - mbsize.d_view(m).x2min); - Real lz = (mbsize.d_view(m).x3max - mbsize.d_view(m).x3min); - - // integer offset of particle relative to center of MeshBlock (-1,0,+1) - int ix = static_cast((x1 - mbsize.d_view(m).x1min + lx)/lx) - 1; - int iy = static_cast((x2 - mbsize.d_view(m).x2min + ly)/ly) - 1; - int iz = static_cast((x3 - mbsize.d_view(m).x3min + lz)/lz) - 1; - - // sublock indices for faces and edges with S/AMR - int fx = (x1 < 0.5*(mbsize.d_view(m).x1min + mbsize.d_view(m).x1max))? 0 : 1; - int fy = (x2 < 0.5*(mbsize.d_view(m).x2min + mbsize.d_view(m).x2max))? 0 : 1; - int fz = (x3 < 0.5*(mbsize.d_view(m).x3min + mbsize.d_view(m).x3max))? 0 : 1; - fy = multi_d ? fy : 0; - fz = three_d ? fz : 0; - - bool check_boundary = false; - - // only update particle GID if it has crossed MeshBlock boundary - if ((abs(ix) + abs(iy) + abs(iz)) != 0 && !check_boundary) { - // The way GIDs are determined currently is not reliable in SMR cases - // due to nighbours across edges and corners being uninitialized. - // Need extra check - bool send_to_coarser = false; - if (ix < 0) { - // level might be initizialized to -1 on some neighbours - for (int iop = 0; iop <= 3; ++iop) { - if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { - send_to_coarser = true; - } - } - } else if (ix > 0) { - for (int iop = 4; iop <= 7; ++iop) { - if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { - send_to_coarser = true; - } - } + if (pi(PLASTMOVE,p) == -1) { + // this particle is frozen so skip update + } else if (pi(PLASTMOVE,p) == -2) { + // TODO, mark for deletion + } else { + // do regular boundary search and update + int m = pi(PGID,p) - gids; + int mylevel = mblev.d_view(m); + Real x1 = pr(IPX,p); + Real x2 = pr(IPY,p); + Real x3 = pr(IPZ,p); + + // length of MeshBlock in each direction + Real lx = (mbsize.d_view(m).x1max - mbsize.d_view(m).x1min); + Real ly = (mbsize.d_view(m).x2max - mbsize.d_view(m).x2min); + Real lz = (mbsize.d_view(m).x3max - mbsize.d_view(m).x3min); + + // integer offset of particle relative to center of MeshBlock (-1,0,+1) + int ix = static_cast((x1 - mbsize.d_view(m).x1min + lx)/lx) - 1; + int iy = static_cast((x2 - mbsize.d_view(m).x2min + ly)/ly) - 1; + int iz = static_cast((x3 - mbsize.d_view(m).x3min + lz)/lz) - 1; + + // sublock indices for faces and edges with S/AMR + int fx = (x1 < 0.5*(mbsize.d_view(m).x1min + mbsize.d_view(m).x1max))? 0 : 1; + int fy = (x2 < 0.5*(mbsize.d_view(m).x2min + mbsize.d_view(m).x2max))? 0 : 1; + int fz = (x3 < 0.5*(mbsize.d_view(m).x3min + mbsize.d_view(m).x3max))? 0 : 1; + fy = multi_d ? fy : 0; + fz = three_d ? fz : 0; + + // if the particle is outside of the "user-defined" boundaries, + // mark it for no more updates + bool check_boundary = ( + mb_bcs.d_view(m, BoundaryFace::inner_x3) == BoundaryFlag::user && iz < 0) + || ( mb_bcs.d_view(m, BoundaryFace::outer_x3) == BoundaryFlag::user && iz > 0) + || ( mb_bcs.d_view(m, BoundaryFace::inner_x2) == BoundaryFlag::user && iy < 0) + || ( mb_bcs.d_view(m, BoundaryFace::outer_x2) == BoundaryFlag::user && iy > 0) + || ( mb_bcs.d_view(m, BoundaryFace::inner_x1) == BoundaryFlag::user && ix < 0) + || ( mb_bcs.d_view(m, BoundaryFace::outer_x1) == BoundaryFlag::user && ix > 0) + || (SQR(x1) + SQR(x2) + SQR(x3) < SQR(min_rad) + ); + + if (check_boundary) { + pi(PLASTMOVE,p) = -1; } - if (iy < 0) { - for (int iop = 8; iop <= 11; ++iop) { - if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { - send_to_coarser = true; + + // only update particle GID if it has crossed MeshBlock boundary + if ((abs(ix) + abs(iy) + abs(iz)) != 0 && !check_boundary) { + // The way GIDs are determined currently is not reliable in SMR cases + // due to nighbours across edges and corners being uninitialized. + // Need extra check + bool send_to_coarser = false; + if (ix < 0) { + // level might be initizialized to -1 on some neighbours + for (int iop = 0; iop <= 3; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; + } } - } - } else if (iy > 0) { - for (int iop = 12; iop <= 15; ++iop) { - if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { - send_to_coarser = true; + } else if (ix > 0) { + for (int iop = 4; iop <= 7; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; + } } } - } - if (iz < 0) { - for (int iop = 24; iop <= 27; ++iop) { - if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { - send_to_coarser = true; + if (iy < 0) { + for (int iop = 8; iop <= 11; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; + } + } + } else if (iy > 0) { + for (int iop = 12; iop <= 15; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; + } } } - } else if (iz > 0) { - for (int iop = 28; iop <= 31; ++iop) { + if (iz < 0) { + for (int iop = 24; iop <= 27; ++iop) { if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { send_to_coarser = true; } + } + } else if (iz > 0) { + for (int iop = 28; iop <= 31; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; + } + } } - } - int indx = 0; + + int indx = 0; if (iz == 0) { if (iy == 0) { // x1 face @@ -195,7 +220,7 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { } // If all faces in the x direction are on a finer level this should // have already been covered by previous logic, thus check y - if ( !found_coarser ) { + if (!found_coarser) { indx = NeighborIndex(0,iy,0,0,0); while (nghbr.d_view(m,indx).gid < 0) {indx++;} if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { @@ -228,7 +253,7 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { found_coarser = true; } - if ( !found_coarser ) { + if (!found_coarser) { indx = NeighborIndex(0,0,iz,0,0); while (nghbr.d_view(m,indx).gid < 0) {indx++;} if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { @@ -253,7 +278,7 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { found_coarser = true; } - if ( !found_coarser ) { + if (!found_coarser) { indx = NeighborIndex(0,0,iz,0,0); while (nghbr.d_view(m,indx).gid < 0) {indx++;} if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { @@ -272,14 +297,14 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { found_coarser = true; } - if ( !found_coarser ) { + if (!found_coarser) { indx = NeighborIndex(0,iy,0,0,0); while (nghbr.d_view(m,indx).gid < 0) {indx++;} if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { found_coarser = true; } } - if ( !found_coarser ) { + if (!found_coarser) { indx = NeighborIndex(0,0,iz,0,0); while (nghbr.d_view(m,indx).gid < 0) {indx++;} if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) { @@ -297,17 +322,20 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { } else if (x1 > meshsize.x1max) { pr(IPX,p) -= (meshsize.x1max - meshsize.x1min); } + if (x2 < meshsize.x2min) { pr(IPY,p) += (meshsize.x2max - meshsize.x2min); } else if (x2 > meshsize.x2max) { pr(IPY,p) -= (meshsize.x2max - meshsize.x2min); } + if (x3 < meshsize.x3min) { pr(IPZ,p) += (meshsize.x3max - meshsize.x3min); } else if (x3 > meshsize.x3max) { pr(IPZ,p) -= (meshsize.x3max - meshsize.x3min); } } + } }); Kokkos::deep_copy(counter, atom_count); diff --git a/src/particles/particles.cpp b/src/particles/particles.cpp index a2130f5e..252d3e1d 100644 --- a/src/particles/particles.cpp +++ b/src/particles/particles.cpp @@ -106,6 +106,11 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : { nrdata = 3; nidata = 4; + // gid, ptag, lastmove, lastlevel + // lastmove: + // if >= 0 => save parity of current zone stored as (i_isodd,j_isodd,k_isodd) * 8 + // if -1 => freeze particle and perform no updates or position checks + // if -2 => remove from domain at next chance (TODO NOT IMPLEMENTED) break; } default: @@ -115,6 +120,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) : Kokkos::realloc(prtcl_idata, nidata, nprtcl_thispack); // allocate boundary object + min_radius = -1; pbval_part = new ParticlesBoundaryValues(this, pin); } diff --git a/src/particles/particles.hpp b/src/particles/particles.hpp index 6f37dd8c..3382c797 100644 --- a/src/particles/particles.hpp +++ b/src/particles/particles.hpp @@ -65,6 +65,8 @@ class Particles { DvceArray2D prtcl_idata; // integer properties each particle (gid, tag, etc.) Real dtnew; + Real min_radius; // particles that enter within this radius will not be updated + ParticlesPusher pusher; // Boundary communication buffers and functions for particles diff --git a/src/particles/particles_pushers.cpp b/src/particles/particles_pushers.cpp index 80f29841..e616c9d9 100644 --- a/src/particles/particles_pushers.cpp +++ b/src/particles/particles_pushers.cpp @@ -102,69 +102,73 @@ void Particles::PushLagrangianMC() { // GNW 2024-JUL-5: Warning, this may not play well with AMR par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1), KOKKOS_LAMBDA(const int p) { - int m = pi(PGID,p) - gids; + if (pi(PLASTMOVE,p) >= 0) { + // only update particles that are not frozen or marked for deletion - int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; - int jp = js; - int kp = ks; + int m = pi(PGID,p) - gids; - if (multi_d) { - jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js; - } + int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; + int jp = js; + int kp = ks; - if (three_d) { - kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks; - } + if (multi_d) { + jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js; + } - // get normalized fluxes based on local density - Real mass = u1_(m,IDN,kp,jp,ip); - - // by convention, these values will be positive when there is outflow - // with respect to the current particle's cell - Real flx1_left = -flx1_(m,kp,jp,ip) / mass; - Real flx1_right = flx1_(m,kp,jp,ip+1) / mass; - Real flx2_left = (multi_d) ? -flx2_(m,kp,jp,ip) / mass : 0.; - Real flx2_right = (multi_d) ? flx2_(m,kp,jp+1,ip) / mass : 0.; - Real flx3_left = (three_d) ? -flx3_(m,kp,jp,ip) / mass : 0.; - Real flx3_right = (three_d) ? flx3_(m,kp+1,jp,ip) / mass : 0.; - - flx1_left = flx1_left < 0 ? 0 : flx1_left; - flx1_right = flx1_right < 0 ? 0 : flx1_right; - flx2_left = flx2_left < 0 ? 0 : flx2_left; - flx2_right = flx2_right < 0 ? 0 : flx2_right; - flx3_left = flx3_left < 0 ? 0 : flx3_left; - flx3_right = flx3_right < 0 ? 0 : flx3_right; - - auto rand_gen = rand_pool64_.get_state(); - Real rand = rand_gen.frand(); - rand_pool64_.free_state(rand_gen); // free state for use by other threads - - // save refinement level of current zone - pi(PLASTLEVEL,p) = mblev.d_view(m); - - // save parity of current zone stored as (i_isodd,j_isodd,k_isodd) * 8 - pi(PLASTMOVE,p) = 32 * (ip % 2) + 16 * (jp % 2) + 8 * (kp % 2); - - if (rand < flx1_left) { - pr(IPX,p) -= mbsize.d_view(m).dx1; - pi(PLASTMOVE,p) += 1; - } else if (rand < flx1_left + flx1_right) { - pr(IPX,p) += mbsize.d_view(m).dx1; - pi(PLASTMOVE,p) += 2; - } else if (multi_d && rand < flx1_left + flx1_right + flx2_left) { - pr(IPY,p) -= mbsize.d_view(m).dx2; - pi(PLASTMOVE,p) += 3; - } else if (multi_d && rand < flx1_left + flx1_right + flx2_left + flx2_right) { - pr(IPY,p) += mbsize.d_view(m).dx2; - pi(PLASTMOVE,p) += 4; - } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right - + flx3_left) { - pr(IPZ,p) -= mbsize.d_view(m).dx3; - pi(PLASTMOVE,p) += 5; - } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right - + flx3_left + flx3_right) { - pr(IPZ,p) += mbsize.d_view(m).dx3; - pi(PLASTMOVE,p) += 6; + if (three_d) { + kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks; + } + + // get normalized fluxes based on local density + Real mass = u1_(m,IDN,kp,jp,ip); + + // by convention, these values will be positive when there is outflow + // with respect to the current particle's cell + Real flx1_left = -flx1_(m,kp,jp,ip) / mass; + Real flx1_right = flx1_(m,kp,jp,ip+1) / mass; + Real flx2_left = (multi_d) ? -flx2_(m,kp,jp,ip) / mass : 0.; + Real flx2_right = (multi_d) ? flx2_(m,kp,jp+1,ip) / mass : 0.; + Real flx3_left = (three_d) ? -flx3_(m,kp,jp,ip) / mass : 0.; + Real flx3_right = (three_d) ? flx3_(m,kp+1,jp,ip) / mass : 0.; + + flx1_left = flx1_left < 0 ? 0 : flx1_left; + flx1_right = flx1_right < 0 ? 0 : flx1_right; + flx2_left = flx2_left < 0 ? 0 : flx2_left; + flx2_right = flx2_right < 0 ? 0 : flx2_right; + flx3_left = flx3_left < 0 ? 0 : flx3_left; + flx3_right = flx3_right < 0 ? 0 : flx3_right; + + auto rand_gen = rand_pool64_.get_state(); + Real rand = rand_gen.frand(); + rand_pool64_.free_state(rand_gen); // free state for use by other threads + + // save refinement level of current zone + pi(PLASTLEVEL,p) = mblev.d_view(m); + + // save parity of current zone stored as (i_isodd,j_isodd,k_isodd) * 8 + pi(PLASTMOVE,p) = 32 * (ip % 2) + 16 * (jp % 2) + 8 * (kp % 2); + + if (rand < flx1_left) { + pr(IPX,p) -= mbsize.d_view(m).dx1; + pi(PLASTMOVE,p) += 1; + } else if (rand < flx1_left + flx1_right) { + pr(IPX,p) += mbsize.d_view(m).dx1; + pi(PLASTMOVE,p) += 2; + } else if (multi_d && rand < flx1_left + flx1_right + flx2_left) { + pr(IPY,p) -= mbsize.d_view(m).dx2; + pi(PLASTMOVE,p) += 3; + } else if (multi_d && rand < flx1_left + flx1_right + flx2_left + flx2_right) { + pr(IPY,p) += mbsize.d_view(m).dx2; + pi(PLASTMOVE,p) += 4; + } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + + flx3_left) { + pr(IPZ,p) -= mbsize.d_view(m).dx3; + pi(PLASTMOVE,p) += 5; + } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + + flx3_left + flx3_right) { + pr(IPZ,p) += mbsize.d_view(m).dx3; + pi(PLASTMOVE,p) += 6; + } } }); } @@ -196,225 +200,229 @@ TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) { par_for("particle_meshshift",DevExeSpace(),0,(nprtcl_thispack-1), KOKKOS_LAMBDA(const int p) { - int m = pi(PGID,p) - gids; - int level = mblev.d_view(m); - - int lastlevel = pi(PLASTLEVEL,p); - int lastmove = pi(PLASTMOVE,p); - - // oddness of the last cell that the particle lived in - int i_parity = lastmove / 32; - int j_parity = (lastmove % 32) / 16; - int k_parity = (lastmove % 16) / 8; - - // direction of last move: - // 1 -> "left" x1 face was chosen - // 2 -> "right" x1 face was chosen - // 3 -> "left" x2 face was chosen - // 4 -> "right" x2 face was chosen - // 5 -> "left" x3 face was chosen - // 6 -> "right" x3 face was chosen - lastmove = lastmove % 8; - - Real dx1 = mbsize.d_view(m).dx1; - Real dx2 = multi_d ? mbsize.d_view(m).dx2 : 0.; - Real dx3 = three_d ? mbsize.d_view(m).dx3 : 0.; - - if (level > lastlevel) { - // this is a higher refinement level, i.e., the zones are smaller now - - if (lastmove == 1) { - // came from zone to right (dx--) - pr(IPX,p) += dx1/2; - - pr(IPY,p) -= dx2/2; - pr(IPZ,p) -= dx3/2; - } else if (lastmove == 2) { - // came from zone to left (dx++) - pr(IPX,p) -= dx1/2; - - pr(IPY,p) -= dx2/2; - pr(IPZ,p) -= dx3/2; - } else if (multi_d && lastmove == 3) { - // came from zone above (dy--) - pr(IPY,p) += dx2/2; - - pr(IPX,p) -= dx1/2; - pr(IPZ,p) -= dx3/2; - } else if (multi_d && lastmove == 4) { - // came from zone below (dy++) - pr(IPY,p) -= dx2/2; - - pr(IPX,p) -= dx1/2; - pr(IPZ,p) -= dx3/2; - } else if (three_d && lastmove == 5) { - // came from zone in front (dz--) - pr(IPZ,p) += dx3/2; - - pr(IPX,p) -= dx1/2; - pr(IPY,p) -= dx2/2; - } else if (three_d && lastmove == 6) { - // came from zone behind (dz++) - pr(IPZ,p) -= dx3/2; - - pr(IPX,p) -= dx1/2; - pr(IPY,p) -= dx2/2; - } - - int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; - int jp = js; - int kp = ks; - - if (multi_d) { - jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js; - } - - if (three_d) { - kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks; - } - - // get fluxes into the four zones that the particle could have ended up in - Real flx1 = 0.; - Real flx2 = 0.; - Real flx3 = 0.; - Real flx4 = 0.; - - if (lastmove == 1) { - // came from zone to the right - flx1 = -flx1_(m,kp,jp,ip+1); - flx2 = (multi_d) ? -flx1_(m,kp,jp+1,ip+1) : 0.; - flx3 = (three_d) ? -flx1_(m,kp+1,jp,ip+1) : 0.; - flx4 = (multi_d && three_d) ? -flx1_(m,kp+1,jp+1,ip+1) : 0.; - } else if (lastmove == 2) { - // came from zone to the left - flx1 = flx1_(m,kp,jp,ip); - flx2 = (multi_d) ? flx1_(m,kp,jp+1,ip) : 0.; - flx3 = (three_d) ? flx1_(m,kp+1,jp,ip) : 0.; - flx4 = (multi_d && three_d) ? flx1_(m,kp+1,jp+1,ip) : 0.; - } else if (lastmove == 3) { - // came from zone above. is at least multi_d - flx1 = -flx2_(m,kp,jp+1,ip); - flx2 = -flx2_(m,kp,jp+1,ip+1); - flx3 = (three_d) ? -flx2_(m,kp+1,jp+1,ip) : 0.; - flx4 = (three_d) ? -flx2_(m,kp+1,jp+1,ip+1) : 0.; - } else if (lastmove == 4) { - // came from zone below. is at least multi_d - flx1 = flx2_(m,kp,jp,ip); - flx2 = flx2_(m,kp,jp,ip+1); - flx3 = (three_d) ? flx2_(m,kp+1,jp,ip) : 0.; - flx4 = (three_d) ? flx2_(m,kp+1,jp,ip+1) : 0.; - } else if (lastmove == 5) { - // came from zone in front. is three_d - flx1 = -flx3_(m,kp+1,jp,ip); - flx2 = -flx3_(m,kp+1,jp,ip+1); - flx3 = -flx3_(m,kp+1,jp+1,ip); - flx4 = -flx3_(m,kp+1,jp+1,ip+1); - } else if (lastmove == 6) { - // came from zone behind. is three_d - flx1 = flx3_(m,kp,jp,ip); - flx2 = flx3_(m,kp,jp,ip+1); - flx3 = flx3_(m,kp,jp+1,ip); - flx4 = flx3_(m,kp,jp+1,ip+1); - } - - flx1 = (flx1 < 0) ? 0. : flx1; - flx2 = (flx2 < 0) ? 0. : flx2; - flx3 = (flx3 < 0) ? 0. : flx3; - flx4 = (flx4 < 0) ? 0. : flx4; - - Real flx_total = flx1 + flx2 + flx3 + flx4; - flx_total = (flx_total > 0) ? flx_total : 1.e-10; + if (pi(PLASTMOVE,p) >= 0) { + // only update particles that are not frozen or marked for deletion + + int m = pi(PGID,p) - gids; + int level = mblev.d_view(m); + + int lastlevel = pi(PLASTLEVEL,p); + int lastmove = pi(PLASTMOVE,p); + + // oddness of the last cell that the particle lived in + int i_parity = lastmove / 32; + int j_parity = (lastmove % 32) / 16; + int k_parity = (lastmove % 16) / 8; + + // direction of last move: + // 1 -> "left" x1 face was chosen + // 2 -> "right" x1 face was chosen + // 3 -> "left" x2 face was chosen + // 4 -> "right" x2 face was chosen + // 5 -> "left" x3 face was chosen + // 6 -> "right" x3 face was chosen + lastmove = lastmove % 8; + + Real dx1 = mbsize.d_view(m).dx1; + Real dx2 = multi_d ? mbsize.d_view(m).dx2 : 0.; + Real dx3 = three_d ? mbsize.d_view(m).dx3 : 0.; + + if (level > lastlevel) { + // this is a higher refinement level, i.e., the zones are smaller now + + if (lastmove == 1) { + // came from zone to right (dx--) + pr(IPX,p) += dx1/2; + + pr(IPY,p) -= dx2/2; + pr(IPZ,p) -= dx3/2; + } else if (lastmove == 2) { + // came from zone to left (dx++) + pr(IPX,p) -= dx1/2; + + pr(IPY,p) -= dx2/2; + pr(IPZ,p) -= dx3/2; + } else if (multi_d && lastmove == 3) { + // came from zone above (dy--) + pr(IPY,p) += dx2/2; + + pr(IPX,p) -= dx1/2; + pr(IPZ,p) -= dx3/2; + } else if (multi_d && lastmove == 4) { + // came from zone below (dy++) + pr(IPY,p) -= dx2/2; + + pr(IPX,p) -= dx1/2; + pr(IPZ,p) -= dx3/2; + } else if (three_d && lastmove == 5) { + // came from zone in front (dz--) + pr(IPZ,p) += dx3/2; + + pr(IPX,p) -= dx1/2; + pr(IPY,p) -= dx2/2; + } else if (three_d && lastmove == 6) { + // came from zone behind (dz++) + pr(IPZ,p) -= dx3/2; + + pr(IPX,p) -= dx1/2; + pr(IPY,p) -= dx2/2; + } - flx1 /= flx_total; - flx2 /= flx_total; - flx3 /= flx_total; - flx4 /= flx_total; + int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is; + int jp = js; + int kp = ks; - auto rand_gen = rand_pool64_.get_state(); - Real rand = rand_gen.frand(); - rand_pool64_.free_state(rand_gen); // free state for use by other threads + if (multi_d) { + jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js; + } - int target_zone = 4; - if (rand < flx1) { - target_zone = 1; - } else if (rand < flx1 + flx2) { - target_zone = 2; - } else if (rand < flx1 + flx2 + flx3) { - target_zone = 3; - } + if (three_d) { + kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks; + } - if (lastmove == 1 || lastmove == 2) { - if (target_zone == 2) { - pr(IPY,p) += mbsize.d_view(m).dx2; - } else if (target_zone == 3) { - pr(IPZ,p) += mbsize.d_view(m).dx3; - } else if (target_zone == 4) { - pr(IPY,p) += mbsize.d_view(m).dx2; - pr(IPZ,p) += mbsize.d_view(m).dx3; + // get fluxes into the four zones that the particle could have ended up in + Real flx1 = 0.; + Real flx2 = 0.; + Real flx3 = 0.; + Real flx4 = 0.; + + if (lastmove == 1) { + // came from zone to the right + flx1 = -flx1_(m,kp,jp,ip+1); + flx2 = (multi_d) ? -flx1_(m,kp,jp+1,ip+1) : 0.; + flx3 = (three_d) ? -flx1_(m,kp+1,jp,ip+1) : 0.; + flx4 = (multi_d && three_d) ? -flx1_(m,kp+1,jp+1,ip+1) : 0.; + } else if (lastmove == 2) { + // came from zone to the left + flx1 = flx1_(m,kp,jp,ip); + flx2 = (multi_d) ? flx1_(m,kp,jp+1,ip) : 0.; + flx3 = (three_d) ? flx1_(m,kp+1,jp,ip) : 0.; + flx4 = (multi_d && three_d) ? flx1_(m,kp+1,jp+1,ip) : 0.; + } else if (lastmove == 3) { + // came from zone above. is at least multi_d + flx1 = -flx2_(m,kp,jp+1,ip); + flx2 = -flx2_(m,kp,jp+1,ip+1); + flx3 = (three_d) ? -flx2_(m,kp+1,jp+1,ip) : 0.; + flx4 = (three_d) ? -flx2_(m,kp+1,jp+1,ip+1) : 0.; + } else if (lastmove == 4) { + // came from zone below. is at least multi_d + flx1 = flx2_(m,kp,jp,ip); + flx2 = flx2_(m,kp,jp,ip+1); + flx3 = (three_d) ? flx2_(m,kp+1,jp,ip) : 0.; + flx4 = (three_d) ? flx2_(m,kp+1,jp,ip+1) : 0.; + } else if (lastmove == 5) { + // came from zone in front. is three_d + flx1 = -flx3_(m,kp+1,jp,ip); + flx2 = -flx3_(m,kp+1,jp,ip+1); + flx3 = -flx3_(m,kp+1,jp+1,ip); + flx4 = -flx3_(m,kp+1,jp+1,ip+1); + } else if (lastmove == 6) { + // came from zone behind. is three_d + flx1 = flx3_(m,kp,jp,ip); + flx2 = flx3_(m,kp,jp,ip+1); + flx3 = flx3_(m,kp,jp+1,ip); + flx4 = flx3_(m,kp,jp+1,ip+1); } - } else if (lastmove == 3 || lastmove == 4) { - if (target_zone == 2) { - pr(IPX,p) += mbsize.d_view(m).dx1; - } else if (target_zone == 3) { - pr(IPZ,p) += mbsize.d_view(m).dx3; - } else if (target_zone == 4) { - pr(IPX,p) += mbsize.d_view(m).dx1; - pr(IPZ,p) += mbsize.d_view(m).dx3; + + flx1 = (flx1 < 0) ? 0. : flx1; + flx2 = (flx2 < 0) ? 0. : flx2; + flx3 = (flx3 < 0) ? 0. : flx3; + flx4 = (flx4 < 0) ? 0. : flx4; + + Real flx_total = flx1 + flx2 + flx3 + flx4; + flx_total = (flx_total > 0) ? flx_total : 1.e-10; + + flx1 /= flx_total; + flx2 /= flx_total; + flx3 /= flx_total; + flx4 /= flx_total; + + auto rand_gen = rand_pool64_.get_state(); + Real rand = rand_gen.frand(); + rand_pool64_.free_state(rand_gen); // free state for use by other threads + + int target_zone = 4; + if (rand < flx1) { + target_zone = 1; + } else if (rand < flx1 + flx2) { + target_zone = 2; + } else if (rand < flx1 + flx2 + flx3) { + target_zone = 3; } - } else if (lastmove == 5 || lastmove == 6) { - if (target_zone == 2) { - pr(IPX,p) += mbsize.d_view(m).dx1; - } else if (target_zone == 3) { - pr(IPY,p) += mbsize.d_view(m).dx2; - } else if (target_zone == 4) { - pr(IPX,p) += mbsize.d_view(m).dx1; - pr(IPY,p) += mbsize.d_view(m).dx2; + + if (lastmove == 1 || lastmove == 2) { + if (target_zone == 2) { + pr(IPY,p) += mbsize.d_view(m).dx2; + } else if (target_zone == 3) { + pr(IPZ,p) += mbsize.d_view(m).dx3; + } else if (target_zone == 4) { + pr(IPY,p) += mbsize.d_view(m).dx2; + pr(IPZ,p) += mbsize.d_view(m).dx3; + } + } else if (lastmove == 3 || lastmove == 4) { + if (target_zone == 2) { + pr(IPX,p) += mbsize.d_view(m).dx1; + } else if (target_zone == 3) { + pr(IPZ,p) += mbsize.d_view(m).dx3; + } else if (target_zone == 4) { + pr(IPX,p) += mbsize.d_view(m).dx1; + pr(IPZ,p) += mbsize.d_view(m).dx3; + } + } else if (lastmove == 5 || lastmove == 6) { + if (target_zone == 2) { + pr(IPX,p) += mbsize.d_view(m).dx1; + } else if (target_zone == 3) { + pr(IPY,p) += mbsize.d_view(m).dx2; + } else if (target_zone == 4) { + pr(IPX,p) += mbsize.d_view(m).dx1; + pr(IPY,p) += mbsize.d_view(m).dx2; + } } - } - } else if (level < lastlevel) { - // this is a lower refinement level, i.e., the zones are larger now, - // there's nothing special to do other than to move the particle to - // the center of the new zone + } else if (level < lastlevel) { + // this is a lower refinement level, i.e., the zones are larger now, + // there's nothing special to do other than to move the particle to + // the center of the new zone - if (i_parity) { - pr(IPX,p) -= mbsize.d_view(m).dx1/4; - } else { - pr(IPX,p) += mbsize.d_view(m).dx1/4; - } - if (multi_d) { - if (j_parity) { - pr(IPY,p) -= mbsize.d_view(m).dx2/4; + if (i_parity) { + pr(IPX,p) -= mbsize.d_view(m).dx1/4; } else { - pr(IPY,p) += mbsize.d_view(m).dx2/4; + pr(IPX,p) += mbsize.d_view(m).dx1/4; } - } - if (three_d) { - if (k_parity) { - pr(IPZ,p) -= mbsize.d_view(m).dx3/4; - } else { - pr(IPZ,p) += mbsize.d_view(m).dx3/4; + if (multi_d) { + if (j_parity) { + pr(IPY,p) -= mbsize.d_view(m).dx2/4; + } else { + pr(IPY,p) += mbsize.d_view(m).dx2/4; + } + } + if (three_d) { + if (k_parity) { + pr(IPZ,p) -= mbsize.d_view(m).dx3/4; + } else { + pr(IPZ,p) += mbsize.d_view(m).dx3/4; + } } - } - if (lastmove == 1) { - // came from zone to right (dx--) - pr(IPX,p) -= mbsize.d_view(m).dx1/2; - } else if (lastmove == 2) { - // came from zone to left (dx++) - pr(IPX,p) += mbsize.d_view(m).dx1/2; - } else if (lastmove == 3) { - // came from zone above (dy--) - pr(IPY,p) -= mbsize.d_view(m).dx2/2; - } else if (lastmove == 4) { - // came from zone below (dy++) - pr(IPY,p) += mbsize.d_view(m).dx2/2; - } else if (lastmove == 5) { - // came from zone in front (dz--) - pr(IPZ,p) -= mbsize.d_view(m).dx3/2; - } else if (lastmove == 6) { - // came from zone behind (dz++) - pr(IPZ,p) += mbsize.d_view(m).dx3/2; + if (lastmove == 1) { + // came from zone to right (dx--) + pr(IPX,p) -= mbsize.d_view(m).dx1/2; + } else if (lastmove == 2) { + // came from zone to left (dx++) + pr(IPX,p) += mbsize.d_view(m).dx1/2; + } else if (lastmove == 3) { + // came from zone above (dy--) + pr(IPY,p) -= mbsize.d_view(m).dx2/2; + } else if (lastmove == 4) { + // came from zone below (dy++) + pr(IPY,p) += mbsize.d_view(m).dx2/2; + } else if (lastmove == 5) { + // came from zone in front (dz--) + pr(IPZ,p) -= mbsize.d_view(m).dx3/2; + } else if (lastmove == 6) { + // came from zone behind (dz++) + pr(IPZ,p) += mbsize.d_view(m).dx3/2; + } } } }); diff --git a/src/pgen/gr_torus.cpp b/src/pgen/gr_torus.cpp index b3a16401..0a991485 100644 --- a/src/pgen/gr_torus.cpp +++ b/src/pgen/gr_torus.cpp @@ -808,6 +808,9 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) { int &nx2 = indcs.nx2; int &nx3 = indcs.nx3; + // set inner radius + pmy_mesh_->pmb_pack->ppart->min_radius = 1. + sqrt(1. - coord.bh_spin*coord.bh_spin); + // init RNG Kokkos::Random_XorShift64_Pool<> rand_pool64(pmbp->gids); From d8039cf5d6f87c08ce7a638c61865772f7baf52f Mon Sep 17 00:00:00 2001 From: "George N. Wong" Date: Mon, 5 Aug 2024 21:19:14 -0400 Subject: [PATCH 09/10] lint bvals_part.cpp --- src/bvals/bvals_part.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp index 1e9b5eda..7791bc68 100644 --- a/src/bvals/bvals_part.cpp +++ b/src/bvals/bvals_part.cpp @@ -92,7 +92,7 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { if (pi(PLASTMOVE,p) == -1) { // this particle is frozen so skip update } else if (pi(PLASTMOVE,p) == -2) { - // TODO, mark for deletion + // TODO(@GNW) mark for deletion } else { // do regular boundary search and update int m = pi(PGID,p) - gids; From 03fefe3797794e929a598e67e3c0943353fb0f3d Mon Sep 17 00:00:00 2001 From: "George N. Wong" Date: Mon, 12 Aug 2024 13:52:43 -0400 Subject: [PATCH 10/10] reset particle z position on initialization --- src/pgen/gr_torus.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pgen/gr_torus.cpp b/src/pgen/gr_torus.cpp index 0a991485..07ca47b3 100644 --- a/src/pgen/gr_torus.cpp +++ b/src/pgen/gr_torus.cpp @@ -916,8 +916,7 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) { pr(IPY,pidx) = CellCenterX(j-js, nx2, mbsize.d_view(m).x2min, mbsize.d_view(m).x2max); pr(IPZ,pidx) = CellCenterX(k-ks, nx3, mbsize.d_view(m).x3min, - mbsize.d_view(m).x3max) - - mbsize.d_view(m).dx3/2; + mbsize.d_view(m).x3max); } }); }