From b855fbbf004dffe9709b83265469dbf28fb1fecb Mon Sep 17 00:00:00 2001
From: "George N. Wong" <gnwong@ias.edu>
Date: Fri, 22 Mar 2024 00:12:25 -0400
Subject: [PATCH 01/10] add rough implementation of Lagrangian MC particle
 pusher

---
 src/bvals/bvals_part.cpp            |   3 +
 src/particles/particles.cpp         |  16 ++++
 src/particles/particles.hpp         |   2 +-
 src/particles/particles_pushers.cpp | 140 +++++++++++++++++++++++++++-
 4 files changed, 156 insertions(+), 5 deletions(-)
diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp
index 9c79d0e8..0e080a10 100644
--- a/src/bvals/bvals_part.cpp
+++ b/src/bvals/bvals_part.cpp
@@ -64,6 +64,9 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
   Kokkos::realloc(sendlist, static_cast<int>(0.1*npart));
   par_for("part_update",DevExeSpace(),0,(npart-1), KOKKOS_LAMBDA(const int p) {
     int m = pi(PGID,p) - gids;
+
+    // TODO(GNW): This won't work if we don't store particle velocities because then
+    //            IPX, IPY, IPZ will resolve to 0, 2, 4 rather than 0, 1, 2
     Real x1 = pr(IPX,p);
     Real x2 = pr(IPY,p);
     Real x3 = pr(IPZ,p);
diff --git a/src/particles/particles.cpp b/src/particles/particles.cpp
index 0c26350f..77d92bda 100644
--- a/src/particles/particles.cpp
+++ b/src/particles/particles.cpp
@@ -45,6 +45,10 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
     std::string ptype = pin->GetString("particles","particle_type");
     if (ptype.compare("cosmic_ray") == 0) {
       particle_type = ParticleType::cosmic_ray;
+    } else if (ptype.compare("lagrangian_mc") == 0) {
+      // TODO(GNW): Support initialization
+      // TODO(GNW): Restrict which pusher is alowed based on particle type
+      particle_type = ParticleType::lagrangian_mc;
     } else {
       std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__
                 << std::endl << "Particle type = '" << ptype << "' not recognized"
@@ -58,6 +62,8 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
     std::string ppush = pin->GetString("particles","pusher");
     if (ppush.compare("drift") == 0) {
       pusher = ParticlesPusher::drift;
+    } else if (ppush.compare("lagrangian_mc") == 0) {
+      pusher = ParticlesPusher::lagrangian_mc;
     } else {
       std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__
                 << std::endl << "Particle pusher must be specified in <particles> block"
@@ -81,6 +87,16 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
         nidata = 2;
         break;
       }
+    case ParticleType::lagrangian_mc:
+      {
+        // TODO(GNW): This implementation may have issues for IPX, IPY, IPZ
+        //            without IVX, IVY, IVZ...
+        int ndim=2;
+        if (pmy_pack->pmesh->three_d) {ndim+=1;}
+        nrdata = ndim;
+        nidata = 2;
+        break;
+      }
     default:
       break;
   }
diff --git a/src/particles/particles.hpp b/src/particles/particles.hpp
index 3ab43781..4fc22e95 100644
--- a/src/particles/particles.hpp
+++ b/src/particles/particles.hpp
@@ -23,7 +23,7 @@
 enum class ParticlesPusher {drift, leap_frog, lagrangian_tracer, lagrangian_mc};
 
 // constants that enumerate ParticleTypes
-enum class ParticleType {cosmic_ray};
+enum class ParticleType {cosmic_ray, lagrangian_mc};
 
 //----------------------------------------------------------------------------------------
 //! \struct ParticlesTaskIDs
diff --git a/src/particles/particles_pushers.cpp b/src/particles/particles_pushers.cpp
index 46b3ebb2..c071478b 100644
--- a/src/particles/particles_pushers.cpp
+++ b/src/particles/particles_pushers.cpp
@@ -8,9 +8,13 @@
 
 #include "athena.hpp"
 #include "mesh/mesh.hpp"
+#include "hydro/hydro.hpp"
+#include "mhd/mhd.hpp"
 #include "driver/driver.hpp"
 #include "particles.hpp"
 
+#include <Kokkos_Random.hpp>
+
 namespace particles {
 //----------------------------------------------------------------------------------------
 //! \fn  void Particles::ParticlesPush
@@ -26,9 +30,38 @@ TaskStatus Particles::Push(Driver *pdriver, int stage) {
   auto &mbsize = pmy_pack->pmb->mb_size;
   auto &pi = prtcl_idata;
   auto &pr = prtcl_rdata;
-  auto dt_ = (pmy_pack->pmesh->dt);
+  auto dt_ = pmy_pack->pmesh->dt;
   auto gids = pmy_pack->gids;
 
+  // Used in lagrangian_mc pusher
+  auto dtold_ = pmy_pack->pmesh->dtold;
+  auto &u1_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->u1:pmy_pack->pmhd->u1;
+  auto &uflx = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflx:pmy_pack->pmhd->uflx;
+  auto &flx1_ = uflx.x1f;
+  auto &flx2_ = uflx.x2f;
+  auto &flx3_ = uflx.x3f;
+
+  // TODO(GNW): Maybe move this outside and pass it in as needed? What is the
+  //            overhead associated with recreating the pool each call?
+
+  Kokkos::Random_XorShift64_Pool<> rand_pool64(pmy_pack->gids);
+
+  // TODO(GNW): Note that for MC tracers we need to handle mesh refinement (in AMR)
+  //            more carefully.
+
+  // TODO(GNW): Is it better to do an if statement here and capture the variables
+  //            for the lambda or keep the switch?
+
+  // TODO(GNW): Are these fluxes are the correct ones to use (RK question)?
+
+  // TODO(GNW): The particle pusher gets called before the time integrator, but
+  //            for the MC pusher we need to know the fluxes from the previous
+  //            time step, which means we need to read from u1 and fluxes.
+  //            One way to deal with this is just to have the MC pusher update
+  //            particle positions "one step out of phase" and use dtold. An
+  //            alternative is to have the MC pusher use a separate task that
+  //            gets called after the time integrator.
+
   switch (pusher) {
     case ParticlesPusher::drift:
 
@@ -49,9 +82,108 @@ TaskStatus Particles::Push(Driver *pdriver, int stage) {
         }
       });
 
-    break;
-  default:
-    break;
+      break;
+
+    case ParticlesPusher::lagrangian_mc:
+
+      par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1),
+      KOKKOS_LAMBDA(const int p) {
+        // get random number state this thread
+        auto rand_gen = rand_pool64.get_state();
+
+        int m = pi(PGID,p) - gids;
+
+        int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
+        int jp = 0;
+        int kp = 0;
+
+        if (multi_d) {
+          jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js;
+        }
+
+        if (three_d) {
+          kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks;
+        }
+
+        bool particle_has_moved = false;
+        Real reduced_mass = u1_(m,IDN,kp,jp,ip);
+
+        // by convention, these values will be negative when there is outflow
+        // with respect to the current particle's cell
+        Real flx1_left = flx1_(m,IDN,kp,jp,ip);
+        Real flx1_right = -flx1_(m,IDN,kp,jp,ip+1);
+        Real flx2_left = (multi_d) ? flx2_(m,IDN,kp,jp,jp) : 0.;
+        Real flx2_right = (multi_d) ? -flx2_(m,IDN,kp,jp+1,jp) : 0.;
+        Real flx3_left = (three_d) ? flx3_(m,IDN,kp,kp,jp) : 0.;
+        Real flx3_right = (three_d) ? -flx3_(m,IDN,kp+1,kp,jp) : 0.;
+
+        flx1_left *= mbsize.d_view(m).dx1 * dtold_;
+        flx1_right *= mbsize.d_view(m).dx1 * dtold_;
+
+        flx2_left *= mbsize.d_view(m).dx2 * dtold_;
+        flx2_right *= mbsize.d_view(m).dx2 * dtold_;
+
+        flx3_left *= mbsize.d_view(m).dx3 * dtold_;
+        flx3_right *= mbsize.d_view(m).dx3 * dtold_;
+
+        if (flx1_left < 0) {
+          if (rand_gen.frand() < fabs(flx1_left)/reduced_mass) {
+            pr(IPX,p) = (ip - 1 - is) * mbsize.d_view(m).dx1 + mbsize.d_view(m).x1min;
+            particle_has_moved = true;
+          }
+          reduced_mass += flx1_left;
+        }
+
+        if (!particle_has_moved && flx1_right < 0) {
+          if (rand_gen.frand() < fabs(flx1_right)/reduced_mass) {
+            pr(IPX,p) = (ip + 1 - is) * mbsize.d_view(m).dx1 + mbsize.d_view(m).x1min;
+            particle_has_moved = true;
+          }
+          reduced_mass += flx1_right;
+        }
+
+        if (multi_d) {
+          if (!particle_has_moved && flx2_left < 0) {
+            if (rand_gen.frand() < fabs(flx2_left)/reduced_mass) {
+              pr(IPY,p) = (jp - 1 - js) * mbsize.d_view(m).dx2 + mbsize.d_view(m).x2min;
+              particle_has_moved = true;
+            }
+            reduced_mass += flx2_left;
+          }
+
+          if (!particle_has_moved && flx2_right < 0) {
+            if (rand_gen.frand() < fabs(flx2_right)/reduced_mass) {
+              pr(IPY,p) = (jp + 1 - js) * mbsize.d_view(m).dx2 + mbsize.d_view(m).x2min;
+              particle_has_moved = true;
+            }
+            reduced_mass += flx2_right;
+          }
+        }
+
+        if (three_d) {
+          if (!particle_has_moved && flx3_left < 0) {
+            if (rand_gen.frand() < fabs(flx3_left)/reduced_mass) {
+              pr(IPZ,p) = (kp - 1 - ks) * mbsize.d_view(m).dx3 + mbsize.d_view(m).x3min;
+              particle_has_moved = true;
+            }
+            reduced_mass += flx3_left;
+          }
+
+          if (!particle_has_moved && flx3_right < 0) {
+            if (rand_gen.frand() < fabs(flx3_right)/reduced_mass) {
+              pr(IPZ,p) = (kp + 1 - ks) * mbsize.d_view(m).dx3 + mbsize.d_view(m).x3min;
+              particle_has_moved = true;
+            }
+            reduced_mass += flx3_right;
+          }
+        }
+        rand_pool64.free_state(rand_gen);  // free state for use by other threads
+      });
+
+      break;
+
+    default:
+      break;
   }
 
   return TaskStatus::complete;

From b8a35ef8a1cd370b8652e96eca9f957a6b40ed9e Mon Sep 17 00:00:00 2001
From: "George N. Wong" <gnwong@ias.edu>
Date: Tue, 2 Apr 2024 01:52:03 -0400
Subject: [PATCH 02/10] use corrected full-step fluxes

---
 src/athena.hpp                      |   2 +-
 src/bvals/bvals_part.cpp            |   3 +-
 src/hydro/hydro.cpp                 |  19 ++
 src/hydro/hydro.hpp                 |   7 +
 src/hydro/hydro_fluxes.cpp          |  61 +++++++
 src/hydro/hydro_tasks.cpp           |   3 +-
 src/mhd/mhd.cpp                     |  19 ++
 src/mhd/mhd.hpp                     |   7 +
 src/mhd/mhd_fluxes.cpp              |  61 +++++++
 src/mhd/mhd_tasks.cpp               |   3 +-
 src/particles/particles.cpp         |  34 ++--
 src/particles/particles.hpp         |   7 +
 src/particles/particles_pushers.cpp | 272 ++++++++++++----------------
 src/particles/particles_tasks.cpp   |  41 ++++-
 src/pgen/kh.cpp                     |  61 +++++++
 15 files changed, 423 insertions(+), 177 deletions(-)

diff --git a/src/athena.hpp b/src/athena.hpp
index 7aae1916..02a1ee00 100644
--- a/src/athena.hpp
+++ b/src/athena.hpp
@@ -52,7 +52,7 @@ enum VariableIndex {IDN=0, IM1=1, IVX=1, IM2=2, IVY=2, IM3=3, IVZ=3, IEN=4, ITM=
 // array indices for components of magnetic field
 enum BFieldIndex {IBX=0, IBY=1, IBZ=2};
 // array indices for particle arrays
-enum ParticlesIndex {PGID=0, PTAG=1, IPX=0, IPVX=1, IPY=2, IPVY=3, IPZ=4, IPVZ=5};
+enum ParticlesIndex {PGID=0, PTAG=1, IPX=0, IPY=1, IPZ=2, IPVX=3, IPVY=4, IPVZ=5};
 
 // integer constants to specify spatial reconstruction methods
 enum ReconstructionMethod {dc, plm, ppm4, ppmx, wenoz};
diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp
index 0e080a10..76482ae1 100644
--- a/src/bvals/bvals_part.cpp
+++ b/src/bvals/bvals_part.cpp
@@ -65,8 +65,6 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
   par_for("part_update",DevExeSpace(),0,(npart-1), KOKKOS_LAMBDA(const int p) {
     int m = pi(PGID,p) - gids;
 
-    // TODO(GNW): This won't work if we don't store particle velocities because then
-    //            IPX, IPY, IPZ will resolve to 0, 2, 4 rather than 0, 1, 2
     Real x1 = pr(IPX,p);
     Real x2 = pr(IPY,p);
     Real x3 = pr(IPZ,p);
@@ -194,6 +192,7 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
       pr(IPZ,p) -= (meshsize.x3max - meshsize.x3min);
     }
   });
+
   nprtcl_send = counter;
   Kokkos::resize(sendlist, nprtcl_send);
   // sync sendlist device array with host
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index c1dec57f..8c362f95 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -32,6 +32,7 @@ Hydro::Hydro(MeshBlockPack *ppack, ParameterInput *pin) :
     coarse_w0("cprim",1,1,1,1,1),
     u1("cons1",1,1,1,1,1),
     uflx("uflx",1,1,1,1,1),
+    uflxidnsaved("uflxidn",1,1,1,1),
     utest("utest",1,1,1,1,1),
     fofc("fofc",1,1,1,1) {
   // Total number of MeshBlocks on this rank to be used in array dimensioning
@@ -287,4 +288,22 @@ Hydro::~Hydro() {
   if (psrc != nullptr) {delete psrc;}
 }
 
+//----------------------------------------------------------------------------------------
+// SetSaveUFlxIdn:  set flag to save density flux over entire step
+
+void Hydro::SetSaveUFlxIdn() {
+  int nmb = std::max((pmy_pack->nmb_thispack), (pmy_pack->pmesh->nmb_maxperrank));
+  auto &indcs = pmy_pack->pmesh->mb_indcs;
+  int ncells1 = indcs.nx1 + 2*(indcs.ng);
+  int ncells2 = (indcs.nx2 > 1)? (indcs.nx2 + 2*(indcs.ng)) : 1;
+  int ncells3 = (indcs.nx3 > 1)? (indcs.nx3 + 2*(indcs.ng)) : 1;
+
+  // allocated saved arrays for density flux
+  Kokkos::realloc(uflxidnsaved.x1f, nmb, ncells3, ncells2, ncells1+1);
+  Kokkos::realloc(uflxidnsaved.x2f, nmb, ncells3, ncells2+1, ncells1);
+  Kokkos::realloc(uflxidnsaved.x3f, nmb, ncells3+1, ncells2, ncells1);
+
+  uflxidn_saved = true;
+}
+
 } // namespace hydro
diff --git a/src/hydro/hydro.hpp b/src/hydro/hydro.hpp
index fb7e72f9..ce3701bd 100644
--- a/src/hydro/hydro.hpp
+++ b/src/hydro/hydro.hpp
@@ -41,6 +41,7 @@ struct HydroTaskIDs {
   TaskID sendf;
   TaskID recvf;
   TaskID expl;
+  TaskID savef;
   TaskID restu;
   TaskID sendu;
   TaskID recvu;
@@ -88,6 +89,10 @@ class Hydro {
   DvceFaceFld5D<Real> uflx;   // fluxes of conserved quantities on cell faces
   Real dtnew;
 
+  // following used to save step-to-step flux values
+  bool uflxidn_saved = false;
+  DvceFaceFld4D<Real> uflxidnsaved;
+
   // following used for FOFC
   DvceArray4D<bool> fofc;  // flag for each cell to indicate if FOFC is needed
   bool use_fofc = false;   // flag to enable FOFC
@@ -96,6 +101,7 @@ class Hydro {
   HydroTaskIDs id;
 
   // functions...
+  void SetSaveUFlxIdn();
   void AssembleHydroTasks(std::map<std::string, std::shared_ptr<TaskList>> tl);
   // ...in "before_stagen_tl" list
   TaskStatus InitRecv(Driver *d, int stage);
@@ -105,6 +111,7 @@ class Hydro {
   TaskStatus SendFlux(Driver *d, int stage);
   TaskStatus RecvFlux(Driver *d, int stage);
   TaskStatus ExpRKUpdate(Driver *d, int stage);
+  TaskStatus SaveFlux(Driver *d, int stage);
   TaskStatus RestrictU(Driver *d, int stage);
   TaskStatus SendU(Driver *d, int stage);
   TaskStatus RecvU(Driver *d, int stage);
diff --git a/src/hydro/hydro_fluxes.cpp b/src/hydro/hydro_fluxes.cpp
index 2078de39..108ab946 100644
--- a/src/hydro/hydro_fluxes.cpp
+++ b/src/hydro/hydro_fluxes.cpp
@@ -356,4 +356,65 @@ template void Hydro::CalculateFluxes<Hydro_RSolver::hllc_sr>(Driver *pdriver, in
 template void Hydro::CalculateFluxes<Hydro_RSolver::llf_gr>(Driver *pdriver, int stage);
 template void Hydro::CalculateFluxes<Hydro_RSolver::hlle_gr>(Driver *pdriver, int stage);
 
+//----------------------------------------------------------------------------------------
+//! \fn TaskStatus Hydro::SaveFlux
+//! \brief Save IDN fluxes across all stages for full time step
+
+TaskStatus Hydro::SaveFlux(Driver *pdrive, int stage) {
+  if (uflxidn_saved) {
+    auto &indcs = pmy_pack->pmesh->mb_indcs;
+    int is = indcs.is, ie = indcs.ie;
+    int js = indcs.js, je = indcs.je;
+    int ks = indcs.ks, ke = indcs.ke;
+    bool &multi_d = pmy_pack->pmesh->multi_d;
+    bool &three_d = pmy_pack->pmesh->three_d;
+
+    Real dt = pmy_pack->pmesh->dt;
+    int nmb1 = pmy_pack->nmb_thispack - 1;
+    auto flx1 = uflx.x1f;
+    auto flx2 = uflx.x2f;
+    auto flx3 = uflx.x3f;
+    auto flxidn1 = uflxidnsaved.x1f;
+    auto flxidn2 = uflxidnsaved.x2f;
+    auto flxidn3 = uflxidnsaved.x3f;
+    auto &mbsize = pmy_pack->pmb->mb_size;
+
+    Real dtfactor = dt / 2.0;
+
+    par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1,
+    KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) {
+
+      // TODO(GNW): need to be careful with different time integrators
+
+      // save dF1/dx1
+      if (j <= je && k <= ke)  {
+        if (stage == 1) {
+          flxidn1(m,k,j,i) = flx1(m,IDN,k,j,i) / mbsize.d_view(m).dx1 * dtfactor;
+        } else {
+          flxidn1(m,k,j,i) += flx1(m,IDN,k,j,i) / mbsize.d_view(m).dx1 * dtfactor;
+        }
+      }
+
+      // save dF2/dx2
+      if (multi_d && i <= ie && k <= ke) {
+        if (stage == 1) {
+          flxidn2(m,k,j,i) = flx2(m,IDN,k,j,i) / mbsize.d_view(m).dx2 * dtfactor;
+        } else {
+          flxidn2(m,k,j,i) += flx2(m,IDN,k,j,i) / mbsize.d_view(m).dx2 * dtfactor;
+        }
+      }
+
+      // save dF3/dx3
+      if (three_d && i <= ie && j <= je) {
+        if (stage == 1) {
+          flxidn3(m,k,j,i) = flx3(m,IDN,k,j,i) / mbsize.d_view(m).dx3 * dtfactor;
+        } else {
+          flxidn3(m,k,j,i) += flx3(m,IDN,k,j,i) / mbsize.d_view(m).dx3 * dtfactor;
+        }
+      }
+    });
+  }
+  return TaskStatus::complete;
+}
+
 } // namespace hydro
diff --git a/src/hydro/hydro_tasks.cpp b/src/hydro/hydro_tasks.cpp
index 411df62e..ede546e2 100644
--- a/src/hydro/hydro_tasks.cpp
+++ b/src/hydro/hydro_tasks.cpp
@@ -53,7 +53,8 @@ void Hydro::AssembleHydroTasks(std::map<std::string, std::shared_ptr<TaskList>>
   id.sendf = tl["stagen"]->AddTask(&Hydro::SendFlux, this, id.flux);
   id.recvf = tl["stagen"]->AddTask(&Hydro::RecvFlux, this, id.sendf);
   id.expl  = tl["stagen"]->AddTask(&Hydro::ExpRKUpdate, this, id.recvf);
-  id.restu = tl["stagen"]->AddTask(&Hydro::RestrictU, this, id.expl);
+  id.savef = tl["stagen"]->AddTask(&Hydro::SaveFlux, this, id.expl);
+  id.restu = tl["stagen"]->AddTask(&Hydro::RestrictU, this, id.savef);
   id.sendu = tl["stagen"]->AddTask(&Hydro::SendU, this, id.restu);
   id.recvu = tl["stagen"]->AddTask(&Hydro::RecvU, this, id.sendu);
   id.bcs   = tl["stagen"]->AddTask(&Hydro::ApplyPhysicalBCs, this, id.recvu);
diff --git a/src/mhd/mhd.cpp b/src/mhd/mhd.cpp
index 097865dd..1f5c178c 100644
--- a/src/mhd/mhd.cpp
+++ b/src/mhd/mhd.cpp
@@ -40,6 +40,7 @@ MHD::MHD(MeshBlockPack *ppack, ParameterInput *pin) :
     efld("efld",1,1,1,1),
     wsaved("wsaved",1,1,1,1,1),
     bccsaved("bccsaved",1,1,1,1,1),
+    uflxidnsaved("uflxidn",1,1,1,1),
     e3x1("e3x1",1,1,1,1),
     e2x1("e2x1",1,1,1,1),
     e1x2("e1x2",1,1,1,1),
@@ -357,4 +358,22 @@ void MHD::SetSaveWBcc() {
   wbcc_saved = true;
 }
 
+//----------------------------------------------------------------------------------------
+// SetSaveUFlxIdn:  set flag to save density flux over entire step
+
+void MHD::SetSaveUFlxIdn() {
+  int nmb = std::max((pmy_pack->nmb_thispack), (pmy_pack->pmesh->nmb_maxperrank));
+  auto &indcs = pmy_pack->pmesh->mb_indcs;
+  int ncells1 = indcs.nx1 + 2*(indcs.ng);
+  int ncells2 = (indcs.nx2 > 1)? (indcs.nx2 + 2*(indcs.ng)) : 1;
+  int ncells3 = (indcs.nx3 > 1)? (indcs.nx3 + 2*(indcs.ng)) : 1;
+
+  // allocated saved arrays for density flux
+  Kokkos::realloc(uflxidnsaved.x1f, nmb, ncells3, ncells2, ncells1+1);
+  Kokkos::realloc(uflxidnsaved.x2f, nmb, ncells3, ncells2+1, ncells1);
+  Kokkos::realloc(uflxidnsaved.x3f, nmb, ncells3+1, ncells2, ncells1);
+
+  uflxidn_saved = true;
+}
+
 } // namespace mhd
diff --git a/src/mhd/mhd.hpp b/src/mhd/mhd.hpp
index 360fc9b5..b505abf4 100644
--- a/src/mhd/mhd.hpp
+++ b/src/mhd/mhd.hpp
@@ -48,6 +48,7 @@ struct MHDTaskIDs {
   TaskID sendf;
   TaskID recvf;
   TaskID expl;
+  TaskID savef;
   TaskID restu;
   TaskID sendu;
   TaskID recvu;
@@ -115,6 +116,10 @@ class MHD {
   DvceArray5D<Real> wsaved;
   DvceArray5D<Real> bccsaved;
 
+  // following used to save step-to-step flux values
+  bool uflxidn_saved = false;
+  DvceFaceFld4D<Real> uflxidnsaved;
+
   // following used for FOFC algorithm
   DvceArray4D<bool> fofc;  // flag for each cell to indicate if FOFC is needed
   bool use_fofc = false;   // flag to enable FOFC
@@ -124,6 +129,7 @@ class MHD {
 
   // functions...
   void SetSaveWBcc();
+  void SetSaveUFlxIdn();
   void AssembleMHDTasks(std::map<std::string, std::shared_ptr<TaskList>> tl);
   // ...in "before_timeintegrator" task list
   TaskStatus SaveMHDState(Driver *d, int stage);
@@ -135,6 +141,7 @@ class MHD {
   TaskStatus SendFlux(Driver *d, int stage);
   TaskStatus RecvFlux(Driver *d, int stage);
   TaskStatus ExpRKUpdate(Driver *d, int stage);
+  TaskStatus SaveFlux(Driver *d, int stage);
   TaskStatus RestrictU(Driver *d, int stage);
   TaskStatus SendU(Driver *d, int stage);
   TaskStatus RecvU(Driver *d, int stage);
diff --git a/src/mhd/mhd_fluxes.cpp b/src/mhd/mhd_fluxes.cpp
index a8408a27..64daeb3b 100644
--- a/src/mhd/mhd_fluxes.cpp
+++ b/src/mhd/mhd_fluxes.cpp
@@ -417,4 +417,65 @@ template void MHD::CalculateFluxes<MHD_RSolver::hlle_sr>(Driver *pdriver, int st
 template void MHD::CalculateFluxes<MHD_RSolver::llf_gr>(Driver *pdriver, int stage);
 template void MHD::CalculateFluxes<MHD_RSolver::hlle_gr>(Driver *pdriver, int stage);
 
+//----------------------------------------------------------------------------------------
+//! \fn TaskStatus MHD::SaveFlux
+//! \brief Save IDN fluxes across all stages for full time step
+
+TaskStatus MHD::SaveFlux(Driver *pdrive, int stage) {
+  if (uflxidn_saved) {
+    auto &indcs = pmy_pack->pmesh->mb_indcs;
+    int is = indcs.is, ie = indcs.ie;
+    int js = indcs.js, je = indcs.je;
+    int ks = indcs.ks, ke = indcs.ke;
+    bool &multi_d = pmy_pack->pmesh->multi_d;
+    bool &three_d = pmy_pack->pmesh->three_d;
+
+    Real dt = pmy_pack->pmesh->dt;
+    int nmb1 = pmy_pack->nmb_thispack - 1;
+    auto flx1 = uflx.x1f;
+    auto flx2 = uflx.x2f;
+    auto flx3 = uflx.x3f;
+    auto flxidn1 = uflxidnsaved.x1f;
+    auto flxidn2 = uflxidnsaved.x2f;
+    auto flxidn3 = uflxidnsaved.x3f;
+    auto &mbsize = pmy_pack->pmb->mb_size;
+
+    Real dtfactor = dt / 2.0;
+
+    par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1,
+    KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) {
+
+      // TODO(GNW): need to be careful with different time integrators
+
+      // save dF1/dx1
+      if (j <= je && k <= ke)  {
+        if (stage == 1) {
+          flxidn1(m,k,j,i) = flx1(m,IDN,k,j,i) / mbsize.d_view(m).dx1 * dtfactor;
+        } else {
+          flxidn1(m,k,j,i) += flx1(m,IDN,k,j,i) / mbsize.d_view(m).dx1 * dtfactor;
+        }
+      }
+
+      // save dF2/dx2
+      if (multi_d && i <= ie && k <= ke) {
+        if (stage == 1) {
+          flxidn2(m,k,j,i) = flx2(m,IDN,k,j,i) / mbsize.d_view(m).dx2 * dtfactor;
+        } else {
+          flxidn2(m,k,j,i) += flx2(m,IDN,k,j,i) / mbsize.d_view(m).dx2 * dtfactor;
+        }
+      }
+
+      // save dF3/dx3
+      if (three_d && i <= ie && j <= je) {
+        if (stage == 1) {
+          flxidn3(m,k,j,i) = flx3(m,IDN,k,j,i) / mbsize.d_view(m).dx3 * dtfactor;
+        } else {
+          flxidn3(m,k,j,i) += flx3(m,IDN,k,j,i) / mbsize.d_view(m).dx3 * dtfactor;
+        }
+      }
+    });
+  }
+  return TaskStatus::complete;
+}
+
 } // namespace mhd
diff --git a/src/mhd/mhd_tasks.cpp b/src/mhd/mhd_tasks.cpp
index 2f3f4c2d..f8c059c9 100644
--- a/src/mhd/mhd_tasks.cpp
+++ b/src/mhd/mhd_tasks.cpp
@@ -45,7 +45,8 @@ void MHD::AssembleMHDTasks(std::map<std::string, std::shared_ptr<TaskList>> tl)
   id.sendf = tl["stagen"]->AddTask(&MHD::SendFlux, this, id.flux);
   id.recvf = tl["stagen"]->AddTask(&MHD::RecvFlux, this, id.sendf);
   id.expl  = tl["stagen"]->AddTask(&MHD::ExpRKUpdate, this, id.recvf);
-  id.restu = tl["stagen"]->AddTask(&MHD::RestrictU, this, id.expl);
+  id.savef = tl["stagen"]->AddTask(&MHD::SaveFlux, this, id.expl);
+  id.restu = tl["stagen"]->AddTask(&MHD::RestrictU, this, id.savef);
   id.sendu = tl["stagen"]->AddTask(&MHD::SendU, this, id.restu);
   id.recvu = tl["stagen"]->AddTask(&MHD::RecvU, this, id.sendu);
   id.efld  = tl["stagen"]->AddTask(&MHD::CornerE, this, id.recvu);
diff --git a/src/particles/particles.cpp b/src/particles/particles.cpp
index 77d92bda..9284ddc7 100644
--- a/src/particles/particles.cpp
+++ b/src/particles/particles.cpp
@@ -14,6 +14,8 @@
 #include "globals.hpp"
 #include "parameter_input.hpp"
 #include "mesh/mesh.hpp"
+#include "hydro/hydro.hpp"
+#include "mhd/mhd.hpp"
 #include "bvals/bvals.hpp"
 #include "particles.hpp"
 
@@ -22,7 +24,8 @@ namespace particles {
 // constructor, initializes data structures and parameters
 
 Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
-    pmy_pack(ppack) {
+    pmy_pack(ppack),
+    rand_pool64(pmy_pack->gids) {
   // check this is at least a 2D problem
   if (pmy_pack->pmesh->one_d) {
     std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__ << std::endl
@@ -46,7 +49,6 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
     if (ptype.compare("cosmic_ray") == 0) {
       particle_type = ParticleType::cosmic_ray;
     } else if (ptype.compare("lagrangian_mc") == 0) {
-      // TODO(GNW): Support initialization
       // TODO(GNW): Restrict which pusher is alowed based on particle type
       particle_type = ParticleType::lagrangian_mc;
     } else {
@@ -62,8 +64,22 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
     std::string ppush = pin->GetString("particles","pusher");
     if (ppush.compare("drift") == 0) {
       pusher = ParticlesPusher::drift;
+      if (particle_type == ParticleType::lagrangian_mc) {
+        std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__
+                  << std::endl << "Particle pusher 'drift' not allowed for lagrangian_mc"
+                  << std::endl;
+        std::exit(EXIT_FAILURE);
+      }
     } else if (ppush.compare("lagrangian_mc") == 0) {
+      // TODO(GNW): is this the right place to set timestep?
+      // force driver to inherit timestep from fluid
+      dtnew = std::numeric_limits<float>::max();
       pusher = ParticlesPusher::lagrangian_mc;
+      if (ppack->pmhd != nullptr) {
+        ppack->pmhd->SetSaveUFlxIdn();
+      } else if (ppack->phydro != nullptr) {
+        ppack->phydro->SetSaveUFlxIdn();
+      }
     } else {
       std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__
                 << std::endl << "Particle pusher must be specified in <particles> block"
@@ -81,19 +97,15 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
   switch (particle_type) {
     case ParticleType::cosmic_ray:
       {
-        int ndim=4;
-        if (pmy_pack->pmesh->three_d) {ndim+=2;}
-        nrdata = ndim;
+        // save particle position then velocity for compiler optimizations even though
+        // 2d runs will not require all six real entries
+        nrdata = 6;  
         nidata = 2;
         break;
       }
     case ParticleType::lagrangian_mc:
       {
-        // TODO(GNW): This implementation may have issues for IPX, IPY, IPZ
-        //            without IVX, IVY, IVZ...
-        int ndim=2;
-        if (pmy_pack->pmesh->three_d) {ndim+=1;}
-        nrdata = ndim;
+        nrdata = 3;
         nidata = 2;
         break;
       }
@@ -104,7 +116,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
   Kokkos::realloc(prtcl_idata, nidata, nprtcl_thispack);
 
   // allocate boundary object
-  pbval_part = new ParticlesBoundaryValues(this, pin);
+  pbval_part = new ParticlesBoundaryValues(this, pin);  // TODO(GNW): do I need to check this?
 }
 
 //----------------------------------------------------------------------------------------
diff --git a/src/particles/particles.hpp b/src/particles/particles.hpp
index 4fc22e95..d1de50a1 100644
--- a/src/particles/particles.hpp
+++ b/src/particles/particles.hpp
@@ -17,6 +17,8 @@
 #include "tasklist/task_list.hpp"
 #include "bvals/bvals.hpp"
 
+#include <Kokkos_Random.hpp>
+
 // forward declarations
 
 // constants that enumerate ParticlesPusher options
@@ -82,8 +84,13 @@ class Particles {
   TaskStatus ClearSend(Driver *pdriver, int stage);
   TaskStatus ClearRecv(Driver *pdriver, int stage);
 
+  // ... for particle pushers
+  void PushDrift();
+  void PushLagrangianMC();
+
  private:
   MeshBlockPack* pmy_pack;  // ptr to MeshBlockPack containing this Particles
+  Kokkos::Random_XorShift64_Pool<> rand_pool64;
 };
 
 } // namespace particles
diff --git a/src/particles/particles_pushers.cpp b/src/particles/particles_pushers.cpp
index c071478b..eb704e70 100644
--- a/src/particles/particles_pushers.cpp
+++ b/src/particles/particles_pushers.cpp
@@ -18,9 +18,31 @@
 namespace particles {
 //----------------------------------------------------------------------------------------
 //! \fn  void Particles::ParticlesPush
-//  \brief
+//  \brief TODO(GNW): write here
 
 TaskStatus Particles::Push(Driver *pdriver, int stage) {
+
+  switch (pusher) {
+    case ParticlesPusher::drift:
+      PushDrift();
+      break;
+
+    case ParticlesPusher::lagrangian_mc:
+      PushLagrangianMC();
+      break;
+
+    default:
+      break;
+  }
+
+  return TaskStatus::complete;
+}
+
+//----------------------------------------------------------------------------------------
+//! \fn void Particles::PushDrift
+//! \brief TODO(GNW): write here
+
+void Particles::PushDrift() {  
   auto &indcs = pmy_pack->pmesh->mb_indcs;
   int is = indcs.is;
   int js = indcs.js;
@@ -33,159 +55,105 @@ TaskStatus Particles::Push(Driver *pdriver, int stage) {
   auto dt_ = pmy_pack->pmesh->dt;
   auto gids = pmy_pack->gids;
 
-  // Used in lagrangian_mc pusher
-  auto dtold_ = pmy_pack->pmesh->dtold;
-  auto &u1_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->u1:pmy_pack->pmhd->u1;
-  auto &uflx = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflx:pmy_pack->pmhd->uflx;
-  auto &flx1_ = uflx.x1f;
-  auto &flx2_ = uflx.x2f;
-  auto &flx3_ = uflx.x3f;
-
-  // TODO(GNW): Maybe move this outside and pass it in as needed? What is the
-  //            overhead associated with recreating the pool each call?
-
-  Kokkos::Random_XorShift64_Pool<> rand_pool64(pmy_pack->gids);
-
-  // TODO(GNW): Note that for MC tracers we need to handle mesh refinement (in AMR)
-  //            more carefully.
-
-  // TODO(GNW): Is it better to do an if statement here and capture the variables
-  //            for the lambda or keep the switch?
-
-  // TODO(GNW): Are these fluxes are the correct ones to use (RK question)?
-
-  // TODO(GNW): The particle pusher gets called before the time integrator, but
-  //            for the MC pusher we need to know the fluxes from the previous
-  //            time step, which means we need to read from u1 and fluxes.
-  //            One way to deal with this is just to have the MC pusher update
-  //            particle positions "one step out of phase" and use dtold. An
-  //            alternative is to have the MC pusher use a separate task that
-  //            gets called after the time integrator.
-
-  switch (pusher) {
-    case ParticlesPusher::drift:
-
-      par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1),
-      KOKKOS_LAMBDA(const int p) {
-        int m = pi(PGID,p) - gids;
-        int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
-        pr(IPX,p) += 0.5*dt_*pr(IPVX,p);
-
-        if (multi_d) {
-          int jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js;
-          pr(IPY,p) += 0.5*dt_*pr(IPVY,p);
-        }
-
-        if (three_d) {
-          int kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks;
-          pr(IPZ,p) += 0.5*dt_*pr(IPVZ,p);
-        }
-      });
-
-      break;
-
-    case ParticlesPusher::lagrangian_mc:
-
-      par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1),
-      KOKKOS_LAMBDA(const int p) {
-        // get random number state this thread
-        auto rand_gen = rand_pool64.get_state();
-
-        int m = pi(PGID,p) - gids;
-
-        int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
-        int jp = 0;
-        int kp = 0;
-
-        if (multi_d) {
-          jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js;
-        }
-
-        if (three_d) {
-          kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks;
-        }
-
-        bool particle_has_moved = false;
-        Real reduced_mass = u1_(m,IDN,kp,jp,ip);
-
-        // by convention, these values will be negative when there is outflow
-        // with respect to the current particle's cell
-        Real flx1_left = flx1_(m,IDN,kp,jp,ip);
-        Real flx1_right = -flx1_(m,IDN,kp,jp,ip+1);
-        Real flx2_left = (multi_d) ? flx2_(m,IDN,kp,jp,jp) : 0.;
-        Real flx2_right = (multi_d) ? -flx2_(m,IDN,kp,jp+1,jp) : 0.;
-        Real flx3_left = (three_d) ? flx3_(m,IDN,kp,kp,jp) : 0.;
-        Real flx3_right = (three_d) ? -flx3_(m,IDN,kp+1,kp,jp) : 0.;
-
-        flx1_left *= mbsize.d_view(m).dx1 * dtold_;
-        flx1_right *= mbsize.d_view(m).dx1 * dtold_;
-
-        flx2_left *= mbsize.d_view(m).dx2 * dtold_;
-        flx2_right *= mbsize.d_view(m).dx2 * dtold_;
-
-        flx3_left *= mbsize.d_view(m).dx3 * dtold_;
-        flx3_right *= mbsize.d_view(m).dx3 * dtold_;
-
-        if (flx1_left < 0) {
-          if (rand_gen.frand() < fabs(flx1_left)/reduced_mass) {
-            pr(IPX,p) = (ip - 1 - is) * mbsize.d_view(m).dx1 + mbsize.d_view(m).x1min;
-            particle_has_moved = true;
-          }
-          reduced_mass += flx1_left;
-        }
-
-        if (!particle_has_moved && flx1_right < 0) {
-          if (rand_gen.frand() < fabs(flx1_right)/reduced_mass) {
-            pr(IPX,p) = (ip + 1 - is) * mbsize.d_view(m).dx1 + mbsize.d_view(m).x1min;
-            particle_has_moved = true;
-          }
-          reduced_mass += flx1_right;
-        }
-
-        if (multi_d) {
-          if (!particle_has_moved && flx2_left < 0) {
-            if (rand_gen.frand() < fabs(flx2_left)/reduced_mass) {
-              pr(IPY,p) = (jp - 1 - js) * mbsize.d_view(m).dx2 + mbsize.d_view(m).x2min;
-              particle_has_moved = true;
-            }
-            reduced_mass += flx2_left;
-          }
-
-          if (!particle_has_moved && flx2_right < 0) {
-            if (rand_gen.frand() < fabs(flx2_right)/reduced_mass) {
-              pr(IPY,p) = (jp + 1 - js) * mbsize.d_view(m).dx2 + mbsize.d_view(m).x2min;
-              particle_has_moved = true;
-            }
-            reduced_mass += flx2_right;
-          }
-        }
-
-        if (three_d) {
-          if (!particle_has_moved && flx3_left < 0) {
-            if (rand_gen.frand() < fabs(flx3_left)/reduced_mass) {
-              pr(IPZ,p) = (kp - 1 - ks) * mbsize.d_view(m).dx3 + mbsize.d_view(m).x3min;
-              particle_has_moved = true;
-            }
-            reduced_mass += flx3_left;
-          }
-
-          if (!particle_has_moved && flx3_right < 0) {
-            if (rand_gen.frand() < fabs(flx3_right)/reduced_mass) {
-              pr(IPZ,p) = (kp + 1 - ks) * mbsize.d_view(m).dx3 + mbsize.d_view(m).x3min;
-              particle_has_moved = true;
-            }
-            reduced_mass += flx3_right;
-          }
-        }
-        rand_pool64.free_state(rand_gen);  // free state for use by other threads
-      });
+  par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1),
+  KOKKOS_LAMBDA(const int p) {
+    int m = pi(PGID,p) - gids;
+    int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
+    pr(IPX,p) += 0.5*dt_*pr(IPVX,p);
+
+    if (multi_d) {
+      int jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js;
+      pr(IPY,p) += 0.5*dt_*pr(IPVY,p);
+    }
+
+    if (three_d) {
+      int kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks;
+      pr(IPZ,p) += 0.5*dt_*pr(IPVZ,p);
+    }
+  });
+}
 
-      break;
+//----------------------------------------------------------------------------------------
+//! \fn void Particles::PushLagrangianMC
+//! \brief TODO(GNW): write here
 
-    default:
-      break;
-  }
+void Particles::PushLagrangianMC() {
+  auto &indcs = pmy_pack->pmesh->mb_indcs;
+  int is = indcs.is;
+  int js = indcs.js;
+  int ks = indcs.ks;
+  bool &multi_d = pmy_pack->pmesh->multi_d;
+  bool &three_d = pmy_pack->pmesh->three_d;
+  auto &mbsize = pmy_pack->pmb->mb_size;
+  auto &pi = prtcl_idata;
+  auto &pr = prtcl_rdata;
+  auto gids = pmy_pack->gids;
 
-  return TaskStatus::complete;
+  auto &u1_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->u1:pmy_pack->pmhd->u1;
+  auto &uflxidn_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved;
+  auto &flx1_ = uflxidn_.x1f;
+  auto &flx2_ = uflxidn_.x2f;
+  auto &flx3_ = uflxidn_.x3f;
+
+  // TODO(GNW): be careful with AMR
+  // TODO(GNW): this does not work with SMR yet
+
+  par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1),
+  KOKKOS_LAMBDA(const int p) {
+
+    auto rand_gen = rand_pool64.get_state();
+
+    int m = pi(PGID,p) - gids;
+
+    int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
+    int jp = js;
+    int kp = ks;
+
+    if (multi_d) {
+      jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js;
+    }
+
+    if (three_d) {
+      kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks;
+    }
+
+    // get normalized fluxes based on local density
+    Real mass = u1_(m,IDN,kp,jp,ip);
+
+    // by convention, these values will be positive when there is outflow
+    // with respect to the current particle's cell
+    Real flx1_left = -flx1_(m,kp,jp,ip) / mass;
+    Real flx1_right = flx1_(m,kp,jp,ip+1) / mass;
+    Real flx2_left = (multi_d) ? -flx2_(m,kp,jp,ip) / mass : 0.;
+    Real flx2_right = (multi_d) ? flx2_(m,kp,jp+1,ip) / mass : 0.;
+    Real flx3_left = (three_d) ? -flx3_(m,kp,jp,ip) / mass : 0.;
+    Real flx3_right = (three_d) ? flx3_(m,kp+1,jp,ip) / mass : 0.;
+
+    flx1_left = flx1_left < 0 ? 0 : flx1_left;
+    flx1_right = flx1_right < 0 ? 0 : flx1_right;
+    flx2_left = flx2_left < 0 ? 0 : flx2_left;
+    flx2_right = flx2_right < 0 ? 0 : flx2_right;
+    flx3_left = flx3_left < 0 ? 0 : flx3_left;
+    flx3_right = flx3_right < 0 ? 0 : flx3_right;
+
+    Real rand = rand_gen.frand();
+
+    if (rand < flx1_left) {
+      pr(IPX,p) -= mbsize.d_view(m).dx1;
+    } else if (rand < flx1_left + flx1_right) {
+      pr(IPX,p) += mbsize.d_view(m).dx1;
+    } else if (multi_d && rand < flx1_left + flx1_right + flx2_left) {
+      pr(IPY,p) -= mbsize.d_view(m).dx2;
+    } else if (multi_d && rand < flx1_left + flx1_right + flx2_left + flx2_right) {
+      pr(IPY,p) += mbsize.d_view(m).dx2;
+    } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left) {
+      pr(IPZ,p) -= mbsize.d_view(m).dx3;
+    } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left + flx3_right) {
+      pr(IPZ,p) += mbsize.d_view(m).dx3;
+    }
+
+    rand_pool64.free_state(rand_gen);  // free state for use by other threads
+  });
 }
+
 } // namespace particles
diff --git a/src/particles/particles_tasks.cpp b/src/particles/particles_tasks.cpp
index 8210a978..2bc3137b 100644
--- a/src/particles/particles_tasks.cpp
+++ b/src/particles/particles_tasks.cpp
@@ -28,15 +28,38 @@ namespace particles {
 void Particles::AssembleTasks(std::map<std::string, std::shared_ptr<TaskList>> tl) {
   TaskID none(0);
 
-  // particle integration done in "before_timeintegrator" task list
-  id.push   = tl["before_timeintegrator"]->AddTask(&Particles::Push, this, none);
-  id.newgid = tl["before_timeintegrator"]->AddTask(&Particles::NewGID, this, id.push);
-  id.count  = tl["before_timeintegrator"]->AddTask(&Particles::SendCnt, this, id.newgid);
-  id.irecv  = tl["before_timeintegrator"]->AddTask(&Particles::InitRecv, this, id.count);
-  id.sendp  = tl["before_timeintegrator"]->AddTask(&Particles::SendP, this, id.irecv);
-  id.recvp  = tl["before_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp);
-  id.crecv  = tl["before_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp);
-  id.csend  = tl["before_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv);
+  switch (particle_type) {
+    case ParticleType::cosmic_ray:
+      {
+        // particle integration done in "before_timeintegrator" task list
+        id.push   = tl["before_timeintegrator"]->AddTask(&Particles::Push, this, none);
+        id.newgid = tl["before_timeintegrator"]->AddTask(&Particles::NewGID, this, id.push);
+        id.count  = tl["before_timeintegrator"]->AddTask(&Particles::SendCnt, this, id.newgid);
+        id.irecv  = tl["before_timeintegrator"]->AddTask(&Particles::InitRecv, this, id.count);
+        id.sendp  = tl["before_timeintegrator"]->AddTask(&Particles::SendP, this, id.irecv);
+        id.recvp  = tl["before_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp);
+        id.crecv  = tl["before_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp);
+        id.csend  = tl["before_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv);
+        break;
+      }
+
+    case ParticleType::lagrangian_mc:
+      {
+        // particle integration done in "after_timeintegrator" task list
+        id.push   = tl["after_timeintegrator"]->AddTask(&Particles::Push, this, none);
+        id.newgid = tl["after_timeintegrator"]->AddTask(&Particles::NewGID, this, id.push);
+        id.count  = tl["after_timeintegrator"]->AddTask(&Particles::SendCnt, this, id.newgid);
+        id.irecv  = tl["after_timeintegrator"]->AddTask(&Particles::InitRecv, this, id.count);
+        id.sendp  = tl["after_timeintegrator"]->AddTask(&Particles::SendP, this, id.irecv);
+        id.recvp  = tl["after_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp);
+        id.crecv  = tl["after_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp);
+        id.csend  = tl["after_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv);
+        break;
+      }
+
+    default:
+      break;
+  }
 
   return;
 }
diff --git a/src/pgen/kh.cpp b/src/pgen/kh.cpp
index c47d9793..e5bbfea1 100644
--- a/src/pgen/kh.cpp
+++ b/src/pgen/kh.cpp
@@ -20,6 +20,9 @@
 #include "hydro/hydro.hpp"
 #include "mhd/mhd.hpp"
 #include "pgen.hpp"
+#include "particles/particles.hpp"
+
+#include <Kokkos_Random.hpp>
 
 //----------------------------------------------------------------------------------------
 //! \fn
@@ -186,5 +189,63 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) {
     pmbp->pmhd->peos->PrimToCons(w0_, bcc0_, u0_, is, ie, js, je, ks, ke);
   }
 
+  // TODO(GNW): support 2d-only particles
+  // Initialize particles
+  if (pmbp->ppart != nullptr) {
+
+    // captures for the kernel
+    auto &u0_ = (pmbp->phydro != nullptr) ? pmbp->phydro->u0 : pmbp->pmhd->u0;
+    auto &pr = pmy_mesh_->pmb_pack->ppart->prtcl_rdata;
+    auto &pi = pmy_mesh_->pmb_pack->ppart->prtcl_idata;
+    auto &mbsize = pmy_mesh_->pmb_pack->pmb->mb_size;
+    auto &npart = pmy_mesh_->pmb_pack->ppart->nprtcl_thispack;
+    auto gids = pmy_mesh_->pmb_pack->gids;
+    auto gide = pmy_mesh_->pmb_pack->gide;
+
+    auto &indcs = pmy_mesh_->mb_indcs;
+    int &is = indcs.is;
+    int &js = indcs.js;
+    int &ks = indcs.ks;
+    int &nx1 = indcs.nx1;
+    int &nx2 = indcs.nx2;
+    int &nx3 = indcs.nx3;
+
+    // initialize particles
+    Kokkos::Random_XorShift64_Pool<> rand_pool64(pmbp->gids);
+    par_for("part_update",DevExeSpace(),0,(npart-1),
+    KOKKOS_LAMBDA(const int p) {
+      auto rand_gen = rand_pool64.get_state();  // get random number state this thread
+      // choose parent MeshBlock randomly
+      int m = static_cast<int>(rand_gen.frand()*(gide - gids + 1.0));
+      pi(PGID,p) = gids + m;
+
+      int ip = 0;
+      int jp = 0;
+      int kp = 0;
+
+      // choose random cell based on density
+      while (true) {
+        ip = static_cast<int>(rand_gen.frand()*nx1) + is;
+        jp = static_cast<int>(rand_gen.frand()*nx2) + js;
+        kp = static_cast<int>(rand_gen.frand()*nx3) + ks;
+        if (u0_(m,IDN,kp,jp,ip) < 1.5) {
+          if (rand_gen.frand() < 0.5) break;
+        } else {
+          break;
+        }
+      }
+
+      // set particle position to cell center + random offset
+      pr(IPX,p) = CellCenterX(ip-is, nx1, mbsize.d_view(m).x1min, mbsize.d_view(m).x1max) +
+                  mbsize.d_view(m).dx1*(rand_gen.frand()-0.5);
+      pr(IPY,p) = CellCenterX(jp-js, nx2, mbsize.d_view(m).x2min, mbsize.d_view(m).x2max) +
+                  mbsize.d_view(m).dx2*(rand_gen.frand()-0.5);
+      pr(IPZ,p) = CellCenterX(kp-ks, nx3, mbsize.d_view(m).x3min, mbsize.d_view(m).x3max) +
+                  mbsize.d_view(m).dx3*(rand_gen.frand()-0.5);
+
+      rand_pool64.free_state(rand_gen);  // free state for use by other threads
+    });
+  }
+
   return;
 }

From f01ab9acf60d52219d3202ac65b4492a5b7ebb77 Mon Sep 17 00:00:00 2001
From: "George N. Wong" <gnwong@ias.edu>
Date: Tue, 9 Jul 2024 03:21:21 -0400
Subject: [PATCH 03/10] update with fixes of mesh refined code for particle
 indexing. still need to be careful with rank_order.

---
 src/athena.hpp                      |   2 +-
 src/bvals/bvals_part.cpp            |   5 +
 src/hydro/hydro_fluxes.cpp          |   5 +-
 src/main.cpp                        |   1 +
 src/mesh/mesh.cpp                   |   5 +
 src/mesh/mesh.hpp                   |   1 +
 src/mhd/mhd_fluxes.cpp              |   3 +-
 src/particles/particles.cpp         |  23 ++-
 src/particles/particles.hpp         |   3 +
 src/particles/particles_pushers.cpp | 285 +++++++++++++++++++++++++++-
 src/particles/particles_tasks.cpp   |   2 +-
 src/pgen/kh.cpp                     | 137 +++++++++----
 12 files changed, 415 insertions(+), 57 deletions(-)

diff --git a/src/athena.hpp b/src/athena.hpp
index 02a1ee00..de790bc6 100644
--- a/src/athena.hpp
+++ b/src/athena.hpp
@@ -52,7 +52,7 @@ enum VariableIndex {IDN=0, IM1=1, IVX=1, IM2=2, IVY=2, IM3=3, IVZ=3, IEN=4, ITM=
 // array indices for components of magnetic field
 enum BFieldIndex {IBX=0, IBY=1, IBZ=2};
 // array indices for particle arrays
-enum ParticlesIndex {PGID=0, PTAG=1, IPX=0, IPY=1, IPZ=2, IPVX=3, IPVY=4, IPVZ=5};
+enum ParticlesIndex {PGID=0, PTAG=1, PLASTMOVE=2, PLASTLEVEL=3, IPX=0, IPY=1, IPZ=2, IPVX=3, IPVY=4, IPVZ=5};
 
 // integer constants to specify spatial reconstruction methods
 enum ReconstructionMethod {dc, plm, ppm4, ppmx, wenoz};
diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp
index e0c29116..ed137438 100644
--- a/src/bvals/bvals_part.cpp
+++ b/src/bvals/bvals_part.cpp
@@ -62,6 +62,8 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
   auto &psendl = sendlist;
   int counter=0;
   int *pcounter = &counter;
+  bool &multi_d = pmy_part->pmy_pack->pmesh->multi_d;
+  bool &three_d = pmy_part->pmy_pack->pmesh->three_d;
 
   Kokkos::realloc(sendlist, static_cast<int>(0.1*npart));
   par_for("part_update",DevExeSpace(),0,(npart-1), KOKKOS_LAMBDA(const int p) {
@@ -87,6 +89,9 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
     int fy = (x2 < 0.5*(mbsize.d_view(m).x2min + mbsize.d_view(m).x2max))? 0 : 1;
     int fz = (x3 < 0.5*(mbsize.d_view(m).x3min + mbsize.d_view(m).x3max))? 0 : 1;
 
+    fy = multi_d ? fy : 0;
+    fz = three_d ? fz : 0;
+
     // only update particle GID if it has crossed MeshBlock boundary
     if ((abs(ix) + abs(iy) + abs(iz)) != 0) {
       if (iz == 0) {
diff --git a/src/hydro/hydro_fluxes.cpp b/src/hydro/hydro_fluxes.cpp
index 108ab946..f20bb9ae 100644
--- a/src/hydro/hydro_fluxes.cpp
+++ b/src/hydro/hydro_fluxes.cpp
@@ -381,11 +381,10 @@ TaskStatus Hydro::SaveFlux(Driver *pdrive, int stage) {
 
     Real dtfactor = dt / 2.0;
 
+    // GNW 2024-JUL-5: Warning .. be careful when using this different time integrators
     par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1,
     KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) {
-
-      // TODO(GNW): need to be careful with different time integrators
-
+      
       // save dF1/dx1
       if (j <= je && k <= ke)  {
         if (stage == 1) {
diff --git a/src/main.cpp b/src/main.cpp
index ecb5d2de..ca153219 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -295,6 +295,7 @@ int main(int argc, char *argv[]) {
     pmesh->pgen = std::make_unique<ProblemGenerator>(pinput, pmesh, restartfile);
     restartfile.Close();
   }
+  pmesh->FinalizeParticleDataStructures(pinput);
 
   //--- Step 6. --------------------------------------------------------------------------
   // Construct Driver and Outputs. Actual outputs (including initial conditions) are made
diff --git a/src/mesh/mesh.cpp b/src/mesh/mesh.cpp
index 26b9280b..b35ffab1 100644
--- a/src/mesh/mesh.cpp
+++ b/src/mesh/mesh.cpp
@@ -613,7 +613,12 @@ void Mesh::AddCoordinatesAndPhysics(ParameterInput *pinput) {
     pmb_pack->AddCoordinates(pinput);
     pmb_pack->AddPhysics(pinput);
   }
+}
+
+//----------------------------------------------------------------------------------------
+// \fn Mesh::FinalizeParticleDataStructures
 
+void Mesh::FinalizeParticleDataStructures(ParameterInput *pinput) {
   // Determine total number of particles across all ranks
   particles::Particles *ppart = pmb_pack->ppart;
   if (ppart != nullptr) {
diff --git a/src/mesh/mesh.hpp b/src/mesh/mesh.hpp
index b64ea23e..285e2cac 100644
--- a/src/mesh/mesh.hpp
+++ b/src/mesh/mesh.hpp
@@ -151,6 +151,7 @@ class Mesh {
   void WriteMeshStructure();
   void NewTimeStep(const Real tlim);
   void AddCoordinatesAndPhysics(ParameterInput *pinput);
+  void FinalizeParticleDataStructures(ParameterInput *pinput);
   BoundaryFlag GetBoundaryFlag(const std::string& input_string);
   std::string GetBoundaryString(BoundaryFlag input_flag);
 
diff --git a/src/mhd/mhd_fluxes.cpp b/src/mhd/mhd_fluxes.cpp
index 64daeb3b..3ee97769 100644
--- a/src/mhd/mhd_fluxes.cpp
+++ b/src/mhd/mhd_fluxes.cpp
@@ -442,11 +442,10 @@ TaskStatus MHD::SaveFlux(Driver *pdrive, int stage) {
 
     Real dtfactor = dt / 2.0;
 
+    // GNW 2024-JUL-5: Warning .. be careful when using this different time integrators
     par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1,
     KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) {
 
-      // TODO(GNW): need to be careful with different time integrators
-
       // save dF1/dx1
       if (j <= je && k <= ke)  {
         if (stage == 1) {
diff --git a/src/particles/particles.cpp b/src/particles/particles.cpp
index 9284ddc7..07c6e24a 100644
--- a/src/particles/particles.cpp
+++ b/src/particles/particles.cpp
@@ -49,7 +49,6 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
     if (ptype.compare("cosmic_ray") == 0) {
       particle_type = ParticleType::cosmic_ray;
     } else if (ptype.compare("lagrangian_mc") == 0) {
-      // TODO(GNW): Restrict which pusher is alowed based on particle type
       particle_type = ParticleType::lagrangian_mc;
     } else {
       std::cout << "### FATAL ERROR in " << __FILE__ << " at line " << __LINE__
@@ -71,8 +70,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
         std::exit(EXIT_FAILURE);
       }
     } else if (ppush.compare("lagrangian_mc") == 0) {
-      // TODO(GNW): is this the right place to set timestep?
-      // force driver to inherit timestep from fluid
+      // force driver to inherit timestep from fluid by setting desired particle dt to max value
       dtnew = std::numeric_limits<float>::max();
       pusher = ParticlesPusher::lagrangian_mc;
       if (ppack->pmhd != nullptr) {
@@ -106,7 +104,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
     case ParticleType::lagrangian_mc:
       {
         nrdata = 3;
-        nidata = 2;
+        nidata = 4;
         break;
       }
     default:
@@ -116,7 +114,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
   Kokkos::realloc(prtcl_idata, nidata, nprtcl_thispack);
 
   // allocate boundary object
-  pbval_part = new ParticlesBoundaryValues(this, pin);  // TODO(GNW): do I need to check this?
+  pbval_part = new ParticlesBoundaryValues(this, pin);
 }
 
 //----------------------------------------------------------------------------------------
@@ -125,6 +123,21 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
 Particles::~Particles() {
 }
 
+//----------------------------------------------------------------------------------------
+// ReallocateParticles()
+// Update particle arrays and update internal particle count data for new number of
+// particles in this pack. This method does not preserve any existing particle data
+
+void Particles::ReallocateParticles(int new_nprtcl_thispack) {
+
+  // TODO(GNW): maybe check that ntrack is set correctly. also check about whether
+  // the rank_order below works given that it might have empty ids
+
+  nprtcl_thispack = new_nprtcl_thispack;
+  Kokkos::realloc(prtcl_rdata, nrdata, nprtcl_thispack);
+  Kokkos::realloc(prtcl_idata, nidata, nprtcl_thispack);
+}
+
 //----------------------------------------------------------------------------------------
 // CreatePaticleTags()
 // Assigns tags to particles (unique integer).  Note that tracked particles are always
diff --git a/src/particles/particles.hpp b/src/particles/particles.hpp
index d1de50a1..6f37dd8c 100644
--- a/src/particles/particles.hpp
+++ b/src/particles/particles.hpp
@@ -40,6 +40,7 @@ struct ParticlesTaskIDs {
   TaskID recvp;
   TaskID csend;
   TaskID crecv;
+  TaskID mradj;
 };
 
 namespace particles {
@@ -73,6 +74,7 @@ class Particles {
   ParticlesTaskIDs id;
 
   // functions...
+  void ReallocateParticles(int new_nprtcl_thispack);
   void CreateParticleTags(ParameterInput *pin);
   void AssembleTasks(std::map<std::string, std::shared_ptr<TaskList>> tl);
   TaskStatus Push(Driver *pdriver, int stage);
@@ -83,6 +85,7 @@ class Particles {
   TaskStatus RecvP(Driver *pdriver, int stage);
   TaskStatus ClearSend(Driver *pdriver, int stage);
   TaskStatus ClearRecv(Driver *pdriver, int stage);
+  TaskStatus AdjustMeshRefinement(Driver *pdriver, int stage);
 
   // ... for particle pushers
   void PushDrift();
diff --git a/src/particles/particles_pushers.cpp b/src/particles/particles_pushers.cpp
index eb704e70..5cd42bfc 100644
--- a/src/particles/particles_pushers.cpp
+++ b/src/particles/particles_pushers.cpp
@@ -18,7 +18,7 @@
 namespace particles {
 //----------------------------------------------------------------------------------------
 //! \fn  void Particles::ParticlesPush
-//  \brief TODO(GNW): write here
+//  \brief wrapper with switch to access different particle pushers
 
 TaskStatus Particles::Push(Driver *pdriver, int stage) {
 
@@ -40,7 +40,7 @@ TaskStatus Particles::Push(Driver *pdriver, int stage) {
 
 //----------------------------------------------------------------------------------------
 //! \fn void Particles::PushDrift
-//! \brief TODO(GNW): write here
+//! \brief push particles based on stored particle internal velocity
 
 void Particles::PushDrift() {  
   auto &indcs = pmy_pack->pmesh->mb_indcs;
@@ -75,9 +75,11 @@ void Particles::PushDrift() {
 
 //----------------------------------------------------------------------------------------
 //! \fn void Particles::PushLagrangianMC
-//! \brief TODO(GNW): write here
+//! \brief push particles using Lagrangian Monte Carlo method (Genel+ 2013, MNRAS.435.1426G)
+//         WARNING: this implementation may not work well with AMR
 
 void Particles::PushLagrangianMC() {
+
   auto &indcs = pmy_pack->pmesh->mb_indcs;
   int is = indcs.is;
   int js = indcs.js;
@@ -87,7 +89,8 @@ void Particles::PushLagrangianMC() {
   auto &mbsize = pmy_pack->pmb->mb_size;
   auto &pi = prtcl_idata;
   auto &pr = prtcl_rdata;
-  auto gids = pmy_pack->gids;
+  auto &gids = pmy_pack->gids;
+  auto &mblev = pmy_pack->pmb->mb_lev;
 
   auto &u1_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->u1:pmy_pack->pmhd->u1;
   auto &uflxidn_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved;
@@ -95,14 +98,12 @@ void Particles::PushLagrangianMC() {
   auto &flx2_ = uflxidn_.x2f;
   auto &flx3_ = uflxidn_.x3f;
 
-  // TODO(GNW): be careful with AMR
-  // TODO(GNW): this does not work with SMR yet
+  auto &rand_pool64_ = rand_pool64;
 
+  // GNW 2024-JUL-5: Warning, this may not play well with AMR
   par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1),
   KOKKOS_LAMBDA(const int p) {
 
-    auto rand_gen = rand_pool64.get_state();
-
     int m = pi(PGID,p) - gids;
 
     int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
@@ -136,24 +137,290 @@ void Particles::PushLagrangianMC() {
     flx3_left = flx3_left < 0 ? 0 : flx3_left;
     flx3_right = flx3_right < 0 ? 0 : flx3_right;
 
+    auto rand_gen = rand_pool64_.get_state();
     Real rand = rand_gen.frand();
+    rand_pool64_.free_state(rand_gen);  // free state for use by other threads
+
+    // save refinement level of current zone
+    pi(PLASTLEVEL,p) = mblev.d_view(m);
+
+    // save parity of current zone stored as (i_isodd,j_isodd,k_isodd) * 8
+    pi(PLASTMOVE,p) = 32 * (ip % 2) + 16 * (jp % 2) + 8 * (kp % 2);
 
     if (rand < flx1_left) {
       pr(IPX,p) -= mbsize.d_view(m).dx1;
+      pi(PLASTMOVE,p) += 1;
     } else if (rand < flx1_left + flx1_right) {
       pr(IPX,p) += mbsize.d_view(m).dx1;
+      pi(PLASTMOVE,p) += 2;
     } else if (multi_d && rand < flx1_left + flx1_right + flx2_left) {
       pr(IPY,p) -= mbsize.d_view(m).dx2;
+      pi(PLASTMOVE,p) += 3;
     } else if (multi_d && rand < flx1_left + flx1_right + flx2_left + flx2_right) {
       pr(IPY,p) += mbsize.d_view(m).dx2;
+      pi(PLASTMOVE,p) += 4;
     } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left) {
       pr(IPZ,p) -= mbsize.d_view(m).dx3;
+      pi(PLASTMOVE,p) += 5;
     } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left + flx3_right) {
       pr(IPZ,p) += mbsize.d_view(m).dx3;
+      pi(PLASTMOVE,p) += 6;
     }
+  });
+}
+
+//----------------------------------------------------------------------------------------
+//! \fn TaskStatus Particles::AdjustMeshRefinement
+//! \brief update locations of particles that enter meshblocks with new refinement levels
+
+TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) {
+
+  auto &indcs = pmy_pack->pmesh->mb_indcs;
+  int is = indcs.is;
+  int js = indcs.js;
+  int ks = indcs.ks;
+  auto &pi = prtcl_idata;
+  auto &pr = prtcl_rdata;
+  auto &gids = pmy_pack->gids;
+  auto &mblev = pmy_pack->pmb->mb_lev;
+  bool &multi_d = pmy_pack->pmesh->multi_d;
+  bool &three_d = pmy_pack->pmesh->three_d;
+  auto &mbsize = pmy_pack->pmb->mb_size;
+
+  auto &uflxidn_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved;
+  auto &flx1_ = uflxidn_.x1f;
+  auto &flx2_ = uflxidn_.x2f;
+  auto &flx3_ = uflxidn_.x3f;
 
-    rand_pool64.free_state(rand_gen);  // free state for use by other threads
+  auto &rand_pool64_ = rand_pool64;
+  
+  par_for("particle_meshshift",DevExeSpace(),0,(nprtcl_thispack-1),
+  KOKKOS_LAMBDA(const int p) {
+
+    int m = pi(PGID,p) - gids;
+    int level = mblev.d_view(m);
+
+    int lastlevel = pi(PLASTLEVEL,p);
+    int lastmove = pi(PLASTMOVE,p);
+
+    // oddness of the last cell that the particle lived in
+    int i_parity = lastmove / 32;
+    int j_parity = (lastmove % 32) / 16;
+    int k_parity = (lastmove % 16) / 8;
+
+    // direction of last move:
+    //   1 -> "left" x1 face was chosen
+    //   2 -> "right" x1 face was chosen
+    //   3 -> "left" x2 face was chosen
+    //   4 -> "right" x2 face was chosen
+    //   5 -> "left" x3 face was chosen
+    //   6 -> "right" x3 face was chosen
+    lastmove = lastmove % 8;
+
+    Real dx1 = mbsize.d_view(m).dx1;
+    Real dx2 = multi_d ? mbsize.d_view(m).dx2 : 0.;
+    Real dx3 = three_d ? mbsize.d_view(m).dx3 : 0.;
+
+    if (level > lastlevel) {
+      // this is a higher refinement level, i.e., the zones are smaller now
+ 
+      if (lastmove == 1) {
+        // came from zone to right (dx--)
+        pr(IPX,p) += dx1/2;
+
+        pr(IPY,p) -= dx2/2;
+        pr(IPZ,p) -= dx3/2;
+      } else if (lastmove == 2) {
+        // came from zone to left (dx++)
+        pr(IPX,p) -= dx1/2;
+
+        pr(IPY,p) -= dx2/2;
+        pr(IPZ,p) -= dx3/2;
+      } else if (multi_d && lastmove == 3) {
+        // came from zone above (dy--)
+        pr(IPY,p) += dx2/2;
+
+        pr(IPX,p) -= dx1/2;
+        pr(IPZ,p) -= dx3/2;
+      } else if (multi_d && lastmove == 4) {
+        // came from zone below (dy++)
+        pr(IPY,p) -= dx2/2;
+
+        pr(IPX,p) -= dx1/2;
+        pr(IPZ,p) -= dx3/2;
+      } else if (three_d && lastmove == 5) {
+        // came from zone in front (dz--)
+        pr(IPZ,p) += dx3/2;
+
+        pr(IPX,p) -= dx1/2;
+        pr(IPY,p) -= dx2/2;
+      } else if (three_d && lastmove == 6) {
+        // came from zone behind (dz++)
+        pr(IPZ,p) -= dx3/2;
+
+        pr(IPX,p) -= dx1/2;
+        pr(IPY,p) -= dx2/2;
+      }
+
+      int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
+      int jp = js;
+      int kp = ks;
+
+      if (multi_d) {
+        jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js;
+      }
+
+      if (three_d) {
+        kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks;
+      }
+
+      // get fluxes into the four zones that the particle could have ended up in
+      Real flx1 = 0.;
+      Real flx2 = 0.;
+      Real flx3 = 0.;
+      Real flx4 = 0.;
+
+      if (lastmove == 1) {
+        // came from zone to the right
+        flx1 = -flx1_(m,kp,jp,ip+1);
+        flx2 = (multi_d) ? -flx1_(m,kp,jp+1,ip+1) : 0.;
+        flx3 = (three_d) ? -flx1_(m,kp+1,jp,ip+1) : 0.;
+        flx4 = (multi_d && three_d) ? -flx1_(m,kp+1,jp+1,ip+1) : 0.;
+      } else if (lastmove == 2) {
+        // came from zone to the left
+        flx1 = flx1_(m,kp,jp,ip);
+        flx2 = (multi_d) ? flx1_(m,kp,jp+1,ip) : 0.;
+        flx3 = (three_d) ? flx1_(m,kp+1,jp,ip) : 0.;
+        flx4 = (multi_d && three_d) ? flx1_(m,kp+1,jp+1,ip) : 0.;
+      } else if (lastmove == 3) {
+        // came from zone above. is at least multi_d
+        flx1 = -flx2_(m,kp,jp+1,ip);
+        flx2 = -flx2_(m,kp,jp+1,ip+1);
+        flx3 = (three_d) ? -flx2_(m,kp+1,jp+1,ip) : 0.;
+        flx4 = (three_d) ? -flx2_(m,kp+1,jp+1,ip+1) : 0.;
+      } else if (lastmove == 4) {
+        // came from zone below. is at least multi_d
+        flx1 = flx2_(m,kp,jp,ip);
+        flx2 = flx2_(m,kp,jp,ip+1);
+        flx3 = (three_d) ? flx2_(m,kp+1,jp,ip) : 0.;
+        flx4 = (three_d) ? flx2_(m,kp+1,jp,ip+1) : 0.;
+      } else if (lastmove == 5) {
+        // came from zone in front. is three_d
+        flx1 = -flx3_(m,kp+1,jp,ip);
+        flx2 = -flx3_(m,kp+1,jp,ip+1);
+        flx3 = -flx3_(m,kp+1,jp+1,ip);
+        flx4 = -flx3_(m,kp+1,jp+1,ip+1);
+      } else if (lastmove == 6) {
+        // came from zone behind. is three_d
+        flx1 = flx3_(m,kp,jp,ip);
+        flx2 = flx3_(m,kp,jp,ip+1);
+        flx3 = flx3_(m,kp,jp+1,ip);
+        flx4 = flx3_(m,kp,jp+1,ip+1);
+      }
+
+      flx1 = (flx1 < 0) ? 0. : flx1;
+      flx2 = (flx2 < 0) ? 0. : flx2;
+      flx3 = (flx3 < 0) ? 0. : flx3;
+      flx4 = (flx4 < 0) ? 0. : flx4;
+
+      Real flx_total = flx1 + flx2 + flx3 + flx4;
+      flx_total = (flx_total > 0) ? flx_total : 1.e-10;
+
+      flx1 /= flx_total;
+      flx2 /= flx_total;
+      flx3 /= flx_total;
+      flx4 /= flx_total;
+
+      auto rand_gen = rand_pool64_.get_state();
+      Real rand = rand_gen.frand();
+      rand_pool64_.free_state(rand_gen);  // free state for use by other threads
+      
+      int target_zone = 4;
+      if (rand < flx1) {
+        target_zone = 1;
+      } else if (rand < flx1 + flx2) {
+        target_zone = 2;
+      } else if (rand < flx1 + flx2 + flx3) {
+        target_zone = 3;
+      }
+
+      if (lastmove == 1 || lastmove == 2) {
+        if (target_zone == 2) {
+          pr(IPY,p) += mbsize.d_view(m).dx2;
+        } else if (target_zone == 3) {
+          pr(IPZ,p) += mbsize.d_view(m).dx3;
+        } else if (target_zone == 4) {
+          pr(IPY,p) += mbsize.d_view(m).dx2;
+          pr(IPZ,p) += mbsize.d_view(m).dx3;
+        }
+      } else if (lastmove == 3 || lastmove == 4) {
+        if (target_zone == 2) {
+          pr(IPX,p) += mbsize.d_view(m).dx1;
+        } else if (target_zone == 3) {
+          pr(IPZ,p) += mbsize.d_view(m).dx3;
+        } else if (target_zone == 4) {
+          pr(IPX,p) += mbsize.d_view(m).dx1;
+          pr(IPZ,p) += mbsize.d_view(m).dx3;
+        }
+      } else if (lastmove == 5 || lastmove == 6) {
+        if (target_zone == 2) {
+          pr(IPX,p) += mbsize.d_view(m).dx1;
+        } else if (target_zone == 3) {
+          pr(IPY,p) += mbsize.d_view(m).dx2;
+        } else if (target_zone == 4) {
+          pr(IPX,p) += mbsize.d_view(m).dx1;
+          pr(IPY,p) += mbsize.d_view(m).dx2;
+        }
+      }
+
+    } else if (level < lastlevel) {
+      // this is a lower refinement level, i.e., the zones are larger now,
+      // there's nothing special to do other than to move the particle to
+      // the center of the new zone
+
+      if (i_parity) {
+        pr(IPX,p) -= mbsize.d_view(m).dx1/4;
+      } else {
+        pr(IPX,p) += mbsize.d_view(m).dx1/4;
+      }
+      if (multi_d) {
+        if (j_parity) {
+          pr(IPY,p) -= mbsize.d_view(m).dx2/4;
+        } else {
+          pr(IPY,p) += mbsize.d_view(m).dx2/4;
+        }
+      }
+      if (three_d) {
+        if (k_parity) {
+          pr(IPZ,p) -= mbsize.d_view(m).dx3/4;
+        } else {
+          pr(IPZ,p) += mbsize.d_view(m).dx3/4;
+        }
+      }
+
+      if (lastmove == 1) {
+        // came from zone to right (dx--)
+        pr(IPX,p) -= mbsize.d_view(m).dx1/2;
+      } else if (lastmove == 2) {
+        // came from zone to left (dx++)
+        pr(IPX,p) += mbsize.d_view(m).dx1/2;
+      } else if (lastmove == 3) {
+        // came from zone above (dy--)
+        pr(IPY,p) -= mbsize.d_view(m).dx2/2;
+      } else if (lastmove == 4) {
+        // came from zone below (dy++)
+        pr(IPY,p) += mbsize.d_view(m).dx2/2;
+      } else if (lastmove == 5) {
+        // came from zone in front (dz--)
+        pr(IPZ,p) -= mbsize.d_view(m).dx3/2;
+      } else if (lastmove == 6) {
+        // came from zone behind (dz++)
+        pr(IPZ,p) += mbsize.d_view(m).dx3/2;
+      }
+    }
   });
+
+  return TaskStatus::complete;
 }
 
 } // namespace particles
diff --git a/src/particles/particles_tasks.cpp b/src/particles/particles_tasks.cpp
index 2bc3137b..e3215a2d 100644
--- a/src/particles/particles_tasks.cpp
+++ b/src/particles/particles_tasks.cpp
@@ -54,6 +54,7 @@ void Particles::AssembleTasks(std::map<std::string, std::shared_ptr<TaskList>> t
         id.recvp  = tl["after_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp);
         id.crecv  = tl["after_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp);
         id.csend  = tl["after_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv);
+        id.mradj  = tl["after_timeintegrator"]->AddTask(&Particles::AdjustMeshRefinement, this, id.csend);
         break;
       }
 
@@ -112,7 +113,6 @@ TaskStatus Particles::RecvP(Driver *pdrive, int stage) {
   return tstat;
 }
 
-
 //----------------------------------------------------------------------------------------
 //! \fn TaskList Particles::ClearSend
 //! \brief Wrapper task list function that checks all MPI sends have completed.
diff --git a/src/pgen/kh.cpp b/src/pgen/kh.cpp
index e5bbfea1..98c5e308 100644
--- a/src/pgen/kh.cpp
+++ b/src/pgen/kh.cpp
@@ -49,7 +49,7 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) {
 
   // Select either Hydro or MHD
   DvceArray5D<Real> w0_;
-  Real gm1, p0;
+  Real gm1;
   int nfluid, nscalars;
   if (pmbp->phydro != nullptr) {
     w0_ = pmbp->phydro->w0;
@@ -189,18 +189,14 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) {
     pmbp->pmhd->peos->PrimToCons(w0_, bcc0_, u0_, is, ie, js, je, ks, ke);
   }
 
-  // TODO(GNW): support 2d-only particles
   // Initialize particles
   if (pmbp->ppart != nullptr) {
 
     // captures for the kernel
     auto &u0_ = (pmbp->phydro != nullptr) ? pmbp->phydro->u0 : pmbp->pmhd->u0;
-    auto &pr = pmy_mesh_->pmb_pack->ppart->prtcl_rdata;
-    auto &pi = pmy_mesh_->pmb_pack->ppart->prtcl_idata;
     auto &mbsize = pmy_mesh_->pmb_pack->pmb->mb_size;
-    auto &npart = pmy_mesh_->pmb_pack->ppart->nprtcl_thispack;
+    auto &mblev = pmy_mesh_->pmb_pack->pmb->mb_lev;
     auto gids = pmy_mesh_->pmb_pack->gids;
-    auto gide = pmy_mesh_->pmb_pack->gide;
 
     auto &indcs = pmy_mesh_->mb_indcs;
     int &is = indcs.is;
@@ -210,40 +206,109 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) {
     int &nx2 = indcs.nx2;
     int &nx3 = indcs.nx3;
 
-    // initialize particles
+    // init RNG
     Kokkos::Random_XorShift64_Pool<> rand_pool64(pmbp->gids);
-    par_for("part_update",DevExeSpace(),0,(npart-1),
-    KOKKOS_LAMBDA(const int p) {
-      auto rand_gen = rand_pool64.get_state();  // get random number state this thread
-      // choose parent MeshBlock randomly
-      int m = static_cast<int>(rand_gen.frand()*(gide - gids + 1.0));
-      pi(PGID,p) = gids + m;
-
-      int ip = 0;
-      int jp = 0;
-      int kp = 0;
-
-      // choose random cell based on density
-      while (true) {
-        ip = static_cast<int>(rand_gen.frand()*nx1) + is;
-        jp = static_cast<int>(rand_gen.frand()*nx2) + js;
-        kp = static_cast<int>(rand_gen.frand()*nx3) + ks;
-        if (u0_(m,IDN,kp,jp,ip) < 1.5) {
-          if (rand_gen.frand() < 0.5) break;
-        } else {
-          break;
-        }
+
+    // count total mass across the domain
+    Real total_mass = 0.0;
+    const int nmkji = (pmbp->nmb_thispack)*indcs.nx3*indcs.nx2*indcs.nx1;
+    const int nkji = indcs.nx3*indcs.nx2*indcs.nx1;
+    const int nji  = indcs.nx2*indcs.nx1;
+
+    Kokkos::parallel_reduce("pgen_mass", Kokkos::RangePolicy<>(DevExeSpace(), 0, nmkji),
+    KOKKOS_LAMBDA(const int &idx, Real &total_mass) {
+      // compute m,k,j,i indices of thread and evaluate
+      int m = (idx)/nkji;
+      int k = (idx - m*nkji)/nji;
+      int j = (idx - m*nkji - k*nji)/indcs.nx1;
+      int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is;
+      k += ks;
+      j += js;
+
+      Real vol = mbsize.d_view(m).dx1*mbsize.d_view(m).dx2*mbsize.d_view(m).dx3;
+      total_mass += u0_(m,IDN,k,j,i) * vol;
+    }, total_mass);
+
+    Real total_mass_thispack = total_mass;
+
+#if MPI_PARALLEL_ENABLED
+    // get total mass over all MPI ranks
+    MPI_Allreduce(MPI_IN_PLACE, &total_mass, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+#endif
+
+    // get number of particles for this mbpack using MC to deal with fractional particles
+    Real target_nparticles = pin->GetOrAddReal("particles","target_count",100000.0);
+    Real mass_per_particle = total_mass / target_nparticles;
+
+    // create shared array to hold number of particles per zone
+    DualArray2D<int> nparticles_per_zone("partperzone", nmkji,2);
+    par_for("particle_count", DevExeSpace(), 0,nmkji-1,
+    KOKKOS_LAMBDA(int idx) {
+      int m = (idx)/nkji;
+      int k = (idx - m*nkji)/nji;
+      int j = (idx - m*nkji - k*nji)/indcs.nx1;
+      int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is;
+      k += ks;
+      j += js;
+      Real vol = mbsize.d_view(m).dx1*mbsize.d_view(m).dx2*mbsize.d_view(m).dx3;
+      Real nppc = u0_(m,IDN,k,j,i) * vol / mass_per_particle;
+      int nparticles = static_cast<int>(nppc);
+      nppc = fabs(fmod(nppc, 1.0));
+      auto rand_gen = rand_pool64.get_state();
+      if (rand_gen.frand() < nppc) {
+        nparticles += 1;
       }
+      nparticles_per_zone.d_view(idx,0) = nparticles;
+      rand_pool64.free_state(rand_gen); 
+    });
 
-      // set particle position to cell center + random offset
-      pr(IPX,p) = CellCenterX(ip-is, nx1, mbsize.d_view(m).x1min, mbsize.d_view(m).x1max) +
-                  mbsize.d_view(m).dx1*(rand_gen.frand()-0.5);
-      pr(IPY,p) = CellCenterX(jp-js, nx2, mbsize.d_view(m).x2min, mbsize.d_view(m).x2max) +
-                  mbsize.d_view(m).dx2*(rand_gen.frand()-0.5);
-      pr(IPZ,p) = CellCenterX(kp-ks, nx3, mbsize.d_view(m).x3min, mbsize.d_view(m).x3max) +
-                  mbsize.d_view(m).dx3*(rand_gen.frand()-0.5);
+    // count total number of particles in this pack
+    nparticles_per_zone.template modify<DevExeSpace>();
+    nparticles_per_zone.template sync<HostMemSpace>();
+    int nparticles_thispack = 0;
+    for (int i=0; i<nmkji; ++i) {
+      nparticles_per_zone.h_view(i,1) = nparticles_thispack;
+      nparticles_thispack += nparticles_per_zone.h_view(i,0);
+    }
+    nparticles_per_zone.template modify<HostMemSpace>();
+    nparticles_per_zone.template sync<DevMemSpace>();
+
+    // helpful debug statement
+    std::cout << "total mass across domain: " << total_mass << ", total mass in pack: " << total_mass_thispack
+              << ", target nparticles: " << target_nparticles << ", nparticles in pack: " << nparticles_thispack
+              << std::endl;
+
+    // reallocate space for particles and get relevant pointers
+    pmy_mesh_->pmb_pack->ppart->ReallocateParticles(nparticles_thispack);
+
+    auto &pr = pmy_mesh_->pmb_pack->ppart->prtcl_rdata;
+    auto &pi = pmy_mesh_->pmb_pack->ppart->prtcl_idata;
 
-      rand_pool64.free_state(rand_gen);  // free state for use by other threads
+    // initialize particles. only intended for Lagrangian-type particles
+    par_for("part_init", DevExeSpace(), 0,nmkji-1,
+    KOKKOS_LAMBDA(int idx) {
+      int m = (idx)/nkji;
+      int k = (idx - m*nkji)/nji;
+      int j = (idx - m*nkji - k*nji)/indcs.nx1;
+      int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is;
+      k += ks;
+      j += js;
+
+      int nparticles_in_zone = nparticles_per_zone.d_view(idx,0);
+      int starting_index = nparticles_per_zone.d_view(idx,1);
+
+      for (int p=0; p<nparticles_in_zone; ++p) {
+        int pidx = p + starting_index;
+
+        pi(PGID,pidx) = gids + m;
+        pi(PLASTLEVEL,pidx) = mblev.d_view(m);
+
+        // set particle to zone center
+        pr(IPX,pidx) = CellCenterX(i-is, nx1, mbsize.d_view(m).x1min, mbsize.d_view(m).x1max);
+        pr(IPY,pidx) = CellCenterX(j-js, nx2, mbsize.d_view(m).x2min, mbsize.d_view(m).x2max);
+        pr(IPZ,pidx) = CellCenterX(k-ks, nx3, mbsize.d_view(m).x3min, mbsize.d_view(m).x3max) -
+                       mbsize.d_view(m).dx3/2;
+      }
     });
   }
 

From d5c6cd481833c3c3d4c91550493c5b7e4f73c808 Mon Sep 17 00:00:00 2001
From: "George N. Wong" <gnwong@ias.edu>
Date: Wed, 31 Jul 2024 11:30:20 -0400
Subject: [PATCH 04/10] lint code for c++

---
 src/athena.hpp                              |   3 +-
 src/hydro/hydro_fluxes.cpp                  |   1 -
 src/mhd/mhd_fluxes.cpp                      |   1 -
 src/particles/particles.cpp                 |   6 +-
 src/particles/particles_pushers.cpp         |  27 ++-
 src/particles/particles_tasks.cpp           |  51 +++--
 src/pgen/kh.cpp                             |  18 +-
 tst/scripts/style/check_athena_cpp_style.sh |   2 +-
 tst/scripts/style/clang_format_v1.cfg       | 196 ++++++++++++++++++++
 tst/scripts/style/clang_format_v2.cfg       | 196 ++++++++++++++++++++
 10 files changed, 456 insertions(+), 45 deletions(-)
 create mode 100644 tst/scripts/style/clang_format_v1.cfg
 create mode 100644 tst/scripts/style/clang_format_v2.cfg

diff --git a/src/athena.hpp b/src/athena.hpp
index de790bc6..c97bb73c 100644
--- a/src/athena.hpp
+++ b/src/athena.hpp
@@ -52,7 +52,8 @@ enum VariableIndex {IDN=0, IM1=1, IVX=1, IM2=2, IVY=2, IM3=3, IVZ=3, IEN=4, ITM=
 // array indices for components of magnetic field
 enum BFieldIndex {IBX=0, IBY=1, IBZ=2};
 // array indices for particle arrays
-enum ParticlesIndex {PGID=0, PTAG=1, PLASTMOVE=2, PLASTLEVEL=3, IPX=0, IPY=1, IPZ=2, IPVX=3, IPVY=4, IPVZ=5};
+enum ParticlesIndex {PGID=0, PTAG=1, PLASTMOVE=2, PLASTLEVEL=3,
+                     IPX=0, IPY=1, IPZ=2, IPVX=3, IPVY=4, IPVZ=5};
 
 // integer constants to specify spatial reconstruction methods
 enum ReconstructionMethod {dc, plm, ppm4, ppmx, wenoz};
diff --git a/src/hydro/hydro_fluxes.cpp b/src/hydro/hydro_fluxes.cpp
index f20bb9ae..f88832bb 100644
--- a/src/hydro/hydro_fluxes.cpp
+++ b/src/hydro/hydro_fluxes.cpp
@@ -384,7 +384,6 @@ TaskStatus Hydro::SaveFlux(Driver *pdrive, int stage) {
     // GNW 2024-JUL-5: Warning .. be careful when using this different time integrators
     par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1,
     KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) {
-      
       // save dF1/dx1
       if (j <= je && k <= ke)  {
         if (stage == 1) {
diff --git a/src/mhd/mhd_fluxes.cpp b/src/mhd/mhd_fluxes.cpp
index 3ee97769..e234065f 100644
--- a/src/mhd/mhd_fluxes.cpp
+++ b/src/mhd/mhd_fluxes.cpp
@@ -445,7 +445,6 @@ TaskStatus MHD::SaveFlux(Driver *pdrive, int stage) {
     // GNW 2024-JUL-5: Warning .. be careful when using this different time integrators
     par_for("flux_save",DevExeSpace(),0,nmb1,ks,ke+1,js,je+1,is,ie+1,
     KOKKOS_LAMBDA(const int m, const int k, const int j, const int i) {
-
       // save dF1/dx1
       if (j <= je && k <= ke)  {
         if (stage == 1) {
diff --git a/src/particles/particles.cpp b/src/particles/particles.cpp
index 07c6e24a..a2130f5e 100644
--- a/src/particles/particles.cpp
+++ b/src/particles/particles.cpp
@@ -9,6 +9,7 @@
 #include <iostream>
 #include <string>
 #include <algorithm>
+#include <limits>
 
 #include "athena.hpp"
 #include "globals.hpp"
@@ -70,7 +71,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
         std::exit(EXIT_FAILURE);
       }
     } else if (ppush.compare("lagrangian_mc") == 0) {
-      // force driver to inherit timestep from fluid by setting desired particle dt to max value
+      // force driver to inherit timestep from fluid by setting particle dt to max value
       dtnew = std::numeric_limits<float>::max();
       pusher = ParticlesPusher::lagrangian_mc;
       if (ppack->pmhd != nullptr) {
@@ -97,7 +98,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
       {
         // save particle position then velocity for compiler optimizations even though
         // 2d runs will not require all six real entries
-        nrdata = 6;  
+        nrdata = 6;
         nidata = 2;
         break;
       }
@@ -129,7 +130,6 @@ Particles::~Particles() {
 // particles in this pack. This method does not preserve any existing particle data
 
 void Particles::ReallocateParticles(int new_nprtcl_thispack) {
-
   // TODO(GNW): maybe check that ntrack is set correctly. also check about whether
   // the rank_order below works given that it might have empty ids
 
diff --git a/src/particles/particles_pushers.cpp b/src/particles/particles_pushers.cpp
index 5cd42bfc..80f29841 100644
--- a/src/particles/particles_pushers.cpp
+++ b/src/particles/particles_pushers.cpp
@@ -21,7 +21,6 @@ namespace particles {
 //  \brief wrapper with switch to access different particle pushers
 
 TaskStatus Particles::Push(Driver *pdriver, int stage) {
-
   switch (pusher) {
     case ParticlesPusher::drift:
       PushDrift();
@@ -42,7 +41,7 @@ TaskStatus Particles::Push(Driver *pdriver, int stage) {
 //! \fn void Particles::PushDrift
 //! \brief push particles based on stored particle internal velocity
 
-void Particles::PushDrift() {  
+void Particles::PushDrift() {
   auto &indcs = pmy_pack->pmesh->mb_indcs;
   int is = indcs.is;
   int js = indcs.js;
@@ -75,11 +74,10 @@ void Particles::PushDrift() {
 
 //----------------------------------------------------------------------------------------
 //! \fn void Particles::PushLagrangianMC
-//! \brief push particles using Lagrangian Monte Carlo method (Genel+ 2013, MNRAS.435.1426G)
+//! \brief push with Lagrangian Monte Carlo method (Genel+ 2013, MNRAS.435.1426G)
 //         WARNING: this implementation may not work well with AMR
 
 void Particles::PushLagrangianMC() {
-
   auto &indcs = pmy_pack->pmesh->mb_indcs;
   int is = indcs.is;
   int js = indcs.js;
@@ -93,7 +91,8 @@ void Particles::PushLagrangianMC() {
   auto &mblev = pmy_pack->pmb->mb_lev;
 
   auto &u1_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->u1:pmy_pack->pmhd->u1;
-  auto &uflxidn_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved;
+  auto &uflxidn_ = (pmy_pack->phydro != nullptr)?
+                    pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved;
   auto &flx1_ = uflxidn_.x1f;
   auto &flx2_ = uflxidn_.x2f;
   auto &flx3_ = uflxidn_.x3f;
@@ -103,7 +102,6 @@ void Particles::PushLagrangianMC() {
   // GNW 2024-JUL-5: Warning, this may not play well with AMR
   par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1),
   KOKKOS_LAMBDA(const int p) {
-
     int m = pi(PGID,p) - gids;
 
     int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
@@ -159,10 +157,12 @@ void Particles::PushLagrangianMC() {
     } else if (multi_d && rand < flx1_left + flx1_right + flx2_left + flx2_right) {
       pr(IPY,p) += mbsize.d_view(m).dx2;
       pi(PLASTMOVE,p) += 4;
-    } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left) {
+    } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right
+                               + flx3_left) {
       pr(IPZ,p) -= mbsize.d_view(m).dx3;
       pi(PLASTMOVE,p) += 5;
-    } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right + flx3_left + flx3_right) {
+    } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right
+                               + flx3_left + flx3_right) {
       pr(IPZ,p) += mbsize.d_view(m).dx3;
       pi(PLASTMOVE,p) += 6;
     }
@@ -174,7 +174,6 @@ void Particles::PushLagrangianMC() {
 //! \brief update locations of particles that enter meshblocks with new refinement levels
 
 TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) {
-
   auto &indcs = pmy_pack->pmesh->mb_indcs;
   int is = indcs.is;
   int js = indcs.js;
@@ -187,16 +186,16 @@ TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) {
   bool &three_d = pmy_pack->pmesh->three_d;
   auto &mbsize = pmy_pack->pmb->mb_size;
 
-  auto &uflxidn_ = (pmy_pack->phydro != nullptr)?pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved;
+  auto &uflxidn_ = (pmy_pack->phydro != nullptr)?
+                   pmy_pack->phydro->uflxidnsaved:pmy_pack->pmhd->uflxidnsaved;
   auto &flx1_ = uflxidn_.x1f;
   auto &flx2_ = uflxidn_.x2f;
   auto &flx3_ = uflxidn_.x3f;
 
   auto &rand_pool64_ = rand_pool64;
-  
+
   par_for("particle_meshshift",DevExeSpace(),0,(nprtcl_thispack-1),
   KOKKOS_LAMBDA(const int p) {
-
     int m = pi(PGID,p) - gids;
     int level = mblev.d_view(m);
 
@@ -223,7 +222,7 @@ TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) {
 
     if (level > lastlevel) {
       // this is a higher refinement level, i.e., the zones are smaller now
- 
+
       if (lastmove == 1) {
         // came from zone to right (dx--)
         pr(IPX,p) += dx1/2;
@@ -334,7 +333,7 @@ TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) {
       auto rand_gen = rand_pool64_.get_state();
       Real rand = rand_gen.frand();
       rand_pool64_.free_state(rand_gen);  // free state for use by other threads
-      
+
       int target_zone = 4;
       if (rand < flx1) {
         target_zone = 1;
diff --git a/src/particles/particles_tasks.cpp b/src/particles/particles_tasks.cpp
index e3215a2d..5f5cfb4b 100644
--- a/src/particles/particles_tasks.cpp
+++ b/src/particles/particles_tasks.cpp
@@ -32,29 +32,46 @@ void Particles::AssembleTasks(std::map<std::string, std::shared_ptr<TaskList>> t
     case ParticleType::cosmic_ray:
       {
         // particle integration done in "before_timeintegrator" task list
-        id.push   = tl["before_timeintegrator"]->AddTask(&Particles::Push, this, none);
-        id.newgid = tl["before_timeintegrator"]->AddTask(&Particles::NewGID, this, id.push);
-        id.count  = tl["before_timeintegrator"]->AddTask(&Particles::SendCnt, this, id.newgid);
-        id.irecv  = tl["before_timeintegrator"]->AddTask(&Particles::InitRecv, this, id.count);
-        id.sendp  = tl["before_timeintegrator"]->AddTask(&Particles::SendP, this, id.irecv);
-        id.recvp  = tl["before_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp);
-        id.crecv  = tl["before_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp);
-        id.csend  = tl["before_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv);
+        id.push   = tl["before_timeintegrator"]->AddTask(&Particles::Push,
+                                                         this, none);
+        id.newgid = tl["before_timeintegrator"]->AddTask(&Particles::NewGID,
+                                                         this, id.push);
+        id.count  = tl["before_timeintegrator"]->AddTask(&Particles::SendCnt,
+                                                         this, id.newgid);
+        id.irecv  = tl["before_timeintegrator"]->AddTask(&Particles::InitRecv,
+                                                         this, id.count);
+        id.sendp  = tl["before_timeintegrator"]->AddTask(&Particles::SendP,
+                                                         this, id.irecv);
+        id.recvp  = tl["before_timeintegrator"]->AddTask(&Particles::RecvP,
+                                                         this, id.sendp);
+        id.crecv  = tl["before_timeintegrator"]->AddTask(&Particles::ClearRecv,
+                                                         this, id.recvp);
+        id.csend  = tl["before_timeintegrator"]->AddTask(&Particles::ClearSend,
+                                                         this, id.crecv);
         break;
       }
 
     case ParticleType::lagrangian_mc:
       {
         // particle integration done in "after_timeintegrator" task list
-        id.push   = tl["after_timeintegrator"]->AddTask(&Particles::Push, this, none);
-        id.newgid = tl["after_timeintegrator"]->AddTask(&Particles::NewGID, this, id.push);
-        id.count  = tl["after_timeintegrator"]->AddTask(&Particles::SendCnt, this, id.newgid);
-        id.irecv  = tl["after_timeintegrator"]->AddTask(&Particles::InitRecv, this, id.count);
-        id.sendp  = tl["after_timeintegrator"]->AddTask(&Particles::SendP, this, id.irecv);
-        id.recvp  = tl["after_timeintegrator"]->AddTask(&Particles::RecvP, this, id.sendp);
-        id.crecv  = tl["after_timeintegrator"]->AddTask(&Particles::ClearRecv, this, id.recvp);
-        id.csend  = tl["after_timeintegrator"]->AddTask(&Particles::ClearSend, this, id.crecv);
-        id.mradj  = tl["after_timeintegrator"]->AddTask(&Particles::AdjustMeshRefinement, this, id.csend);
+        id.push   = tl["after_timeintegrator"]->AddTask(&Particles::Push,
+                                                        this, none);
+        id.newgid = tl["after_timeintegrator"]->AddTask(&Particles::NewGID,
+                                                        this, id.push);
+        id.count  = tl["after_timeintegrator"]->AddTask(&Particles::SendCnt,
+                                                        this, id.newgid);
+        id.irecv  = tl["after_timeintegrator"]->AddTask(&Particles::InitRecv,
+                                                        this, id.count);
+        id.sendp  = tl["after_timeintegrator"]->AddTask(&Particles::SendP,
+                                                        this, id.irecv);
+        id.recvp  = tl["after_timeintegrator"]->AddTask(&Particles::RecvP,
+                                                        this, id.sendp);
+        id.crecv  = tl["after_timeintegrator"]->AddTask(&Particles::ClearRecv,
+                                                        this, id.recvp);
+        id.csend  = tl["after_timeintegrator"]->AddTask(&Particles::ClearSend,
+                                                        this, id.crecv);
+        id.mradj  = tl["after_timeintegrator"]->AddTask(&Particles::AdjustMeshRefinement,
+                                                        this, id.csend);
         break;
       }
 
diff --git a/src/pgen/kh.cpp b/src/pgen/kh.cpp
index 98c5e308..af1206ba 100644
--- a/src/pgen/kh.cpp
+++ b/src/pgen/kh.cpp
@@ -191,7 +191,6 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) {
 
   // Initialize particles
   if (pmbp->ppart != nullptr) {
-
     // captures for the kernel
     auto &u0_ = (pmbp->phydro != nullptr) ? pmbp->phydro->u0 : pmbp->pmhd->u0;
     auto &mbsize = pmy_mesh_->pmb_pack->pmb->mb_size;
@@ -259,7 +258,7 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) {
         nparticles += 1;
       }
       nparticles_per_zone.d_view(idx,0) = nparticles;
-      rand_pool64.free_state(rand_gen); 
+      rand_pool64.free_state(rand_gen);
     });
 
     // count total number of particles in this pack
@@ -274,8 +273,10 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) {
     nparticles_per_zone.template sync<DevMemSpace>();
 
     // helpful debug statement
-    std::cout << "total mass across domain: " << total_mass << ", total mass in pack: " << total_mass_thispack
-              << ", target nparticles: " << target_nparticles << ", nparticles in pack: " << nparticles_thispack
+    std::cout << "total mass across domain: " << total_mass
+              <<  ", total mass in pack: " << total_mass_thispack
+              << ", target nparticles: " << target_nparticles
+              << ", nparticles in pack: " << nparticles_thispack
               << std::endl;
 
     // reallocate space for particles and get relevant pointers
@@ -304,9 +305,12 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) {
         pi(PLASTLEVEL,pidx) = mblev.d_view(m);
 
         // set particle to zone center
-        pr(IPX,pidx) = CellCenterX(i-is, nx1, mbsize.d_view(m).x1min, mbsize.d_view(m).x1max);
-        pr(IPY,pidx) = CellCenterX(j-js, nx2, mbsize.d_view(m).x2min, mbsize.d_view(m).x2max);
-        pr(IPZ,pidx) = CellCenterX(k-ks, nx3, mbsize.d_view(m).x3min, mbsize.d_view(m).x3max) -
+        pr(IPX,pidx) = CellCenterX(i-is, nx1, mbsize.d_view(m).x1min,
+                                   mbsize.d_view(m).x1max);
+        pr(IPY,pidx) = CellCenterX(j-js, nx2, mbsize.d_view(m).x2min,
+                                   mbsize.d_view(m).x2max);
+        pr(IPZ,pidx) = CellCenterX(k-ks, nx3, mbsize.d_view(m).x3min,
+                                   mbsize.d_view(m).x3max) -
                        mbsize.d_view(m).dx3/2;
       }
     });
diff --git a/tst/scripts/style/check_athena_cpp_style.sh b/tst/scripts/style/check_athena_cpp_style.sh
index 76309bb2..8895fa48 100755
--- a/tst/scripts/style/check_athena_cpp_style.sh
+++ b/tst/scripts/style/check_athena_cpp_style.sh
@@ -17,7 +17,7 @@
 
 # Obtain Google C++ Style Linter:
 echo "Obtaining Google C++ Style cpplint.py test"
-curl https://raw.githubusercontent.com/cpplint/cpplint/develop/cpplint.py \
+curl https://raw.githubusercontent.com/cpplint/cpplint/master/cpplint.py \
 --output cpplint.py --silent
 
 # Apply Google C++ Style Linter to all source code files at once:
diff --git a/tst/scripts/style/clang_format_v1.cfg b/tst/scripts/style/clang_format_v1.cfg
new file mode 100644
index 00000000..bbdb1688
--- /dev/null
+++ b/tst/scripts/style/clang_format_v1.cfg
@@ -0,0 +1,196 @@
+## see https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+##
+## usage:
+## clang-format -style=file:/path/to/clang_format_v1.cfg -i file.cpp
+##
+---
+AccessModifierOffset: 1
+AlignAfterOpenBracket: AlwaysBreak
+AlignArrayOfStructures: Right
+AlignConsecutiveAssignments: Consecutive
+AlignConsecutiveBitFields: Consecutive
+AlignConsecutiveDeclarations: None
+AlignConsecutiveMacros: Consecutive
+AlignEscapedNewlines: Right
+AlignOperands: DontAlign
+AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: false
+AllowAllConstructorInitializersOnNextLine: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortEnumsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLambdasOnASingleLine: All
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+AttributeMacros:
+- __capability
+BasedOnStyle: Chromium
+BinPackArguments: true
+BinPackParameters: true
+BitFieldColonSpacing: Both
+BraceWrapping:
+  AfterCaseLabel: false
+  AfterClass: true
+  AfterControlStatement: Never
+  AfterEnum: true
+  AfterExternBlock: true
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: false
+  AfterStruct: true
+  AfterUnion: true
+  BeforeCatch: false
+  BeforeElse: false
+  BeforeLambdaBody: false
+  BeforeWhile: false
+  IndentBraces: true
+  SplitEmptyFunction: true
+  SplitEmptyNamespace: true
+  SplitEmptyRecord: true
+BreakAfterJavaFieldAnnotations: true
+BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeBraces: Attach
+BreakBeforeConceptDeclarations: true
+BreakBeforeInheritanceComma: true
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: AfterColon
+BreakConstructorInitializersBeforeComma: false
+BreakInheritanceList: AfterColon
+BreakStringLiterals: false
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: true
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 14
+ContinuationIndentWidth: 2
+Cpp11BracedListStyle: true
+DeriveLineEnding: true
+DerivePointerAlignment: false
+DisableFormat: false
+## after public or private
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: Never
+ExperimentalAutoDetectBinPacking: true
+FixNamespaceComments: true
+ForEachMacros:
+- foreach
+- Q_FOREACH
+- BOOST_FOREACH
+IfMacros:
+- KJ_IF_MAYBE
+IncludeBlocks: Preserve
+IncludeCategories:
+- CaseSensitive: false
+  Priority: 2
+  Regex: ^"(llvm|llvm-c|clang|clang-c)/
+  SortPriority: 0
+- CaseSensitive: false
+  Priority: 3
+  Regex: ^(<|"(gtest|gmock|isl|json)/)
+  SortPriority: 0
+- CaseSensitive: false
+  Priority: 1
+  Regex: .*
+  SortPriority: 0
+IncludeIsMainRegex: (Test)?$
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: true
+IndentCaseBlocks: true
+IndentCaseLabels: true
+IndentExternBlock: Indent
+IndentGotoLabels: false
+IndentPPDirectives: None
+IndentRequires: false
+IndentWidth: 2
+IndentWrappedFunctionNames: false
+InsertTrailingCommas: None
+JavaScriptQuotes: Single
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+LambdaBodyIndentation: Signature
+Language: Cpp
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 0
+ObjCBreakBeforeNestedBlockParam: true
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PPIndentWidth: -1
+PackConstructorInitializers: BinPack
+PenaltyBreakAssignment: 0
+PenaltyBreakBeforeFirstCallParameter: 21
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 107
+PenaltyBreakOpenParenthesis: 0
+PenaltyBreakString: 1073
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1008223
+PenaltyIndentedWhitespace: 0
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Right
+QualifierAlignment: Leave ## don't change this
+ReferenceAlignment: Pointer
+ReflowComments: true ## set true to brack a long comment
+RemoveBracesLLVM: false
+SeparateDefinitionBlocks: Leave
+ShortNamespaceLines: 0
+SortIncludes: CaseSensitive
+SortJavaStaticImport: Before
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: false
+SpaceAroundPointerQualifiers: Both
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: false
+SpaceBeforeInheritanceColon: false
+SpaceBeforeParens: ControlStatements
+SpaceBeforeParensOptions:
+  AfterControlStatements: false
+  AfterForeachMacros: true
+  AfterFunctionDeclarationName: false
+  AfterFunctionDefinitionName: false
+  AfterIfMacros: true
+  AfterOverloadedOperator: false
+  BeforeNonEmptyParentheses: false
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: Never
+SpacesInCStyleCastParentheses: false
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: true
+SpacesInLineCommentPrefix:
+  Maximum: -1
+  Minimum: 1
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp03
+StatementAttributeLikeMacros:
+- Q_EMIT
+StatementMacros:
+- Q_UNUSED
+- QT_REQUIRE_VERSION
+TabWidth: 0
+UseCRLF: true
+UseTab: ForIndentation
+WhitespaceSensitiveMacros:
+- STRINGIZE
+- PP_STRINGIZE
+- BOOST_PP_STRINGIZE
+- NS_SWIFT_NAME
+- CF_SWIFT_NAME
+...
diff --git a/tst/scripts/style/clang_format_v2.cfg b/tst/scripts/style/clang_format_v2.cfg
new file mode 100644
index 00000000..1d4c3a8a
--- /dev/null
+++ b/tst/scripts/style/clang_format_v2.cfg
@@ -0,0 +1,196 @@
+## see https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+##
+## usage:
+## clang-format -style=file:/path/to/clang_format_v2.cfg -i file.hpp
+##
+---
+AccessModifierOffset: 1
+AlignAfterOpenBracket: AlwaysBreak
+AlignArrayOfStructures: Right
+AlignConsecutiveAssignments: Consecutive
+AlignConsecutiveBitFields: Consecutive
+AlignConsecutiveDeclarations: None
+AlignConsecutiveMacros: Consecutive
+AlignEscapedNewlines: Right
+AlignOperands: DontAlign
+AlignTrailingComments: true
+AllowAllArgumentsOnNextLine: false
+AllowAllConstructorInitializersOnNextLine: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortEnumsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLambdasOnASingleLine: All
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+AttributeMacros:
+- __capability
+BasedOnStyle: Chromium
+BinPackArguments: true
+BinPackParameters: true
+BitFieldColonSpacing: Both
+BraceWrapping:
+  AfterCaseLabel: false
+  AfterClass: true
+  AfterControlStatement: Never
+  AfterEnum: true
+  AfterExternBlock: true
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: false
+  AfterStruct: true
+  AfterUnion: true
+  BeforeCatch: false
+  BeforeElse: false
+  BeforeLambdaBody: false
+  BeforeWhile: false
+  IndentBraces: true
+  SplitEmptyFunction: true
+  SplitEmptyNamespace: true
+  SplitEmptyRecord: true
+BreakAfterJavaFieldAnnotations: true
+BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeBraces: Attach
+BreakBeforeConceptDeclarations: true
+BreakBeforeInheritanceComma: true
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: AfterColon
+BreakConstructorInitializersBeforeComma: false
+BreakInheritanceList: AfterColon
+BreakStringLiterals: false
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: true
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 14
+ContinuationIndentWidth: 2
+Cpp11BracedListStyle: true
+DeriveLineEnding: true
+DerivePointerAlignment: false
+DisableFormat: false
+## after public or private
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: Never
+ExperimentalAutoDetectBinPacking: true
+FixNamespaceComments: true
+ForEachMacros:
+- foreach
+- Q_FOREACH
+- BOOST_FOREACH
+IfMacros:
+- KJ_IF_MAYBE
+IncludeBlocks: Preserve
+IncludeCategories:
+- CaseSensitive: false
+  Priority: 2
+  Regex: ^"(llvm|llvm-c|clang|clang-c)/
+  SortPriority: 0
+- CaseSensitive: false
+  Priority: 3
+  Regex: ^(<|"(gtest|gmock|isl|json)/)
+  SortPriority: 0
+- CaseSensitive: false
+  Priority: 1
+  Regex: .*
+  SortPriority: 0
+IncludeIsMainRegex: (Test)?$
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: true
+IndentCaseBlocks: true
+IndentCaseLabels: true
+IndentExternBlock: Indent
+IndentGotoLabels: false
+IndentPPDirectives: None
+IndentRequires: false
+IndentWidth: 1
+IndentWrappedFunctionNames: false
+InsertTrailingCommas: None
+JavaScriptQuotes: Single
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+LambdaBodyIndentation: Signature
+Language: Cpp
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Auto
+ObjCBlockIndentWidth: 0
+ObjCBreakBeforeNestedBlockParam: true
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PPIndentWidth: -1
+PackConstructorInitializers: BinPack
+PenaltyBreakAssignment: 0
+PenaltyBreakBeforeFirstCallParameter: 21
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 107
+PenaltyBreakOpenParenthesis: 0
+PenaltyBreakString: 1073
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1008223
+PenaltyIndentedWhitespace: 0
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Right
+QualifierAlignment: Leave ## don't change this
+ReferenceAlignment: Pointer
+ReflowComments: true ## set true to brack a long comment
+RemoveBracesLLVM: false
+SeparateDefinitionBlocks: Leave
+ShortNamespaceLines: 0
+SortIncludes: CaseSensitive
+SortJavaStaticImport: Before
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: false
+SpaceAroundPointerQualifiers: Both
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: false
+SpaceBeforeInheritanceColon: false
+SpaceBeforeParens: ControlStatements
+SpaceBeforeParensOptions:
+  AfterControlStatements: false
+  AfterForeachMacros: true
+  AfterFunctionDeclarationName: false
+  AfterFunctionDefinitionName: false
+  AfterIfMacros: true
+  AfterOverloadedOperator: false
+  BeforeNonEmptyParentheses: false
+SpaceBeforeRangeBasedForLoopColon: false
+SpaceBeforeSquareBrackets: false
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: Never
+SpacesInCStyleCastParentheses: false
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: true
+SpacesInLineCommentPrefix:
+  Maximum: -1
+  Minimum: 1
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp03
+StatementAttributeLikeMacros:
+- Q_EMIT
+StatementMacros:
+- Q_UNUSED
+- QT_REQUIRE_VERSION
+TabWidth: 0
+UseCRLF: true
+UseTab: ForIndentation
+WhitespaceSensitiveMacros:
+- STRINGIZE
+- PP_STRINGIZE
+- BOOST_PP_STRINGIZE
+- NS_SWIFT_NAME
+- CF_SWIFT_NAME
+...

From 0c1c858a5477d5da24b69c12a56bdaa16356b98a Mon Sep 17 00:00:00 2001
From: "George N. Wong" <gnwong@ias.edu>
Date: Thu, 1 Aug 2024 17:19:31 -0400
Subject: [PATCH 05/10] add particle IC for gr torus to remote

---
 src/pgen/gr_torus.cpp | 128 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)

diff --git a/src/pgen/gr_torus.cpp b/src/pgen/gr_torus.cpp
index 1220fb47..b3a16401 100644
--- a/src/pgen/gr_torus.cpp
+++ b/src/pgen/gr_torus.cpp
@@ -42,6 +42,7 @@
 #include "hydro/hydro.hpp"
 #include "mhd/mhd.hpp"
 #include "radiation/radiation.hpp"
+#include "particles/particles.hpp"
 
 #include <Kokkos_Random.hpp>
 
@@ -791,6 +792,133 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) {
     pmbp->pmhd->peos->PrimToCons(w0_, bcc0_, u0_, is, ie, js, je, ks, ke);
   }
 
+  // Initialize particles
+  if (pmbp->ppart != nullptr) {
+    // captures for the kernel
+    auto &u0_ = (pmbp->phydro != nullptr) ? pmbp->phydro->u0 : pmbp->pmhd->u0;
+    auto &mbsize = pmy_mesh_->pmb_pack->pmb->mb_size;
+    auto &mblev = pmy_mesh_->pmb_pack->pmb->mb_lev;
+    auto gids = pmy_mesh_->pmb_pack->gids;
+
+    auto &indcs = pmy_mesh_->mb_indcs;
+    int &is = indcs.is;
+    int &js = indcs.js;
+    int &ks = indcs.ks;
+    int &nx1 = indcs.nx1;
+    int &nx2 = indcs.nx2;
+    int &nx3 = indcs.nx3;
+
+    // init RNG
+    Kokkos::Random_XorShift64_Pool<> rand_pool64(pmbp->gids);
+
+    // count total mass across the domain
+    Real total_mass = 0.0;
+    const int nmkji = (pmbp->nmb_thispack)*indcs.nx3*indcs.nx2*indcs.nx1;
+    const int nkji = indcs.nx3*indcs.nx2*indcs.nx1;
+    const int nji  = indcs.nx2*indcs.nx1;
+
+    Kokkos::parallel_reduce("pgen_mass", Kokkos::RangePolicy<>(DevExeSpace(), 0, nmkji),
+    KOKKOS_LAMBDA(const int &idx, Real &total_mass) {
+      // compute m,k,j,i indices of thread and evaluate
+      int m = (idx)/nkji;
+      int k = (idx - m*nkji)/nji;
+      int j = (idx - m*nkji - k*nji)/indcs.nx1;
+      int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is;
+      k += ks;
+      j += js;
+
+      Real vol = mbsize.d_view(m).dx1*mbsize.d_view(m).dx2*mbsize.d_view(m).dx3;
+      total_mass += u0_(m,IDN,k,j,i) * vol;
+    }, total_mass);
+
+    Real total_mass_thispack = total_mass;
+
+#if MPI_PARALLEL_ENABLED
+    // get total mass over all MPI ranks
+    MPI_Allreduce(MPI_IN_PLACE, &total_mass, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+#endif
+
+    // get number of particles for this mbpack using MC to deal with fractional particles
+    Real target_nparticles = pin->GetOrAddReal("particles","target_count",100000.0);
+    Real mass_per_particle = total_mass / target_nparticles;
+
+    // create shared array to hold number of particles per zone
+    DualArray2D<int> nparticles_per_zone("partperzone", nmkji,2);
+    par_for("particle_count", DevExeSpace(), 0,nmkji-1,
+    KOKKOS_LAMBDA(int idx) {
+      int m = (idx)/nkji;
+      int k = (idx - m*nkji)/nji;
+      int j = (idx - m*nkji - k*nji)/indcs.nx1;
+      int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is;
+      k += ks;
+      j += js;
+      Real vol = mbsize.d_view(m).dx1*mbsize.d_view(m).dx2*mbsize.d_view(m).dx3;
+      Real nppc = u0_(m,IDN,k,j,i) * vol / mass_per_particle;
+      int nparticles = static_cast<int>(nppc);
+      nppc = fabs(fmod(nppc, 1.0));
+      auto rand_gen = rand_pool64.get_state();
+      if (rand_gen.frand() < nppc) {
+        nparticles += 1;
+      }
+      nparticles_per_zone.d_view(idx,0) = nparticles;
+      rand_pool64.free_state(rand_gen);
+    });
+
+    // count total number of particles in this pack
+    nparticles_per_zone.template modify<DevExeSpace>();
+    nparticles_per_zone.template sync<HostMemSpace>();
+    int nparticles_thispack = 0;
+    for (int i=0; i<nmkji; ++i) {
+      nparticles_per_zone.h_view(i,1) = nparticles_thispack;
+      nparticles_thispack += nparticles_per_zone.h_view(i,0);
+    }
+    nparticles_per_zone.template modify<HostMemSpace>();
+    nparticles_per_zone.template sync<DevMemSpace>();
+
+    // helpful debug statement
+    std::cout << "total mass across domain: " << total_mass
+              <<  ", total mass in pack: " << total_mass_thispack
+              << ", target nparticles: " << target_nparticles
+              << ", nparticles in pack: " << nparticles_thispack
+              << std::endl;
+
+    // reallocate space for particles and get relevant pointers
+    pmy_mesh_->pmb_pack->ppart->ReallocateParticles(nparticles_thispack);
+
+    auto &pr = pmy_mesh_->pmb_pack->ppart->prtcl_rdata;
+    auto &pi = pmy_mesh_->pmb_pack->ppart->prtcl_idata;
+
+    // initialize particles. only intended for Lagrangian-type particles
+    par_for("part_init", DevExeSpace(), 0,nmkji-1,
+    KOKKOS_LAMBDA(int idx) {
+      int m = (idx)/nkji;
+      int k = (idx - m*nkji)/nji;
+      int j = (idx - m*nkji - k*nji)/indcs.nx1;
+      int i = (idx - m*nkji - k*nji - j*indcs.nx1) + is;
+      k += ks;
+      j += js;
+
+      int nparticles_in_zone = nparticles_per_zone.d_view(idx,0);
+      int starting_index = nparticles_per_zone.d_view(idx,1);
+
+      for (int p=0; p<nparticles_in_zone; ++p) {
+        int pidx = p + starting_index;
+
+        pi(PGID,pidx) = gids + m;
+        pi(PLASTLEVEL,pidx) = mblev.d_view(m);
+
+        // set particle to zone center
+        pr(IPX,pidx) = CellCenterX(i-is, nx1, mbsize.d_view(m).x1min,
+                                   mbsize.d_view(m).x1max);
+        pr(IPY,pidx) = CellCenterX(j-js, nx2, mbsize.d_view(m).x2min,
+                                   mbsize.d_view(m).x2max);
+        pr(IPZ,pidx) = CellCenterX(k-ks, nx3, mbsize.d_view(m).x3min,
+                                   mbsize.d_view(m).x3max) -
+                       mbsize.d_view(m).dx3/2;
+      }
+    });
+  }
+
   return;
 }
 

From 0ca47980c2654b2c030fe7809b775e9100280756 Mon Sep 17 00:00:00 2001
From: "George N. Wong" <gnwong@ias.edu>
Date: Sat, 3 Aug 2024 12:19:52 -0400
Subject: [PATCH 06/10] revert boundary deletion from dev code. copy in fix for
 lev-1 from @DVilla95.

---
 src/bvals/bvals_part.cpp | 475 ++++++++++++++++++++++++++-------------
 1 file changed, 319 insertions(+), 156 deletions(-)

diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp
index ed137438..e7e27688 100644
--- a/src/bvals/bvals_part.cpp
+++ b/src/bvals/bvals_part.cpp
@@ -44,6 +44,23 @@ void UpdateGID(int &newgid, NeighborBlock nghbr, int myrank, int *pcounter,
   return;
 }
 
+//----------------------------------------------------------------------------------------
+//! \fn void ParticlesBoundaryValues::MarkForDestruction()
+//! \brief Adds particles that cross the user-defined mesh boundariesto a list used then
+//! to destroy them. The list uses the same structure as the sendlist for convenience
+//! in the following functions.
+//! This opeartion should also be performed for single process runs
+
+KOKKOS_INLINE_FUNCTION
+void MarkForDestruction(int *pcounter, DualArray1D<ParticleLocationData> dlist, int p) {
+    int index = Kokkos::atomic_fetch_add(pcounter,1);
+    dlist.d_view(index).prtcl_indx = p;
+    // These particles don't actually get sent, thus following information is not needed
+    dlist.d_view(index).dest_gid   = 0;
+    dlist.d_view(index).dest_rank  = 0;
+  return;
+}
+
 //----------------------------------------------------------------------------------------
 //! \fn void ParticlesBoundaryValues::SetNewGID()
 //! \brief
@@ -61,14 +78,14 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
   auto &nghbr = pmy_part->pmy_pack->pmb->nghbr;
   auto &psendl = sendlist;
   int counter=0;
-  int *pcounter = &counter;
+  Kokkos::View<int> atom_count("atom_count");
+  Kokkos::deep_copy(atom_count, counter);
   bool &multi_d = pmy_part->pmy_pack->pmesh->multi_d;
   bool &three_d = pmy_part->pmy_pack->pmesh->three_d;
 
-  Kokkos::realloc(sendlist, static_cast<int>(0.1*npart));
+  Kokkos::realloc(sendlist, static_cast<int>(npart));
   par_for("part_update",DevExeSpace(),0,(npart-1), KOKKOS_LAMBDA(const int p) {
     int m = pi(PGID,p) - gids;
-
     int mylevel = mblev.d_view(m);
     Real x1 = pr(IPX,p);
     Real x2 = pr(IPY,p);
@@ -88,94 +105,215 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
     int fx = (x1 < 0.5*(mbsize.d_view(m).x1min + mbsize.d_view(m).x1max))? 0 : 1;
     int fy = (x2 < 0.5*(mbsize.d_view(m).x2min + mbsize.d_view(m).x2max))? 0 : 1;
     int fz = (x3 < 0.5*(mbsize.d_view(m).x3min + mbsize.d_view(m).x3max))? 0 : 1;
-
     fy = multi_d ? fy : 0;
     fz = three_d ? fz : 0;
 
+    bool check_boundary = false;
+
     // only update particle GID if it has crossed MeshBlock boundary
-    if ((abs(ix) + abs(iy) + abs(iz)) != 0) {
-      if (iz == 0) {
-        if (iy == 0) {
-          // x1 face
-          int indx = NeighborIndex(ix,0,0,0,0);           // neighbor at same level
-          if (nghbr.d_view(m,indx).lev > mylevel) {       // neighbor at finer level
-            indx = NeighborIndex(ix,0,0,fy,fz);
+    if ((abs(ix) + abs(iy) + abs(iz)) != 0 && !check_boundary) {
+      // The way GIDs are determined currently is not reliable in SMR cases
+      // due to nighbours across edges and corners being uninitialized.
+      // Need extra check
+      bool send_to_coarser = false;
+      if (ix < 0) {
+        // level might be initizialized to -1 on some neighbours
+        for (int iop = 0; iop <= 3; ++iop) {
+          if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
+            send_to_coarser = true;
           }
-          while (nghbr.d_view(m,indx).gid < 0) {indx++;}  // neighbor at coarser level
-          UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p);
-        } else if (ix == 0) {
-          // x2 face
-          int indx = NeighborIndex(0,iy,0,0,0);
-          if (nghbr.d_view(m,indx).lev > mylevel) {
-            indx = NeighborIndex(0,iy,0,fx,fz);
+        }
+      } else if (ix > 0) {
+         for (int iop = 4; iop <= 7; ++iop) {
+          if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
+            send_to_coarser = true;
           }
-          while (nghbr.d_view(m,indx).gid < 0) {indx++;}
-          UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p);
-        } else {
-          // x1x2 edge
-          int indx = NeighborIndex(ix,iy,0,0,0);
-          if (nghbr.d_view(m,indx).lev > mylevel) {
-            indx = NeighborIndex(ix,iy,0,fz,0);
+         }
+      }
+      if (iy < 0) {
+        for (int iop = 8; iop <= 11; ++iop) {
+          if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
+            send_to_coarser = true;
           }
-          while (nghbr.d_view(m,indx).gid < 0) {indx++;}
-          UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p);
         }
-      } else if (iy == 0) {
-        if (ix == 0) {
-          // x3 face
-          int indx = NeighborIndex(0,0,iz,0,0);
-          if (nghbr.d_view(m,indx).lev > mylevel) {
-            indx = NeighborIndex(0,0,iz,fx,fy);
+      } else if (iy > 0) {
+        for (int iop = 12; iop <= 15; ++iop) {
+          if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
+            send_to_coarser = true;
           }
-          while (nghbr.d_view(m,indx).gid < 0) {indx++;}
-          UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p);
-        } else {
-          // x3x1 edge
-          int indx = NeighborIndex(ix,0,iz,0,0);
-          if (nghbr.d_view(m,indx).lev > mylevel) {
-            indx = NeighborIndex(ix,0,iz,fy,0);
+        }
+      }
+      if (iz < 0) {
+        for (int iop = 24; iop <= 27; ++iop) {
+          if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
+            send_to_coarser = true;
           }
-          while (nghbr.d_view(m,indx).gid < 0) {indx++;}
-          UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p);
         }
-      } else {
-        if (ix == 0) {
-          // x2x3 edge
-          int indx = NeighborIndex(0,iy,iz,0,0);
-          if (nghbr.d_view(m,indx).lev > mylevel) {
-            indx = NeighborIndex(0,iy,iz,fx,0);
+      } else if (iz > 0) {
+          for (int iop = 28; iop <= 31; ++iop) {
+            if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
+              send_to_coarser = true;
+            }
+        }
+      }
+      int indx = 0;
+        if (iz == 0) {
+          if (iy == 0) {
+            // x1 face
+            indx = NeighborIndex(ix,0,0,0,0);           // neighbor at same level
+            if (nghbr.d_view(m,indx).lev > mylevel) {   // neighbor at finer level
+              indx = NeighborIndex(ix,0,0,fy,fz);
+            }
+            while (nghbr.d_view(m,indx).gid < 0) {indx++;}  // neighbor at coarser level
+            UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p);
+          } else if (ix == 0) {
+            // x2 face
+            indx = NeighborIndex(0,iy,0,0,0);
+            if (nghbr.d_view(m,indx).lev > mylevel) {
+              indx = NeighborIndex(0,iy,0,fx,fz);
+            }
+            while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+            UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p);
+          } else {
+            // x1x2 edge
+            indx = NeighborIndex(ix,iy,0,0,0);
+            if (nghbr.d_view(m,indx).lev > mylevel) {
+              indx = NeighborIndex(ix,iy,0,fz,0);
+            }
+            while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+            // Using SMR some edge and corner neighbours are uninitialized,
+            // thus check if the index has increased over the appropriate range
+            // and try to communicate through faces to a coarser meshblock
+            // Communication to coarser meshblocks should always go through faces
+            if (indx > 23 || send_to_coarser) {
+              bool found_coarser = false;
+              // First try through x face
+              indx = NeighborIndex(ix,0,0,0,0);
+              while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+              if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
+                found_coarser = true;
+              }
+              // If all faces in the x direction are on a finer level this should
+              // have already been covered by previous logic, thus check y
+              if ( !found_coarser ) {
+                indx = NeighborIndex(0,iy,0,0,0);
+                while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+                if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
+                  found_coarser = true;
+                }
+              }
+            }
+            UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p);
+          }
+        } else if (iy == 0) {
+          if (ix == 0) {
+            // x3 face
+            indx = NeighborIndex(0,0,iz,0,0);
+            if (nghbr.d_view(m,indx).lev > mylevel) {
+              indx = NeighborIndex(0,0,iz,fx,fy);
+            }
+            while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+            UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p);
+          } else {
+            // x3x1 edge
+            indx = NeighborIndex(ix,0,iz,0,0);
+            if (nghbr.d_view(m,indx).lev > mylevel) {
+              indx = NeighborIndex(ix,0,iz,fy,0);
+            }
+            while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+            if (indx > 39 || send_to_coarser) {
+              bool found_coarser = false;
+              indx = NeighborIndex(ix,0,0,0,0);
+              while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+              if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
+                found_coarser = true;
+              }
+              if ( !found_coarser ) {
+                indx = NeighborIndex(0,0,iz,0,0);
+                while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+                if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
+                  found_coarser = true;
+                }
+              }
+            }
+            UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p);
           }
-          while (nghbr.d_view(m,indx).gid < 0) {indx++;}
-          UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p);
         } else {
-          // corners
-          int indx = NeighborIndex(ix,iy,iz,0,0);
-          UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p);
+          if (ix == 0) {
+            // x2x3 edge
+            indx = NeighborIndex(0,iy,iz,0,0);
+            if (nghbr.d_view(m,indx).lev > mylevel) {
+              indx = NeighborIndex(0,iy,iz,fx,0);
+            }
+            while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+            if (indx > 47 || send_to_coarser) {
+              bool found_coarser = false;
+              indx = NeighborIndex(0,iy,0,0,0);
+              while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+              if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
+                found_coarser = true;
+              }
+              if ( !found_coarser ) {
+                indx = NeighborIndex(0,0,iz,0,0);
+                while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+                if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
+                  found_coarser = true;
+                }
+              }
+            }
+            UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p);
+          } else {
+            // corners
+            indx = NeighborIndex(ix,iy,iz,0,0);
+            if (nghbr.d_view(m,indx).gid < 0 || send_to_coarser) {
+              bool found_coarser = false;
+              indx = NeighborIndex(ix,0,0,0,0);
+              while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+              if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
+                found_coarser = true;
+              }
+              if ( !found_coarser ) {
+                indx = NeighborIndex(0,iy,0,0,0);
+                while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+                if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
+                  found_coarser = true;
+                }
+              }
+              if ( !found_coarser ) {
+                indx = NeighborIndex(0,0,iz,0,0);
+                while (nghbr.d_view(m,indx).gid < 0) {indx++;}
+                if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
+                  found_coarser = true;
+                }
+              }
+            }
+            UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p);
+          }
         }
-      }
 
-      // reset x,y,z positions if particle crosses Mesh boundary using periodic BCs
-      if (x1 < meshsize.x1min) {
-        pr(IPX,p) += (meshsize.x1max - meshsize.x1min);
-      } else if (x1 > meshsize.x1max) {
-        pr(IPX,p) -= (meshsize.x1max - meshsize.x1min);
-      }
-      if (x2 < meshsize.x2min) {
-        pr(IPY,p) += (meshsize.x2max - meshsize.x2min);
-      } else if (x2 > meshsize.x2max) {
-        pr(IPY,p) -= (meshsize.x2max - meshsize.x2min);
-      }
-      if (x3 < meshsize.x3min) {
-        pr(IPZ,p) += (meshsize.x3max - meshsize.x3min);
-      } else if (x3 > meshsize.x3max) {
-        pr(IPZ,p) -= (meshsize.x3max - meshsize.x3min);
+        // reset x,y,z positions if particle crosses Mesh boundary using periodic BCs
+        if (x1 < meshsize.x1min) {
+          pr(IPX,p) += (meshsize.x1max - meshsize.x1min);
+        } else if (x1 > meshsize.x1max) {
+          pr(IPX,p) -= (meshsize.x1max - meshsize.x1min);
+        }
+        if (x2 < meshsize.x2min) {
+          pr(IPY,p) += (meshsize.x2max - meshsize.x2min);
+        } else if (x2 > meshsize.x2max) {
+          pr(IPY,p) -= (meshsize.x2max - meshsize.x2min);
+        }
+        if (x3 < meshsize.x3min) {
+          pr(IPZ,p) += (meshsize.x3max - meshsize.x3min);
+        } else if (x3 > meshsize.x3max) {
+          pr(IPZ,p) -= (meshsize.x3max - meshsize.x3min);
+        }
       }
-    }
   });
 
+  Kokkos::deep_copy(counter, atom_count);
   nprtcl_send = counter;
   Kokkos::resize(sendlist, nprtcl_send);
-  // sync sendlist device array with host
+
   sendlist.template modify<DevExeSpace>();
   sendlist.template sync<HostMemSpace>();
 
@@ -251,6 +389,8 @@ TaskStatus ParticlesBoundaryValues::CountSendsAndRecvs() {
 //! \brief
 
 TaskStatus ParticlesBoundaryValues::InitPrtclRecv() {
+  // Moved assignment here for better compatibility with destruction logic
+  nprtcl_recv=0;
 #if MPI_PARALLEL_ENABLED
   // load STL::vector of ParticleMessageData with <sendrank,recvrank,nprtcl_recv> for
   // receives // on this rank. Length will be nrecvs, initially this length is unknown
@@ -265,55 +405,58 @@ TaskStatus ParticlesBoundaryValues::InitPrtclRecv() {
   nrecvs = recvs_thisrank.size();
 
   // Figure out how many particles will be received from all ranks
-  nprtcl_recv=0;
   for (int n=0; n<nrecvs; ++n) {
     nprtcl_recv += recvs_thisrank[n].nprtcls;
   }
 
-  // Allocate receive buffer
-  Kokkos::realloc(prtcl_rrecvbuf, (pmy_part->nrdata)*nprtcl_recv);
-  Kokkos::realloc(prtcl_irecvbuf, (pmy_part->nidata)*nprtcl_recv);
-
-  // Post non-blocking receives
   bool no_errors=true;
-  rrecv_req.clear();
-  irecv_req.clear();
-  for (int n=0; n<nrecvs; ++n) {
-    rrecv_req.emplace_back(MPI_REQUEST_NULL);
-    irecv_req.emplace_back(MPI_REQUEST_NULL);
-  }
+  if (nprtcl_recv > 0) {
+    // Allocate receive buffer
+    Kokkos::realloc(prtcl_rrecvbuf, (pmy_part->nrdata)*nprtcl_recv);
+    Kokkos::realloc(prtcl_irecvbuf, (pmy_part->nidata)*nprtcl_recv);
+
+    // Post non-blocking receives
+    rrecv_req.clear();
+    irecv_req.clear();
+    for (int n=0; n<nrecvs; ++n) {
+      rrecv_req.emplace_back(MPI_REQUEST_NULL);
+      irecv_req.emplace_back(MPI_REQUEST_NULL);
+    }
 
-  // Init receives for Reals
-  int data_start=0;
-  for (int n=0; n<nrecvs; ++n) {
-    // calculate amount of data to be passed, get pointer to variables
-    int data_size = (pmy_part->nrdata)*(recvs_thisrank[n].nprtcls);
-    int data_end = data_start + (pmy_part->nrdata)*(recvs_thisrank[n].nprtcls - 1);
-    auto recv_ptr = Kokkos::subview(prtcl_rrecvbuf, std::make_pair(data_start, data_end));
-    int drank = recvs_thisrank[n].sendrank;
-    int tag = 0; // 0 for Reals, 1 for ints
-
-    // Post non-blocking receive
-    int ierr = MPI_Irecv(recv_ptr.data(), data_size, MPI_ATHENA_REAL, drank, tag,
-                         mpi_comm_part, &(rrecv_req[n]));
-    if (ierr != MPI_SUCCESS) {no_errors=false;}
-    data_start += data_size;
-  }
-  // Init receives for ints
-  data_start=0;
-  for (int n=0; n<nrecvs; ++n) {
-    // calculate amount of data to be passed, get pointer to variables
-    int data_size = (pmy_part->nidata)*(recvs_thisrank[n].nprtcls);
-    int data_end = data_start + (pmy_part->nidata)*(recvs_thisrank[n].nprtcls - 1);
-    auto recv_ptr = Kokkos::subview(prtcl_irecvbuf, std::make_pair(data_start, data_end));
-    int drank = recvs_thisrank[n].sendrank;
-    int tag = 1; // 0 for Reals, 1 for ints
-
-    // Post non-blocking receive
-    int ierr = MPI_Irecv(recv_ptr.data(), data_size, MPI_INT, drank, tag,
-                         mpi_comm_part, &(irecv_req[n]));
-    if (ierr != MPI_SUCCESS) {no_errors=false;}
-    data_start += data_size;
+    // Init receives for Reals
+    int data_start=0;
+    for (int n=0; n<nrecvs; ++n) {
+      // calculate amount of data to be passed, get pointer to variables
+      int data_size = (pmy_part->nrdata)*(recvs_thisrank[n].nprtcls);
+      int data_end = data_start + (pmy_part->nrdata)*(recvs_thisrank[n].nprtcls - 1);
+      auto recv_ptr = Kokkos::subview(prtcl_rrecvbuf,
+                                      std::make_pair(data_start, data_end));
+      int drank = recvs_thisrank[n].sendrank;
+      int tag = 0; // 0 for Reals, 1 for ints
+
+      // Post non-blocking receive
+      int ierr = MPI_Irecv(recv_ptr.data(), data_size, MPI_ATHENA_REAL, drank, tag,
+                           mpi_comm_part, &(rrecv_req[n]));
+      if (ierr != MPI_SUCCESS) {no_errors=false;}
+      data_start += data_size;
+    }
+    // Init receives for ints
+    data_start=0;
+    for (int n=0; n<nrecvs; ++n) {
+      // calculate amount of data to be passed, get pointer to variables
+      int data_size = (pmy_part->nidata)*(recvs_thisrank[n].nprtcls);
+      int data_end = data_start + (pmy_part->nidata)*(recvs_thisrank[n].nprtcls - 1);
+      auto recv_ptr = Kokkos::subview(prtcl_irecvbuf,
+                                      std::make_pair(data_start, data_end));
+      int drank = recvs_thisrank[n].sendrank;
+      int tag = 1; // 0 for Reals, 1 for ints
+
+      // Post non-blocking receive
+      int ierr = MPI_Irecv(recv_ptr.data(), data_size, MPI_INT, drank, tag,
+                           mpi_comm_part, &(irecv_req[n]));
+      if (ierr != MPI_SUCCESS) {no_errors=false;}
+      data_start += data_size;
+    }
   }
 
   // Quit if MPI error detected
@@ -352,8 +495,9 @@ TaskStatus ParticlesBoundaryValues::PackAndSendPrtcls() {
     auto &pi = pmy_part->prtcl_idata;
     auto &rsendbuf = prtcl_rsendbuf;
     auto &isendbuf = prtcl_isendbuf;
+    auto &slist = sendlist;
     par_for("ppack",DevExeSpace(),0,(nprtcl_send-1), KOKKOS_LAMBDA(const int n) {
-      int p = sendlist.d_view(n).prtcl_indx;
+      int p = slist.d_view(n).prtcl_indx;
       for (int i=0; i<nidata; ++i) {
         isendbuf(nidata*n + i) = pi(i,p);
       }
@@ -420,16 +564,19 @@ TaskStatus ParticlesBoundaryValues::PackAndSendPrtcls() {
 //! \brief
 
 TaskStatus ParticlesBoundaryValues::RecvAndUnpackPrtcls() {
-#if MPI_PARALLEL_ENABLED
-  // Sort sendlist on host by index in particle array
   namespace KE = Kokkos::Experimental;
-  std::sort(KE::begin(sendlist.h_view), KE::end(sendlist.h_view), SortByIndex);
-  // sync sendlist host array with device.  This results in sorted array on device
-  sendlist.template modify<HostMemSpace>();
-  sendlist.template sync<DevExeSpace>();
 
+  //In single process runs, size of particle arrays can only be reduced
+  int &npart = pmy_part->nprtcl_thispack;
+
+  int new_npart = npart + (nprtcl_recv - nprtcl_send);
+
+  // nremain defined outside compiler instructions for end logic
+  int nremain = 0;
+  int nremain_d = nprtcl_send - nprtcl_recv;
+
+#if MPI_PARALLEL_ENABLED
   // increase size of particle arrays if needed
-  int new_npart = pmy_part->nprtcl_thispack + (nprtcl_recv - nprtcl_send);
   if (nprtcl_recv > nprtcl_send) {
     Kokkos::resize(pmy_part->prtcl_idata, pmy_part->nidata, new_npart);
     Kokkos::resize(pmy_part->prtcl_rdata, pmy_part->nrdata, new_npart);
@@ -460,8 +607,18 @@ TaskStatus ParticlesBoundaryValues::RecvAndUnpackPrtcls() {
   }
   // exit if particle communications have not completed
   if (bflag) {return TaskStatus::incomplete;}
+#endif
+
+#if MPI_PARALLEL_ENABLED
+  if (nprtcl_send > 0) {
+    // Sort sendlist on host by index in particle array
+    std::sort(KE::begin(sendlist.h_view), KE::end(sendlist.h_view), SortByIndex);
+    // sync sendlist host array with device.  This results in sorted array on device
+    sendlist.template modify<HostMemSpace>();
+    sendlist.template sync<DevExeSpace>();
+  }
 
-  // unpack particles into positions of sent particles
+  // unpack particles into positions of sent or destroyed particles
   if (nprtcl_recv > 0) {
     int nrdata = pmy_part->nrdata;
     int nidata = pmy_part->nidata;
@@ -469,13 +626,16 @@ TaskStatus ParticlesBoundaryValues::RecvAndUnpackPrtcls() {
     auto &pi = pmy_part->prtcl_idata;
     auto &rrecvbuf = prtcl_rrecvbuf;
     auto &irecvbuf = prtcl_irecvbuf;
-    int npart = pmy_part->nprtcl_thispack;
+    auto &slist = sendlist;
+    int nps = nprtcl_send;
+    int npa = pmy_part->nprtcl_thispack;
     par_for("punpack",DevExeSpace(),0,(nprtcl_recv-1), KOKKOS_LAMBDA(const int n) {
       int p;
-      if (n < nprtcl_send) {
-        p = sendlist.d_view(n).prtcl_indx; // place particles in holes created by sends
+      if (n < nps ) {
+        p = slist.d_view(n).prtcl_indx;  // place particles in holes created by send
       } else {
-        p = npart + (n - nprtcl_send);     // place particle at end of arrays
+        // place particle at end of arrays
+        p = npa + (n - nps);
       }
       for (int i=0; i<nidata; ++i) {
         pi(i,p) = irecvbuf(nidata*n + i);
@@ -484,44 +644,47 @@ TaskStatus ParticlesBoundaryValues::RecvAndUnpackPrtcls() {
         pr(i,p) = rrecvbuf(nrdata*n + i);
       }
     });
+  }
 
-    // At this point have filled npart_recv holes in particle arrays from sends
-    // If (nprtcl_recv < nprtcl_send), have to move particles from end of arrays to fill
-    // remaining holes
-    int nremain = nprtcl_send - nprtcl_recv;
-    if (nremain > 0) {
-      int i_last_hole = nprtcl_send-1;
-      int i_next_hole = nprtcl_recv;
-      for (int n=1; n<=nremain; ++n) {
-        int nend = npart-n;
-        if (nend > sendlist.h_view(i_last_hole).prtcl_indx) {
-          // copy particle from end into hole
-          int next_hole = sendlist.h_view(i_next_hole).prtcl_indx;
-          auto rdest = Kokkos::subview(pmy_part->prtcl_rdata, Kokkos::ALL, next_hole);
-          auto rsrc  = Kokkos::subview(pmy_part->prtcl_rdata, Kokkos::ALL, nend);
-          Kokkos::deep_copy(rdest, rsrc);
-          auto idest = Kokkos::subview(pmy_part->prtcl_idata, Kokkos::ALL, next_hole);
-          auto isrc  = Kokkos::subview(pmy_part->prtcl_idata, Kokkos::ALL, nend);
-          Kokkos::deep_copy(idest, isrc);
-          i_next_hole += 1;
-        } else {
-          // this index contains a hole, so do nothing except find new index of last hole
-          i_last_hole -= 1;
-        }
+  // At this point have filled npart_recv holes in particle arrays from sends
+  // If (nprtcl_recv < nprtcl_send), have to move particles from end of arrays to fill
+  // remaining holes
+  nremain = nprtcl_send - nprtcl_recv;
+  if (nremain > 0) {
+    int i_last_hole = nprtcl_send-1;
+    int i_next_hole = nprtcl_recv;
+    for (int n=1; n<=nremain; ++n) {
+      int nend = npart-n;
+      --nremain_d;
+      if (nend > sendlist.h_view(i_last_hole).prtcl_indx) {
+        // copy particle from end into hole
+        int next_hole = sendlist.h_view(i_next_hole).prtcl_indx;
+        auto rdest = Kokkos::subview(pmy_part->prtcl_rdata, Kokkos::ALL, next_hole);
+        auto rsrc  = Kokkos::subview(pmy_part->prtcl_rdata, Kokkos::ALL, nend);
+        Kokkos::deep_copy(rdest, rsrc);
+        auto idest = Kokkos::subview(pmy_part->prtcl_idata, Kokkos::ALL, next_hole);
+        auto isrc  = Kokkos::subview(pmy_part->prtcl_idata, Kokkos::ALL, nend);
+        Kokkos::deep_copy(idest, isrc);
+        i_next_hole += 1;
+      } else {
+        // this index contains a hole, so do nothing except find new index of last hole
+        i_last_hole -= 1;
       }
-
-      // shrink size of particle data arrays
-      Kokkos::resize(pmy_part->prtcl_idata, pmy_part->nidata, new_npart);
-      Kokkos::resize(pmy_part->prtcl_rdata, pmy_part->nrdata, new_npart);
     }
   }
 
   // Update nparticles_thisrank.  Update cost array (use npart_thismb[nmb]?)
-  pmy_part->nprtcl_thispack = new_npart;
-  pmy_part->pmy_pack->pmesh->nprtcl_thisrank = new_npart;
   MPI_Allgather(&new_npart,1,MPI_INT,(pmy_part->pmy_pack->pmesh->nprtcl_eachrank),1,
                 MPI_INT,MPI_COMM_WORLD);
 #endif
+
+  if (nprtcl_send - nprtcl_recv > 0) {
+    // shrink size of particle data arrays
+    Kokkos::resize(pmy_part->prtcl_idata, pmy_part->nidata, new_npart);
+    Kokkos::resize(pmy_part->prtcl_rdata, pmy_part->nrdata, new_npart);
+  }
+  pmy_part->nprtcl_thispack = new_npart;
+  pmy_part->pmy_pack->pmesh->nprtcl_thisrank = new_npart;
   return TaskStatus::complete;
 }
 

From 9c0286c9cd94e7d97e2b6a4542b600ab61226a10 Mon Sep 17 00:00:00 2001
From: "George N. Wong" <gnwong@ias.edu>
Date: Sat, 3 Aug 2024 19:19:47 -0400
Subject: [PATCH 07/10] fix vtk output to respect only one POINT_DATA header

---
 src/outputs/vtk_prtcl.cpp | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/outputs/vtk_prtcl.cpp b/src/outputs/vtk_prtcl.cpp
index eb8371c6..dcac23b2 100644
--- a/src/outputs/vtk_prtcl.cpp
+++ b/src/outputs/vtk_prtcl.cpp
@@ -182,15 +182,24 @@ void ParticleVTKOutput::WriteOutputFile(Mesh *pm, ParameterInput *pin) {
   }
 
   // Write Part 6: scalar particle data
-  // Write gid of points
+  bool have_written_pointdata_header = false;
+
   for (int n=0; n<(pm->pmb_pack->ppart->nidata); ++n) {
     std::stringstream msg;
+
+    if (!have_written_pointdata_header) {
+      have_written_pointdata_header = true;
+      msg << std::endl << std::endl << "POINT_DATA " << npout_total << std::endl;
+    }
+
     if (n == static_cast<int>(PGID)) {
-      msg << std::endl << std::endl << "POINT_DATA " << npout_total << std::endl
-          << "SCALARS gid float" << std::endl << "LOOKUP_TABLE default" << std::endl;
+      msg << std::endl << "SCALARS gid float" << std::endl
+          << "LOOKUP_TABLE default" << std::endl;
     } else if (n == static_cast<int>(PTAG)) {
-      msg << std::endl << std::endl << "POINT_DATA " << npout_total << std::endl
-          << "SCALARS ptag float" << std::endl << "LOOKUP_TABLE default" << std::endl;
+      msg << std::endl << "SCALARS ptag float" << std::endl
+          << "LOOKUP_TABLE default" << std::endl;
+    } else if (n == static_cast<int>(PLASTMOVE) || n == static_cast<int>(PLASTLEVEL)) {
+      continue;
     }
     if (global_variable::my_rank == 0) {
       partfile.Write_any_type_at(msg.str().c_str(),msg.str().size(),header_offset,"byte");

From bad094ee8c1d3da80b4b379ed16e070cc151095e Mon Sep 17 00:00:00 2001
From: "George N. Wong" <gnwong@ias.edu>
Date: Mon, 5 Aug 2024 21:14:36 -0400
Subject: [PATCH 08/10] code to freeze particles but reverted to not remove
 them

---
 src/bvals/bvals.hpp                 |   3 +-
 src/bvals/bvals_part.cpp            | 164 +++++----
 src/particles/particles.cpp         |   6 +
 src/particles/particles.hpp         |   2 +
 src/particles/particles_pushers.cpp | 538 ++++++++++++++--------------
 src/pgen/gr_torus.cpp               |   3 +
 6 files changed, 382 insertions(+), 334 deletions(-)

diff --git a/src/bvals/bvals.hpp b/src/bvals/bvals.hpp
index 134638a8..d39c99e2 100644
--- a/src/bvals/bvals.hpp
+++ b/src/bvals/bvals.hpp
@@ -247,8 +247,9 @@ class ParticlesBoundaryValues {
   ParticlesBoundaryValues(particles::Particles *ppart, ParameterInput *pin);
   ~ParticlesBoundaryValues();
 
-  int nprtcl_send, nprtcl_recv;
+  int nprtcl_send, nprtcl_recv, nprtcl_destroy;
   DualArray1D<ParticleLocationData> sendlist;
+  DualArray1D<ParticleLocationData> destroylist;
 
   // Data needed to count number of messages and particles to send between ranks
   int nsends; // number of MPI sends to neighboring ranks on this rank
diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp
index e7e27688..1e9b5eda 100644
--- a/src/bvals/bvals_part.cpp
+++ b/src/bvals/bvals_part.cpp
@@ -46,7 +46,7 @@ void UpdateGID(int &newgid, NeighborBlock nghbr, int myrank, int *pcounter,
 
 //----------------------------------------------------------------------------------------
 //! \fn void ParticlesBoundaryValues::MarkForDestruction()
-//! \brief Adds particles that cross the user-defined mesh boundariesto a list used then
+//! \brief Adds particles that cross the user-defined mesh boundaries to a list used
 //! to destroy them. The list uses the same structure as the sendlist for convenience
 //! in the following functions.
 //! This opeartion should also be performed for single process runs
@@ -83,80 +83,105 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
   bool &multi_d = pmy_part->pmy_pack->pmesh->multi_d;
   bool &three_d = pmy_part->pmy_pack->pmesh->three_d;
 
+  // support for user boundary conditions like for gr torus
+  auto &mb_bcs = pmy_part->pmy_pack->pmb->mb_bcs;
+  const Real &min_rad = pmy_part->min_radius;
+
   Kokkos::realloc(sendlist, static_cast<int>(npart));
   par_for("part_update",DevExeSpace(),0,(npart-1), KOKKOS_LAMBDA(const int p) {
-    int m = pi(PGID,p) - gids;
-    int mylevel = mblev.d_view(m);
-    Real x1 = pr(IPX,p);
-    Real x2 = pr(IPY,p);
-    Real x3 = pr(IPZ,p);
-
-    // length of MeshBlock in each direction
-    Real lx = (mbsize.d_view(m).x1max - mbsize.d_view(m).x1min);
-    Real ly = (mbsize.d_view(m).x2max - mbsize.d_view(m).x2min);
-    Real lz = (mbsize.d_view(m).x3max - mbsize.d_view(m).x3min);
-
-    // integer offset of particle relative to center of MeshBlock (-1,0,+1)
-    int ix = static_cast<int>((x1 - mbsize.d_view(m).x1min + lx)/lx) - 1;
-    int iy = static_cast<int>((x2 - mbsize.d_view(m).x2min + ly)/ly) - 1;
-    int iz = static_cast<int>((x3 - mbsize.d_view(m).x3min + lz)/lz) - 1;
-
-    // sublock indices for faces and edges with S/AMR
-    int fx = (x1 < 0.5*(mbsize.d_view(m).x1min + mbsize.d_view(m).x1max))? 0 : 1;
-    int fy = (x2 < 0.5*(mbsize.d_view(m).x2min + mbsize.d_view(m).x2max))? 0 : 1;
-    int fz = (x3 < 0.5*(mbsize.d_view(m).x3min + mbsize.d_view(m).x3max))? 0 : 1;
-    fy = multi_d ? fy : 0;
-    fz = three_d ? fz : 0;
-
-    bool check_boundary = false;
-
-    // only update particle GID if it has crossed MeshBlock boundary
-    if ((abs(ix) + abs(iy) + abs(iz)) != 0 && !check_boundary) {
-      // The way GIDs are determined currently is not reliable in SMR cases
-      // due to nighbours across edges and corners being uninitialized.
-      // Need extra check
-      bool send_to_coarser = false;
-      if (ix < 0) {
-        // level might be initizialized to -1 on some neighbours
-        for (int iop = 0; iop <= 3; ++iop) {
-          if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
-            send_to_coarser = true;
-          }
-        }
-      } else if (ix > 0) {
-         for (int iop = 4; iop <= 7; ++iop) {
-          if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
-            send_to_coarser = true;
-          }
-         }
+    if (pi(PLASTMOVE,p) == -1) {
+      // this particle is frozen so skip update
+    } else if (pi(PLASTMOVE,p) == -2) {
+      // TODO, mark for deletion
+    } else {
+      // do regular boundary search and update
+      int m = pi(PGID,p) - gids;
+      int mylevel = mblev.d_view(m);
+      Real x1 = pr(IPX,p);
+      Real x2 = pr(IPY,p);
+      Real x3 = pr(IPZ,p);
+
+      // length of MeshBlock in each direction
+      Real lx = (mbsize.d_view(m).x1max - mbsize.d_view(m).x1min);
+      Real ly = (mbsize.d_view(m).x2max - mbsize.d_view(m).x2min);
+      Real lz = (mbsize.d_view(m).x3max - mbsize.d_view(m).x3min);
+
+      // integer offset of particle relative to center of MeshBlock (-1,0,+1)
+      int ix = static_cast<int>((x1 - mbsize.d_view(m).x1min + lx)/lx) - 1;
+      int iy = static_cast<int>((x2 - mbsize.d_view(m).x2min + ly)/ly) - 1;
+      int iz = static_cast<int>((x3 - mbsize.d_view(m).x3min + lz)/lz) - 1;
+
+      // sublock indices for faces and edges with S/AMR
+      int fx = (x1 < 0.5*(mbsize.d_view(m).x1min + mbsize.d_view(m).x1max))? 0 : 1;
+      int fy = (x2 < 0.5*(mbsize.d_view(m).x2min + mbsize.d_view(m).x2max))? 0 : 1;
+      int fz = (x3 < 0.5*(mbsize.d_view(m).x3min + mbsize.d_view(m).x3max))? 0 : 1;
+      fy = multi_d ? fy : 0;
+      fz = three_d ? fz : 0;
+
+      // if the particle is outside of the "user-defined" boundaries,
+      // mark it for no more updates
+      bool check_boundary = (
+          mb_bcs.d_view(m, BoundaryFace::inner_x3) == BoundaryFlag::user && iz < 0)
+      || ( mb_bcs.d_view(m, BoundaryFace::outer_x3) == BoundaryFlag::user && iz > 0)
+      || ( mb_bcs.d_view(m, BoundaryFace::inner_x2) == BoundaryFlag::user && iy < 0)
+      || ( mb_bcs.d_view(m, BoundaryFace::outer_x2) == BoundaryFlag::user && iy > 0)
+      || ( mb_bcs.d_view(m, BoundaryFace::inner_x1) == BoundaryFlag::user && ix < 0)
+      || ( mb_bcs.d_view(m, BoundaryFace::outer_x1) == BoundaryFlag::user && ix > 0)
+      || (SQR(x1) + SQR(x2) + SQR(x3) < SQR(min_rad)
+                            );
+
+      if (check_boundary) {
+        pi(PLASTMOVE,p) = -1;
       }
-      if (iy < 0) {
-        for (int iop = 8; iop <= 11; ++iop) {
-          if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
-            send_to_coarser = true;
+
+      // only update particle GID if it has crossed MeshBlock boundary
+      if ((abs(ix) + abs(iy) + abs(iz)) != 0 && !check_boundary) {
+        // The way GIDs are determined currently is not reliable in SMR cases
+        // due to nighbours across edges and corners being uninitialized.
+        // Need extra check
+        bool send_to_coarser = false;
+        if (ix < 0) {
+          // level might be initizialized to -1 on some neighbours
+          for (int iop = 0; iop <= 3; ++iop) {
+            if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
+              send_to_coarser = true;
+            }
           }
-        }
-      } else if (iy > 0) {
-        for (int iop = 12; iop <= 15; ++iop) {
-          if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
-            send_to_coarser = true;
+        } else if (ix > 0) {
+          for (int iop = 4; iop <= 7; ++iop) {
+            if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
+              send_to_coarser = true;
+            }
           }
         }
-      }
-      if (iz < 0) {
-        for (int iop = 24; iop <= 27; ++iop) {
-          if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
-            send_to_coarser = true;
+        if (iy < 0) {
+          for (int iop = 8; iop <= 11; ++iop) {
+            if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
+              send_to_coarser = true;
+            }
+          }
+        } else if (iy > 0) {
+          for (int iop = 12; iop <= 15; ++iop) {
+            if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
+              send_to_coarser = true;
+            }
           }
         }
-      } else if (iz > 0) {
-          for (int iop = 28; iop <= 31; ++iop) {
+        if (iz < 0) {
+          for (int iop = 24; iop <= 27; ++iop) {
             if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
               send_to_coarser = true;
             }
+          }
+        } else if (iz > 0) {
+            for (int iop = 28; iop <= 31; ++iop) {
+              if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) {
+                send_to_coarser = true;
+              }
+          }
         }
-      }
-      int indx = 0;
+
+        int indx = 0;
         if (iz == 0) {
           if (iy == 0) {
             // x1 face
@@ -195,7 +220,7 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
               }
               // If all faces in the x direction are on a finer level this should
               // have already been covered by previous logic, thus check y
-              if ( !found_coarser ) {
+              if (!found_coarser) {
                 indx = NeighborIndex(0,iy,0,0,0);
                 while (nghbr.d_view(m,indx).gid < 0) {indx++;}
                 if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
@@ -228,7 +253,7 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
               if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
                 found_coarser = true;
               }
-              if ( !found_coarser ) {
+              if (!found_coarser) {
                 indx = NeighborIndex(0,0,iz,0,0);
                 while (nghbr.d_view(m,indx).gid < 0) {indx++;}
                 if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
@@ -253,7 +278,7 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
               if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
                 found_coarser = true;
               }
-              if ( !found_coarser ) {
+              if (!found_coarser) {
                 indx = NeighborIndex(0,0,iz,0,0);
                 while (nghbr.d_view(m,indx).gid < 0) {indx++;}
                 if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
@@ -272,14 +297,14 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
               if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
                 found_coarser = true;
               }
-              if ( !found_coarser ) {
+              if (!found_coarser) {
                 indx = NeighborIndex(0,iy,0,0,0);
                 while (nghbr.d_view(m,indx).gid < 0) {indx++;}
                 if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
                   found_coarser = true;
                 }
               }
-              if ( !found_coarser ) {
+              if (!found_coarser) {
                 indx = NeighborIndex(0,0,iz,0,0);
                 while (nghbr.d_view(m,indx).gid < 0) {indx++;}
                 if (nghbr.d_view(m,indx).lev < mylevel && nghbr.d_view(m,indx).lev > 0) {
@@ -297,17 +322,20 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
         } else if (x1 > meshsize.x1max) {
           pr(IPX,p) -= (meshsize.x1max - meshsize.x1min);
         }
+
         if (x2 < meshsize.x2min) {
           pr(IPY,p) += (meshsize.x2max - meshsize.x2min);
         } else if (x2 > meshsize.x2max) {
           pr(IPY,p) -= (meshsize.x2max - meshsize.x2min);
         }
+
         if (x3 < meshsize.x3min) {
           pr(IPZ,p) += (meshsize.x3max - meshsize.x3min);
         } else if (x3 > meshsize.x3max) {
           pr(IPZ,p) -= (meshsize.x3max - meshsize.x3min);
         }
       }
+    }
   });
 
   Kokkos::deep_copy(counter, atom_count);
diff --git a/src/particles/particles.cpp b/src/particles/particles.cpp
index a2130f5e..252d3e1d 100644
--- a/src/particles/particles.cpp
+++ b/src/particles/particles.cpp
@@ -106,6 +106,11 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
       {
         nrdata = 3;
         nidata = 4;
+        // gid, ptag, lastmove, lastlevel
+        // lastmove:
+        //  if >= 0 => save parity of current zone stored as (i_isodd,j_isodd,k_isodd) * 8
+        //  if -1   => freeze particle and perform no updates or position checks
+        //  if -2   => remove from domain at next chance (TODO NOT IMPLEMENTED)
         break;
       }
     default:
@@ -115,6 +120,7 @@ Particles::Particles(MeshBlockPack *ppack, ParameterInput *pin) :
   Kokkos::realloc(prtcl_idata, nidata, nprtcl_thispack);
 
   // allocate boundary object
+  min_radius = -1;
   pbval_part = new ParticlesBoundaryValues(this, pin);
 }
 
diff --git a/src/particles/particles.hpp b/src/particles/particles.hpp
index 6f37dd8c..3382c797 100644
--- a/src/particles/particles.hpp
+++ b/src/particles/particles.hpp
@@ -65,6 +65,8 @@ class Particles {
   DvceArray2D<int>  prtcl_idata;   // integer properties each particle (gid, tag, etc.)
   Real dtnew;
 
+  Real min_radius;  // particles that enter within this radius will not be updated
+
   ParticlesPusher pusher;
 
   // Boundary communication buffers and functions for particles
diff --git a/src/particles/particles_pushers.cpp b/src/particles/particles_pushers.cpp
index 80f29841..e616c9d9 100644
--- a/src/particles/particles_pushers.cpp
+++ b/src/particles/particles_pushers.cpp
@@ -102,69 +102,73 @@ void Particles::PushLagrangianMC() {
   // GNW 2024-JUL-5: Warning, this may not play well with AMR
   par_for("part_update",DevExeSpace(),0,(nprtcl_thispack-1),
   KOKKOS_LAMBDA(const int p) {
-    int m = pi(PGID,p) - gids;
+    if (pi(PLASTMOVE,p) >= 0) {
+      // only update particles that are not frozen or marked for deletion
 
-    int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
-    int jp = js;
-    int kp = ks;
+      int m = pi(PGID,p) - gids;
 
-    if (multi_d) {
-      jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js;
-    }
+      int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
+      int jp = js;
+      int kp = ks;
 
-    if (three_d) {
-      kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks;
-    }
+      if (multi_d) {
+        jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js;
+      }
 
-    // get normalized fluxes based on local density
-    Real mass = u1_(m,IDN,kp,jp,ip);
-
-    // by convention, these values will be positive when there is outflow
-    // with respect to the current particle's cell
-    Real flx1_left = -flx1_(m,kp,jp,ip) / mass;
-    Real flx1_right = flx1_(m,kp,jp,ip+1) / mass;
-    Real flx2_left = (multi_d) ? -flx2_(m,kp,jp,ip) / mass : 0.;
-    Real flx2_right = (multi_d) ? flx2_(m,kp,jp+1,ip) / mass : 0.;
-    Real flx3_left = (three_d) ? -flx3_(m,kp,jp,ip) / mass : 0.;
-    Real flx3_right = (three_d) ? flx3_(m,kp+1,jp,ip) / mass : 0.;
-
-    flx1_left = flx1_left < 0 ? 0 : flx1_left;
-    flx1_right = flx1_right < 0 ? 0 : flx1_right;
-    flx2_left = flx2_left < 0 ? 0 : flx2_left;
-    flx2_right = flx2_right < 0 ? 0 : flx2_right;
-    flx3_left = flx3_left < 0 ? 0 : flx3_left;
-    flx3_right = flx3_right < 0 ? 0 : flx3_right;
-
-    auto rand_gen = rand_pool64_.get_state();
-    Real rand = rand_gen.frand();
-    rand_pool64_.free_state(rand_gen);  // free state for use by other threads
-
-    // save refinement level of current zone
-    pi(PLASTLEVEL,p) = mblev.d_view(m);
-
-    // save parity of current zone stored as (i_isodd,j_isodd,k_isodd) * 8
-    pi(PLASTMOVE,p) = 32 * (ip % 2) + 16 * (jp % 2) + 8 * (kp % 2);
-
-    if (rand < flx1_left) {
-      pr(IPX,p) -= mbsize.d_view(m).dx1;
-      pi(PLASTMOVE,p) += 1;
-    } else if (rand < flx1_left + flx1_right) {
-      pr(IPX,p) += mbsize.d_view(m).dx1;
-      pi(PLASTMOVE,p) += 2;
-    } else if (multi_d && rand < flx1_left + flx1_right + flx2_left) {
-      pr(IPY,p) -= mbsize.d_view(m).dx2;
-      pi(PLASTMOVE,p) += 3;
-    } else if (multi_d && rand < flx1_left + flx1_right + flx2_left + flx2_right) {
-      pr(IPY,p) += mbsize.d_view(m).dx2;
-      pi(PLASTMOVE,p) += 4;
-    } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right
-                               + flx3_left) {
-      pr(IPZ,p) -= mbsize.d_view(m).dx3;
-      pi(PLASTMOVE,p) += 5;
-    } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right
-                               + flx3_left + flx3_right) {
-      pr(IPZ,p) += mbsize.d_view(m).dx3;
-      pi(PLASTMOVE,p) += 6;
+      if (three_d) {
+        kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks;
+      }
+
+      // get normalized fluxes based on local density
+      Real mass = u1_(m,IDN,kp,jp,ip);
+
+      // by convention, these values will be positive when there is outflow
+      // with respect to the current particle's cell
+      Real flx1_left = -flx1_(m,kp,jp,ip) / mass;
+      Real flx1_right = flx1_(m,kp,jp,ip+1) / mass;
+      Real flx2_left = (multi_d) ? -flx2_(m,kp,jp,ip) / mass : 0.;
+      Real flx2_right = (multi_d) ? flx2_(m,kp,jp+1,ip) / mass : 0.;
+      Real flx3_left = (three_d) ? -flx3_(m,kp,jp,ip) / mass : 0.;
+      Real flx3_right = (three_d) ? flx3_(m,kp+1,jp,ip) / mass : 0.;
+
+      flx1_left = flx1_left < 0 ? 0 : flx1_left;
+      flx1_right = flx1_right < 0 ? 0 : flx1_right;
+      flx2_left = flx2_left < 0 ? 0 : flx2_left;
+      flx2_right = flx2_right < 0 ? 0 : flx2_right;
+      flx3_left = flx3_left < 0 ? 0 : flx3_left;
+      flx3_right = flx3_right < 0 ? 0 : flx3_right;
+
+      auto rand_gen = rand_pool64_.get_state();
+      Real rand = rand_gen.frand();
+      rand_pool64_.free_state(rand_gen);  // free state for use by other threads
+
+      // save refinement level of current zone
+      pi(PLASTLEVEL,p) = mblev.d_view(m);
+
+      // save parity of current zone stored as (i_isodd,j_isodd,k_isodd) * 8
+      pi(PLASTMOVE,p) = 32 * (ip % 2) + 16 * (jp % 2) + 8 * (kp % 2);
+
+      if (rand < flx1_left) {
+        pr(IPX,p) -= mbsize.d_view(m).dx1;
+        pi(PLASTMOVE,p) += 1;
+      } else if (rand < flx1_left + flx1_right) {
+        pr(IPX,p) += mbsize.d_view(m).dx1;
+        pi(PLASTMOVE,p) += 2;
+      } else if (multi_d && rand < flx1_left + flx1_right + flx2_left) {
+        pr(IPY,p) -= mbsize.d_view(m).dx2;
+        pi(PLASTMOVE,p) += 3;
+      } else if (multi_d && rand < flx1_left + flx1_right + flx2_left + flx2_right) {
+        pr(IPY,p) += mbsize.d_view(m).dx2;
+        pi(PLASTMOVE,p) += 4;
+      } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right
+                                + flx3_left) {
+        pr(IPZ,p) -= mbsize.d_view(m).dx3;
+        pi(PLASTMOVE,p) += 5;
+      } else if (three_d && rand < flx1_left + flx1_right + flx2_left + flx2_right
+                                + flx3_left + flx3_right) {
+        pr(IPZ,p) += mbsize.d_view(m).dx3;
+        pi(PLASTMOVE,p) += 6;
+      }
     }
   });
 }
@@ -196,225 +200,229 @@ TaskStatus Particles::AdjustMeshRefinement(Driver *pdriver, int stage) {
 
   par_for("particle_meshshift",DevExeSpace(),0,(nprtcl_thispack-1),
   KOKKOS_LAMBDA(const int p) {
-    int m = pi(PGID,p) - gids;
-    int level = mblev.d_view(m);
-
-    int lastlevel = pi(PLASTLEVEL,p);
-    int lastmove = pi(PLASTMOVE,p);
-
-    // oddness of the last cell that the particle lived in
-    int i_parity = lastmove / 32;
-    int j_parity = (lastmove % 32) / 16;
-    int k_parity = (lastmove % 16) / 8;
-
-    // direction of last move:
-    //   1 -> "left" x1 face was chosen
-    //   2 -> "right" x1 face was chosen
-    //   3 -> "left" x2 face was chosen
-    //   4 -> "right" x2 face was chosen
-    //   5 -> "left" x3 face was chosen
-    //   6 -> "right" x3 face was chosen
-    lastmove = lastmove % 8;
-
-    Real dx1 = mbsize.d_view(m).dx1;
-    Real dx2 = multi_d ? mbsize.d_view(m).dx2 : 0.;
-    Real dx3 = three_d ? mbsize.d_view(m).dx3 : 0.;
-
-    if (level > lastlevel) {
-      // this is a higher refinement level, i.e., the zones are smaller now
-
-      if (lastmove == 1) {
-        // came from zone to right (dx--)
-        pr(IPX,p) += dx1/2;
-
-        pr(IPY,p) -= dx2/2;
-        pr(IPZ,p) -= dx3/2;
-      } else if (lastmove == 2) {
-        // came from zone to left (dx++)
-        pr(IPX,p) -= dx1/2;
-
-        pr(IPY,p) -= dx2/2;
-        pr(IPZ,p) -= dx3/2;
-      } else if (multi_d && lastmove == 3) {
-        // came from zone above (dy--)
-        pr(IPY,p) += dx2/2;
-
-        pr(IPX,p) -= dx1/2;
-        pr(IPZ,p) -= dx3/2;
-      } else if (multi_d && lastmove == 4) {
-        // came from zone below (dy++)
-        pr(IPY,p) -= dx2/2;
-
-        pr(IPX,p) -= dx1/2;
-        pr(IPZ,p) -= dx3/2;
-      } else if (three_d && lastmove == 5) {
-        // came from zone in front (dz--)
-        pr(IPZ,p) += dx3/2;
-
-        pr(IPX,p) -= dx1/2;
-        pr(IPY,p) -= dx2/2;
-      } else if (three_d && lastmove == 6) {
-        // came from zone behind (dz++)
-        pr(IPZ,p) -= dx3/2;
-
-        pr(IPX,p) -= dx1/2;
-        pr(IPY,p) -= dx2/2;
-      }
-
-      int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
-      int jp = js;
-      int kp = ks;
-
-      if (multi_d) {
-        jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js;
-      }
-
-      if (three_d) {
-        kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks;
-      }
-
-      // get fluxes into the four zones that the particle could have ended up in
-      Real flx1 = 0.;
-      Real flx2 = 0.;
-      Real flx3 = 0.;
-      Real flx4 = 0.;
-
-      if (lastmove == 1) {
-        // came from zone to the right
-        flx1 = -flx1_(m,kp,jp,ip+1);
-        flx2 = (multi_d) ? -flx1_(m,kp,jp+1,ip+1) : 0.;
-        flx3 = (three_d) ? -flx1_(m,kp+1,jp,ip+1) : 0.;
-        flx4 = (multi_d && three_d) ? -flx1_(m,kp+1,jp+1,ip+1) : 0.;
-      } else if (lastmove == 2) {
-        // came from zone to the left
-        flx1 = flx1_(m,kp,jp,ip);
-        flx2 = (multi_d) ? flx1_(m,kp,jp+1,ip) : 0.;
-        flx3 = (three_d) ? flx1_(m,kp+1,jp,ip) : 0.;
-        flx4 = (multi_d && three_d) ? flx1_(m,kp+1,jp+1,ip) : 0.;
-      } else if (lastmove == 3) {
-        // came from zone above. is at least multi_d
-        flx1 = -flx2_(m,kp,jp+1,ip);
-        flx2 = -flx2_(m,kp,jp+1,ip+1);
-        flx3 = (three_d) ? -flx2_(m,kp+1,jp+1,ip) : 0.;
-        flx4 = (three_d) ? -flx2_(m,kp+1,jp+1,ip+1) : 0.;
-      } else if (lastmove == 4) {
-        // came from zone below. is at least multi_d
-        flx1 = flx2_(m,kp,jp,ip);
-        flx2 = flx2_(m,kp,jp,ip+1);
-        flx3 = (three_d) ? flx2_(m,kp+1,jp,ip) : 0.;
-        flx4 = (three_d) ? flx2_(m,kp+1,jp,ip+1) : 0.;
-      } else if (lastmove == 5) {
-        // came from zone in front. is three_d
-        flx1 = -flx3_(m,kp+1,jp,ip);
-        flx2 = -flx3_(m,kp+1,jp,ip+1);
-        flx3 = -flx3_(m,kp+1,jp+1,ip);
-        flx4 = -flx3_(m,kp+1,jp+1,ip+1);
-      } else if (lastmove == 6) {
-        // came from zone behind. is three_d
-        flx1 = flx3_(m,kp,jp,ip);
-        flx2 = flx3_(m,kp,jp,ip+1);
-        flx3 = flx3_(m,kp,jp+1,ip);
-        flx4 = flx3_(m,kp,jp+1,ip+1);
-      }
-
-      flx1 = (flx1 < 0) ? 0. : flx1;
-      flx2 = (flx2 < 0) ? 0. : flx2;
-      flx3 = (flx3 < 0) ? 0. : flx3;
-      flx4 = (flx4 < 0) ? 0. : flx4;
-
-      Real flx_total = flx1 + flx2 + flx3 + flx4;
-      flx_total = (flx_total > 0) ? flx_total : 1.e-10;
+    if (pi(PLASTMOVE,p) >= 0) {
+      // only update particles that are not frozen or marked for deletion
+
+      int m = pi(PGID,p) - gids;
+      int level = mblev.d_view(m);
+
+      int lastlevel = pi(PLASTLEVEL,p);
+      int lastmove = pi(PLASTMOVE,p);
+
+      // oddness of the last cell that the particle lived in
+      int i_parity = lastmove / 32;
+      int j_parity = (lastmove % 32) / 16;
+      int k_parity = (lastmove % 16) / 8;
+
+      // direction of last move:
+      //   1 -> "left" x1 face was chosen
+      //   2 -> "right" x1 face was chosen
+      //   3 -> "left" x2 face was chosen
+      //   4 -> "right" x2 face was chosen
+      //   5 -> "left" x3 face was chosen
+      //   6 -> "right" x3 face was chosen
+      lastmove = lastmove % 8;
+
+      Real dx1 = mbsize.d_view(m).dx1;
+      Real dx2 = multi_d ? mbsize.d_view(m).dx2 : 0.;
+      Real dx3 = three_d ? mbsize.d_view(m).dx3 : 0.;
+
+      if (level > lastlevel) {
+        // this is a higher refinement level, i.e., the zones are smaller now
+
+        if (lastmove == 1) {
+          // came from zone to right (dx--)
+          pr(IPX,p) += dx1/2;
+
+          pr(IPY,p) -= dx2/2;
+          pr(IPZ,p) -= dx3/2;
+        } else if (lastmove == 2) {
+          // came from zone to left (dx++)
+          pr(IPX,p) -= dx1/2;
+
+          pr(IPY,p) -= dx2/2;
+          pr(IPZ,p) -= dx3/2;
+        } else if (multi_d && lastmove == 3) {
+          // came from zone above (dy--)
+          pr(IPY,p) += dx2/2;
+
+          pr(IPX,p) -= dx1/2;
+          pr(IPZ,p) -= dx3/2;
+        } else if (multi_d && lastmove == 4) {
+          // came from zone below (dy++)
+          pr(IPY,p) -= dx2/2;
+
+          pr(IPX,p) -= dx1/2;
+          pr(IPZ,p) -= dx3/2;
+        } else if (three_d && lastmove == 5) {
+          // came from zone in front (dz--)
+          pr(IPZ,p) += dx3/2;
+
+          pr(IPX,p) -= dx1/2;
+          pr(IPY,p) -= dx2/2;
+        } else if (three_d && lastmove == 6) {
+          // came from zone behind (dz++)
+          pr(IPZ,p) -= dx3/2;
+
+          pr(IPX,p) -= dx1/2;
+          pr(IPY,p) -= dx2/2;
+        }
 
-      flx1 /= flx_total;
-      flx2 /= flx_total;
-      flx3 /= flx_total;
-      flx4 /= flx_total;
+        int ip = (pr(IPX,p) - mbsize.d_view(m).x1min)/mbsize.d_view(m).dx1 + is;
+        int jp = js;
+        int kp = ks;
 
-      auto rand_gen = rand_pool64_.get_state();
-      Real rand = rand_gen.frand();
-      rand_pool64_.free_state(rand_gen);  // free state for use by other threads
+        if (multi_d) {
+          jp = (pr(IPY,p) - mbsize.d_view(m).x2min)/mbsize.d_view(m).dx2 + js;
+        }
 
-      int target_zone = 4;
-      if (rand < flx1) {
-        target_zone = 1;
-      } else if (rand < flx1 + flx2) {
-        target_zone = 2;
-      } else if (rand < flx1 + flx2 + flx3) {
-        target_zone = 3;
-      }
+        if (three_d) {
+          kp = (pr(IPZ,p) - mbsize.d_view(m).x3min)/mbsize.d_view(m).dx3 + ks;
+        }
 
-      if (lastmove == 1 || lastmove == 2) {
-        if (target_zone == 2) {
-          pr(IPY,p) += mbsize.d_view(m).dx2;
-        } else if (target_zone == 3) {
-          pr(IPZ,p) += mbsize.d_view(m).dx3;
-        } else if (target_zone == 4) {
-          pr(IPY,p) += mbsize.d_view(m).dx2;
-          pr(IPZ,p) += mbsize.d_view(m).dx3;
+        // get fluxes into the four zones that the particle could have ended up in
+        Real flx1 = 0.;
+        Real flx2 = 0.;
+        Real flx3 = 0.;
+        Real flx4 = 0.;
+
+        if (lastmove == 1) {
+          // came from zone to the right
+          flx1 = -flx1_(m,kp,jp,ip+1);
+          flx2 = (multi_d) ? -flx1_(m,kp,jp+1,ip+1) : 0.;
+          flx3 = (three_d) ? -flx1_(m,kp+1,jp,ip+1) : 0.;
+          flx4 = (multi_d && three_d) ? -flx1_(m,kp+1,jp+1,ip+1) : 0.;
+        } else if (lastmove == 2) {
+          // came from zone to the left
+          flx1 = flx1_(m,kp,jp,ip);
+          flx2 = (multi_d) ? flx1_(m,kp,jp+1,ip) : 0.;
+          flx3 = (three_d) ? flx1_(m,kp+1,jp,ip) : 0.;
+          flx4 = (multi_d && three_d) ? flx1_(m,kp+1,jp+1,ip) : 0.;
+        } else if (lastmove == 3) {
+          // came from zone above. is at least multi_d
+          flx1 = -flx2_(m,kp,jp+1,ip);
+          flx2 = -flx2_(m,kp,jp+1,ip+1);
+          flx3 = (three_d) ? -flx2_(m,kp+1,jp+1,ip) : 0.;
+          flx4 = (three_d) ? -flx2_(m,kp+1,jp+1,ip+1) : 0.;
+        } else if (lastmove == 4) {
+          // came from zone below. is at least multi_d
+          flx1 = flx2_(m,kp,jp,ip);
+          flx2 = flx2_(m,kp,jp,ip+1);
+          flx3 = (three_d) ? flx2_(m,kp+1,jp,ip) : 0.;
+          flx4 = (three_d) ? flx2_(m,kp+1,jp,ip+1) : 0.;
+        } else if (lastmove == 5) {
+          // came from zone in front. is three_d
+          flx1 = -flx3_(m,kp+1,jp,ip);
+          flx2 = -flx3_(m,kp+1,jp,ip+1);
+          flx3 = -flx3_(m,kp+1,jp+1,ip);
+          flx4 = -flx3_(m,kp+1,jp+1,ip+1);
+        } else if (lastmove == 6) {
+          // came from zone behind. is three_d
+          flx1 = flx3_(m,kp,jp,ip);
+          flx2 = flx3_(m,kp,jp,ip+1);
+          flx3 = flx3_(m,kp,jp+1,ip);
+          flx4 = flx3_(m,kp,jp+1,ip+1);
         }
-      } else if (lastmove == 3 || lastmove == 4) {
-        if (target_zone == 2) {
-          pr(IPX,p) += mbsize.d_view(m).dx1;
-        } else if (target_zone == 3) {
-          pr(IPZ,p) += mbsize.d_view(m).dx3;
-        } else if (target_zone == 4) {
-          pr(IPX,p) += mbsize.d_view(m).dx1;
-          pr(IPZ,p) += mbsize.d_view(m).dx3;
+
+        flx1 = (flx1 < 0) ? 0. : flx1;
+        flx2 = (flx2 < 0) ? 0. : flx2;
+        flx3 = (flx3 < 0) ? 0. : flx3;
+        flx4 = (flx4 < 0) ? 0. : flx4;
+
+        Real flx_total = flx1 + flx2 + flx3 + flx4;
+        flx_total = (flx_total > 0) ? flx_total : 1.e-10;
+
+        flx1 /= flx_total;
+        flx2 /= flx_total;
+        flx3 /= flx_total;
+        flx4 /= flx_total;
+
+        auto rand_gen = rand_pool64_.get_state();
+        Real rand = rand_gen.frand();
+        rand_pool64_.free_state(rand_gen);  // free state for use by other threads
+
+        int target_zone = 4;
+        if (rand < flx1) {
+          target_zone = 1;
+        } else if (rand < flx1 + flx2) {
+          target_zone = 2;
+        } else if (rand < flx1 + flx2 + flx3) {
+          target_zone = 3;
         }
-      } else if (lastmove == 5 || lastmove == 6) {
-        if (target_zone == 2) {
-          pr(IPX,p) += mbsize.d_view(m).dx1;
-        } else if (target_zone == 3) {
-          pr(IPY,p) += mbsize.d_view(m).dx2;
-        } else if (target_zone == 4) {
-          pr(IPX,p) += mbsize.d_view(m).dx1;
-          pr(IPY,p) += mbsize.d_view(m).dx2;
+
+        if (lastmove == 1 || lastmove == 2) {
+          if (target_zone == 2) {
+            pr(IPY,p) += mbsize.d_view(m).dx2;
+          } else if (target_zone == 3) {
+            pr(IPZ,p) += mbsize.d_view(m).dx3;
+          } else if (target_zone == 4) {
+            pr(IPY,p) += mbsize.d_view(m).dx2;
+            pr(IPZ,p) += mbsize.d_view(m).dx3;
+          }
+        } else if (lastmove == 3 || lastmove == 4) {
+          if (target_zone == 2) {
+            pr(IPX,p) += mbsize.d_view(m).dx1;
+          } else if (target_zone == 3) {
+            pr(IPZ,p) += mbsize.d_view(m).dx3;
+          } else if (target_zone == 4) {
+            pr(IPX,p) += mbsize.d_view(m).dx1;
+            pr(IPZ,p) += mbsize.d_view(m).dx3;
+          }
+        } else if (lastmove == 5 || lastmove == 6) {
+          if (target_zone == 2) {
+            pr(IPX,p) += mbsize.d_view(m).dx1;
+          } else if (target_zone == 3) {
+            pr(IPY,p) += mbsize.d_view(m).dx2;
+          } else if (target_zone == 4) {
+            pr(IPX,p) += mbsize.d_view(m).dx1;
+            pr(IPY,p) += mbsize.d_view(m).dx2;
+          }
         }
-      }
 
-    } else if (level < lastlevel) {
-      // this is a lower refinement level, i.e., the zones are larger now,
-      // there's nothing special to do other than to move the particle to
-      // the center of the new zone
+      } else if (level < lastlevel) {
+        // this is a lower refinement level, i.e., the zones are larger now,
+        // there's nothing special to do other than to move the particle to
+        // the center of the new zone
 
-      if (i_parity) {
-        pr(IPX,p) -= mbsize.d_view(m).dx1/4;
-      } else {
-        pr(IPX,p) += mbsize.d_view(m).dx1/4;
-      }
-      if (multi_d) {
-        if (j_parity) {
-          pr(IPY,p) -= mbsize.d_view(m).dx2/4;
+        if (i_parity) {
+          pr(IPX,p) -= mbsize.d_view(m).dx1/4;
         } else {
-          pr(IPY,p) += mbsize.d_view(m).dx2/4;
+          pr(IPX,p) += mbsize.d_view(m).dx1/4;
         }
-      }
-      if (three_d) {
-        if (k_parity) {
-          pr(IPZ,p) -= mbsize.d_view(m).dx3/4;
-        } else {
-          pr(IPZ,p) += mbsize.d_view(m).dx3/4;
+        if (multi_d) {
+          if (j_parity) {
+            pr(IPY,p) -= mbsize.d_view(m).dx2/4;
+          } else {
+            pr(IPY,p) += mbsize.d_view(m).dx2/4;
+          }
+        }
+        if (three_d) {
+          if (k_parity) {
+            pr(IPZ,p) -= mbsize.d_view(m).dx3/4;
+          } else {
+            pr(IPZ,p) += mbsize.d_view(m).dx3/4;
+          }
         }
-      }
 
-      if (lastmove == 1) {
-        // came from zone to right (dx--)
-        pr(IPX,p) -= mbsize.d_view(m).dx1/2;
-      } else if (lastmove == 2) {
-        // came from zone to left (dx++)
-        pr(IPX,p) += mbsize.d_view(m).dx1/2;
-      } else if (lastmove == 3) {
-        // came from zone above (dy--)
-        pr(IPY,p) -= mbsize.d_view(m).dx2/2;
-      } else if (lastmove == 4) {
-        // came from zone below (dy++)
-        pr(IPY,p) += mbsize.d_view(m).dx2/2;
-      } else if (lastmove == 5) {
-        // came from zone in front (dz--)
-        pr(IPZ,p) -= mbsize.d_view(m).dx3/2;
-      } else if (lastmove == 6) {
-        // came from zone behind (dz++)
-        pr(IPZ,p) += mbsize.d_view(m).dx3/2;
+        if (lastmove == 1) {
+          // came from zone to right (dx--)
+          pr(IPX,p) -= mbsize.d_view(m).dx1/2;
+        } else if (lastmove == 2) {
+          // came from zone to left (dx++)
+          pr(IPX,p) += mbsize.d_view(m).dx1/2;
+        } else if (lastmove == 3) {
+          // came from zone above (dy--)
+          pr(IPY,p) -= mbsize.d_view(m).dx2/2;
+        } else if (lastmove == 4) {
+          // came from zone below (dy++)
+          pr(IPY,p) += mbsize.d_view(m).dx2/2;
+        } else if (lastmove == 5) {
+          // came from zone in front (dz--)
+          pr(IPZ,p) -= mbsize.d_view(m).dx3/2;
+        } else if (lastmove == 6) {
+          // came from zone behind (dz++)
+          pr(IPZ,p) += mbsize.d_view(m).dx3/2;
+        }
       }
     }
   });
diff --git a/src/pgen/gr_torus.cpp b/src/pgen/gr_torus.cpp
index b3a16401..0a991485 100644
--- a/src/pgen/gr_torus.cpp
+++ b/src/pgen/gr_torus.cpp
@@ -808,6 +808,9 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) {
     int &nx2 = indcs.nx2;
     int &nx3 = indcs.nx3;
 
+    // set inner radius
+    pmy_mesh_->pmb_pack->ppart->min_radius = 1. + sqrt(1. - coord.bh_spin*coord.bh_spin);
+
     // init RNG
     Kokkos::Random_XorShift64_Pool<> rand_pool64(pmbp->gids);
 

From d8039cf5d6f87c08ce7a638c61865772f7baf52f Mon Sep 17 00:00:00 2001
From: "George N. Wong" <gnwong@ias.edu>
Date: Mon, 5 Aug 2024 21:19:14 -0400
Subject: [PATCH 09/10] lint bvals_part.cpp

---
 src/bvals/bvals_part.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp
index 1e9b5eda..7791bc68 100644
--- a/src/bvals/bvals_part.cpp
+++ b/src/bvals/bvals_part.cpp
@@ -92,7 +92,7 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() {
     if (pi(PLASTMOVE,p) == -1) {
       // this particle is frozen so skip update
     } else if (pi(PLASTMOVE,p) == -2) {
-      // TODO, mark for deletion
+      // TODO(@GNW) mark for deletion
     } else {
       // do regular boundary search and update
       int m = pi(PGID,p) - gids;

From 03fefe3797794e929a598e67e3c0943353fb0f3d Mon Sep 17 00:00:00 2001
From: "George N. Wong" <gnwong@ias.edu>
Date: Mon, 12 Aug 2024 13:52:43 -0400
Subject: [PATCH 10/10] reset particle z position on initialization

---
 src/pgen/gr_torus.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/pgen/gr_torus.cpp b/src/pgen/gr_torus.cpp
index 0a991485..07ca47b3 100644
--- a/src/pgen/gr_torus.cpp
+++ b/src/pgen/gr_torus.cpp
@@ -916,8 +916,7 @@ void ProblemGenerator::UserProblem(ParameterInput *pin, const bool restart) {
         pr(IPY,pidx) = CellCenterX(j-js, nx2, mbsize.d_view(m).x2min,
                                    mbsize.d_view(m).x2max);
         pr(IPZ,pidx) = CellCenterX(k-ks, nx3, mbsize.d_view(m).x3min,
-                                   mbsize.d_view(m).x3max) -
-                       mbsize.d_view(m).dx3/2;
+                                   mbsize.d_view(m).x3max);
       }
     });
   }