From 24a9c8c9762787a5627cef3574f9b069946ce8ba Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Wed, 23 Apr 2025 18:05:42 -0400
Subject: [PATCH 01/62] add bubble advection

---
 src/simulation/m_bubbles_EL.fpp  | 26 +++++++++++++--
 src/simulation/m_sim_helpers.f90 | 55 +++++++++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index b48f941a39..859bbbc3f0 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -258,6 +258,8 @@ contains
             call s_restart_bubbles(bub_id, save_count)
         end if
 
+        if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat')
+
         print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id
 
         !$acc update device(bubbles_lagrange, lag_params)
@@ -552,6 +554,8 @@ contains
         if (bubble_model == 2) then
             !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) copyin(stage)
             do k = 1, nBubs
+                cell = -buff_size
+                call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1))
                 ! Keller-Miksis model
 
                 ! Current bubble state
@@ -600,25 +604,41 @@ contains
                 gas_dpdt(k, stage) = fpbdt
                 intfc_draddt(k, stage) = fV
 
+                !$acc loop seq
+                do l = 1, num_dims
+                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,1:3,1), cell, l, q_prim_vf)
+                    mtn_dveldt(k, l, stage) = 0._wp
+                end do
             end do
         else
-            if (proc_rank == 0) print *, 'WARNING: Lagrange bubbles work with Keller Miksis model!', &
-                ' Deactivating radial motion.'
+            !if (proc_rank == 0) print *, 'WARNING: Lagrange bubbles work with Keller Miksis model!', &
+                !' Deactivating radial motion.'
             !$acc parallel loop gang vector default(present) private(k) copyin(stage)
             do k = 1, nBubs
+                cell = -buff_size
+                call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1))
+
                 intfc_dveldt(k, stage) = 0._wp
                 intfc_draddt(k, stage) = 0._wp
+
+                !$acc loop seq
+                do l = 1, num_dims
+                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,1:3,1),cell, l, q_prim_vf)
+                    mtn_dveldt(k, l, stage) = 0._wp
+                end do
             end do
         end if
 
         ! Bubbles remain in a fixed position
+#if 0
         !$acc parallel loop collapse(2) gang vector default(present) private(k) copyin(stage)
         do k = 1, nBubs
             do l = 1, 3
-                mtn_dposdt(k, l, stage) = 0._wp
+                mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_ps(k,:,1), l, q_prim_vf)
                 mtn_dveldt(k, l, stage) = 0._wp
             end do
         end do
+#endif
 
         call nvtxEndRange
 
diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.f90
index 88d37f9674..dc07ea811d 100644
--- a/src/simulation/m_sim_helpers.f90
+++ b/src/simulation/m_sim_helpers.f90
@@ -10,7 +10,8 @@ module m_sim_helpers
 
     private; public :: s_compute_enthalpy, &
  s_compute_stability_from_dt, &
- s_compute_dt_from_cfl
+ s_compute_dt_from_cfl, &
+ f_interpolate_velocity
 
 contains
 
@@ -267,4 +268,56 @@ subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l)
 
     end subroutine s_compute_dt_from_cfl
 
+    function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v)
+!$acc routine seq
+        real(wp), dimension(3) :: pos
+        integer, dimension(3) :: cell
+        integer :: i
+        type(scalar_field), dimension(sys_size) :: q_prim_vf
+        real(wp) :: v
+        real(wp) :: L1, L2, L3
+        real(wp) :: x, x1, x2, x3
+        real(wp) :: y1, y2, y3
+
+        if (i == 1) then
+            x1 = x_cc(cell(1) - 1)
+            y1 = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3))
+            x2 = x_cc(cell(1))
+            y2 = q_prim_vf(momxb)%sf(cell(1)    , cell(2), cell(3))
+            x3 = x_cc(cell(1) + 1)
+            y3 = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3))
+        elseif (i == 2) then
+            x1 = y_cc(cell(2) - 1)
+            y1 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3))
+            x2 = y_cc(cell(2))
+            y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2)    , cell(3))
+            x3 = y_cc(cell(2) + 1)
+            y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3))
+        else
+            x1 = z_cc(cell(3) - 1)
+            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1)
+            x2 = z_cc(cell(3))
+            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)    )
+            x3 = z_cc(cell(3) + 1)
+            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1)
+        endif
+
+        x = pos(i)
+
+        L1 = ((x - x2)*(x - x3)) / ((x1 - x2)*(x1 - x3))
+        L2 = ((x - x1)*(x - x3)) / ((x2 - x1)*(x2 - x3))
+        L3 = ((x - x1)*(x - x2)) / ((x3 - x1)*(x3 - x2))
+
+        v = L1*y1 + L2*y2 + L3*y3
+
+        !if (i == 1) then
+            !V = q_prim_vf(momxb)%sf(cell(1),cell(2),cell(3))
+        !elseif (i == 2) then
+            !V = q_prim_vf(momxb+1)%sf(cell(1),cell(2),cell(3))
+        !else
+            !V = q_prim_vf(momxe)%sf(cell(1),cell(2),cell(3))
+        !end if
+
+    end function f_interpolate_velocity
+
 end module m_sim_helpers

From 614c6c5257f77f8fc876c273e9799dbb2112120b Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Thu, 24 Apr 2025 13:12:03 -0400
Subject: [PATCH 02/62] small updates

---
 src/simulation/m_bubbles_EL.fpp    | 2 +-
 src/simulation/m_time_steppers.fpp | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 859bbbc3f0..96ac686ab0 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -613,7 +613,7 @@ contains
         else
             !if (proc_rank == 0) print *, 'WARNING: Lagrange bubbles work with Keller Miksis model!', &
                 !' Deactivating radial motion.'
-            !$acc parallel loop gang vector default(present) private(k) copyin(stage)
+            !$acc parallel loop gang vector default(present) private(k, cell) copyin(stage)
             do k = 1, nBubs
                 cell = -buff_size
                 call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1))
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 045dfda597..4403f3a3ad 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -1181,6 +1181,8 @@ contains
 
         end do
 
+        if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_ts_rkck(6)%vf, dt)
+
         !> Update values
         mytime = mytime + dt_did
         call s_update_rkck(q_cons_ts)

From 5332d2ce2ea5b7d1aa53603301bfe4c98c2a7ac7 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Thu, 24 Apr 2025 22:49:11 -0400
Subject: [PATCH 03/62] working on GPUs

---
 src/simulation/m_bubbles_EL.fpp  |  8 ++++----
 src/simulation/m_sim_helpers.f90 | 16 ++++++++--------
 src/simulation/m_start_up.fpp    |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 96ac686ab0..fd4a96abf2 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -605,8 +605,8 @@ contains
                 intfc_draddt(k, stage) = fV
 
                 !$acc loop seq
-                do l = 1, num_dims
-                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,1:3,1), cell, l, q_prim_vf)
+                do l = 1, 3
+                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), cell, l, q_prim_vf)
                     mtn_dveldt(k, l, stage) = 0._wp
                 end do
             end do
@@ -622,8 +622,8 @@ contains
                 intfc_draddt(k, stage) = 0._wp
 
                 !$acc loop seq
-                do l = 1, num_dims
-                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,1:3,1),cell, l, q_prim_vf)
+                do l = 1, 3
+                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf)
                     mtn_dveldt(k, l, stage) = 0._wp
                 end do
             end do
diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.f90
index dc07ea811d..b840029d71 100644
--- a/src/simulation/m_sim_helpers.f90
+++ b/src/simulation/m_sim_helpers.f90
@@ -270,13 +270,12 @@ end subroutine s_compute_dt_from_cfl
 
     function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v)
 !$acc routine seq
-        real(wp), dimension(3) :: pos
         integer, dimension(3) :: cell
         integer :: i
         type(scalar_field), dimension(sys_size) :: q_prim_vf
         real(wp) :: v
         real(wp) :: L1, L2, L3
-        real(wp) :: x, x1, x2, x3
+        real(wp) :: pos, x1, x2, x3
         real(wp) :: y1, y2, y3
 
         if (i == 1) then
@@ -293,20 +292,21 @@ function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v)
             y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2)    , cell(3))
             x3 = y_cc(cell(2) + 1)
             y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3))
-        else
+        elseif (i == 3 .and. p > 0) then
             x1 = z_cc(cell(3) - 1)
             y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1)
             x2 = z_cc(cell(3))
             y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)    )
             x3 = z_cc(cell(3) + 1)
             y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1)
+        else
+            x1 = 0._wp; x2 = 0._wp; x3 = 0._wp
+            y1 = 0._wp; y2 = 0._wp; y3 = 0._wp
         endif
 
-        x = pos(i)
-
-        L1 = ((x - x2)*(x - x3)) / ((x1 - x2)*(x1 - x3))
-        L2 = ((x - x1)*(x - x3)) / ((x2 - x1)*(x2 - x3))
-        L3 = ((x - x1)*(x - x2)) / ((x3 - x1)*(x3 - x2))
+        L1 = ((pos - x2)*(pos - x3)) / ((x1 - x2)*(x1 - x3))
+        L2 = ((pos - x1)*(pos - x3)) / ((x2 - x1)*(x2 - x3))
+        L3 = ((pos - x1)*(pos - x2)) / ((x3 - x1)*(x3 - x2))
 
         v = L1*y1 + L2*y2 + L3*y3
 
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 0606ad5ce1..4ae97c9e3f 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1411,7 +1411,7 @@ contains
         if (bubbles_lagrange) then
             !$acc update host(q_beta%vf(1)%sf)
             call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, q_beta%vf(1))
-            !$acc update host(Rmax_stats, Rmin_stats, gas_p, gas_mv, intfc_rad, intfc_vel)
+            !$acc update host(Rmax_stats, Rmin_stats, gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_vel)
             call s_write_restart_lag_bubbles(save_count) !parallel
             if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats()
         else

From 68214e3ce1ee1288ceb7a62cb70c8d6ceeed6df5 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Tue, 29 Apr 2025 01:19:03 -0400
Subject: [PATCH 04/62] more models

---
 src/common/m_derived_types.fpp          |  2 +
 src/simulation/m_bubbles_EL.fpp         | 35 ++++++-----
 src/simulation/m_bubbles_EL_kernels.fpp | 84 +++++++++++++++++++++++++
 src/simulation/m_global_parameters.fpp  |  4 ++
 src/simulation/m_sim_helpers.f90        | 55 +---------------
 src/simulation/m_start_up.fpp           |  2 +
 toolchain/mfc/run/case_dicts.py         |  3 +-
 7 files changed, 115 insertions(+), 70 deletions(-)

diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp
index b9198335c0..4c99bd0bfa 100644
--- a/src/common/m_derived_types.fpp
+++ b/src/common/m_derived_types.fpp
@@ -422,6 +422,8 @@ module m_derived_types
         logical :: write_bubbles            !< Write files to track the bubble evolution each time step
         logical :: write_bubbles_stats      !< Write the maximum and minimum radius of each bubble
         integer :: nBubs_glb                !< Global number of bubbles
+        integer :: vel_model                !< Particle velocity model
+        integer :: drag_model               !< Particle drag model
         real(wp) :: epsilonb         !< Standard deviation scaling for the gaussian function
         real(wp) :: charwidth        !< Domain virtual depth (z direction, for 2D simulations)
         real(wp) :: valmaxvoid       !< Maximum void fraction permitted
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index fd4a96abf2..39b4f7e8ed 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -606,8 +606,16 @@ contains
 
                 !$acc loop seq
                 do l = 1, 3
-                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), cell, l, q_prim_vf)
-                    mtn_dveldt(k, l, stage) = 0._wp
+                    if (lag_params%vel_model == 1) then
+                        mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf)
+                        mtn_dveldt(k, l, stage) = 0._wp
+                    elseif (lag_params%vel_model == 2) then
+                        mtn_dposdt(k, l, stage) = mtn_vel(k,l,1)
+                        mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),intfc_rad(k,1),mtn_vel(k,l,1),cell,l,q_prim_vf)
+                    else
+                        mtn_dposdt(k, l, stage) = 0._wp
+                        mtn_dveldt(k, l, stage) = 0._wp
+                    end if
                 end do
             end do
         else
@@ -623,23 +631,20 @@ contains
 
                 !$acc loop seq
                 do l = 1, 3
-                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf)
-                    mtn_dveldt(k, l, stage) = 0._wp
+                    if (lag_params%vel_model == 1) then
+                        mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf)
+                        mtn_dveldt(k, l, stage) = 0._wp
+                    elseif (lag_params%vel_model == 2) then
+                        mtn_dposdt(k, l, stage) = mtn_vel(k,l,1)
+                        mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),intfc_rad(k,1),mtn_vel(k,l,1),cell,l,q_prim_vf)
+                    else
+                        mtn_dposdt(k, l, stage) = 0._wp
+                        mtn_dveldt(k, l, stage) = 0._wp
+                    end if
                 end do
             end do
         end if
 
-        ! Bubbles remain in a fixed position
-#if 0
-        !$acc parallel loop collapse(2) gang vector default(present) private(k) copyin(stage)
-        do k = 1, nBubs
-            do l = 1, 3
-                mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_ps(k,:,1), l, q_prim_vf)
-                mtn_dveldt(k, l, stage) = 0._wp
-            end do
-        end do
-#endif
-
         call nvtxEndRange
 
         !< EULER-LAGRANGE COUPLING
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index 43246aad55..6aec813680 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -454,4 +454,88 @@ contains
 
     end subroutine s_get_cell
 
+    function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v)
+!$acc routine seq
+        integer, dimension(3) :: cell
+        integer :: i
+        type(scalar_field), dimension(sys_size) :: q_prim_vf
+        real(wp) :: v
+        real(wp) :: L1, L2, L3
+        real(wp) :: pos, x1, x2, x3
+        real(wp) :: y1, y2, y3
+
+        if (i == 1) then
+            x1 = x_cc(cell(1) - 1)
+            y1 = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3))
+            x2 = x_cc(cell(1))
+            y2 = q_prim_vf(momxb)%sf(cell(1)    , cell(2), cell(3))
+            x3 = x_cc(cell(1) + 1)
+            y3 = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3))
+        elseif (i == 2) then
+            x1 = y_cc(cell(2) - 1)
+            y1 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3))
+            x2 = y_cc(cell(2))
+            y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2)    , cell(3))
+            x3 = y_cc(cell(2) + 1)
+            y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3))
+        elseif (i == 3 .and. p > 0) then
+            x1 = z_cc(cell(3) - 1)
+            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1)
+            x2 = z_cc(cell(3))
+            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)    )
+            x3 = z_cc(cell(3) + 1)
+            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1)
+        else
+            x1 = 0._wp; x2 = 0._wp; x3 = 0._wp
+            y1 = 0._wp; y2 = 0._wp; y3 = 0._wp
+        endif
+
+        L1 = ((pos - x2)*(pos - x3)) / ((x1 - x2)*(x1 - x3))
+        L2 = ((pos - x1)*(pos - x3)) / ((x2 - x1)*(x2 - x3))
+        L3 = ((pos - x1)*(pos - x2)) / ((x3 - x1)*(x3 - x2))
+
+        v = L1*y1 + L2*y2 + L3*y3
+
+    end function f_interpolate_velocity
+
+    function f_get_acceleration(pos,rad,vel,cell,i,q_prim_vf) result(a)
+!$acc routine seq
+        integer, dimension(3) :: cell
+        integer :: i
+        type(scalar_field), dimension(sys_size) :: q_prim_vf
+        real(wp) :: a, area, vol, mass, force, drag_force, vel
+        real(wp) :: pos, rad, dp, v_rel
+
+        if (i == 1) then
+            dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - &
+                  q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / &
+                  (x_cc(cell(1) + 1) - x_cc(cell(1) - 1))
+        elseif (i == 2) then
+            dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - &
+                  q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / &
+                  (y_cc(cell(2) + 1) -y_cc(cell(2) - 1))
+        elseif (i == 3 .and. p > 0) then
+            dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - &
+                  q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / &
+                  (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
+        else
+            dp = 0._wp
+        end if
+
+        area = pi * rad**2._wp
+        vol = (4._wp/3._wp) * pi * rad**3._wp
+
+        force = -1._wp * area * dp
+
+        if (lag_params%drag_model == 1) then ! Stokes drag
+            v_rel = vel - f_interpolate_velocity(pos,cell,i,q_prim_vf)
+            force = force - (6._wp * pi * rad * v_rel) / fluid_pp(1)%Re(1)
+        end if
+
+        mass = 1e-3 * vol
+
+        a = force / mass
+
+    end function f_get_acceleration
+
 end module m_bubbles_EL_kernels
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 329153bfdf..fce65b7d89 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -318,6 +318,8 @@ module m_global_parameters
     !! in the flow. These include the stiffened gas equation of state parameters,
     !! the Reynolds numbers and the Weber numbers.
 
+    !$acc declare create(fluid_pp)
+
     !$acc declare create(bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3)
     !$acc declare create(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3)
     !$acc declare create(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3)
@@ -739,6 +741,8 @@ contains
         lag_params%write_bubbles = .false.
         lag_params%write_bubbles_stats = .false.
         lag_params%nBubs_glb = dflt_int
+        lag_params%vel_model = dflt_int
+        lag_params%drag_model = dflt_int
         lag_params%epsilonb = 1._wp
         lag_params%charwidth = dflt_real
         lag_params%valmaxvoid = dflt_real
diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.f90
index b840029d71..88d37f9674 100644
--- a/src/simulation/m_sim_helpers.f90
+++ b/src/simulation/m_sim_helpers.f90
@@ -10,8 +10,7 @@ module m_sim_helpers
 
     private; public :: s_compute_enthalpy, &
  s_compute_stability_from_dt, &
- s_compute_dt_from_cfl, &
- f_interpolate_velocity
+ s_compute_dt_from_cfl
 
 contains
 
@@ -268,56 +267,4 @@ subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l)
 
     end subroutine s_compute_dt_from_cfl
 
-    function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v)
-!$acc routine seq
-        integer, dimension(3) :: cell
-        integer :: i
-        type(scalar_field), dimension(sys_size) :: q_prim_vf
-        real(wp) :: v
-        real(wp) :: L1, L2, L3
-        real(wp) :: pos, x1, x2, x3
-        real(wp) :: y1, y2, y3
-
-        if (i == 1) then
-            x1 = x_cc(cell(1) - 1)
-            y1 = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3))
-            x2 = x_cc(cell(1))
-            y2 = q_prim_vf(momxb)%sf(cell(1)    , cell(2), cell(3))
-            x3 = x_cc(cell(1) + 1)
-            y3 = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3))
-        elseif (i == 2) then
-            x1 = y_cc(cell(2) - 1)
-            y1 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3))
-            x2 = y_cc(cell(2))
-            y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2)    , cell(3))
-            x3 = y_cc(cell(2) + 1)
-            y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3))
-        elseif (i == 3 .and. p > 0) then
-            x1 = z_cc(cell(3) - 1)
-            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1)
-            x2 = z_cc(cell(3))
-            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)    )
-            x3 = z_cc(cell(3) + 1)
-            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1)
-        else
-            x1 = 0._wp; x2 = 0._wp; x3 = 0._wp
-            y1 = 0._wp; y2 = 0._wp; y3 = 0._wp
-        endif
-
-        L1 = ((pos - x2)*(pos - x3)) / ((x1 - x2)*(x1 - x3))
-        L2 = ((pos - x1)*(pos - x3)) / ((x2 - x1)*(x2 - x3))
-        L3 = ((pos - x1)*(pos - x2)) / ((x3 - x1)*(x3 - x2))
-
-        v = L1*y1 + L2*y2 + L3*y3
-
-        !if (i == 1) then
-            !V = q_prim_vf(momxb)%sf(cell(1),cell(2),cell(3))
-        !elseif (i == 2) then
-            !V = q_prim_vf(momxb+1)%sf(cell(1),cell(2),cell(3))
-        !else
-            !V = q_prim_vf(momxe)%sf(cell(1),cell(2),cell(3))
-        !end if
-
-    end function f_interpolate_velocity
-
 end module m_sim_helpers
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 4ae97c9e3f..755446e91e 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1633,6 +1633,8 @@ contains
         !$acc update device(bc_y%grcbc_in, bc_y%grcbc_out, bc_y%grcbc_vel_out)
         !$acc update device(bc_z%grcbc_in, bc_z%grcbc_out, bc_z%grcbc_vel_out)
 
+        !$acc update device(fluid_pp)
+
         !$acc update device(relax, relax_model)
         if (relax) then
             !$acc update device(palpha_eps, ptgalpha_eps)
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index 0294ffc5e5..9ebb5fdbf6 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -266,7 +266,8 @@ def analytic(self):
              'write_bubbles', 'write_bubbles_stats' ]:
     SIMULATION[f'lag_params%{var}'] = ParamType.LOG
 
-for var in [ 'solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb']:
+for var in [ 'solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb', 
+             'vel_model', 'drag_model']:
     SIMULATION[f'lag_params%{var}'] = ParamType.INT
 
 for var in [ 'epsilonb', 'valmaxvoid', 'charwidth', 'diffcoefvap',

From 950752c52051fa38dd1d199d4ab45ca1ea638204 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Wed, 30 Apr 2025 16:03:41 -0400
Subject: [PATCH 05/62] first changes

---
 src/common/m_mpi_common.fpp      | 698 ++++++++++++++++++++---
 src/post_process/m_mpi_proxy.fpp | 440 +-------------
 src/pre_process/m_mpi_proxy.fpp  | 454 ---------------
 src/simulation/m_mpi_proxy.fpp   | 951 ++++++-------------------------
 4 files changed, 808 insertions(+), 1735 deletions(-)

diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 86199de3ce..da35420544 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -28,19 +28,23 @@ module m_mpi_common
     !$acc declare create(v_size)
     !! Generic flags used to identify and report MPI errors
 
-    real(wp), private, allocatable, dimension(:), target :: q_prims_buff_send !<
+    real(wp), private, allocatable, dimension(:), target :: buff_send !<
     !! This variable is utilized to pack and send the buffer of the cell-average
     !! primitive variables, for a single computational domain boundary at the
     !! time, to the relevant neighboring processor.
 
-    real(wp), private, allocatable, dimension(:), target :: q_prims_buff_recv !<
-    !! q_prims_buff_recv is utilized to receive and unpack the buffer of the cell-
+    real(wp), private, allocatable, dimension(:), target :: buff_recv !<
+    !! buff_recv is utilized to receive and unpack the buffer of the cell-
     !! average primitive variables, for a single computational domain boundary
     !! at the time, from the relevant neighboring processor.
 
-    !$acc declare create(q_prims_buff_send, q_prims_buff_recv)
+    !$acc declare create(buff_send, buff_recv)
 
     integer :: halo_size
+    !$acc declare create(halo_size)
+
+    integer :: nVars !< nVars for surface tension communication
+    !$acc declare create(nVars)
 
 contains
 
@@ -51,52 +55,34 @@ contains
 
 #ifdef MFC_MPI
 
-        ! Allocating q_prims_buff_send/recv and ib_buff_send/recv. Please note that
+        ! Allocating q_cons_buff_send/recv and ib_buff_send/recv. Please note that
         ! for the sake of simplicity, both variables are provided sufficient
         ! storage to hold the largest buffer in the computational domain.
-#ifdef MFC_SIMULATION
-        if (qbmm .and. .not. polytropic) then
-            if (n > 0) then
-                if (p > 0) then
-                    @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* &
-                                             & (m + 2*buff_size + 1)* &
-                                             & (n + 2*buff_size + 1)* &
-                                             & (p + 2*buff_size + 1)/ &
-                                             & (min(m, n, p) + 2*buff_size + 1)))
-                else
-                    @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* &
-                                             & (max(m, n) + 2*buff_size + 1)))
-                end if
-            else
-                @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)))
-            end if
-
-            @:ALLOCATE(q_prims_buff_recv(0:ubound(q_prims_buff_send, 1)))
 
-            v_size = sys_size + 2*nb*4
+        if (qbmm .and. .not. polytropic) then
+            v_size = vec_size + 2*nb*4
         else
-#endif
-            if (n > 0) then
-                if (p > 0) then
-                    @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*sys_size* &
-                                             & (m + 2*buff_size + 1)* &
-                                             & (n + 2*buff_size + 1)* &
-                                             & (p + 2*buff_size + 1)/ &
-                                             & (min(m, n, p) + 2*buff_size + 1)))
-                else
-                    @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*sys_size* &
-                                             & (max(m, n) + 2*buff_size + 1)))
-                end if
+            v_size = sys_size
+        end if
+
+        if (n > 0) then
+            if (p > 0) then
+                halo_size = NINT(-1._wp + 1._wp * buff_size*(v_size)* &
+                                         & (m + 2*buff_size + 1)* &
+                                         & (n + 2*buff_size + 1)* &
+                                         & (p + 2*buff_size + 1)/ & 
+                                         & (min(m, n, p) + 2*buff_size + 1))
             else
-                @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*sys_size))
+                halo_size = -1 + buff_size*(v_size)* &
+                                         & (max(m, n) + 2*buff_size + 1)
             end if
+        else
+            halo_size = -1 + buff_size*(v_size)
+        end if
 
-            @:ALLOCATE(q_prims_buff_recv(0:ubound(q_prims_buff_send, 1)))
+        !$acc update device(halo_size, v_size)
 
-            v_size = sys_size
-#ifdef MFC_SIMULATION
-        end if
-#endif
+        @:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size))
 
 #endif
 
@@ -709,7 +695,7 @@ contains
                             do j = 0, buff_size - 1
                                 do i = 1, sys_size
                                     r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
-                                    q_prims_buff_send(r) = q_cons_vf(i)%sf(j + pack_offset, k, l)
+                                    buff_send(r) = q_cons_vf(i)%sf(j + pack_offset, k, l)
                                 end do
                             end do
                         end do
@@ -725,7 +711,7 @@ contains
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 (j + buff_size*(k + (n + 1)*l))
-                                            q_prims_buff_send(r) = pb(j + pack_offset, k, l, i - sys_size, q)
+                                            buff_send(r) = pb(j + pack_offset, k, l, i - sys_size, q)
                                         end do
                                     end do
                                 end do
@@ -740,7 +726,7 @@ contains
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 (j + buff_size*(k + (n + 1)*l))
-                                            q_prims_buff_send(r) = mv(j + pack_offset, k, l, i - sys_size, q)
+                                            buff_send(r) = mv(j + pack_offset, k, l, i - sys_size, q)
                                         end do
                                     end do
                                 end do
@@ -757,7 +743,7 @@ contains
                                     r = (i - 1) + v_size* &
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          (k + buff_size*l))
-                                    q_prims_buff_send(r) = q_cons_vf(i)%sf(j, k + pack_offset, l)
+                                    buff_send(r) = q_cons_vf(i)%sf(j, k + pack_offset, l)
                                 end do
                             end do
                         end do
@@ -774,7 +760,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  (k + buff_size*l))
-                                            q_prims_buff_send(r) = pb(j, k + pack_offset, l, i - sys_size, q)
+                                            buff_send(r) = pb(j, k + pack_offset, l, i - sys_size, q)
                                         end do
                                     end do
                                 end do
@@ -790,7 +776,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  (k + buff_size*l))
-                                            q_prims_buff_send(r) = mv(j, k + pack_offset, l, i - sys_size, q)
+                                            buff_send(r) = mv(j, k + pack_offset, l, i - sys_size, q)
                                         end do
                                     end do
                                 end do
@@ -807,7 +793,7 @@ contains
                                     r = (i - 1) + v_size* &
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                    q_prims_buff_send(r) = q_cons_vf(i)%sf(j, k, l + pack_offset)
+                                    buff_send(r) = q_cons_vf(i)%sf(j, k, l + pack_offset)
                                 end do
                             end do
                         end do
@@ -824,7 +810,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                            q_prims_buff_send(r) = pb(j, k, l + pack_offset, i - sys_size, q)
+                                            buff_send(r) = pb(j, k, l + pack_offset, i - sys_size, q)
                                         end do
                                     end do
                                 end do
@@ -840,7 +826,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                            q_prims_buff_send(r) = mv(j, k, l + pack_offset, i - sys_size, q)
+                                            buff_send(r) = mv(j, k, l + pack_offset, i - sys_size, q)
                                         end do
                                     end do
                                 end do
@@ -853,8 +839,8 @@ contains
         #:endfor
         call nvtxEndRange ! Packbuf
 
-        p_send => q_prims_buff_send(0)
-        p_recv => q_prims_buff_recv(0)
+        p_send => buff_send(0)
+        p_recv => buff_recv(0)
 
         ! Send/Recv
 #ifdef MFC_SIMULATION
@@ -866,7 +852,7 @@ contains
                     call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
                 #:else
                     call nvtxStartRange("RHS-COMM-DEV2HOST")
-                    !$acc update host(q_prims_buff_send)
+                    !$acc update host(buff_send)
                     call nvtxEndRange
                     call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA")
                 #:endif
@@ -884,7 +870,7 @@ contains
                     !$acc wait
                 #:else
                     call nvtxStartRange("RHS-COMM-HOST2DEV")
-                    !$acc update device(q_prims_buff_recv)
+                    !$acc update device(buff_recv)
                     call nvtxEndRange
                 #:endif
             end if
@@ -908,7 +894,7 @@ contains
                                 do i = 1, sys_size
                                     r = (i - 1) + v_size* &
                                         (j + buff_size*((k + 1) + (n + 1)*l))
-                                    q_cons_vf(i)%sf(j + unpack_offset, k, l) = q_prims_buff_recv(r)
+                                    q_cons_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
 #if defined(__INTEL_COMPILER)
                                     if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
                                         print *, "Error", j, k, l, i
@@ -930,7 +916,7 @@ contains
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 (j + buff_size*((k + 1) + (n + 1)*l))
-                                            pb(j + unpack_offset, k, l, i - sys_size, q) = q_prims_buff_recv(r)
+                                            pb(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -945,7 +931,7 @@ contains
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 (j + buff_size*((k + 1) + (n + 1)*l))
-                                            mv(j + unpack_offset, k, l, i - sys_size, q) = q_prims_buff_recv(r)
+                                            mv(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -962,7 +948,7 @@ contains
                                     r = (i - 1) + v_size* &
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          ((k + buff_size) + buff_size*l))
-                                    q_cons_vf(i)%sf(j, k + unpack_offset, l) = q_prims_buff_recv(r)
+                                    q_cons_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
 #if defined(__INTEL_COMPILER)
                                     if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
                                         print *, "Error", j, k, l, i
@@ -985,7 +971,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + buff_size*l))
-                                            pb(j, k + unpack_offset, l, i - sys_size, q) = q_prims_buff_recv(r)
+                                            pb(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -1001,7 +987,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + buff_size*l))
-                                            mv(j, k + unpack_offset, l, i - sys_size, q) = q_prims_buff_recv(r)
+                                            mv(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -1020,7 +1006,7 @@ contains
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          ((k + buff_size) + (n + 2*buff_size + 1)* &
                                           (l + buff_size)))
-                                    q_cons_vf(i)%sf(j, k, l + unpack_offset) = q_prims_buff_recv(r)
+                                    q_cons_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
 #if defined(__INTEL_COMPILER)
                                     if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
                                         print *, "Error", j, k, l, i
@@ -1044,7 +1030,7 @@ contains
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)* &
                                                   (l + buff_size)))
-                                            pb(j, k, l + unpack_offset, i - sys_size, q) = q_prims_buff_recv(r)
+                                            pb(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -1061,7 +1047,7 @@ contains
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)* &
                                                   (l + buff_size)))
-                                            mv(j, k, l + unpack_offset, i - sys_size, q) = q_prims_buff_recv(r)
+                                            mv(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -1078,11 +1064,593 @@ contains
 
     end subroutine s_mpi_sendrecv_variables_buffers
 
+    subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, mpi_dir, pbc_loc)
+
+        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf
+        integer, intent(in) :: mpi_dir, pbc_loc
+
+        integer :: i, j, k, l, r, q !< Generic loop iterators
+
+        integer :: buffer_counts(1:3), buffer_count
+
+        type(int_bounds_info) :: boundary_conditions(1:3)
+        integer :: beg_end(1:2), grid_dims(1:3)
+        integer :: dst_proc, src_proc, recv_tag, send_tag
+
+        logical :: beg_end_geq_0
+
+        integer :: pack_offset, unpack_offset
+        real(wp), pointer :: p_send, p_recv
+
+#ifdef MFC_MPI
+
+        nVars = num_dims + 1
+        !$acc update device(nVars)
+
+        buffer_counts = (/ &
+                        buff_size*nVars*(n + 1)*(p + 1), &
+                        buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), &
+                        buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
+                        /)
+
+        buffer_count = buffer_counts(mpi_dir)
+        boundary_conditions = (/bc_x, bc_y, bc_z/)
+        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
+        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
+
+        ! Implements:
+        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
+        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
+        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
+        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
+        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
+
+        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
+        recv_tag = f_logical_to_int(pbc_loc == 1)
+
+        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
+        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
+
+        grid_dims = (/m, n, p/)
+
+        pack_offset = 0
+        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
+            pack_offset = grid_dims(mpi_dir) - buff_size + 1
+        end if
+
+        unpack_offset = 0
+        if (pbc_loc == 1) then
+            unpack_offset = grid_dims(mpi_dir) + buff_size + 1
+        end if
+
+        ! Pack Buffer to Send
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = 0, buff_size - 1
+                                do i = 1, nVars
+                                    r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l))
+                                    buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l)
+                                end do
+                            end do
+                        end do
+                    end do
+
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVars
+                        do l = 0, p
+                            do k = 0, buff_size - 1
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + nVars* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         (k + buff_size*l))
+                                    buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l)
+                                end do
+                            end do
+                        end do
+                    end do
+
+                #:else
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVars
+                        do l = 0, buff_size - 1
+                            do k = -buff_size, n + buff_size
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + nVars* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         ((k + buff_size) + (n + 2*buff_size + 1)*l))
+                                    buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset)
+                                end do
+                            end do
+                        end do
+                    end do
+                #:endif
+            end if
+        #:endfor
+
+        ! Send/Recv
+        #:for rdma_mpi in [False, True]
+            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
+                p_send => buff_send(0)
+                p_recv => buff_recv(0)
+
+                #:if rdma_mpi
+                    !$acc data attach(p_send, p_recv)
+                    !$acc host_data use_device(p_send, p_recv)
+                #:else
+                    !$acc update host(buff_send)
+                #:endif
+
+                call MPI_SENDRECV( &
+                    p_send, buffer_count, mpi_p, dst_proc, send_tag, &
+                    p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
+                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                #:if rdma_mpi
+                    !$acc end host_data
+                    !$acc end data
+                    !$acc wait
+                #:else
+                    !$acc update device(buff_recv)
+                #:endif
+            end if
+        #:endfor
+
+        ! Unpack Received Buffer
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = -buff_size, -1
+                                do i = 1, nVars
+                                    r = (i - 1) + nVars* &
+                                        (j + buff_size*((k + 1) + (n + 1)*l))
+                                    c_divs_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
+#if defined(__INTEL_COMPILER)
+                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
+                                        print *, "Error", j, k, l, i
+                                        error stop "NaN(s) in recv"
+                                    end if
+#endif
+                                end do
+                            end do
+                        end do
+                    end do
+
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVars
+                        do l = 0, p
+                            do k = -buff_size, -1
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + nVars* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         ((k + buff_size) + buff_size*l))
+                                    c_divs_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
+#if defined(__INTEL_COMPILER)
+                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
+                                        print *, "Error", j, k, l, i
+                                        error stop "NaN(s) in recv"
+                                    end if
+#endif
+                                end do
+                            end do
+                        end do
+                    end do
+
+                #:else
+                    ! Unpacking buffer from bc_z%beg
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVars
+                        do l = -buff_size, -1
+                            do k = -buff_size, n + buff_size
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + nVars* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         ((k + buff_size) + (n + 2*buff_size + 1)* &
+                                          (l + buff_size)))
+                                    c_divs_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
+#if defined(__INTEL_COMPILER)
+                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
+                                        print *, "Error", j, k, l, i
+                                        error stop "NaN(s) in recv"
+                                    end if
+#endif
+                                end do
+                            end do
+                        end do
+                    end do
+
+                #:endif
+            end if
+        #:endfor
+
+#endif
+
+    end subroutine s_mpi_sendrecv_capilary_variables_buffers
+
+    !>  The purpose of this procedure is to optimally decompose
+        !!      the computational domain among the available processors.
+        !!      This is performed by attempting to award each processor,
+        !!      in each of the coordinate directions, approximately the
+        !!      same number of cells, and then recomputing the affected
+        !!      global parameters.
+    subroutine s_mpi_decompose_computational_domain
+
+#ifdef MFC_MPI
+
+        integer :: num_procs_x, num_procs_y, num_procs_z !<
+            !! Optimal number of processors in the x-, y- and z-directions
+
+        real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !<
+            !! Non-optimal number of processors in the x-, y- and z-directions
+
+        real(wp) :: fct_min !<
+            !! Processor factorization (fct) minimization parameter
+
+        integer :: MPI_COMM_CART !<
+            !! Cartesian processor topology communicator
+
+        integer :: rem_cells !<
+            !! Remaining number of cells, in a particular coordinate direction,
+            !! after the majority is divided up among the available processors
+
+        integer :: i, j !< Generic loop iterators
+
+        if (num_procs == 1 .and. parallel_io) then
+            do i = 1, num_dims
+                start_idx(i) = 0
+            end do
+            return
+        end if
+
+        ! 3D Cartesian Processor Topology
+        if (n > 0) then
+
+            if (p > 0) then
+
+                if (cyl_coord .and. p > 0) then
+                    ! Implement pencil processor blocking if using cylindrical coordinates so
+                    ! that all cells in azimuthal direction are stored on a single processor.
+                    ! This is necessary for efficient application of Fourier filter near axis.
+
+                    ! Initial values of the processor factorization optimization
+                    num_procs_x = 1
+                    num_procs_y = num_procs
+                    num_procs_z = 1
+                    ierr = -1
+
+                    ! Computing minimization variable for these initial values
+                    tmp_num_procs_x = num_procs_x
+                    tmp_num_procs_y = num_procs_y
+                    tmp_num_procs_z = num_procs_z
+                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
+                                         - (n + 1)/tmp_num_procs_y)
+
+                    ! Searching for optimal computational domain distribution
+                    do i = 1, num_procs
+
+                        if (mod(num_procs, i) == 0 &
+                            .and. &
+                            (m + 1)/i >= num_stcls_min*weno_order) then
+
+                            tmp_num_procs_x = i
+                            tmp_num_procs_y = num_procs/i
+
+                            if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+                                               - (n + 1)/tmp_num_procs_y) &
+                                .and. &
+                                (n + 1)/tmp_num_procs_y &
+                                >= &
+                                num_stcls_min*weno_order) then
+
+                                num_procs_x = i
+                                num_procs_y = num_procs/i
+                                fct_min = abs((m + 1)/tmp_num_procs_x &
+                                              - (n + 1)/tmp_num_procs_y)
+                                ierr = 0
+
+                            end if
+
+                        end if
+
+                    end do
+
+                else
+
+                    ! Initial estimate of optimal processor topology
+                    num_procs_x = 1
+                    num_procs_y = 1
+                    num_procs_z = num_procs
+                    ierr = -1
+
+                    ! Benchmarking the quality of this initial guess
+                    tmp_num_procs_x = num_procs_x
+                    tmp_num_procs_y = num_procs_y
+                    tmp_num_procs_z = num_procs_z
+                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
+                                         - (n + 1)/tmp_num_procs_y) &
+                              + 10._wp*abs((n + 1)/tmp_num_procs_y &
+                                           - (p + 1)/tmp_num_procs_z)
+
+                    ! Optimization of the initial processor topology
+                    do i = 1, num_procs
+
+                        if (mod(num_procs, i) == 0 &
+                            .and. &
+                            (m + 1)/i >= num_stcls_min*weno_order) then
+
+                            do j = 1, num_procs/i
+
+                                if (mod(num_procs/i, j) == 0 &
+                                    .and. &
+                                    (n + 1)/j >= num_stcls_min*weno_order) then
+
+                                    tmp_num_procs_x = i
+                                    tmp_num_procs_y = j
+                                    tmp_num_procs_z = num_procs/(i*j)
+
+                                    if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+                                                       - (n + 1)/tmp_num_procs_y) &
+                                        + abs((n + 1)/tmp_num_procs_y &
+                                              - (p + 1)/tmp_num_procs_z) &
+                                        .and. &
+                                        (p + 1)/tmp_num_procs_z &
+                                        >= &
+                                        num_stcls_min*weno_order) &
+                                        then
+
+                                        num_procs_x = i
+                                        num_procs_y = j
+                                        num_procs_z = num_procs/(i*j)
+                                        fct_min = abs((m + 1)/tmp_num_procs_x &
+                                                      - (n + 1)/tmp_num_procs_y) &
+                                                  + abs((n + 1)/tmp_num_procs_y &
+                                                        - (p + 1)/tmp_num_procs_z)
+                                        ierr = 0
+
+                                    end if
+
+                                end if
+
+                            end do
+
+                        end if
+
+                    end do
+
+                end if
+
+                ! Verifying that a valid decomposition of the computational
+                ! domain has been established. If not, the simulation exits.
+                if (proc_rank == 0 .and. ierr == -1) then
+                    call s_mpi_abort('Unsupported combination of values '// &
+                                     'of num_procs, m, n, p and '// &
+                                     'weno_order. Exiting.')
+                end if
+
+                ! Creating new communicator using the Cartesian topology
+                call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
+                                                          num_procs_y, num_procs_z/), &
+                                     (/.true., .true., .true./), &
+                                     .false., MPI_COMM_CART, ierr)
+
+                ! Finding the Cartesian coordinates of the local process
+                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
+                                     proc_coords, ierr)
+                ! END: 3D Cartesian Processor Topology
+
+                ! Global Parameters for z-direction
+
+                ! Number of remaining cells
+                rem_cells = mod(p + 1, num_procs_z)
+
+                ! Optimal number of cells per processor
+                p = (p + 1)/num_procs_z - 1
+
+                ! Distributing the remaining cells
+                do i = 1, rem_cells
+                    if (proc_coords(3) == i - 1) then
+                        p = p + 1; exit
+                    end if
+                end do
+
+                ! Boundary condition at the beginning
+                if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then
+                    proc_coords(3) = proc_coords(3) - 1
+                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                       bc_z%beg, ierr)
+                    proc_coords(3) = proc_coords(3) + 1
+                end if
+
+                ! Boundary condition at the end
+                if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then
+                    proc_coords(3) = proc_coords(3) + 1
+                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                       bc_z%end, ierr)
+                    proc_coords(3) = proc_coords(3) - 1
+                end if
+
+                if (parallel_io) then
+                    if (proc_coords(3) < rem_cells) then
+                        start_idx(3) = (p + 1)*proc_coords(3)
+                    else
+                        start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
+                    end if
+                end if
+
+                ! 2D Cartesian Processor Topology
+            else
+
+                ! Initial estimate of optimal processor topology
+                num_procs_x = 1
+                num_procs_y = num_procs
+                ierr = -1
+
+                ! Benchmarking the quality of this initial guess
+                tmp_num_procs_x = num_procs_x
+                tmp_num_procs_y = num_procs_y
+                fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
+                                     - (n + 1)/tmp_num_procs_y)
+
+                ! Optimization of the initial processor topology
+                do i = 1, num_procs
+
+                    if (mod(num_procs, i) == 0 &
+                        .and. &
+                        (m + 1)/i >= num_stcls_min*weno_order) then
+
+                        tmp_num_procs_x = i
+                        tmp_num_procs_y = num_procs/i
+
+                        if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+                                           - (n + 1)/tmp_num_procs_y) &
+                            .and. &
+                            (n + 1)/tmp_num_procs_y &
+                            >= &
+                            num_stcls_min*weno_order) then
+
+                            num_procs_x = i
+                            num_procs_y = num_procs/i
+                            fct_min = abs((m + 1)/tmp_num_procs_x &
+                                          - (n + 1)/tmp_num_procs_y)
+                            ierr = 0
+
+                        end if
+
+                    end if
+
+                end do
+
+                ! Verifying that a valid decomposition of the computational
+                ! domain has been established. If not, the simulation exits.
+                if (proc_rank == 0 .and. ierr == -1) then
+                    call s_mpi_abort('Unsupported combination of values '// &
+                                     'of num_procs, m, n and '// &
+                                     'weno_order. Exiting.')
+                end if
+
+                ! Creating new communicator using the Cartesian topology
+                call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
+                                                          num_procs_y/), (/.true., &
+                                                                           .true./), .false., MPI_COMM_CART, &
+                                     ierr)
+
+                ! Finding the Cartesian coordinates of the local process
+                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
+                                     proc_coords, ierr)
+
+            end if
+            ! END: 2D Cartesian Processor Topology
+
+            ! Global Parameters for y-direction
+
+            ! Number of remaining cells
+            rem_cells = mod(n + 1, num_procs_y)
+
+            ! Optimal number of cells per processor
+            n = (n + 1)/num_procs_y - 1
+
+            ! Distributing the remaining cells
+            do i = 1, rem_cells
+                if (proc_coords(2) == i - 1) then
+                    n = n + 1; exit
+                end if
+            end do
+
+            ! Boundary condition at the beginning
+            if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then
+                proc_coords(2) = proc_coords(2) - 1
+                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                   bc_y%beg, ierr)
+                proc_coords(2) = proc_coords(2) + 1
+            end if
+
+            ! Boundary condition at the end
+            if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then
+                proc_coords(2) = proc_coords(2) + 1
+                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                   bc_y%end, ierr)
+                proc_coords(2) = proc_coords(2) - 1
+            end if
+
+            if (parallel_io) then
+                if (proc_coords(2) < rem_cells) then
+                    start_idx(2) = (n + 1)*proc_coords(2)
+                else
+                    start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
+                end if
+            end if
+
+            ! 1D Cartesian Processor Topology
+        else
+
+            ! Optimal processor topology
+            num_procs_x = num_procs
+
+            ! Creating new communicator using the Cartesian topology
+            call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
+                                 (/.true./), .false., MPI_COMM_CART, &
+                                 ierr)
+
+            ! Finding the Cartesian coordinates of the local process
+            call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
+                                 proc_coords, ierr)
+
+        end if
+
+        ! Global Parameters for x-direction
+
+        ! Number of remaining cells
+        rem_cells = mod(m + 1, num_procs_x)
+
+        ! Optimal number of cells per processor
+        m = (m + 1)/num_procs_x - 1
+
+        ! Distributing the remaining cells
+        do i = 1, rem_cells
+            if (proc_coords(1) == i - 1) then
+                m = m + 1; exit
+            end if
+        end do
+
+        ! Boundary condition at the beginning
+        if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then
+            proc_coords(1) = proc_coords(1) - 1
+            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
+            proc_coords(1) = proc_coords(1) + 1
+        end if
+
+        ! Boundary condition at the end
+        if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then
+            proc_coords(1) = proc_coords(1) + 1
+            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
+            proc_coords(1) = proc_coords(1) - 1
+        end if
+
+        if (parallel_io) then
+            if (proc_coords(1) < rem_cells) then
+                start_idx(1) = (m + 1)*proc_coords(1)
+            else
+                start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
+            end if
+        end if
+
+#endif
+
+    end subroutine s_mpi_decompose_computational_domain
+
     !> Module deallocation and/or disassociation procedures
     subroutine s_finalize_mpi_common_module
 
 #ifdef MFC_MPI
-        deallocate (q_prims_buff_send, q_prims_buff_recv)
+        deallocate (buff_send, buff_recv)
 #endif
 
     end subroutine s_finalize_mpi_common_module
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index 5154d19102..db7c50d648 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -202,445 +202,7 @@ contains
 
     end subroutine s_mpi_bcast_user_inputs
 
-    !>  This subroutine takes care of efficiently distributing
-        !!      the computational domain among the available processors
-        !!      as well as recomputing some of the global parameters so
-        !!      that they reflect the configuration of sub-domain that
-        !!      is overseen by the local processor.
-    subroutine s_mpi_decompose_computational_domain
-
-#ifdef MFC_MPI
-
-        ! # of processors in the x-, y- and z-coordinate directions
-        integer :: num_procs_x, num_procs_y, num_procs_z
-
-        ! Temporary # of processors in x-, y- and z-coordinate directions
-        ! used during the processor factorization optimization procedure
-        real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z
-
-        ! Processor factorization (fct) minimization parameter
-        real(wp) :: fct_min
-
-        ! Cartesian processor topology communicator
-        integer :: MPI_COMM_CART
-
-        ! Number of remaining cells for a particular coordinate direction
-        ! after the bulk has evenly been distributed among the available
-        ! processors for that coordinate direction
-        integer :: rem_cells
-
-        ! Generic loop iterators
-        integer :: i, j
-
-        if (num_procs == 1 .and. parallel_io) then
-            do i = 1, num_dims
-                start_idx(i) = 0
-            end do
-            return
-        end if
-
-        ! Performing the computational domain decomposition. The procedure
-        ! is optimized by ensuring that each processor contains a close to
-        ! equivalent piece of the computational domain. Note that explicit
-        ! type-casting is omitted here for code legibility purposes.
-
-        ! Generating 3D Cartesian Processor Topology
-
-        if (n > 0) then
-
-            if (p > 0) then
-
-                if (cyl_coord .and. p > 0) then
-                    ! Implement pencil processor blocking if using cylindrical coordinates so
-                    ! that all cells in azimuthal direction are stored on a single processor.
-                    ! This is necessary for efficient application of Fourier filter near axis.
-
-                    ! Initial values of the processor factorization optimization
-                    num_procs_x = 1
-                    num_procs_y = num_procs
-                    num_procs_z = 1
-                    ierr = -1
-
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y)
-
-                    ! Searching for optimal computational domain distribution
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            tmp_num_procs_x = i
-                            tmp_num_procs_y = num_procs/i
-
-                            if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                               - (n + 1)/tmp_num_procs_y) &
-                                .and. &
-                                (n + 1)/tmp_num_procs_y &
-                                >= &
-                                num_stcls_min*weno_order) then
-
-                                num_procs_x = i
-                                num_procs_y = num_procs/i
-                                fct_min = abs((m + 1)/tmp_num_procs_x &
-                                              - (n + 1)/tmp_num_procs_y)
-                                ierr = 0
-
-                            end if
-
-                        end if
-
-                    end do
-
-                else
-
-                    ! Initial values of the processor factorization optimization
-                    num_procs_x = 1
-                    num_procs_y = 1
-                    num_procs_z = num_procs
-                    ierr = -1
-
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y) &
-                              + 10._wp*abs((n + 1)/tmp_num_procs_y &
-                                           - (p + 1)/tmp_num_procs_z)
-
-                    ! Searching for optimal computational domain distribution
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            do j = 1, (num_procs/i)
-
-                                if (mod(num_procs/i, j) == 0 &
-                                    .and. &
-                                    (n + 1)/j >= num_stcls_min*weno_order) then
-
-                                    tmp_num_procs_x = i
-                                    tmp_num_procs_y = j
-                                    tmp_num_procs_z = num_procs/(i*j)
-
-                                    if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                                       - (n + 1)/tmp_num_procs_y) &
-                                        + abs((n + 1)/tmp_num_procs_y &
-                                              - (p + 1)/tmp_num_procs_z) &
-                                        .and. &
-                                        (p + 1)/tmp_num_procs_z &
-                                        >= &
-                                        num_stcls_min*weno_order) &
-                                        then
-
-                                        num_procs_x = i
-                                        num_procs_y = j
-                                        num_procs_z = num_procs/(i*j)
-                                        fct_min = abs((m + 1)/tmp_num_procs_x &
-                                                      - (n + 1)/tmp_num_procs_y) &
-                                                  + abs((n + 1)/tmp_num_procs_y &
-                                                        - (p + 1)/tmp_num_procs_z)
-                                        ierr = 0
-
-                                    end if
-
-                                end if
-
-                            end do
-
-                        end if
-
-                    end do
-
-                end if
-
-                ! Checking whether the decomposition of the computational
-                ! domain was successful
-                if (proc_rank == 0 .and. ierr == -1) then
-                    print '(A)', 'Unable to decompose computational '// &
-                        'domain for selected number of '// &
-                        'processors. Exiting.'
-                    call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
-                end if
-
-                ! Creating a new communicator using Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
-                                                          num_procs_y, num_procs_z/), &
-                                     (/.true., .true., .true./), &
-                                     .false., MPI_COMM_CART, ierr)
-
-                ! Finding corresponding Cartesian coordinates of the local
-                ! processor rank in newly declared cartesian communicator
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
-                                     proc_coords, ierr)
-
-                ! END: Generating 3D Cartesian Processor Topology
-
-                ! Sub-domain Global Parameters in z-direction
-
-                ! Number of remaining cells after majority is distributed
-                rem_cells = mod(p + 1, num_procs_z)
-
-                ! Optimal number of cells per processor
-                p = (p + 1)/num_procs_z - 1
-
-                ! Distributing any remaining cells
-                do i = 1, rem_cells
-                    if (proc_coords(3) == i - 1) then
-                        p = p + 1
-                        exit
-                    end if
-                end do
-
-                ! Boundary condition at the beginning
-                if (proc_coords(3) > 0 .or. bc_z%beg == -1) then
-                    proc_coords(3) = proc_coords(3) - 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%beg, ierr)
-                    proc_coords(3) = proc_coords(3) + 1
-                end if
-
-                ! Ghost zone at the beginning
-                if (proc_coords(3) > 0 .and. format == 1) then
-                    offset_z%beg = 2
-                else
-                    offset_z%beg = 0
-                end if
-
-                ! Boundary condition at the end
-                if (proc_coords(3) < num_procs_z - 1 .or. bc_z%end == -1) then
-                    proc_coords(3) = proc_coords(3) + 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%end, ierr)
-                    proc_coords(3) = proc_coords(3) - 1
-                end if
-
-                ! Ghost zone at the end
-                if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then
-                    offset_z%end = 2
-                else
-                    offset_z%end = 0
-                end if
-
-                if (parallel_io) then
-                    if (proc_coords(3) < rem_cells) then
-                        start_idx(3) = (p + 1)*proc_coords(3)
-                    else
-                        start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
-                    end if
-                end if
-
-                ! Generating 2D Cartesian Processor Topology
-
-            else
-
-                ! Initial values of the processor factorization optimization
-                num_procs_x = 1
-                num_procs_y = num_procs
-                ierr = -1
-
-                ! Computing minimization variable for these initial values
-                tmp_num_procs_x = num_procs_x
-                tmp_num_procs_y = num_procs_y
-                fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                     - (n + 1)/tmp_num_procs_y)
-
-                ! Searching for optimal computational domain distribution
-                do i = 1, num_procs
-
-                    if (mod(num_procs, i) == 0 &
-                        .and. &
-                        (m + 1)/i >= num_stcls_min*weno_order) then
-
-                        tmp_num_procs_x = i
-                        tmp_num_procs_y = num_procs/i
-
-                        if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                           - (n + 1)/tmp_num_procs_y) &
-                            .and. &
-                            (n + 1)/tmp_num_procs_y &
-                            >= &
-                            num_stcls_min*weno_order) then
-
-                            num_procs_x = i
-                            num_procs_y = num_procs/i
-                            fct_min = abs((m + 1)/tmp_num_procs_x &
-                                          - (n + 1)/tmp_num_procs_y)
-                            ierr = 0
-
-                        end if
-
-                    end if
-
-                end do
-
-                ! Checking whether the decomposition of the computational
-                ! domain was successful
-                if (proc_rank == 0 .and. ierr == -1) then
-                    print '(A)', 'Unable to decompose computational '// &
-                        'domain for selected number of '// &
-                        'processors. Exiting.'
-                    call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
-                end if
-
-                ! Creating a new communicator using Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
-                                                          num_procs_y/), (/.true., &
-                                                                           .true./), .false., MPI_COMM_CART, &
-                                     ierr)
-
-                ! Finding corresponding Cartesian coordinates of the local
-                ! processor rank in newly declared cartesian communicator
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
-                                     proc_coords, ierr)
-
-            end if
-
-            ! END: Generating 2D Cartesian Processor Topology
-
-            ! Sub-domain Global Parameters in y-direction
-
-            ! Number of remaining cells after majority has been distributed
-            rem_cells = mod(n + 1, num_procs_y)
-
-            ! Optimal number of cells per processor
-            n = (n + 1)/num_procs_y - 1
-
-            ! Distributing any remaining cells
-            do i = 1, rem_cells
-                if (proc_coords(2) == i - 1) then
-                    n = n + 1
-                    exit
-                end if
-            end do
-
-            ! Boundary condition at the beginning
-            if (proc_coords(2) > 0 .or. bc_y%beg == -1) then
-                proc_coords(2) = proc_coords(2) - 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%beg, ierr)
-                proc_coords(2) = proc_coords(2) + 1
-            end if
-
-            ! Ghost zone at the beginning
-            if (proc_coords(2) > 0 .and. format == 1) then
-                offset_y%beg = 2
-            else
-                offset_y%beg = 0
-            end if
-
-            ! Boundary condition at the end
-            if (proc_coords(2) < num_procs_y - 1 .or. bc_y%end == -1) then
-                proc_coords(2) = proc_coords(2) + 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%end, ierr)
-                proc_coords(2) = proc_coords(2) - 1
-            end if
-
-            ! Ghost zone at the end
-            if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then
-                offset_y%end = 2
-            else
-                offset_y%end = 0
-            end if
-
-            if (parallel_io) then
-                if (proc_coords(2) < rem_cells) then
-                    start_idx(2) = (n + 1)*proc_coords(2)
-                else
-                    start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
-                end if
-            end if
-
-            ! Generating 1D Cartesian Processor Topology
-
-        else
-
-            ! Number of processors in the coordinate direction is equal to
-            ! the total number of processors available
-            num_procs_x = num_procs
-
-            ! Number of cells in undecomposed computational domain needed
-            ! for sub-domain reassembly during formatted data output
-            m_root = m
-
-            ! Creating a new communicator using Cartesian topology
-            call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
-                                 (/.true./), .false., MPI_COMM_CART, &
-                                 ierr)
-
-            ! Finding the corresponding Cartesian coordinates of the local
-            ! processor rank in the newly declared cartesian communicator
-            call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
-                                 proc_coords, ierr)
-
-        end if
-
-        ! Sub-domain Global Parameters in x-direction
-
-        ! Number of remaining cells after majority has been distributed
-        rem_cells = mod(m + 1, num_procs_x)
-
-        ! Optimal number of cells per processor
-        m = (m + 1)/num_procs_x - 1
-
-        ! Distributing any remaining cells
-        do i = 1, rem_cells
-            if (proc_coords(1) == i - 1) then
-                m = m + 1
-                exit
-            end if
-        end do
-
-        ! Boundary condition at the beginning
-        if (proc_coords(1) > 0 .or. bc_x%beg == -1) then
-            proc_coords(1) = proc_coords(1) - 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
-            proc_coords(1) = proc_coords(1) + 1
-        end if
-
-        ! Ghost zone at the beginning
-        if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then
-            offset_x%beg = 2
-        else
-            offset_x%beg = 0
-        end if
-
-        ! Boundary condition at the end
-        if (proc_coords(1) < num_procs_x - 1 .or. bc_x%end == -1) then
-            proc_coords(1) = proc_coords(1) + 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
-            proc_coords(1) = proc_coords(1) - 1
-        end if
-
-        ! Ghost zone at the end
-        if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then
-            offset_x%end = 2
-        else
-            offset_x%end = 0
-        end if
-
-        if (parallel_io) then
-            if (proc_coords(1) < rem_cells) then
-                start_idx(1) = (m + 1)*proc_coords(1)
-            else
-                start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
-            end if
-        end if
-
-#endif
-
-    end subroutine s_mpi_decompose_computational_domain
-
-    !>  Communicates the buffer regions associated with the grid
+!>  Communicates the buffer regions associated with the grid
         !!      variables with processors in charge of the neighboring
         !!      sub-domains. Note that only cell-width spacings feature
         !!      buffer regions so that no information relating to the
diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp
index c9888e79ca..1428600fff 100644
--- a/src/pre_process/m_mpi_proxy.fpp
+++ b/src/pre_process/m_mpi_proxy.fpp
@@ -131,458 +131,4 @@ contains
 
     end subroutine s_mpi_bcast_user_inputs
 
-    !> Description: This subroutine takes care of efficiently distributing
-        !!              the computational domain among the available processors
-        !!             as well as recomputing some of the global parameters so
-        !!              that they reflect the configuration of sub-domain that is
-        !!              overseen by the local processor.
-    subroutine s_mpi_decompose_computational_domain
-
-#ifdef MFC_MPI
-
-        ! # of processors in the x-, y- and z-coordinate directions
-        integer :: num_procs_x, num_procs_y, num_procs_z
-
-        ! Temporary # of processors in x-, y- and z-coordinate directions
-        ! used during the processor factorization optimization procedure
-        real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z
-
-        ! Processor factorization (fct) minimization parameter
-        real(wp) :: fct_min
-
-        ! Cartesian processor topology communicator
-        integer :: MPI_COMM_CART
-
-        ! Number of remaining cells for a particular coordinate direction
-        ! after the bulk has evenly been distributed among the available
-        ! processors for that coordinate direction
-        integer :: rem_cells
-
-        ! Generic loop iterators
-        integer :: i, j
-
-        if (num_procs == 1 .and. parallel_io) then
-            do i = 1, num_dims
-                start_idx(i) = 0
-            end do
-            return
-        end if
-
-        ! Performing the computational domain decomposition. The procedure
-        ! is optimized by ensuring that each processor contains a close to
-        ! equivalent piece of the computational domain. Note that explicit
-        ! type-casting is omitted here for code legibility purposes.
-
-        ! Generating 3D Cartesian Processor Topology
-
-        if (n > 0) then
-
-            if (p > 0) then
-
-                if (cyl_coord .and. p > 0) then
-                    ! Implement pencil processor blocking if using cylindrical coordinates so
-                    ! that all cells in azimuthal direction are stored on a single processor.
-                    ! This is necessary for efficient application of Fourier filter near axis.
-
-                    ! Initial values of the processor factorization optimization
-                    num_procs_x = 1
-                    num_procs_y = num_procs
-                    num_procs_z = 1
-                    ierr = -1
-
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y)
-
-                    ! Searching for optimal computational domain distribution
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            tmp_num_procs_x = i
-                            tmp_num_procs_y = num_procs/i
-
-                            if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                               - (n + 1)/tmp_num_procs_y) &
-                                .and. &
-                                (n + 1)/tmp_num_procs_y &
-                                >= &
-                                num_stcls_min*weno_order) then
-
-                                num_procs_x = i
-                                num_procs_y = num_procs/i
-                                fct_min = abs((m + 1)/tmp_num_procs_x &
-                                              - (n + 1)/tmp_num_procs_y)
-                                ierr = 0
-
-                            end if
-
-                        end if
-
-                    end do
-
-                else
-
-                    ! Initial values of the processor factorization optimization
-                    num_procs_x = 1
-                    num_procs_y = 1
-                    num_procs_z = num_procs
-                    ierr = -1
-
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y) &
-                              + 10._wp*abs((n + 1)/tmp_num_procs_y &
-                                           - (p + 1)/tmp_num_procs_z)
-
-                    ! Searching for optimal computational domain distribution
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            do j = 1, (num_procs/i)
-
-                                if (mod(num_procs/i, j) == 0 &
-                                    .and. &
-                                    (n + 1)/j >= num_stcls_min*weno_order) then
-
-                                    tmp_num_procs_x = i
-                                    tmp_num_procs_y = j
-                                    tmp_num_procs_z = num_procs/(i*j)
-
-                                    if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                                       - (n + 1)/tmp_num_procs_y) &
-                                        + abs((n + 1)/tmp_num_procs_y &
-                                              - (p + 1)/tmp_num_procs_z) &
-                                        .and. &
-                                        (p + 1)/tmp_num_procs_z &
-                                        >= &
-                                        num_stcls_min*weno_order) &
-                                        then
-
-                                        num_procs_x = i
-                                        num_procs_y = j
-                                        num_procs_z = num_procs/(i*j)
-                                        fct_min = abs((m + 1)/tmp_num_procs_x &
-                                                      - (n + 1)/tmp_num_procs_y) &
-                                                  + abs((n + 1)/tmp_num_procs_y &
-                                                        - (p + 1)/tmp_num_procs_z)
-                                        ierr = 0
-
-                                    end if
-
-                                end if
-
-                            end do
-
-                        end if
-
-                    end do
-
-                end if
-
-                ! Checking whether the decomposition of the computational
-                ! domain was successful
-                if (proc_rank == 0 .and. ierr == -1) then
-                    print '(A)', 'Unable to decompose computational '// &
-                        'domain for selected number of '// &
-                        'processors. Exiting.'
-                    call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
-                end if
-
-                ! Creating a new communicator using Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
-                                                          num_procs_y, num_procs_z/), &
-                                     (/.true., .true., .true./), &
-                                     .false., MPI_COMM_CART, ierr)
-
-                ! Finding corresponding Cartesian coordinates of the local
-                ! processor rank in newly declared cartesian communicator
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
-                                     proc_coords, ierr)
-
-                ! END: Generating 3D Cartesian Processor Topology
-
-                ! Sub-domain Global Parameters in z-direction
-
-                ! Number of remaining cells after majority is distributed
-                rem_cells = mod(p + 1, num_procs_z)
-
-                ! Preliminary uniform cell-width spacing
-                if (old_grid .neqv. .true.) then
-                    dz = (z_domain%end - z_domain%beg)/real(p + 1, wp)
-                end if
-
-                ! Optimal number of cells per processor
-                p = (p + 1)/num_procs_z - 1
-
-                ! Distributing any remaining cells
-                do i = 1, rem_cells
-                    if (proc_coords(3) == i - 1) then
-                        p = p + 1
-                        exit
-                    end if
-                end do
-
-                ! Boundary condition at the beginning
-                if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then
-                    proc_coords(3) = proc_coords(3) - 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%beg, ierr)
-                    proc_coords(3) = proc_coords(3) + 1
-                end if
-
-                ! Boundary condition at the end
-                if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then
-                    proc_coords(3) = proc_coords(3) + 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%end, ierr)
-                    proc_coords(3) = proc_coords(3) - 1
-                end if
-
-                ! Beginning and end sub-domain boundary locations
-                if (parallel_io .neqv. .true.) then
-                    if (old_grid .neqv. .true.) then
-                        if (proc_coords(3) < rem_cells) then
-                            z_domain%beg = z_domain%beg + dz*real((p + 1)* &
-                                                                  proc_coords(3))
-                            z_domain%end = z_domain%end - dz*real((p + 1)* &
-                                                                  (num_procs_z - proc_coords(3) - 1) &
-                                                                  - (num_procs_z - rem_cells))
-                        else
-                            z_domain%beg = z_domain%beg + dz*real((p + 1)* &
-                                                                  proc_coords(3) + rem_cells)
-                            z_domain%end = z_domain%end - dz*real((p + 1)* &
-                                                                  (num_procs_z - proc_coords(3) - 1))
-                        end if
-                    end if
-                else
-                    if (proc_coords(3) < rem_cells) then
-                        start_idx(3) = (p + 1)*proc_coords(3)
-                    else
-                        start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
-                    end if
-                end if
-
-                ! Generating 2D Cartesian Processor Topology
-
-            else
-
-                ! Initial values of the processor factorization optimization
-                num_procs_x = 1
-                num_procs_y = num_procs
-                ierr = -1
-
-                ! Computing minimization variable for these initial values
-                tmp_num_procs_x = num_procs_x
-                tmp_num_procs_y = num_procs_y
-                fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                     - (n + 1)/tmp_num_procs_y)
-
-                ! Searching for optimal computational domain distribution
-                do i = 1, num_procs
-
-                    if (mod(num_procs, i) == 0 &
-                        .and. &
-                        (m + 1)/i >= num_stcls_min*weno_order) then
-
-                        tmp_num_procs_x = i
-                        tmp_num_procs_y = num_procs/i
-
-                        if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                           - (n + 1)/tmp_num_procs_y) &
-                            .and. &
-                            (n + 1)/tmp_num_procs_y &
-                            >= &
-                            num_stcls_min*weno_order) then
-
-                            num_procs_x = i
-                            num_procs_y = num_procs/i
-                            fct_min = abs((m + 1)/tmp_num_procs_x &
-                                          - (n + 1)/tmp_num_procs_y)
-                            ierr = 0
-
-                        end if
-
-                    end if
-
-                end do
-
-                ! Checking whether the decomposition of the computational
-                ! domain was successful
-                if (proc_rank == 0 .and. ierr == -1) then
-                    print '(A)', 'Unable to decompose computational '// &
-                        'domain for selected number of '// &
-                        'processors. Exiting.'
-                    call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
-                end if
-
-                ! Creating a new communicator using Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
-                                                          num_procs_y/), (/.true., &
-                                                                           .true./), .false., MPI_COMM_CART, &
-                                     ierr)
-                ! Finding corresponding Cartesian coordinates of the local
-                ! processor rank in newly declared cartesian communicator
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
-                                     proc_coords, ierr)
-
-            end if
-
-            ! END: Generating 2D Cartesian Processor Topology
-
-            ! Sub-domain Global Parameters in y-direction
-
-            ! Number of remaining cells after majority has been distributed
-            rem_cells = mod(n + 1, num_procs_y)
-
-            ! Preliminary uniform cell-width spacing
-            if (old_grid .neqv. .true.) then
-                dy = (y_domain%end - y_domain%beg)/real(n + 1, wp)
-            end if
-
-            ! Optimal number of cells per processor
-            n = (n + 1)/num_procs_y - 1
-
-            ! Distributing any remaining cells
-            do i = 1, rem_cells
-                if (proc_coords(2) == i - 1) then
-                    n = n + 1
-                    exit
-                end if
-            end do
-
-            ! Boundary condition at the beginning
-            if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then
-                proc_coords(2) = proc_coords(2) - 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%beg, ierr)
-                proc_coords(2) = proc_coords(2) + 1
-            end if
-
-            ! Boundary condition at the end
-            if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then
-                proc_coords(2) = proc_coords(2) + 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%end, ierr)
-                proc_coords(2) = proc_coords(2) - 1
-            end if
-
-            ! Beginning and end sub-domain boundary locations
-            if (parallel_io .neqv. .true.) then
-                if (old_grid .neqv. .true.) then
-                    if (proc_coords(2) < rem_cells) then
-                        y_domain%beg = y_domain%beg + dy*real((n + 1)* &
-                                                              proc_coords(2))
-                        y_domain%end = y_domain%end - dy*real((n + 1)* &
-                                                              (num_procs_y - proc_coords(2) - 1) &
-                                                              - (num_procs_y - rem_cells))
-                    else
-                        y_domain%beg = y_domain%beg + dy*real((n + 1)* &
-                                                              proc_coords(2) + rem_cells)
-                        y_domain%end = y_domain%end - dy*real((n + 1)* &
-                                                              (num_procs_y - proc_coords(2) - 1))
-                    end if
-                end if
-            else
-                if (proc_coords(2) < rem_cells) then
-                    start_idx(2) = (n + 1)*proc_coords(2)
-                else
-                    start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
-                end if
-            end if
-
-            ! Generating 1D Cartesian Processor Topology
-
-        else
-
-            ! Number of processors in the coordinate direction is equal to
-            ! the total number of processors available
-            num_procs_x = num_procs
-
-            ! Creating a new communicator using Cartesian topology
-            call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
-                                 (/.true./), .false., MPI_COMM_CART, &
-                                 ierr)
-
-            ! Finding the corresponding Cartesian coordinates of the local
-            ! processor rank in the newly declared cartesian communicator
-            call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
-                                 proc_coords, ierr)
-
-        end if
-
-        ! Sub-domain Global Parameters in x-direction
-
-        ! Number of remaining cells after majority has been distributed
-        rem_cells = mod(m + 1, num_procs_x)
-
-        ! Preliminary uniform cell-width spacing
-        if (old_grid .neqv. .true.) then
-            dx = (x_domain%end - x_domain%beg)/real(m + 1, wp)
-        end if
-
-        ! Optimal number of cells per processor
-        m = (m + 1)/num_procs_x - 1
-
-        ! Distributing any remaining cells
-        do i = 1, rem_cells
-            if (proc_coords(1) == i - 1) then
-                m = m + 1
-                exit
-            end if
-        end do
-
-        ! Boundary condition at the beginning
-        if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then
-            proc_coords(1) = proc_coords(1) - 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
-            proc_coords(1) = proc_coords(1) + 1
-        end if
-
-        ! Boundary condition at the end
-        if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then
-            proc_coords(1) = proc_coords(1) + 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
-            proc_coords(1) = proc_coords(1) - 1
-        end if
-
-        ! Beginning and end sub-domain boundary locations
-        if (parallel_io .neqv. .true.) then
-            if (old_grid .neqv. .true.) then
-                if (proc_coords(1) < rem_cells) then
-                    x_domain%beg = x_domain%beg + dx*real((m + 1)* &
-                                                          proc_coords(1))
-                    x_domain%end = x_domain%end - dx*real((m + 1)* &
-                                                          (num_procs_x - proc_coords(1) - 1) &
-                                                          - (num_procs_x - rem_cells))
-                else
-                    x_domain%beg = x_domain%beg + dx*real((m + 1)* &
-                                                          proc_coords(1) + rem_cells)
-                    x_domain%end = x_domain%end - dx*real((m + 1)* &
-                                                          (num_procs_x - proc_coords(1) - 1))
-                end if
-            end if
-        else
-            if (proc_coords(1) < rem_cells) then
-                start_idx(1) = (m + 1)*proc_coords(1)
-            else
-                start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
-            end if
-        end if
-
-#endif
-
-    end subroutine s_mpi_decompose_computational_domain
-
 end module m_mpi_proxy
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 9dd922f36b..145bbbd23b 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -61,42 +61,32 @@ module m_mpi_proxy
     !> @}
     !$acc declare create(v_size)
 
-    integer :: nVars !< nVars for surface tension communication
-    !$acc declare create(nVars)
-
+    
 contains
 
-    !> The computation of parameters, the allocation of memory,
-        !!      the association of pointers and/or the execution of any
-        !!      other procedures that are necessary to setup the module.
-    subroutine s_initialize_mpi_proxy_module
+    subroutine s_initialize_mpi_proxy()
 
-#ifdef MFC_MPI
-        if (surface_tension) then
-            nVars = num_dims + 1
+        if (ib) then
             if (n > 0) then
                 if (p > 0) then
-                    @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* &
-                                             & (m + 2*buff_size + 1)* &
-                                             & (n + 2*buff_size + 1)* &
-                                             & (p + 2*buff_size + 1)/ &
-                                             & (min(m, n, p) + 2*buff_size + 1)))
+                    @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * &
+                                            & (m + 2*gp_layers + 1)* &
+                                            & (n + 2*gp_layers + 1)* &
+                                            & (p + 2*gp_layers + 1)/ &
+                                            & (min(m, n, p) + 2*gp_layers + 1)))
                 else
-                    @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* &
-                                             & (max(m, n) + 2*buff_size + 1)))
+                    @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* &
+                                            & (max(m, n) + 2*gp_layers + 1)))
                 end if
             else
-                @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)))
+                @:ALLOCATE(ib_buff_send(0:-1 + gp_layers))
             end if
-
-            @:ALLOCATE(c_divs_buff_recv(0:ubound(c_divs_buff_send, 1)))
+            @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1)))
         end if
-        !$acc update device(v_size, nVars)
-#endif
 
-    end subroutine s_initialize_mpi_proxy_module
+    end subroutine s_initialize_mpi_proxy
 
-    !>  Since only the processor with rank 0 reads and verifies
+        !>  Since only the processor with rank 0 reads and verifies
         !!      the consistency of user inputs, these are initially not
         !!      available to the other processors. Then, the purpose of
         !!      this subroutine is to distribute the user inputs to the
@@ -252,561 +242,199 @@ contains
 
     end subroutine s_mpi_bcast_user_inputs
 
-    !>  The purpose of this procedure is to optimally decompose
-        !!      the computational domain among the available processors.
-        !!      This is performed by attempting to award each processor,
-        !!      in each of the coordinate directions, approximately the
-        !!      same number of cells, and then recomputing the affected
-        !!      global parameters.
-    subroutine s_mpi_decompose_computational_domain
+    subroutine s_mpi_sendrecv_ib_buffers(c_divs_vf, gp_layers)
 
-#ifdef MFC_MPI
-
-        integer :: num_procs_x, num_procs_y, num_procs_z !<
-            !! Optimal number of processors in the x-, y- and z-directions
 
-        real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !<
-            !! Non-optimal number of processors in the x-, y- and z-directions
-
-        real(wp) :: fct_min !<
-            !! Processor factorization (fct) minimization parameter
+        type(integer_field), intent(inout) :: ib_markers
+        integer, intent(in) :: gp_layers
 
-        integer :: MPI_COMM_CART !<
-            !! Cartesian processor topology communicator
 
-        integer :: rem_cells !<
-            !! Remaining number of cells, in a particular coordinate direction,
-            !! after the majority is divided up among the available processors
+        integer :: i, j, k, l, r, q !< Generic loop iterators
 
-        integer :: i, j !< Generic loop iterators
+        integer :: buffer_counts(1:3), buffer_count
 
-        if (num_procs == 1 .and. parallel_io) then
-            do i = 1, num_dims
-                start_idx(i) = 0
-            end do
-            return
-        end if
+        type(int_bounds_info) :: boundary_conditions(1:3)
+        integer :: beg_end(1:2), grid_dims(1:3)
+        integer :: dst_proc, src_proc, recv_tag, send_tag
 
-        ! 3D Cartesian Processor Topology
-        if (n > 0) then
+        logical :: beg_end_geq_0
 
-            if (p > 0) then
+        integer :: pack_offset, unpack_offset
+        real(wp), pointer :: p_send, p_recv
 
-                if (cyl_coord .and. p > 0) then
-                    ! Implement pencil processor blocking if using cylindrical coordinates so
-                    ! that all cells in azimuthal direction are stored on a single processor.
-                    ! This is necessary for efficient application of Fourier filter near axis.
+#ifdef MFC_MPI
 
-                    ! Initial values of the processor factorization optimization
-                    num_procs_x = 1
-                    num_procs_y = num_procs
-                    num_procs_z = 1
-                    ierr = -1
+        nVars = num_dims + 1
+        !$acc update device(nVars)
 
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y)
+        buffer_counts = (/ &
+                        buff_size*nVars*(n + 1)*(p + 1), &
+                        buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), &
+                        buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
+                        /)
 
-                    ! Searching for optimal computational domain distribution
-                    do i = 1, num_procs
+        buffer_count = buffer_counts(mpi_dir)
+        boundary_conditions = (/bc_x, bc_y, bc_z/)
+        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
+        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
 
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
+        ! Implements:
+        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
+        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
+        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
+        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
+        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
 
-                            tmp_num_procs_x = i
-                            tmp_num_procs_y = num_procs/i
+        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
+        recv_tag = f_logical_to_int(pbc_loc == 1)
 
-                            if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                               - (n + 1)/tmp_num_procs_y) &
-                                .and. &
-                                (n + 1)/tmp_num_procs_y &
-                                >= &
-                                num_stcls_min*weno_order) then
+        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
+        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
 
-                                num_procs_x = i
-                                num_procs_y = num_procs/i
-                                fct_min = abs((m + 1)/tmp_num_procs_x &
-                                              - (n + 1)/tmp_num_procs_y)
-                                ierr = 0
+        grid_dims = (/m, n, p/)
 
-                            end if
+        pack_offset = 0
+        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
+            pack_offset = grid_dims(mpi_dir) - buff_size + 1
+        end if
 
-                        end if
+        unpack_offset = 0
+        if (pbc_loc == 1) then
+            unpack_offset = grid_dims(mpi_dir) + buff_size + 1
+        end if
 
+        ! Pack Buffer to Send
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = 0, buff_size - 1
+                                do i = 1, nVars
+                                    r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l))
+                                    buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l)
+                                end do
+                            end do
+                        end do
                     end do
 
-                else
-
-                    ! Initial estimate of optimal processor topology
-                    num_procs_x = 1
-                    num_procs_y = 1
-                    num_procs_z = num_procs
-                    ierr = -1
-
-                    ! Benchmarking the quality of this initial guess
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y) &
-                              + 10._wp*abs((n + 1)/tmp_num_procs_y &
-                                           - (p + 1)/tmp_num_procs_z)
-
-                    ! Optimization of the initial processor topology
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            do j = 1, num_procs/i
-
-                                if (mod(num_procs/i, j) == 0 &
-                                    .and. &
-                                    (n + 1)/j >= num_stcls_min*weno_order) then
-
-                                    tmp_num_procs_x = i
-                                    tmp_num_procs_y = j
-                                    tmp_num_procs_z = num_procs/(i*j)
-
-                                    if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                                       - (n + 1)/tmp_num_procs_y) &
-                                        + abs((n + 1)/tmp_num_procs_y &
-                                              - (p + 1)/tmp_num_procs_z) &
-                                        .and. &
-                                        (p + 1)/tmp_num_procs_z &
-                                        >= &
-                                        num_stcls_min*weno_order) &
-                                        then
-
-                                        num_procs_x = i
-                                        num_procs_y = j
-                                        num_procs_z = num_procs/(i*j)
-                                        fct_min = abs((m + 1)/tmp_num_procs_x &
-                                                      - (n + 1)/tmp_num_procs_y) &
-                                                  + abs((n + 1)/tmp_num_procs_y &
-                                                        - (p + 1)/tmp_num_procs_z)
-                                        ierr = 0
-
-                                    end if
-
-                                end if
-
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVars
+                        do l = 0, p
+                            do k = 0, buff_size - 1
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + nVars* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         (k + buff_size*l))
+                                    buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l)
+                                end do
                             end do
-
-                        end if
-
+                        end do
                     end do
 
-                end if
-
-                ! Verifying that a valid decomposition of the computational
-                ! domain has been established. If not, the simulation exits.
-                if (proc_rank == 0 .and. ierr == -1) then
-                    call s_mpi_abort('Unsupported combination of values '// &
-                                     'of num_procs, m, n, p and '// &
-                                     'weno_order. Exiting.')
-                end if
-
-                ! Creating new communicator using the Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
-                                                          num_procs_y, num_procs_z/), &
-                                     (/.true., .true., .true./), &
-                                     .false., MPI_COMM_CART, ierr)
-
-                ! Finding the Cartesian coordinates of the local process
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
-                                     proc_coords, ierr)
-                ! END: 3D Cartesian Processor Topology
-
-                ! Global Parameters for z-direction
-
-                ! Number of remaining cells
-                rem_cells = mod(p + 1, num_procs_z)
-
-                ! Optimal number of cells per processor
-                p = (p + 1)/num_procs_z - 1
-
-                ! Distributing the remaining cells
-                do i = 1, rem_cells
-                    if (proc_coords(3) == i - 1) then
-                        p = p + 1; exit
-                    end if
-                end do
-
-                ! Boundary condition at the beginning
-                if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then
-                    proc_coords(3) = proc_coords(3) - 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%beg, ierr)
-                    proc_coords(3) = proc_coords(3) + 1
-                end if
-
-                ! Boundary condition at the end
-                if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then
-                    proc_coords(3) = proc_coords(3) + 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%end, ierr)
-                    proc_coords(3) = proc_coords(3) - 1
-                end if
-
-                if (parallel_io) then
-                    if (proc_coords(3) < rem_cells) then
-                        start_idx(3) = (p + 1)*proc_coords(3)
-                    else
-                        start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
-                    end if
-                end if
-
-                ! 2D Cartesian Processor Topology
-            else
-
-                ! Initial estimate of optimal processor topology
-                num_procs_x = 1
-                num_procs_y = num_procs
-                ierr = -1
-
-                ! Benchmarking the quality of this initial guess
-                tmp_num_procs_x = num_procs_x
-                tmp_num_procs_y = num_procs_y
-                fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                     - (n + 1)/tmp_num_procs_y)
-
-                ! Optimization of the initial processor topology
-                do i = 1, num_procs
-
-                    if (mod(num_procs, i) == 0 &
-                        .and. &
-                        (m + 1)/i >= num_stcls_min*weno_order) then
-
-                        tmp_num_procs_x = i
-                        tmp_num_procs_y = num_procs/i
-
-                        if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                           - (n + 1)/tmp_num_procs_y) &
-                            .and. &
-                            (n + 1)/tmp_num_procs_y &
-                            >= &
-                            num_stcls_min*weno_order) then
-
-                            num_procs_x = i
-                            num_procs_y = num_procs/i
-                            fct_min = abs((m + 1)/tmp_num_procs_x &
-                                          - (n + 1)/tmp_num_procs_y)
-                            ierr = 0
-
-                        end if
-
-                    end if
-
-                end do
-
-                ! Verifying that a valid decomposition of the computational
-                ! domain has been established. If not, the simulation exits.
-                if (proc_rank == 0 .and. ierr == -1) then
-                    call s_mpi_abort('Unsupported combination of values '// &
-                                     'of num_procs, m, n and '// &
-                                     'weno_order. Exiting.')
-                end if
-
-                ! Creating new communicator using the Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
-                                                          num_procs_y/), (/.true., &
-                                                                           .true./), .false., MPI_COMM_CART, &
-                                     ierr)
-
-                ! Finding the Cartesian coordinates of the local process
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
-                                     proc_coords, ierr)
-
-            end if
-            ! END: 2D Cartesian Processor Topology
-
-            ! Global Parameters for y-direction
-
-            ! Number of remaining cells
-            rem_cells = mod(n + 1, num_procs_y)
-
-            ! Optimal number of cells per processor
-            n = (n + 1)/num_procs_y - 1
-
-            ! Distributing the remaining cells
-            do i = 1, rem_cells
-                if (proc_coords(2) == i - 1) then
-                    n = n + 1; exit
-                end if
-            end do
-
-            ! Boundary condition at the beginning
-            if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then
-                proc_coords(2) = proc_coords(2) - 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%beg, ierr)
-                proc_coords(2) = proc_coords(2) + 1
-            end if
-
-            ! Boundary condition at the end
-            if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then
-                proc_coords(2) = proc_coords(2) + 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%end, ierr)
-                proc_coords(2) = proc_coords(2) - 1
-            end if
-
-            if (parallel_io) then
-                if (proc_coords(2) < rem_cells) then
-                    start_idx(2) = (n + 1)*proc_coords(2)
-                else
-                    start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
-                end if
-            end if
-
-            ! 1D Cartesian Processor Topology
-        else
-
-            ! Optimal processor topology
-            num_procs_x = num_procs
-
-            ! Creating new communicator using the Cartesian topology
-            call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
-                                 (/.true./), .false., MPI_COMM_CART, &
-                                 ierr)
-
-            ! Finding the Cartesian coordinates of the local process
-            call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
-                                 proc_coords, ierr)
-
-        end if
-
-        ! Global Parameters for x-direction
-
-        ! Number of remaining cells
-        rem_cells = mod(m + 1, num_procs_x)
-
-        ! Optimal number of cells per processor
-        m = (m + 1)/num_procs_x - 1
-
-        ! Distributing the remaining cells
-        do i = 1, rem_cells
-            if (proc_coords(1) == i - 1) then
-                m = m + 1; exit
-            end if
-        end do
-
-        ! Boundary condition at the beginning
-        if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then
-            proc_coords(1) = proc_coords(1) - 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
-            proc_coords(1) = proc_coords(1) + 1
-        end if
-
-        ! Boundary condition at the end
-        if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then
-            proc_coords(1) = proc_coords(1) + 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
-            proc_coords(1) = proc_coords(1) - 1
-        end if
-
-        if (parallel_io) then
-            if (proc_coords(1) < rem_cells) then
-                start_idx(1) = (m + 1)*proc_coords(1)
-            else
-                start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
-            end if
-        end if
-
-#endif
-
-    end subroutine s_mpi_decompose_computational_domain
-
-    !>  The goal of this procedure is to populate the buffers of
-        !!      the grid variables by communicating with the neighboring
-        !!      processors. Note that only the buffers of the cell-width
-        !!      distributions are handled in such a way. This is because
-        !!      the buffers of cell-boundary locations may be calculated
-        !!      directly from those of the cell-width distributions.
-        !!  @param mpi_dir MPI communication coordinate direction
-        !!  @param pbc_loc Processor boundary condition (PBC) location
-    subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
-
-        integer, intent(in) :: mpi_dir
-        integer, intent(in) :: pbc_loc
-
-        integer :: dst_proc(1:3)
-
-#ifdef MFC_MPI
-
-        ! MPI Communication in x-direction
-        if (mpi_dir == 1) then
-
-            if (pbc_loc == -1) then      ! PBC at the beginning
-
-                if (bc_x%end >= 0) then      ! PBC at the beginning and end
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        dx(m - buff_size + 1), buff_size, &
-                        mpi_p, bc_x%end, 0, &
-                        dx(-buff_size), buff_size, &
-                        mpi_p, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the beginning only
-
-                    ! Send/receive buffer to/from bc_x%beg/bc_x%beg
-                    call MPI_SENDRECV( &
-                        dx(0), buff_size, &
-                        mpi_p, bc_x%beg, 1, &
-                        dx(-buff_size), buff_size, &
-                        mpi_p, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            else                        ! PBC at the end
-
-                if (bc_x%beg >= 0) then      ! PBC at the end and beginning
-
-                    ! Send/receive buffer to/from bc_x%beg/bc_x%end
-                    call MPI_SENDRECV( &
-                        dx(0), buff_size, &
-                        mpi_p, bc_x%beg, 1, &
-                        dx(m + 1), buff_size, &
-                        mpi_p, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the end only
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%end
-                    call MPI_SENDRECV( &
-                        dx(m - buff_size + 1), buff_size, &
-                        mpi_p, bc_x%end, 0, &
-                        dx(m + 1), buff_size, &
-                        mpi_p, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
+                #:else
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVars
+                        do l = 0, buff_size - 1
+                            do k = -buff_size, n + buff_size
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + nVars* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         ((k + buff_size) + (n + 2*buff_size + 1)*l))
+                                    buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset)
+                                end do
+                            end do
+                        end do
+                    end do
+                #:endif
             end if
-            ! END: MPI Communication in x-direction
-
-            ! MPI Communication in y-direction
-        elseif (mpi_dir == 2) then
-
-            if (pbc_loc == -1) then      ! PBC at the beginning
-
-                if (bc_y%end >= 0) then      ! PBC at the beginning and end
-
-                    ! Send/receive buffer to/from bc_y%end/bc_y%beg
-                    call MPI_SENDRECV( &
-                        dy(n - buff_size + 1), buff_size, &
-                        mpi_p, bc_y%end, 0, &
-                        dy(-buff_size), buff_size, &
-                        mpi_p, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the beginning only
-
-                    ! Send/receive buffer to/from bc_y%beg/bc_y%beg
-                    call MPI_SENDRECV( &
-                        dy(0), buff_size, &
-                        mpi_p, bc_y%beg, 1, &
-                        dy(-buff_size), buff_size, &
-                        mpi_p, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            else                        ! PBC at the end
-
-                if (bc_y%beg >= 0) then      ! PBC at the end and beginning
-
-                    ! Send/receive buffer to/from bc_y%beg/bc_y%end
-                    call MPI_SENDRECV( &
-                        dy(0), buff_size, &
-                        mpi_p, bc_y%beg, 1, &
-                        dy(n + 1), buff_size, &
-                        mpi_p, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+        #:endfor
 
-                else                        ! PBC at the end only
+        ! Send/Recv
+        #:for rdma_mpi in [False, True]
+            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
+                p_send => buff_send(0)
+                p_recv => buff_recv(0)
 
-                    ! Send/receive buffer to/from bc_y%end/bc_y%end
-                    call MPI_SENDRECV( &
-                        dy(n - buff_size + 1), buff_size, &
-                        mpi_p, bc_y%end, 0, &
-                        dy(n + 1), buff_size, &
-                        mpi_p, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+                #:if rdma_mpi
+                    !$acc data attach(p_send, p_recv)
+                    !$acc host_data use_device(p_send, p_recv)
+                #:else
+                    !$acc update host(buff_send)
+                #:endif
 
-                end if
+                call MPI_SENDRECV( &
+                    p_send, buffer_count, mpi_p, dst_proc, send_tag, &
+                    p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
+                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
+                #:if rdma_mpi
+                    !$acc end host_data
+                    !$acc end data
+                    !$acc wait
+                #:else
+                    !$acc update device(buff_recv)
+                #:endif
             end if
-            ! END: MPI Communication in y-direction
-
-            ! MPI Communication in z-direction
-        else
-
-            if (pbc_loc == -1) then      ! PBC at the beginning
-
-                if (bc_z%end >= 0) then      ! PBC at the beginning and end
-
-                    ! Send/receive buffer to/from bc_z%end/bc_z%beg
-                    call MPI_SENDRECV( &
-                        dz(p - buff_size + 1), buff_size, &
-                        mpi_p, bc_z%end, 0, &
-                        dz(-buff_size), buff_size, &
-                        mpi_p, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the beginning only
-
-                    ! Send/receive buffer to/from bc_z%beg/bc_z%beg
-                    call MPI_SENDRECV( &
-                        dz(0), buff_size, &
-                        mpi_p, bc_z%beg, 1, &
-                        dz(-buff_size), buff_size, &
-                        mpi_p, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            else                        ! PBC at the end
-
-                if (bc_z%beg >= 0) then      ! PBC at the end and beginning
-
-                    ! Send/receive buffer to/from bc_z%beg/bc_z%end
-                    call MPI_SENDRECV( &
-                        dz(0), buff_size, &
-                        mpi_p, bc_z%beg, 1, &
-                        dz(p + 1), buff_size, &
-                        mpi_p, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the end only
+        #:endfor
 
-                    ! Send/receive buffer to/from bc_z%end/bc_z%end
-                    call MPI_SENDRECV( &
-                        dz(p - buff_size + 1), buff_size, &
-                        mpi_p, bc_z%end, 0, &
-                        dz(p + 1), buff_size, &
-                        mpi_p, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+        ! Unpack Received Buffer
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = -buff_size, -1
+                                do i = 1, nVars
+                                    r = (i - 1) + nVars* &
+                                        (j + buff_size*((k + 1) + (n + 1)*l))
+                                    c_divs_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
+                                end do
+                            end do
+                        end do
+                    end do
 
-                end if
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVars
+                        do l = 0, p
+                            do k = -buff_size, -1
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + nVars* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         ((k + buff_size) + buff_size*l))
+                                    c_divs_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
+                                end do
+                            end do
+                        end do
+                    end do
 
+                #:else
+                    ! Unpacking buffer from bc_z%beg
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVars
+                        do l = -buff_size, -1
+                            do k = -buff_size, n + buff_size
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + nVars* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         ((k + buff_size) + (n + 2*buff_size + 1)* &
+                                          (l + buff_size)))
+                                    c_divs_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
+                                end do
+                            end do
+                        end do
+                    end do
+                #:endif
             end if
-
-        end if
-        ! END: MPI Communication in z-direction
+        #:endfor
 
 #endif
 
-    end subroutine s_mpi_sendrecv_grid_variables_buffers
+    end subroutine s_mpi_sendrecv_ib_buffers
 
     !>  The goal of this procedure is to populate the buffers of
         !!      the cell-average conservative variables by communicating
@@ -820,23 +448,6 @@ contains
         integer, pointer, dimension(:) :: p_i_send, p_i_recv
 
 #ifdef MFC_MPI
-
-        if (n > 0) then
-            if (p > 0) then
-                @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * &
-                                        & (m + 2*gp_layers + 1)* &
-                                        & (n + 2*gp_layers + 1)* &
-                                        & (p + 2*gp_layers + 1)/ &
-                                        & (min(m, n, p) + 2*gp_layers + 1)))
-            else
-                @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* &
-                                        & (max(m, n) + 2*gp_layers + 1)))
-            end if
-        else
-            @:ALLOCATE(ib_buff_send(0:-1 + gp_layers))
-        end if
-        @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1)))
-
         !nCalls_time = nCalls_time + 1
 
         ! MPI Communication in x-direction
@@ -1657,217 +1268,6 @@ contains
 
     end subroutine s_mpi_sendrecv_ib_buffers
 
-    subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, mpi_dir, pbc_loc)
-
-        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf
-        integer, intent(in) :: mpi_dir, pbc_loc
-
-        integer :: i, j, k, l, r, q !< Generic loop iterators
-
-        integer :: buffer_counts(1:3), buffer_count
-
-        type(int_bounds_info) :: boundary_conditions(1:3)
-        integer :: beg_end(1:2), grid_dims(1:3)
-        integer :: dst_proc, src_proc, recv_tag, send_tag
-
-        logical :: beg_end_geq_0
-
-        integer :: pack_offset, unpack_offset
-        real(wp), pointer :: p_send, p_recv
-
-#ifdef MFC_MPI
-
-        nVars = num_dims + 1
-        !$acc update device(nVars)
-
-        buffer_counts = (/ &
-                        buff_size*nVars*(n + 1)*(p + 1), &
-                        buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), &
-                        buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
-                        /)
-
-        buffer_count = buffer_counts(mpi_dir)
-        boundary_conditions = (/bc_x, bc_y, bc_z/)
-        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
-        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
-
-        ! Implements:
-        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
-        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
-        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
-        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
-        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
-
-        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
-        recv_tag = f_logical_to_int(pbc_loc == 1)
-
-        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
-        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
-
-        grid_dims = (/m, n, p/)
-
-        pack_offset = 0
-        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
-            pack_offset = grid_dims(mpi_dir) - buff_size + 1
-        end if
-
-        unpack_offset = 0
-        if (pbc_loc == 1) then
-            unpack_offset = grid_dims(mpi_dir) + buff_size + 1
-        end if
-
-        ! Pack Buffer to Send
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = 0, buff_size - 1
-                                do i = 1, nVars
-                                    r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l))
-                                    c_divs_buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l)
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, p
-                            do k = 0, buff_size - 1
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         (k + buff_size*l))
-                                    c_divs_buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l)
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:else
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, buff_size - 1
-                            do k = -buff_size, n + buff_size
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                    c_divs_buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset)
-                                end do
-                            end do
-                        end do
-                    end do
-                #:endif
-            end if
-        #:endfor
-
-        ! Send/Recv
-        #:for rdma_mpi in [False, True]
-            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
-                p_send => c_divs_buff_send(0)
-                p_recv => c_divs_buff_recv(0)
-
-                #:if rdma_mpi
-                    !$acc data attach(p_send, p_recv)
-                    !$acc host_data use_device(p_send, p_recv)
-                #:else
-                    !$acc update host(c_divs_buff_send)
-                #:endif
-
-                call MPI_SENDRECV( &
-                    p_send, buffer_count, mpi_p, dst_proc, send_tag, &
-                    p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
-                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                #:if rdma_mpi
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                #:else
-                    !$acc update device(c_divs_buff_recv)
-                #:endif
-            end if
-        #:endfor
-
-        ! Unpack Received Buffer
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = -buff_size, -1
-                                do i = 1, nVars
-                                    r = (i - 1) + nVars* &
-                                        (j + buff_size*((k + 1) + (n + 1)*l))
-                                    c_divs_vf(i)%sf(j + unpack_offset, k, l) = c_divs_buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
-#endif
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, p
-                            do k = -buff_size, -1
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + buff_size*l))
-                                    c_divs_vf(i)%sf(j, k + unpack_offset, l) = c_divs_buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
-#endif
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:else
-                    ! Unpacking buffer from bc_z%beg
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = -buff_size, -1
-                            do k = -buff_size, n + buff_size
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + (n + 2*buff_size + 1)* &
-                                          (l + buff_size)))
-                                    c_divs_vf(i)%sf(j, k, l + unpack_offset) = c_divs_buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
-#endif
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:endif
-            end if
-        #:endfor
-
-#endif
-
-    end subroutine s_mpi_sendrecv_capilary_variables_buffers
-
     subroutine s_mpi_send_random_number(phi_rn, num_freq)
         integer, intent(in) :: num_freq
         real(wp), intent(inout), dimension(1:num_freq) :: phi_rn
@@ -1876,15 +1276,12 @@ contains
 #endif
     end subroutine s_mpi_send_random_number
 
-    !> Module deallocation and/or disassociation procedures
-    subroutine s_finalize_mpi_proxy_module
+    subroutine s_finalize_mpi_proxy()
 
-#ifdef MFC_MPI
-        if (surface_tension) then
-            @:DEALLOCATE(c_divs_buff_send, c_divs_buff_recv)
+        if (ib) then
+            @:DEALLOCATE(ib_buff_send, ib_buff_recv)
         end if
-#endif
 
-    end subroutine s_finalize_mpi_proxy_module
+    end subroutine s_initialize_mpi_proxy
 
 end module m_mpi_proxy

From 2667687b7c0e42b2222b19c73d555986fc0a9160 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Wed, 30 Apr 2025 22:37:21 -0400
Subject: [PATCH 06/62] bug fixes

---
 src/common/m_mpi_common.fpp    | 207 ++++++++++++++++++++++++++++++---
 src/simulation/m_mpi_proxy.fpp | 204 +-------------------------------
 2 files changed, 198 insertions(+), 213 deletions(-)

diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index da35420544..ebd66a4252 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -60,7 +60,7 @@ contains
         ! storage to hold the largest buffer in the computational domain.
 
         if (qbmm .and. .not. polytropic) then
-            v_size = vec_size + 2*nb*4
+            v_size = sys_size + 2*nb*4
         else
             v_size = sys_size
         end if
@@ -701,7 +701,6 @@ contains
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
                     if (qbmm .and. .not. polytropic) then
                         !$acc parallel loop collapse(4) gang vector default(present) private(r)
                         do l = 0, p
@@ -733,7 +732,6 @@ contains
                             end do
                         end do
                     end if
-#endif
                 #:elif mpi_dir == 2
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
                     do i = 1, sys_size
@@ -749,7 +747,6 @@ contains
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
                     if (qbmm .and. .not. polytropic) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
                         do i = sys_size + 1, sys_size + 4
@@ -783,7 +780,6 @@ contains
                             end do
                         end do
                     end if
-#endif
                 #:else
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
                     do i = 1, sys_size
@@ -799,7 +795,6 @@ contains
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
                     if (qbmm .and. .not. polytropic) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
                         do i = sys_size + 1, sys_size + 4
@@ -833,7 +828,6 @@ contains
                             end do
                         end do
                     end if
-#endif
                 #:endif
             end if
         #:endfor
@@ -906,7 +900,6 @@ contains
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
                     if (qbmm .and. .not. polytropic) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
                         do l = 0, p
@@ -938,7 +931,6 @@ contains
                             end do
                         end do
                     end if
-#endif
                 #:elif mpi_dir == 2
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
                     do i = 1, sys_size
@@ -960,7 +952,6 @@ contains
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
                     if (qbmm .and. .not. polytropic) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
                         do i = sys_size + 1, sys_size + 4
@@ -994,7 +985,6 @@ contains
                             end do
                         end do
                     end if
-#endif
                 #:else
                     ! Unpacking buffer from bc_z%beg
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
@@ -1018,7 +1008,6 @@ contains
                         end do
                     end do
 
-#ifdef MFC_SIMULATION
                     if (qbmm .and. .not. polytropic) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
                         do i = sys_size + 1, sys_size + 4
@@ -1054,12 +1043,10 @@ contains
                             end do
                         end do
                     end if
-#endif
                 #:endif
             end if
         #:endfor
         call nvtxEndRange
-
 #endif
 
     end subroutine s_mpi_sendrecv_variables_buffers
@@ -1172,6 +1159,7 @@ contains
             end if
         #:endfor
 
+#ifdef MFC_SIMULATION
         ! Send/Recv
         #:for rdma_mpi in [False, True]
             if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
@@ -1199,6 +1187,12 @@ contains
                 #:endif
             end if
         #:endfor
+#else
+        call MPI_SENDRECV( &
+            p_send, buffer_count, mpi_p, dst_proc, send_tag, &
+            p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
+            MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+#endif
 
         ! Unpack Received Buffer
         #:for mpi_dir in [1, 2, 3]
@@ -1646,6 +1640,191 @@ contains
 
     end subroutine s_mpi_decompose_computational_domain
 
+    !>  The goal of this procedure is to populate the buffers of
+        !!      the grid variables by communicating with the neighboring
+        !!      processors. Note that only the buffers of the cell-width
+        !!      distributions are handled in such a way. This is because
+        !!      the buffers of cell-boundary locations may be calculated
+        !!      directly from those of the cell-width distributions.
+        !!  @param mpi_dir MPI communication coordinate direction
+        !!  @param pbc_loc Processor boundary condition (PBC) location
+#ifndef MFC_PRE_PROCESS
+    subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
+
+        integer, intent(in) :: mpi_dir
+        integer, intent(in) :: pbc_loc
+
+#ifdef MFC_MPI
+
+        ! MPI Communication in x-direction
+        if (mpi_dir == 1) then
+
+            if (pbc_loc == -1) then      ! PBC at the beginning
+
+                if (bc_x%end >= 0) then      ! PBC at the beginning and end
+
+                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
+                    call MPI_SENDRECV( &
+                        dx(m - buff_size + 1), buff_size, &
+                        mpi_p, bc_x%end, 0, &
+                        dx(-buff_size), buff_size, &
+                        mpi_p, bc_x%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the beginning only
+
+                    ! Send/receive buffer to/from bc_x%beg/bc_x%beg
+                    call MPI_SENDRECV( &
+                        dx(0), buff_size, &
+                        mpi_p, bc_x%beg, 1, &
+                        dx(-buff_size), buff_size, &
+                        mpi_p, bc_x%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            else                        ! PBC at the end
+
+                if (bc_x%beg >= 0) then      ! PBC at the end and beginning
+
+                    ! Send/receive buffer to/from bc_x%beg/bc_x%end
+                    call MPI_SENDRECV( &
+                        dx(0), buff_size, &
+                        mpi_p, bc_x%beg, 1, &
+                        dx(m + 1), buff_size, &
+                        mpi_p, bc_x%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the end only
+
+                    ! Send/receive buffer to/from bc_x%end/bc_x%end
+                    call MPI_SENDRECV( &
+                        dx(m - buff_size + 1), buff_size, &
+                        mpi_p, bc_x%end, 0, &
+                        dx(m + 1), buff_size, &
+                        mpi_p, bc_x%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            end if
+            ! END: MPI Communication in x-direction
+
+            ! MPI Communication in y-direction
+        elseif (mpi_dir == 2) then
+
+            if (pbc_loc == -1) then      ! PBC at the beginning
+
+                if (bc_y%end >= 0) then      ! PBC at the beginning and end
+
+                    ! Send/receive buffer to/from bc_y%end/bc_y%beg
+                    call MPI_SENDRECV( &
+                        dy(n - buff_size + 1), buff_size, &
+                        mpi_p, bc_y%end, 0, &
+                        dy(-buff_size), buff_size, &
+                        mpi_p, bc_y%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the beginning only
+
+                    ! Send/receive buffer to/from bc_y%beg/bc_y%beg
+                    call MPI_SENDRECV( &
+                        dy(0), buff_size, &
+                        mpi_p, bc_y%beg, 1, &
+                        dy(-buff_size), buff_size, &
+                        mpi_p, bc_y%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            else                        ! PBC at the end
+
+                if (bc_y%beg >= 0) then      ! PBC at the end and beginning
+
+                    ! Send/receive buffer to/from bc_y%beg/bc_y%end
+                    call MPI_SENDRECV( &
+                        dy(0), buff_size, &
+                        mpi_p, bc_y%beg, 1, &
+                        dy(n + 1), buff_size, &
+                        mpi_p, bc_y%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the end only
+
+                    ! Send/receive buffer to/from bc_y%end/bc_y%end
+                    call MPI_SENDRECV( &
+                        dy(n - buff_size + 1), buff_size, &
+                        mpi_p, bc_y%end, 0, &
+                        dy(n + 1), buff_size, &
+                        mpi_p, bc_y%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            end if
+            ! END: MPI Communication in y-direction
+
+            ! MPI Communication in z-direction
+        else
+
+            if (pbc_loc == -1) then      ! PBC at the beginning
+
+                if (bc_z%end >= 0) then      ! PBC at the beginning and end
+
+                    ! Send/receive buffer to/from bc_z%end/bc_z%beg
+                    call MPI_SENDRECV( &
+                        dz(p - buff_size + 1), buff_size, &
+                        mpi_p, bc_z%end, 0, &
+                        dz(-buff_size), buff_size, &
+                        mpi_p, bc_z%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the beginning only
+
+                    ! Send/receive buffer to/from bc_z%beg/bc_z%beg
+                    call MPI_SENDRECV( &
+                        dz(0), buff_size, &
+                        mpi_p, bc_z%beg, 1, &
+                        dz(-buff_size), buff_size, &
+                        mpi_p, bc_z%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            else                        ! PBC at the end
+
+                if (bc_z%beg >= 0) then      ! PBC at the end and beginning
+
+                    ! Send/receive buffer to/from bc_z%beg/bc_z%end
+                    call MPI_SENDRECV( &
+                        dz(0), buff_size, &
+                        mpi_p, bc_z%beg, 1, &
+                        dz(p + 1), buff_size, &
+                        mpi_p, bc_z%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the end only
+
+                    ! Send/receive buffer to/from bc_z%end/bc_z%end
+                    call MPI_SENDRECV( &
+                        dz(p - buff_size + 1), buff_size, &
+                        mpi_p, bc_z%end, 0, &
+                        dz(p + 1), buff_size, &
+                        mpi_p, bc_z%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            end if
+
+        end if
+        ! END: MPI Communication in z-direction
+
+#endif
+
+    end subroutine s_mpi_sendrecv_grid_variables_buffers
+#endif
+
     !> Module deallocation and/or disassociation procedures
     subroutine s_finalize_mpi_common_module
 
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 145bbbd23b..f37dea4e4a 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -64,7 +64,7 @@ module m_mpi_proxy
     
 contains
 
-    subroutine s_initialize_mpi_proxy()
+    subroutine s_initialize_mpi_proxy_module()
 
         if (ib) then
             if (n > 0) then
@@ -84,7 +84,7 @@ contains
             @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1)))
         end if
 
-    end subroutine s_initialize_mpi_proxy
+    end subroutine s_initialize_mpi_proxy_module
 
         !>  Since only the processor with rank 0 reads and verifies
         !!      the consistency of user inputs, these are initially not
@@ -242,201 +242,7 @@ contains
 
     end subroutine s_mpi_bcast_user_inputs
 
-    subroutine s_mpi_sendrecv_ib_buffers(c_divs_vf, gp_layers)
-
-
-        type(integer_field), intent(inout) :: ib_markers
-        integer, intent(in) :: gp_layers
-
-
-        integer :: i, j, k, l, r, q !< Generic loop iterators
-
-        integer :: buffer_counts(1:3), buffer_count
-
-        type(int_bounds_info) :: boundary_conditions(1:3)
-        integer :: beg_end(1:2), grid_dims(1:3)
-        integer :: dst_proc, src_proc, recv_tag, send_tag
-
-        logical :: beg_end_geq_0
-
-        integer :: pack_offset, unpack_offset
-        real(wp), pointer :: p_send, p_recv
-
-#ifdef MFC_MPI
-
-        nVars = num_dims + 1
-        !$acc update device(nVars)
-
-        buffer_counts = (/ &
-                        buff_size*nVars*(n + 1)*(p + 1), &
-                        buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), &
-                        buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
-                        /)
-
-        buffer_count = buffer_counts(mpi_dir)
-        boundary_conditions = (/bc_x, bc_y, bc_z/)
-        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
-        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
-
-        ! Implements:
-        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
-        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
-        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
-        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
-        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
-
-        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
-        recv_tag = f_logical_to_int(pbc_loc == 1)
-
-        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
-        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
-
-        grid_dims = (/m, n, p/)
-
-        pack_offset = 0
-        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
-            pack_offset = grid_dims(mpi_dir) - buff_size + 1
-        end if
-
-        unpack_offset = 0
-        if (pbc_loc == 1) then
-            unpack_offset = grid_dims(mpi_dir) + buff_size + 1
-        end if
-
-        ! Pack Buffer to Send
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = 0, buff_size - 1
-                                do i = 1, nVars
-                                    r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l))
-                                    buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l)
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, p
-                            do k = 0, buff_size - 1
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         (k + buff_size*l))
-                                    buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l)
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:else
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, buff_size - 1
-                            do k = -buff_size, n + buff_size
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                    buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset)
-                                end do
-                            end do
-                        end do
-                    end do
-                #:endif
-            end if
-        #:endfor
-
-        ! Send/Recv
-        #:for rdma_mpi in [False, True]
-            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
-                p_send => buff_send(0)
-                p_recv => buff_recv(0)
-
-                #:if rdma_mpi
-                    !$acc data attach(p_send, p_recv)
-                    !$acc host_data use_device(p_send, p_recv)
-                #:else
-                    !$acc update host(buff_send)
-                #:endif
-
-                call MPI_SENDRECV( &
-                    p_send, buffer_count, mpi_p, dst_proc, send_tag, &
-                    p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
-                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                #:if rdma_mpi
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                #:else
-                    !$acc update device(buff_recv)
-                #:endif
-            end if
-        #:endfor
-
-        ! Unpack Received Buffer
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = -buff_size, -1
-                                do i = 1, nVars
-                                    r = (i - 1) + nVars* &
-                                        (j + buff_size*((k + 1) + (n + 1)*l))
-                                    c_divs_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, p
-                            do k = -buff_size, -1
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + buff_size*l))
-                                    c_divs_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:else
-                    ! Unpacking buffer from bc_z%beg
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = -buff_size, -1
-                            do k = -buff_size, n + buff_size
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + (n + 2*buff_size + 1)* &
-                                          (l + buff_size)))
-                                    c_divs_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
-                                end do
-                            end do
-                        end do
-                    end do
-                #:endif
-            end if
-        #:endfor
-
-#endif
-
-    end subroutine s_mpi_sendrecv_ib_buffers
-
-    !>  The goal of this procedure is to populate the buffers of
+        !>  The goal of this procedure is to populate the buffers of
         !!      the cell-average conservative variables by communicating
         !!      with the neighboring processors.
     subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers)
@@ -1276,12 +1082,12 @@ contains
 #endif
     end subroutine s_mpi_send_random_number
 
-    subroutine s_finalize_mpi_proxy()
+    subroutine s_finalize_mpi_proxy_module()
 
         if (ib) then
             @:DEALLOCATE(ib_buff_send, ib_buff_recv)
         end if
 
-    end subroutine s_initialize_mpi_proxy
+    end subroutine s_finalize_mpi_proxy_module
 
 end module m_mpi_proxy

From 9814a7f803dcb19d3a7997df64cea99df3886a7e Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Wed, 30 Apr 2025 22:38:15 -0400
Subject: [PATCH 07/62] update mass calculation

---
 src/simulation/m_bubbles_EL.fpp         | 16 ++++++++++++----
 src/simulation/m_bubbles_EL_kernels.fpp | 10 +++-------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 39b4f7e8ed..a2ac3a7ae9 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -607,11 +607,15 @@ contains
                 !$acc loop seq
                 do l = 1, 3
                     if (lag_params%vel_model == 1) then
-                        mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf)
+                        mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), &
+                                                    cell, l, q_prim_vf)
                         mtn_dveldt(k, l, stage) = 0._wp
                     elseif (lag_params%vel_model == 2) then
                         mtn_dposdt(k, l, stage) = mtn_vel(k,l,1)
-                        mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),intfc_rad(k,1),mtn_vel(k,l,1),cell,l,q_prim_vf)
+                        mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),&
+                                                    intfc_rad(k,1), mtn_vel(k,l,1), &
+                                                    gas_mg(k,1), gas_mv(k,1), &
+                                                    cell, l, q_prim_vf)
                     else
                         mtn_dposdt(k, l, stage) = 0._wp
                         mtn_dveldt(k, l, stage) = 0._wp
@@ -632,11 +636,15 @@ contains
                 !$acc loop seq
                 do l = 1, 3
                     if (lag_params%vel_model == 1) then
-                        mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf)
+                        mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), &
+                                                    cell, l, q_prim_vf)
                         mtn_dveldt(k, l, stage) = 0._wp
                     elseif (lag_params%vel_model == 2) then
                         mtn_dposdt(k, l, stage) = mtn_vel(k,l,1)
-                        mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),intfc_rad(k,1),mtn_vel(k,l,1),cell,l,q_prim_vf)
+                        mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),&
+                                                    intfc_rad(k,1), mtn_vel(k,l,1), &
+                                                    gas_mg(k,1), gas_mv(k,1), &
+                                                    cell, l, q_prim_vf)
                     else
                         mtn_dposdt(k, l, stage) = 0._wp
                         mtn_dveldt(k, l, stage) = 0._wp
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index 6aec813680..ccd5cbe0bc 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -498,13 +498,13 @@ contains
 
     end function f_interpolate_velocity
 
-    function f_get_acceleration(pos,rad,vel,cell,i,q_prim_vf) result(a)
+    function f_get_acceleration(pos,rad,vel,mg,mv,cell,i,q_prim_vf) result(a)
 !$acc routine seq
         integer, dimension(3) :: cell
         integer :: i
         type(scalar_field), dimension(sys_size) :: q_prim_vf
         real(wp) :: a, area, vol, mass, force, drag_force, vel
-        real(wp) :: pos, rad, dp, v_rel
+        real(wp) :: pos, rad, dp, v_rel, mg, mv
 
         if (i == 1) then
             dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - &
@@ -523,8 +523,6 @@ contains
         end if
 
         area = pi * rad**2._wp
-        vol = (4._wp/3._wp) * pi * rad**3._wp
-
         force = -1._wp * area * dp
 
         if (lag_params%drag_model == 1) then ! Stokes drag
@@ -532,9 +530,7 @@ contains
             force = force - (6._wp * pi * rad * v_rel) / fluid_pp(1)%Re(1)
         end if
 
-        mass = 1e-3 * vol
-
-        a = force / mass
+        a = force / (mg + mv)
 
     end function f_get_acceleration
 

From 2c5d7631b7fa0686ae79cfd8cfa801ffd7282841 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Thu, 1 May 2025 16:11:57 -0400
Subject: [PATCH 08/62] more refactoring

---
 examples/3D_TaylorGreenVortex/case.py    |   4 +-
 src/common/m_boundary_conditions.fpp     |  24 +-
 src/common/m_mpi_common.fpp              | 348 +++++------------------
 src/post_process/m_data_input.f90        |  14 +-
 src/post_process/m_global_parameters.fpp |   2 +-
 src/post_process/m_mpi_proxy.fpp         | 200 -------------
 src/post_process/m_start_up.f90          |   4 +-
 7 files changed, 92 insertions(+), 504 deletions(-)

diff --git a/examples/3D_TaylorGreenVortex/case.py b/examples/3D_TaylorGreenVortex/case.py
index de440980cb..33889c7958 100644
--- a/examples/3D_TaylorGreenVortex/case.py
+++ b/examples/3D_TaylorGreenVortex/case.py
@@ -2,7 +2,7 @@
 import math
 import json
 
-N = 256
+N = 49
 
 Re = 1600
 L = 1
@@ -41,7 +41,7 @@
             "p": N,
             "cyl_coord": "F",
             "dt": dt,
-            "t_step_start": 13529,
+            "t_step_start": 0,
             "t_step_stop": Nt,
             "t_step_save": int(Nt / 100),
             # Simulation Algorithm Parameters
diff --git a/src/common/m_boundary_conditions.fpp b/src/common/m_boundary_conditions.fpp
index 762ff6a44f..575eecad34 100644
--- a/src/common/m_boundary_conditions.fpp
+++ b/src/common/m_boundary_conditions.fpp
@@ -50,7 +50,7 @@ contains
             call s_no_slip_wall(q_prim_vf, pb, mv, 1, -1)
         case default ! Processor BC at beginning
             call s_mpi_sendrecv_variables_buffers( &
-                q_prim_vf, pb, mv, 1, -1)
+                q_prim_vf, 1, -1, sys_size, pb, mv)
         end select
 
         select case (bc_x%end)
@@ -66,7 +66,7 @@ contains
             call s_no_slip_wall(q_prim_vf, pb, mv, 1, 1)
         case default ! Processor BC at end
             call s_mpi_sendrecv_variables_buffers( &
-                q_prim_vf, pb, mv, 1, 1)
+                q_prim_vf, 1, 1, sys_size, pb, mv)
         end select
 
 #ifdef MFC_SIMULATION
@@ -110,7 +110,7 @@ contains
             call s_no_slip_wall(q_prim_vf, pb, mv, 2, -1)
         case default ! Processor BC at beginning
             call s_mpi_sendrecv_variables_buffers( &
-                q_prim_vf, pb, mv, 2, -1)
+                q_prim_vf, 2, -1, sys_size, pb, mv)
         end select
 
         select case (bc_y%end)
@@ -126,7 +126,7 @@ contains
             call s_no_slip_wall(q_prim_vf, pb, mv, 2, 1)
         case default ! Processor BC at end
             call s_mpi_sendrecv_variables_buffers( &
-                q_prim_vf, pb, mv, 2, 1)
+                q_prim_vf, 2, 1, sys_size, pb, mv)
         end select
 
 #ifdef MFC_SIMULATION
@@ -168,7 +168,7 @@ contains
             call s_no_slip_wall(q_prim_vf, pb, mv, 3, -1)
         case default ! Processor BC at beginning
             call s_mpi_sendrecv_variables_buffers( &
-                q_prim_vf, pb, mv, 3, -1)
+                q_prim_vf, 3, -1, sys_size, pb, mv)
         end select
 
         select case (bc_z%end)
@@ -184,7 +184,7 @@ contains
             call s_no_slip_wall(q_prim_vf, pb, mv, 3, 1)
         case default ! Processor BC at end
             call s_mpi_sendrecv_variables_buffers( &
-                q_prim_vf, pb, mv, 3, 1)
+                q_prim_vf, 3, 1, sys_size, pb, mv)
         end select
 
 #ifdef MFC_SIMULATION
@@ -1470,7 +1470,7 @@ contains
                 end do
             end do
         else
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, -1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1)
         end if
 
         if (bc_x%end <= -3) then !< ghost-cell extrapolation
@@ -1515,7 +1515,7 @@ contains
                 end do
             end do
         else
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, 1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1)
         end if
 
         if (n == 0) then
@@ -1562,7 +1562,7 @@ contains
                 end do
             end do
         else
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, -1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1)
         end if
 
         if (bc_y%end <= -3) then !< ghost-cell extrapolation
@@ -1607,7 +1607,7 @@ contains
                 end do
             end do
         else
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, 1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1)
         end if
 
         if (p == 0) then
@@ -1654,7 +1654,7 @@ contains
                 end do
             end do
         else
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, -1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1)
         end if
 
         if (bc_z%end <= -3) then !< ghost-cell extrapolation
@@ -1699,7 +1699,7 @@ contains
                 end do
             end do
         else
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, 1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1)
         end if
 
     end subroutine s_populate_capillary_buffers
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index ebd66a4252..e5ff84f2ec 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -70,7 +70,7 @@ contains
                 halo_size = NINT(-1._wp + 1._wp * buff_size*(v_size)* &
                                          & (m + 2*buff_size + 1)* &
                                          & (n + 2*buff_size + 1)* &
-                                         & (p + 2*buff_size + 1)/ & 
+                                         & (p + 2*buff_size + 1)/ &
                                          & (min(m, n, p) + 2*buff_size + 1))
             else
                 halo_size = -1 + buff_size*(v_size)* &
@@ -608,13 +608,14 @@ contains
         !!  @param q_cons_vf Cell-average conservative variables
         !!  @param mpi_dir MPI communication coordinate direction
         !!  @param pbc_loc Processor boundary condition (PBC) location
-    subroutine s_mpi_sendrecv_variables_buffers(q_cons_vf, &
-                                                pb, mv, &
+    subroutine s_mpi_sendrecv_variables_buffers(q_comm, &
                                                 mpi_dir, &
-                                                pbc_loc)
+                                                pbc_loc, &
+                                                nVar, &
+                                                pb, mv)
 
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        type(scalar_field), dimension(1:), intent(inout) :: q_comm
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
 
         integer, intent(in) :: mpi_dir, pbc_loc
 
@@ -626,34 +627,36 @@ contains
         integer :: beg_end(1:2), grid_dims(1:3)
         integer :: dst_proc, src_proc, recv_tag, send_tag
 
-        logical :: beg_end_geq_0
+        logical :: beg_end_geq_0, qbmm_comm
 
-        integer :: pack_offset, unpack_offset
+        integer :: pack_offset, unpack_offset, nVar
 
         real(wp), pointer :: p_send, p_recv
 
 #ifdef MFC_MPI
 
         call nvtxStartRange("RHS-COMM-PACKBUF")
-!$acc update device(v_size)
 
-#ifdef MFC_SIMULATION
-        if (qbmm .and. .not. polytropic) then
+        qbmm_comm = .false.
+
+        if (present(pb) .and. present(mv) .and. qbmm .and. .not. polytropic) then
+            qbmm_comm = .true.
+            v_size = sys_size + 2*nb*4
             buffer_counts = (/ &
-                            buff_size*(sys_size + 2*nb*4)*(n + 1)*(p + 1), &
-                            buff_size*(sys_size + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), &
+                            buff_size*(nVar + 2*nb*4)*(n + 1)*(p + 1), &
+                            buff_size*(nVar + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), &
                             buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
                             /)
         else
-#endif
+            v_size = nVar
             buffer_counts = (/ &
-                            buff_size*sys_size*(n + 1)*(p + 1), &
-                            buff_size*sys_size*(m + 2*buff_size + 1)*(p + 1), &
+                            buff_size*nVar*(n + 1)*(p + 1), &
+                            buff_size*nVar*(m + 2*buff_size + 1)*(p + 1), &
                             buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
                             /)
-#ifdef MFC_SIMULATION
         end if
-#endif
+
+        !$acc update device(v_size, nVar)
 
         buffer_count = buffer_counts(mpi_dir)
         boundary_conditions = (/bc_x, bc_y, bc_z/)
@@ -693,24 +696,24 @@ contains
                     do l = 0, p
                         do k = 0, n
                             do j = 0, buff_size - 1
-                                do i = 1, sys_size
+                                do i = 1, nVar
                                     r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
-                                    buff_send(r) = q_cons_vf(i)%sf(j + pack_offset, k, l)
+                                    buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l)
                                 end do
                             end do
                         end do
                     end do
 
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(4) gang vector default(present) private(r)
                         do l = 0, p
                             do k = 0, n
                                 do j = 0, buff_size - 1
-                                    do i = sys_size + 1, sys_size + 4
+                                    do i = nVar + 1, nVar + 4
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 (j + buff_size*(k + (n + 1)*l))
-                                            buff_send(r) = pb(j + pack_offset, k, l, i - sys_size, q)
+                                            buff_send(r) = pb(j + pack_offset, k, l, i - nVar, q)
                                         end do
                                     end do
                                 end do
@@ -721,11 +724,11 @@ contains
                         do l = 0, p
                             do k = 0, n
                                 do j = 0, buff_size - 1
-                                    do i = sys_size + 1, sys_size + 4
+                                    do i = nVar + 1, nVar + 4
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 (j + buff_size*(k + (n + 1)*l))
-                                            buff_send(r) = mv(j + pack_offset, k, l, i - sys_size, q)
+                                            buff_send(r) = mv(j + pack_offset, k, l, i - nVar, q)
                                         end do
                                     end do
                                 end do
@@ -734,22 +737,22 @@ contains
                     end if
                 #:elif mpi_dir == 2
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, sys_size
+                    do i = 1, nVar
                         do l = 0, p
                             do k = 0, buff_size - 1
                                 do j = -buff_size, m + buff_size
                                     r = (i - 1) + v_size* &
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          (k + buff_size*l))
-                                    buff_send(r) = q_cons_vf(i)%sf(j, k + pack_offset, l)
+                                    buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l)
                                 end do
                             end do
                         end do
                     end do
 
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, p
                                 do k = 0, buff_size - 1
                                     do j = -buff_size, m + buff_size
@@ -757,7 +760,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  (k + buff_size*l))
-                                            buff_send(r) = pb(j, k + pack_offset, l, i - sys_size, q)
+                                            buff_send(r) = pb(j, k + pack_offset, l, i - nVar, q)
                                         end do
                                     end do
                                 end do
@@ -765,7 +768,7 @@ contains
                         end do
 
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, p
                                 do k = 0, buff_size - 1
                                     do j = -buff_size, m + buff_size
@@ -773,7 +776,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  (k + buff_size*l))
-                                            buff_send(r) = mv(j, k + pack_offset, l, i - sys_size, q)
+                                            buff_send(r) = mv(j, k + pack_offset, l, i - nVar, q)
                                         end do
                                     end do
                                 end do
@@ -782,22 +785,22 @@ contains
                     end if
                 #:else
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, sys_size
+                    do i = 1, nVar
                         do l = 0, buff_size - 1
                             do k = -buff_size, n + buff_size
                                 do j = -buff_size, m + buff_size
                                     r = (i - 1) + v_size* &
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                    buff_send(r) = q_cons_vf(i)%sf(j, k, l + pack_offset)
+                                    buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset)
                                 end do
                             end do
                         end do
                     end do
 
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, buff_size - 1
                                 do k = -buff_size, n + buff_size
                                     do j = -buff_size, m + buff_size
@@ -805,7 +808,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                            buff_send(r) = pb(j, k, l + pack_offset, i - sys_size, q)
+                                            buff_send(r) = pb(j, k, l + pack_offset, i - nVar, q)
                                         end do
                                     end do
                                 end do
@@ -813,7 +816,7 @@ contains
                         end do
 
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar+ 1, nVar + 4
                             do l = 0, buff_size - 1
                                 do k = -buff_size, n + buff_size
                                     do j = -buff_size, m + buff_size
@@ -821,7 +824,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                            buff_send(r) = mv(j, k, l + pack_offset, i - sys_size, q)
+                                            buff_send(r) = mv(j, k, l + pack_offset, i - nVar, q)
                                         end do
                                     end do
                                 end do
@@ -885,12 +888,12 @@ contains
                     do l = 0, p
                         do k = 0, n
                             do j = -buff_size, -1
-                                do i = 1, sys_size
+                                do i = 1, nVar
                                     r = (i - 1) + v_size* &
                                         (j + buff_size*((k + 1) + (n + 1)*l))
-                                    q_cons_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
+                                    q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
 #if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
                                         print *, "Error", j, k, l, i
                                         error stop "NaN(s) in recv"
                                     end if
@@ -900,16 +903,16 @@ contains
                         end do
                     end do
 
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
                         do l = 0, p
                             do k = 0, n
                                 do j = -buff_size, -1
-                                    do i = sys_size + 1, sys_size + 4
+                                    do i = nVar + 1, nVar + 4
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 (j + buff_size*((k + 1) + (n + 1)*l))
-                                            pb(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r)
+                                            pb(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -920,11 +923,11 @@ contains
                         do l = 0, p
                             do k = 0, n
                                 do j = -buff_size, -1
-                                    do i = sys_size + 1, sys_size + 4
+                                    do i = nVar + 1, nVar + 4
                                         do q = 1, nb
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 (j + buff_size*((k + 1) + (n + 1)*l))
-                                            mv(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r)
+                                            mv(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -933,16 +936,16 @@ contains
                     end if
                 #:elif mpi_dir == 2
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, sys_size
+                    do i = 1, nVar
                         do l = 0, p
                             do k = -buff_size, -1
                                 do j = -buff_size, m + buff_size
                                     r = (i - 1) + v_size* &
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          ((k + buff_size) + buff_size*l))
-                                    q_cons_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
+                                    q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
 #if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
                                         print *, "Error", j, k, l, i
                                         error stop "NaN(s) in recv"
                                     end if
@@ -952,9 +955,9 @@ contains
                         end do
                     end do
 
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, p
                                 do k = -buff_size, -1
                                     do j = -buff_size, m + buff_size
@@ -962,7 +965,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + buff_size*l))
-                                            pb(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r)
+                                            pb(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -970,7 +973,7 @@ contains
                         end do
 
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, p
                                 do k = -buff_size, -1
                                     do j = -buff_size, m + buff_size
@@ -978,7 +981,7 @@ contains
                                             r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + buff_size*l))
-                                            mv(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r)
+                                            mv(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -988,7 +991,7 @@ contains
                 #:else
                     ! Unpacking buffer from bc_z%beg
                     !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, sys_size
+                    do i = 1, nVar
                         do l = -buff_size, -1
                             do k = -buff_size, n + buff_size
                                 do j = -buff_size, m + buff_size
@@ -996,9 +999,9 @@ contains
                                         ((j + buff_size) + (m + 2*buff_size + 1)* &
                                          ((k + buff_size) + (n + 2*buff_size + 1)* &
                                           (l + buff_size)))
-                                    q_cons_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
+                                    q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
 #if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
                                         print *, "Error", j, k, l, i
                                         error stop "NaN(s) in recv"
                                     end if
@@ -1008,9 +1011,9 @@ contains
                         end do
                     end do
 
-                    if (qbmm .and. .not. polytropic) then
+                    if (qbmm_comm) then
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = -buff_size, -1
                                 do k = -buff_size, n + buff_size
                                     do j = -buff_size, m + buff_size
@@ -1019,7 +1022,7 @@ contains
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)* &
                                                   (l + buff_size)))
-                                            pb(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r)
+                                            pb(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -1027,7 +1030,7 @@ contains
                         end do
 
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = sys_size + 1, sys_size + 4
+                        do i = nVar + 1, nVar + 4
                             do l = -buff_size, -1
                                 do k = -buff_size, n + buff_size
                                     do j = -buff_size, m + buff_size
@@ -1036,7 +1039,7 @@ contains
                                                 ((j + buff_size) + (m + 2*buff_size + 1)* &
                                                  ((k + buff_size) + (n + 2*buff_size + 1)* &
                                                   (l + buff_size)))
-                                            mv(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r)
+                                            mv(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r)
                                         end do
                                     end do
                                 end do
@@ -1051,224 +1054,6 @@ contains
 
     end subroutine s_mpi_sendrecv_variables_buffers
 
-    subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, mpi_dir, pbc_loc)
-
-        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf
-        integer, intent(in) :: mpi_dir, pbc_loc
-
-        integer :: i, j, k, l, r, q !< Generic loop iterators
-
-        integer :: buffer_counts(1:3), buffer_count
-
-        type(int_bounds_info) :: boundary_conditions(1:3)
-        integer :: beg_end(1:2), grid_dims(1:3)
-        integer :: dst_proc, src_proc, recv_tag, send_tag
-
-        logical :: beg_end_geq_0
-
-        integer :: pack_offset, unpack_offset
-        real(wp), pointer :: p_send, p_recv
-
-#ifdef MFC_MPI
-
-        nVars = num_dims + 1
-        !$acc update device(nVars)
-
-        buffer_counts = (/ &
-                        buff_size*nVars*(n + 1)*(p + 1), &
-                        buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), &
-                        buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
-                        /)
-
-        buffer_count = buffer_counts(mpi_dir)
-        boundary_conditions = (/bc_x, bc_y, bc_z/)
-        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
-        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
-
-        ! Implements:
-        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
-        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
-        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
-        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
-        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
-
-        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
-        recv_tag = f_logical_to_int(pbc_loc == 1)
-
-        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
-        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
-
-        grid_dims = (/m, n, p/)
-
-        pack_offset = 0
-        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
-            pack_offset = grid_dims(mpi_dir) - buff_size + 1
-        end if
-
-        unpack_offset = 0
-        if (pbc_loc == 1) then
-            unpack_offset = grid_dims(mpi_dir) + buff_size + 1
-        end if
-
-        ! Pack Buffer to Send
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = 0, buff_size - 1
-                                do i = 1, nVars
-                                    r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l))
-                                    buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l)
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, p
-                            do k = 0, buff_size - 1
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         (k + buff_size*l))
-                                    buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l)
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:else
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, buff_size - 1
-                            do k = -buff_size, n + buff_size
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                    buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset)
-                                end do
-                            end do
-                        end do
-                    end do
-                #:endif
-            end if
-        #:endfor
-
-#ifdef MFC_SIMULATION
-        ! Send/Recv
-        #:for rdma_mpi in [False, True]
-            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
-                p_send => buff_send(0)
-                p_recv => buff_recv(0)
-
-                #:if rdma_mpi
-                    !$acc data attach(p_send, p_recv)
-                    !$acc host_data use_device(p_send, p_recv)
-                #:else
-                    !$acc update host(buff_send)
-                #:endif
-
-                call MPI_SENDRECV( &
-                    p_send, buffer_count, mpi_p, dst_proc, send_tag, &
-                    p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
-                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                #:if rdma_mpi
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                #:else
-                    !$acc update device(buff_recv)
-                #:endif
-            end if
-        #:endfor
-#else
-        call MPI_SENDRECV( &
-            p_send, buffer_count, mpi_p, dst_proc, send_tag, &
-            p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
-            MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-#endif
-
-        ! Unpack Received Buffer
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = -buff_size, -1
-                                do i = 1, nVars
-                                    r = (i - 1) + nVars* &
-                                        (j + buff_size*((k + 1) + (n + 1)*l))
-                                    c_divs_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
-#endif
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = 0, p
-                            do k = -buff_size, -1
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + buff_size*l))
-                                    c_divs_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
-#endif
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:else
-                    ! Unpacking buffer from bc_z%beg
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVars
-                        do l = -buff_size, -1
-                            do k = -buff_size, n + buff_size
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + nVars* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + (n + 2*buff_size + 1)* &
-                                          (l + buff_size)))
-                                    c_divs_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
-#endif
-                                end do
-                            end do
-                        end do
-                    end do
-
-                #:endif
-            end if
-        #:endfor
-
-#endif
-
-    end subroutine s_mpi_sendrecv_capilary_variables_buffers
-
     !>  The purpose of this procedure is to optimally decompose
         !!      the computational domain among the available processors.
         !!      This is performed by attempting to award each processor,
@@ -1819,7 +1604,6 @@ contains
 
         end if
         ! END: MPI Communication in z-direction
-
 #endif
 
     end subroutine s_mpi_sendrecv_grid_variables_buffers
diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90
index e4ddd8c18f..1ca507a2dc 100644
--- a/src/post_process/m_data_input.f90
+++ b/src/post_process/m_data_input.f90
@@ -17,6 +17,8 @@ module m_data_input
 
     use m_mpi_proxy             !< Message passing interface (MPI) module proxy
 
+    use m_mpi_common
+
     use m_compile_specific
 
     use m_helper
@@ -586,7 +588,7 @@ subroutine s_populate_grid_variables_buffer_regions
             ! Processor BC at the beginning
         else
 
-            call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'x')
+            call s_mpi_sendrecv_grid_variables_buffers(1, -1)
 
         end if
 
@@ -622,7 +624,7 @@ subroutine s_populate_grid_variables_buffer_regions
             ! Processor BC at the end
         else
 
-            call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'x')
+            call s_mpi_sendrecv_grid_variables_buffers(1, 1)
 
         end if
 
@@ -664,7 +666,7 @@ subroutine s_populate_grid_variables_buffer_regions
                 ! Processor BC at the beginning
             else
 
-                call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'y')
+                call s_mpi_sendrecv_grid_variables_buffers(2, -1)
 
             end if
 
@@ -700,7 +702,7 @@ subroutine s_populate_grid_variables_buffer_regions
                 ! Processor BC at the end
             else
 
-                call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'y')
+                call s_mpi_sendrecv_grid_variables_buffers(2, 1)
 
             end if
 
@@ -742,7 +744,7 @@ subroutine s_populate_grid_variables_buffer_regions
                     ! Processor BC at the beginning
                 else
 
-                    call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'z')
+                    call s_mpi_sendrecv_grid_variables_buffers(3, -1)
 
                 end if
 
@@ -778,7 +780,7 @@ subroutine s_populate_grid_variables_buffer_regions
                     ! Processor BC at the end
                 else
 
-                    call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'z')
+                    call s_mpi_sendrecv_grid_variables_buffers(3, 1)
 
                 end if
 
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index 00c5f0ec27..0a5089f247 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -469,7 +469,7 @@ contains
         integer :: i, j, fac
 
         ! Setting m_root equal to m in the case of a 1D serial simulation
-        if (num_procs == 1 .and. n == 0) m_root = m
+        if (num_procs >1 .and. n == 0) m_root = m_glb
 
         ! Gamma/Pi_inf Model
         if (model_eqns == 1) then
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index db7c50d648..29ffbc134f 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -202,206 +202,6 @@ contains
 
     end subroutine s_mpi_bcast_user_inputs
 
-!>  Communicates the buffer regions associated with the grid
-        !!      variables with processors in charge of the neighboring
-        !!      sub-domains. Note that only cell-width spacings feature
-        !!      buffer regions so that no information relating to the
-        !!      cell-boundary locations is communicated.
-        !!  @param pbc_loc Processor boundary condition (PBC) location
-        !!  @param sweep_coord Coordinate direction normal to the processor boundary
-    subroutine s_mpi_sendrecv_grid_vars_buffer_regions(pbc_loc, sweep_coord)
-
-        character(LEN=3), intent(in) :: pbc_loc
-        character, intent(in) :: sweep_coord
-
-#ifdef MFC_MPI
-
-        ! Communications in the x-direction
-
-        if (sweep_coord == 'x') then
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_x%end >= 0) then
-
-                    ! Sending/receiving the data to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, &
-                                      mpi_p, bc_x%end, 0, &
-                                      dx(-buff_size), buff_size, &
-                                      mpi_p, bc_x%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_x%beg/bc_x%beg
-                    call MPI_SENDRECV(dx(0), buff_size, &
-                                      mpi_p, bc_x%beg, 1, &
-                                      dx(-buff_size), buff_size, &
-                                      mpi_p, bc_x%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_x%beg >= 0) then
-
-                    ! Sending/receiving the data to/from bc_x%beg/bc_x%end
-                    call MPI_SENDRECV(dx(0), buff_size, &
-                                      mpi_p, bc_x%beg, 1, &
-                                      dx(m + 1), buff_size, &
-                                      mpi_p, bc_x%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_x%end/bc_x%end
-                    call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, &
-                                      mpi_p, bc_x%end, 0, &
-                                      dx(m + 1), buff_size, &
-                                      mpi_p, bc_x%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            end if
-
-            ! END: Communications in the x-direction
-
-            ! Communications in the y-direction
-
-        elseif (sweep_coord == 'y') then
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_y%end >= 0) then
-
-                    ! Sending/receiving the data to/from bc_y%end/bc_y%beg
-                    call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, &
-                                      mpi_p, bc_y%end, 0, &
-                                      dy(-buff_size), buff_size, &
-                                      mpi_p, bc_y%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_y%beg/bc_y%beg
-                    call MPI_SENDRECV(dy(0), buff_size, &
-                                      mpi_p, bc_y%beg, 1, &
-                                      dy(-buff_size), buff_size, &
-                                      mpi_p, bc_y%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_y%beg >= 0) then
-
-                    ! Sending/receiving the data to/from bc_y%beg/bc_y%end
-                    call MPI_SENDRECV(dy(0), buff_size, &
-                                      mpi_p, bc_y%beg, 1, &
-                                      dy(n + 1), buff_size, &
-                                      mpi_p, bc_y%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_y%end/bc_y%end
-                    call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, &
-                                      mpi_p, bc_y%end, 0, &
-                                      dy(n + 1), buff_size, &
-                                      mpi_p, bc_y%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            end if
-
-            ! END: Communications in the y-direction
-
-            ! Communications in the z-direction
-
-        else
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_z%end >= 0) then
-
-                    ! Sending/receiving the data to/from bc_z%end/bc_z%beg
-                    call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, &
-                                      mpi_p, bc_z%end, 0, &
-                                      dz(-buff_size), buff_size, &
-                                      mpi_p, bc_z%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_z%beg/bc_z%beg
-                    call MPI_SENDRECV(dz(0), buff_size, &
-                                      mpi_p, bc_z%beg, 1, &
-                                      dz(-buff_size), buff_size, &
-                                      mpi_p, bc_z%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_z%beg >= 0) then
-
-                    ! Sending/receiving the data to/from bc_z%beg/bc_z%end
-                    call MPI_SENDRECV(dz(0), buff_size, &
-                                      mpi_p, bc_z%beg, 1, &
-                                      dz(p + 1), buff_size, &
-                                      mpi_p, bc_z%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Sending/receiving the data to/from bc_z%end/bc_z%end
-                    call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, &
-                                      mpi_p, bc_z%end, 0, &
-                                      dz(p + 1), buff_size, &
-                                      mpi_p, bc_z%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-            end if
-
-        end if
-
-        ! END: Communications in the z-direction
-
-#endif
-
-    end subroutine s_mpi_sendrecv_grid_vars_buffer_regions
-
     !>  Communicates buffer regions associated with conservative
         !!      variables with processors in charge of the neighboring
         !!      sub-domains
diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index 8fece2820b..a9484255c0 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -33,6 +33,8 @@ module m_start_up
 
     use m_checker_common
 
+    use m_boundary_conditions
+
     use m_checker
 
     use m_thermochem, only: num_species, species_names
@@ -177,7 +179,7 @@ subroutine s_perform_time_step(t_step)
 
         ! Populating the buffer regions of the conservative variables
         if (buff_size > 0) then
-            call s_populate_conservative_variables_buffer_regions()
+            call s_populate_variables_buffers(q_cons_vf)
             if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1))
         end if
 

From 97e4f0334f59c0bd6e2a90f6a3f3e51c34bf5519 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Mon, 5 May 2025 12:42:07 -0400
Subject: [PATCH 09/62] bug fix

---
 src/simulation/m_bubbles_EL.fpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index a2ac3a7ae9..7a5bb6e421 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -614,7 +614,7 @@ contains
                         mtn_dposdt(k, l, stage) = mtn_vel(k,l,1)
                         mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),&
                                                     intfc_rad(k,1), mtn_vel(k,l,1), &
-                                                    gas_mg(k,1), gas_mv(k,1), &
+                                                    gas_mg(k), gas_mv(k), &
                                                     cell, l, q_prim_vf)
                     else
                         mtn_dposdt(k, l, stage) = 0._wp
@@ -643,7 +643,7 @@ contains
                         mtn_dposdt(k, l, stage) = mtn_vel(k,l,1)
                         mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),&
                                                     intfc_rad(k,1), mtn_vel(k,l,1), &
-                                                    gas_mg(k,1), gas_mv(k,1), &
+                                                    gas_mg(k), gas_mv(k), &
                                                     cell, l, q_prim_vf)
                     else
                         mtn_dposdt(k, l, stage) = 0._wp

From 5644605ba16e7be744be513fe0b2229c0c205bc1 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Mon, 5 May 2025 15:59:19 -0400
Subject: [PATCH 10/62] more refactoring

---
 src/common/m_boundary_common.fpp          | 314 ++++++++++++++++++++--
 src/post_process/m_data_input.f90         |  40 +++
 src/post_process/m_global_parameters.fpp  |   4 +
 src/post_process/m_start_up.f90           |  18 +-
 src/pre_process/m_boundary_conditions.fpp | 144 +---------
 src/pre_process/m_data_output.fpp         |   2 +-
 src/pre_process/m_perturbation.fpp        |   2 +-
 src/simulation/m_boundary_conditions.fpp  | 129 ---------
 src/simulation/m_global_parameters.fpp    |   4 -
 src/simulation/m_mpi_proxy.fpp            |   2 -
 src/simulation/m_rhs.fpp                  |   2 +-
 src/simulation/m_sim_helpers.f90          |  25 +-
 src/simulation/m_time_steppers.fpp        |   2 -
 toolchain/mfc/run/case_dicts.py           |   3 +-
 14 files changed, 359 insertions(+), 332 deletions(-)
 delete mode 100644 src/simulation/m_boundary_conditions.fpp

diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index 55ac2fec82..857eae09e9 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -17,6 +17,11 @@ module m_boundary_common
 
     use m_constants
 
+    use m_delay_file_access
+
+    use m_compile_specific
+
+
     implicit none
 
     type(scalar_field), dimension(:, :), allocatable :: bc_buffers
@@ -32,6 +37,11 @@ module m_boundary_common
  s_populate_variables_buffers, &
  s_create_mpi_types, &
  s_populate_capillary_buffers, &
+ s_write_serial_boundary_condition_files, &
+ s_write_parallel_boundary_condition_files, &
+ s_read_serial_boundary_condition_files, &
+ s_read_parallel_boundary_condition_files, &
+ s_assign_default_bc_type, &
  s_finalize_boundary_common_module
 
     public :: bc_buffers, bcxb, bcxe, bcyb, bcye, bczb, bcze
@@ -48,7 +58,6 @@ contains
 
         @:ALLOCATE(bc_buffers(1:num_dims, -1:1))
 
-#ifndef MFC_POST_PROCESS
         if (bc_io) then
             @:ALLOCATE(bc_buffers(1, -1)%sf(1:sys_size, 0:n, 0:p))
             @:ALLOCATE(bc_buffers(1, 1)%sf(1:sys_size, 0:n, 0:p))
@@ -64,17 +73,16 @@ contains
                 end if
             end if
         end if
-#endif
 
     end subroutine s_initialize_boundary_common_module
 
     !>  The purpose of this procedure is to populate the buffers
     !!      of the primitive variables, depending on the selected
     !!      boundary conditions.
-    subroutine s_populate_variables_buffers(q_prim_vf, pb, mv, bc_type)
+    subroutine s_populate_variables_buffers(bc_type, q_prim_vf, pb, mv)
 
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type
 
         integer :: bc_loc, bc_dir
@@ -82,7 +90,7 @@ contains
 
         ! Population of Buffers in x-direction
         if (bcxb >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, -1)
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -106,7 +114,7 @@ contains
         end if
 
         if (bcxe >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, 1)
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -134,7 +142,7 @@ contains
         if (n == 0) return
 
         if (bcyb >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, -1)
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -160,7 +168,7 @@ contains
         end if
 
         if (bcye >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, 1)
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -188,7 +196,7 @@ contains
         if (p == 0) return
 
         if (bczb >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, -1)
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = -buff_size, n + buff_size
@@ -212,7 +220,7 @@ contains
         end if
 
         if (bcze >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, 1)
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = -buff_size, n + buff_size
@@ -1166,7 +1174,7 @@ contains
 
         !< x-direction
         if (bcxb >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, -1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -1184,7 +1192,7 @@ contains
         end if
 
         if (bcxe >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, 1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -1205,7 +1213,7 @@ contains
 
         !< y-direction
         if (bcyb >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, -1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -1223,7 +1231,7 @@ contains
         end if
 
         if (bcye >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, 1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = 0, p
@@ -1244,7 +1252,7 @@ contains
 
         !< z-direction
         if (bczb >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, -1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = -buff_size, n + buff_size
@@ -1262,7 +1270,7 @@ contains
         end if
 
         if (bcze >= 0) then
-            call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, 1)
+            call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = -buff_size, n + buff_size
@@ -1511,9 +1519,279 @@ contains
 #endif
     end subroutine s_create_mpi_types
 
+    subroutine s_write_serial_boundary_condition_files(q_prim_vf, bc_type, step_dirpath, old_grid)
+
+        type(scalar_field), dimension(sys_size) :: q_prim_vf
+        type(integer_field), dimension(1:num_dims, -1:1) :: bc_type
+        logical :: old_grid
+
+        character(LEN=*), intent(in) :: step_dirpath
+
+        integer :: dir, loc, i
+        character(len=path_len) :: file_path
+
+        character(len=10) :: status
+
+        if (old_grid) then
+            status = 'old'
+        else
+            status = 'new'
+        end if
+
+        call s_pack_boundary_condition_buffers(q_prim_vf)
+
+        file_path = trim(step_dirpath)//'/bc_type.dat'
+        open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status)
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                write (1) bc_type(dir, loc)%sf
+            end do
+        end do
+        close (1)
+
+        file_path = trim(step_dirpath)//'/bc_buffers.dat'
+        open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status)
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                write (1) bc_buffers(dir, loc)%sf
+            end do
+        end do
+        close (1)
+
+    end subroutine s_write_serial_boundary_condition_files
+
+    subroutine s_write_parallel_boundary_condition_files(q_prim_vf, bc_type)
+
+        type(scalar_field), dimension(sys_size) :: q_prim_vf
+        type(integer_field), dimension(1:num_dims, -1:1) :: bc_type
+
+        integer :: dir, loc
+        character(len=path_len) :: file_loc, file_path
+
+        character(len=10) :: status
+
+#ifdef MFC_MPI
+        integer :: ierr
+        integer :: file_id
+        integer :: offset
+        character(len=7) :: proc_rank_str
+        logical :: dir_check
+
+        call s_pack_boundary_condition_buffers(q_prim_vf)
+
+        file_loc = trim(case_dir)//'/restart_data/boundary_conditions'
+        if (proc_rank == 0) then
+            call my_inquire(file_loc, dir_check)
+            if (dir_check .neqv. .true.) then
+                call s_create_directory(trim(file_loc))
+            end if
+        end if
+
+        call s_create_mpi_types(bc_type)
+
+        call s_mpi_barrier()
+
+        call DelayFileAccess(proc_rank)
+
+        write (proc_rank_str, '(I7.7)') proc_rank
+        file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat'
+        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr)
+
+        offset = 0
+
+        ! Write bc_types
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
+                call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
+                offset = offset + sizeof(bc_type(dir, loc)%sf)
+            end do
+        end do
+
+        ! Write bc_buffers
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
+                call MPI_File_write_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
+                offset = offset + sizeof(bc_buffers(dir, loc)%sf)
+            end do
+        end do
+
+        call MPI_File_close(file_id, ierr)
+#endif
+
+    end subroutine s_write_parallel_boundary_condition_files
+
+    subroutine s_read_serial_boundary_condition_files(step_dirpath, bc_type)
+
+        character(LEN=*), intent(in) :: step_dirpath
+
+        type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type
+
+        integer :: dir, loc
+        logical :: file_exist
+        character(len=path_len) :: file_path
+
+        character(len=10) :: status
+
+        ! Read bc_types
+        file_path = trim(step_dirpath)//'/bc_type.dat'
+        inquire (FILE=trim(file_path), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort(trim(file_path)//' is missing. Exiting.')
+        end if
+
+        open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown')
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                read (1) bc_type(dir, loc)%sf
+                !$acc update device(bc_type(dir, loc)%sf)
+            end do
+        end do
+        close (1)
+
+        ! Read bc_buffers
+        file_path = trim(step_dirpath)//'/bc_buffers.dat'
+        inquire (FILE=trim(file_path), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort(trim(file_path)//' is missing. Exiting.')
+        end if
+
+        open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown')
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                read (1) bc_buffers(dir, loc)%sf
+                !$acc update device(bc_buffers(dir, loc)%sf)
+            end do
+        end do
+        close (1)
+
+    end subroutine s_read_serial_boundary_condition_files
+
+    subroutine s_read_parallel_boundary_condition_files(bc_type)
+
+        type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type
+
+        integer :: dir, loc
+        character(len=path_len) :: file_loc, file_path
+
+        character(len=10) :: status
+
+#ifdef MFC_MPI
+        integer :: ierr
+        integer :: file_id
+        integer :: offset
+        character(len=7) :: proc_rank_str
+        logical :: dir_check
+
+        file_loc = trim(case_dir)//'/restart_data/boundary_conditions'
+
+        if (proc_rank == 0) then
+            call my_inquire(file_loc, dir_check)
+            if (dir_check .neqv. .true.) then
+                call s_mpi_abort(trim(file_loc)//' is missing. Exiting.')
+            end if
+        end if
+
+        call s_create_mpi_types(bc_type)
+
+        call s_mpi_barrier()
+
+        call DelayFileAccess(proc_rank)
+
+        write (proc_rank_str, '(I7.7)') proc_rank
+        file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat'
+        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr)
+
+        offset = 0
+
+        ! Read bc_types
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
+                call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
+                offset = offset + sizeof(bc_type(dir, loc)%sf)
+                !$acc update device(bc_type(dir, loc)%sf)
+            end do
+        end do
+
+        ! Read bc_buffers
+        do dir = 1, num_dims
+            do loc = -1, 1, 2
+                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
+                call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
+                offset = offset + sizeof(bc_buffers(dir, loc)%sf)
+                !$acc update device(bc_buffers(dir, loc)%sf)
+            end do
+        end do
+
+        call MPI_File_close(file_id, ierr)
+#endif
+
+    end subroutine s_read_parallel_boundary_condition_files
+
+    subroutine s_pack_boundary_condition_buffers(q_prim_vf)
+
+        type(scalar_field), dimension(sys_size) :: q_prim_vf
+        integer :: i, j, k
+
+        do k = 0, p
+            do j = 0, n
+                do i = 1, sys_size
+                    bc_buffers(1, -1)%sf(i, j, k) = q_prim_vf(i)%sf(0, j, k)
+                    bc_buffers(1, 1)%sf(i, j, k) = q_prim_vf(i)%sf(m, j, k)
+                end do
+            end do
+        end do
+
+        if (n > 0) then
+            do k = 0, p
+                do j = 1, sys_size
+                    do i = 0, m
+                        bc_buffers(2, -1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k)
+                        bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k)
+                    end do
+                end do
+            end do
+
+            if (p > 0) then
+                do k = 1, sys_size
+                    do j = 0, n
+                        do i = 0, m
+                            bc_buffers(3, -1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0)
+                            bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p)
+                        end do
+                    end do
+                end do
+            end if
+        end if
+
+    end subroutine s_pack_boundary_condition_buffers
+
+    subroutine s_assign_default_bc_type(bc_type)
+
+        type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type
+
+        bc_type(1, -1)%sf(:, :, :) = bc_x%beg
+        bc_type(1, 1)%sf(:, :, :) = bc_x%end
+        !$acc update device(bc_type(1,-1)%sf, bc_type(1,1)%sf)
+
+        if (n > 0) then
+            bc_type(2, -1)%sf(:, :, :) = bc_y%beg
+            bc_type(2, 1)%sf(:, :, :) = bc_y%end
+            !$acc update device(bc_type(2,-1)%sf, bc_type(2,1)%sf)
+
+            if (p > 0) then
+                bc_type(3, -1)%sf(:, :, :) = bc_z%beg
+                bc_type(3, 1)%sf(:, :, :) = bc_z%end
+                !$acc update device(bc_type(3,-1)%sf, bc_type(3,1)%sf)
+            end if
+        end if
+
+    end subroutine s_assign_default_bc_type
+
     subroutine s_finalize_boundary_common_module()
 
-#ifndef MFC_POST_PROCESS
         if (bc_io) then
             deallocate (bc_buffers(1, -1)%sf)
             deallocate (bc_buffers(1, 1)%sf)
@@ -1526,7 +1804,7 @@ contains
                 end if
             end if
         end if
-#endif
+
         deallocate (bc_buffers)
 
     end subroutine s_finalize_boundary_common_module
diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90
index 826ae7f572..6986d76ee2 100644
--- a/src/post_process/m_data_input.f90
+++ b/src/post_process/m_data_input.f90
@@ -21,6 +21,8 @@ module m_data_input
 
     use m_compile_specific
 
+    use m_boundary_common
+
     use m_helper
 
     implicit none
@@ -56,6 +58,9 @@ end subroutine s_read_abstract_data_files
     type(scalar_field), allocatable, dimension(:), public :: q_particle !<
     !! Lagrangian solver (particle void fraction)
 
+    type(integer_field), allocatable, dimension(:, :), public :: bc_type !<
+    !! Boundary condition identifiers
+
     type(scalar_field), public :: q_T_sf !<
     !! Temperature field
 
@@ -127,6 +132,11 @@ subroutine s_read_serial_data_files(t_step)
                              ' is missing. Exiting.')
         end if
 
+        if (bc_io) then
+            call s_read_serial_boundary_condition_files(t_step_dir, bc_type)
+        else
+            call s_assign_default_bc_type(bc_type)
+        end if
         ! Reading the Grid Data File for the x-direction
 
         ! Checking whether x_cb.dat exists
@@ -546,6 +556,12 @@ subroutine s_read_parallel_data_files(t_step)
 
         deallocate (x_cb_glb, y_cb_glb, z_cb_glb)
 
+        if (bc_io) then
+            call s_read_parallel_boundary_condition_files(bc_type)
+        else
+            call s_assign_default_bc_type(bc_type)
+        end if
+
 #endif
 
     end subroutine s_read_parallel_data_files
@@ -1411,6 +1427,20 @@ subroutine s_initialize_data_input_module
 
         end if
 
+        ! Allocating arrays to store the bc types
+        allocate(bc_type(1:num_dims,-1:1))
+
+        allocate(bc_type(1,-1)%sf(0:0,0:n,0:p))
+        allocate(bc_type(1,1)%sf(0:0,0:n,0:p))
+        if (n > 0) then
+            allocate(bc_type(2,-1)%sf(-buff_size:m+buff_size,0:0,0:p))
+            allocate(bc_type(2,1)%sf(-buff_size:m+buff_size,0:0,0:p))
+            if (p > 0) then
+                allocate(bc_type(3,-1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0))
+                allocate(bc_type(3,1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0))
+            end if
+        end if
+
         if (parallel_io .neqv. .true.) then
             s_read_data_files => s_read_serial_data_files
         else
@@ -1446,6 +1476,16 @@ subroutine s_finalize_data_input_module
             deallocate (q_T_sf%sf)
         end if
 
+        deallocate(bc_type(1,-1)%sf, bc_type(1,1)%sf)
+        if (n > 0) then
+            deallocate(bc_type(2,-1)%sf, bc_type(2, 1)%sf)
+            if (p > 0) then
+                deallocate(bc_type(3,-1)%sf, bc_type(3,1)%sf)
+            end if
+        end if
+
+        deallocate(bc_type)
+
         s_read_data_files => null()
 
     end subroutine s_finalize_data_input_module
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index 0a5089f247..3847ce0f73 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -147,6 +147,8 @@ module m_global_parameters
     ! Stands for "InDices With BUFFer".
     type(int_bounds_info) :: idwbuff(1:3)
 
+    integer :: num_bc_patches
+    logical :: bc_io
     !> @name Boundary conditions in the x-, y- and z-coordinate directions
     !> @{
     type(int_bounds_info) :: bc_x, bc_y, bc_z
@@ -370,6 +372,8 @@ contains
         bc_x%beg = dflt_int; bc_x%end = dflt_int
         bc_y%beg = dflt_int; bc_y%end = dflt_int
         bc_z%beg = dflt_int; bc_z%end = dflt_int
+        bc_io = .false.
+        num_bc_patches = dflt_int
 
         #:for DIM in ['x', 'y', 'z']
             #:for DIR in [1, 2, 3]
diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index a9484255c0..3acf713d84 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -16,6 +16,10 @@ module m_start_up
 
     use m_mpi_proxy             !< Message passing interface (MPI) module proxy
 
+    use m_mpi_common            !< Common MPI subroutines
+
+    use m_boundary_common       !< Common boundary conditions subroutines
+
     use m_variables_conversion  !< Subroutines to change the state variables from
                                 !! one form to another
 
@@ -33,8 +37,6 @@ module m_start_up
 
     use m_checker_common
 
-    use m_boundary_conditions
-
     use m_checker
 
     use m_thermochem, only: num_species, species_names
@@ -86,7 +88,8 @@ subroutine s_read_input_file
             relax_model, cf_wrt, sigma, adv_n, ib, num_ibs, &
             cfl_adap_dt, cfl_const_dt, t_save, t_stop, n_start, &
             cfl_target, surface_tension, bubbles_lagrange, rkck_adap_dt, &
-            sim_data, hyperelasticity, Bx0, relativity, cont_damage
+            sim_data, hyperelasticity, Bx0, relativity, cont_damage, &
+            num_bc_patches
 
         ! Inquiring the status of the post_process.inp file
         file_loc = 'post_process.inp'
@@ -117,6 +120,11 @@ subroutine s_read_input_file
 
             if (cfl_adap_dt .or. cfl_const_dt .or. rkck_adap_dt) cfl_dt = .true.
 
+            if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == -17) .or. &
+                num_bc_patches > 0) then
+                bc_io = .true.
+            endif
+
         else
             call s_mpi_abort('File post_process.inp is missing. Exiting.')
         end if
@@ -179,8 +187,8 @@ subroutine s_perform_time_step(t_step)
 
         ! Populating the buffer regions of the conservative variables
         if (buff_size > 0) then
-            call s_populate_variables_buffers(q_cons_vf)
-            if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1))
+            call s_populate_variables_buffers(bc_type, q_cons_vf)
+            !if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1))
         end if
 
         ! Initialize the Temperature cache.
diff --git a/src/pre_process/m_boundary_conditions.fpp b/src/pre_process/m_boundary_conditions.fpp
index 5bc18ad68c..75452459bb 100644
--- a/src/pre_process/m_boundary_conditions.fpp
+++ b/src/pre_process/m_boundary_conditions.fpp
@@ -24,9 +24,7 @@ module m_boundary_conditions
     real(wp) :: radius
     type(bounds_info) :: x_boundary, y_boundary, z_boundary  !<
 
-    private; public :: s_apply_boundary_patches, &
- s_write_serial_boundary_condition_files, &
- s_write_parallel_boundary_condition_files
+    private; public :: s_apply_boundary_patches
 
 contains
     subroutine s_line_segment_bc(patch_id, q_prim_vf, bc_type)
@@ -267,144 +265,4 @@ contains
 
     end subroutine s_apply_boundary_patches
 
-    subroutine s_write_serial_boundary_condition_files(q_prim_vf, bc_type, step_dirpath)
-
-        type(scalar_field), dimension(sys_size) :: q_prim_vf
-        type(integer_field), dimension(1:num_dims, -1:1) :: bc_type
-
-        character(LEN=*), intent(in) :: step_dirpath
-
-        integer :: dir, loc, i
-        character(len=path_len) :: file_path
-
-        character(len=10) :: status
-
-        if (old_grid) then
-            status = 'old'
-        else
-            status = 'new'
-        end if
-
-        call s_pack_boundary_condition_buffers(q_prim_vf)
-
-        file_path = trim(step_dirpath)//'/bc_type.dat'
-        open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status)
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                write (1) bc_type(dir, loc)%sf
-            end do
-        end do
-        close (1)
-
-        file_path = trim(step_dirpath)//'/bc_buffers.dat'
-        open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status)
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                write (1) bc_buffers(dir, loc)%sf
-            end do
-        end do
-        close (1)
-
-    end subroutine s_write_serial_boundary_condition_files
-
-    subroutine s_write_parallel_boundary_condition_files(q_prim_vf, bc_type)
-
-        type(scalar_field), dimension(sys_size) :: q_prim_vf
-        type(integer_field), dimension(1:num_dims, -1:1) :: bc_type
-
-        integer :: dir, loc
-        character(len=path_len) :: file_loc, file_path
-
-        character(len=10) :: status
-
-#ifdef MFC_MPI
-        integer :: ierr
-        integer :: file_id
-        integer :: offset
-        character(len=7) :: proc_rank_str
-        logical :: dir_check
-
-        call s_pack_boundary_condition_buffers(q_prim_vf)
-
-        file_loc = trim(case_dir)//'/restart_data/boundary_conditions'
-        if (proc_rank == 0) then
-            call my_inquire(file_loc, dir_check)
-            if (dir_check .neqv. .true.) then
-                call s_create_directory(trim(file_loc))
-            end if
-        end if
-
-        call s_create_mpi_types(bc_type)
-
-        call s_mpi_barrier()
-
-        call DelayFileAccess(proc_rank)
-
-        write (proc_rank_str, '(I7.7)') proc_rank
-        file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat'
-        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr)
-
-        offset = 0
-
-        ! Write bc_types
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
-                call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
-                offset = offset + sizeof(bc_type(dir, loc)%sf)
-            end do
-        end do
-
-        ! Write bc_buffers
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
-                call MPI_File_write_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
-                offset = offset + sizeof(bc_buffers(dir, loc)%sf)
-            end do
-        end do
-
-        call MPI_File_close(file_id, ierr)
-#endif
-
-    end subroutine s_write_parallel_boundary_condition_files
-
-    subroutine s_pack_boundary_condition_buffers(q_prim_vf)
-
-        type(scalar_field), dimension(sys_size) :: q_prim_vf
-        integer :: i, j, k
-
-        do k = 0, p
-            do j = 0, n
-                do i = 1, sys_size
-                    bc_buffers(1, -1)%sf(i, j, k) = q_prim_vf(i)%sf(0, j, k)
-                    bc_buffers(1, 1)%sf(i, j, k) = q_prim_vf(i)%sf(m, j, k)
-                end do
-            end do
-        end do
-
-        if (n > 0) then
-            do k = 0, p
-                do j = 1, sys_size
-                    do i = 0, m
-                        bc_buffers(2, -1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k)
-                        bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k)
-                    end do
-                end do
-            end do
-
-            if (p > 0) then
-                do k = 1, sys_size
-                    do j = 0, n
-                        do i = 0, m
-                            bc_buffers(3, -1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0)
-                            bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p)
-                        end do
-                    end do
-                end do
-            end if
-        end if
-
-    end subroutine s_pack_boundary_condition_buffers
-
 end module m_boundary_conditions
diff --git a/src/pre_process/m_data_output.fpp b/src/pre_process/m_data_output.fpp
index 7ae637f034..95363db1e3 100644
--- a/src/pre_process/m_data_output.fpp
+++ b/src/pre_process/m_data_output.fpp
@@ -162,7 +162,7 @@ contains
         end if
 
         if (bc_io) then
-            call s_write_serial_boundary_condition_files(q_prim_vf, bc_type, t_step_dir)
+            call s_write_serial_boundary_condition_files(q_prim_vf, bc_type, t_step_dir, old_grid)
         end if
 
         ! x-coordinate direction
diff --git a/src/pre_process/m_perturbation.fpp b/src/pre_process/m_perturbation.fpp
index d1b7536fdb..e1d9ed7efc 100644
--- a/src/pre_process/m_perturbation.fpp
+++ b/src/pre_process/m_perturbation.fpp
@@ -624,7 +624,7 @@ contains
         do q = 1, elliptic_smoothing_iters
 
             ! Communication of buffer regions and apply boundary conditions
-            call s_populate_variables_buffers(q_prim_vf, pb%sf, mv%sf, bc_type)
+            call s_populate_variables_buffers(bc_type, q_prim_vf, pb%sf, mv%sf)
 
             ! Perform smoothing and store in temp array
             if (n == 0) then
diff --git a/src/simulation/m_boundary_conditions.fpp b/src/simulation/m_boundary_conditions.fpp
deleted file mode 100644
index 0d6d22f8af..0000000000
--- a/src/simulation/m_boundary_conditions.fpp
+++ /dev/null
@@ -1,129 +0,0 @@
-! @file m_boundary_conditions.fpp
-! @brief Contains module m_boundary_conditions
-
-!> @brief This module contains
-module m_boundary_conditions
-
-    use m_derived_types
-
-    use m_global_parameters
-#ifdef MFC_MPI
-    use mpi
-#endif
-    use m_delay_file_access
-
-    use m_compile_specific
-
-    use m_boundary_common
-
-contains
-
-    subroutine s_read_serial_boundary_condition_files(step_dirpath, bc_type)
-
-        character(LEN=*), intent(in) :: step_dirpath
-
-        type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type
-
-        integer :: dir, loc
-        logical :: file_exist
-        character(len=path_len) :: file_path
-
-        character(len=10) :: status
-
-        ! Read bc_types
-        file_path = trim(step_dirpath)//'/bc_type.dat'
-        inquire (FILE=trim(file_path), EXIST=file_exist)
-        if (.not. file_exist) then
-            call s_mpi_abort(trim(file_path)//' is missing. Exiting.')
-        end if
-
-        open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown')
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                read (1) bc_type(dir, loc)%sf
-                !$acc update device(bc_type(dir, loc)%sf)
-            end do
-        end do
-        close (1)
-
-        ! Read bc_buffers
-        file_path = trim(step_dirpath)//'/bc_buffers.dat'
-        inquire (FILE=trim(file_path), EXIST=file_exist)
-        if (.not. file_exist) then
-            call s_mpi_abort(trim(file_path)//' is missing. Exiting.')
-        end if
-
-        open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown')
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                read (1) bc_buffers(dir, loc)%sf
-                !$acc update device(bc_buffers(dir, loc)%sf)
-            end do
-        end do
-        close (1)
-
-    end subroutine s_read_serial_boundary_condition_files
-
-    subroutine s_read_parallel_boundary_condition_files(bc_type)
-
-        type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type
-
-        integer :: dir, loc
-        character(len=path_len) :: file_loc, file_path
-
-        character(len=10) :: status
-
-#ifdef MFC_MPI
-        integer :: ierr
-        integer :: file_id
-        integer :: offset
-        character(len=7) :: proc_rank_str
-        logical :: dir_check
-
-        file_loc = trim(case_dir)//'/restart_data/boundary_conditions'
-
-        if (proc_rank == 0) then
-            call my_inquire(file_loc, dir_check)
-            if (dir_check .neqv. .true.) then
-                call s_mpi_abort(trim(file_loc)//' is missing. Exiting.')
-            end if
-        end if
-
-        call s_create_mpi_types(bc_type)
-
-        call s_mpi_barrier()
-
-        call DelayFileAccess(proc_rank)
-
-        write (proc_rank_str, '(I7.7)') proc_rank
-        file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat'
-        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr)
-
-        offset = 0
-
-        ! Read bc_types
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
-                call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
-                offset = offset + sizeof(bc_type(dir, loc)%sf)
-                !$acc update device(bc_type(dir, loc)%sf)
-            end do
-        end do
-
-        ! Read bc_buffers
-        do dir = 1, num_dims
-            do loc = -1, 1, 2
-                call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr)
-                call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr)
-                offset = offset + sizeof(bc_buffers(dir, loc)%sf)
-                !$acc update device(bc_buffers(dir, loc)%sf)
-            end do
-        end do
-
-        call MPI_File_close(file_id, ierr)
-#endif
-
-    end subroutine s_read_parallel_boundary_condition_files
-
-end module m_boundary_conditions
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 1bdc9f1d97..217d14937a 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -190,8 +190,6 @@ module m_global_parameters
 
     integer :: num_bc_patches
     logical :: bc_io
-    integer :: BC_RIEMANN_EXTRAPOLATION
-    integer :: BC_GHOST_EXTRAPOLATION
     !> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively
     !> @{
     type(int_bounds_info) :: bc_x, bc_y, bc_z
@@ -586,8 +584,6 @@ contains
 
         num_bc_patches = 0
         bc_io = .false.
-        BC_RIEMANN_EXTRAPOLATION = -4
-        BC_GHOST_EXTRAPOLATION = -3
 
         bc_x%beg = dflt_int; bc_x%end = dflt_int
         bc_y%beg = dflt_int; bc_y%end = dflt_int
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index ffabb9e216..9edd2f3121 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -72,8 +72,6 @@ contains
 
     end subroutine s_initialize_mpi_proxy_module
 
-contains
-
     !>  Since only the processor with rank 0 reads and verifies
         !!      the consistency of user inputs, these are initially not
         !!      available to the other processors. Then, the purpose of
diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp
index 6930f3caa7..0c8470dff4 100644
--- a/src/simulation/m_rhs.fpp
+++ b/src/simulation/m_rhs.fpp
@@ -671,7 +671,7 @@ contains
         call nvtxEndRange
 
         call nvtxStartRange("RHS-COMMUNICATION")
-        call s_populate_variables_buffers(q_prim_qp%vf, pb, mv, bc_type)
+        call s_populate_variables_buffers(bc_type, q_prim_qp%vf, pb, mv)
         call nvtxEndRange
 
         call nvtxStartRange("RHS-ELASTIC")
diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.f90
index 4d525cf5d9..88d37f9674 100644
--- a/src/simulation/m_sim_helpers.f90
+++ b/src/simulation/m_sim_helpers.f90
@@ -10,8 +10,7 @@ module m_sim_helpers
 
     private; public :: s_compute_enthalpy, &
  s_compute_stability_from_dt, &
- s_compute_dt_from_cfl, &
- s_assign_default_bc_type
+ s_compute_dt_from_cfl
 
 contains
 
@@ -268,26 +267,4 @@ subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l)
 
     end subroutine s_compute_dt_from_cfl
 
-    subroutine s_assign_default_bc_type(bc_type)
-
-        type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type
-
-        bc_type(1, -1)%sf(:, :, :) = bc_x%beg
-        bc_type(1, 1)%sf(:, :, :) = bc_x%end
-        !$acc update device(bc_type(1,-1)%sf, bc_type(1,1)%sf)
-
-        if (n > 0) then
-            bc_type(2, -1)%sf(:, :, :) = bc_y%beg
-            bc_type(2, 1)%sf(:, :, :) = bc_y%end
-            !$acc update device(bc_type(2,-1)%sf, bc_type(2,1)%sf)
-
-            if (p > 0) then
-                bc_type(3, -1)%sf(:, :, :) = bc_z%beg
-                bc_type(3, 1)%sf(:, :, :) = bc_z%end
-                !$acc update device(bc_type(3,-1)%sf, bc_type(3,1)%sf)
-            end if
-        end if
-
-    end subroutine s_assign_default_bc_type
-
 end module m_sim_helpers
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index f8cdb7a7ac..385b846058 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -32,8 +32,6 @@ module m_time_steppers
 
     use m_boundary_common
 
-    use m_boundary_conditions
-
     use m_helper
 
     use m_sim_helpers
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index a2d2ca559b..0b82fbb6ea 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -61,6 +61,7 @@ def analytic(self):
     'Bx0': ParamType.REAL,
     'relativity': ParamType.LOG,
     'cont_damage': ParamType.LOG,
+    'num_bc_patches': ParamType.INT,
 }
 
 PRE_PROCESS = COMMON.copy()
@@ -98,7 +99,6 @@ def analytic(self):
     'surface_tension': ParamType.LOG,
     'elliptic_smoothing': ParamType.LOG,
     'elliptic_smoothing_iters': ParamType.INT,
-    'num_bc_patches': ParamType.INT,
     'viscous': ParamType.LOG,
     'bubbles_lagrange': ParamType.LOG,
 })
@@ -292,7 +292,6 @@ def analytic(self):
     'viscous': ParamType.LOG,
     'bubbles_lagrange': ParamType.LOG,
     'rkck_tolerance': ParamType.REAL,
-    'num_bc_patches': ParamType.INT,
     'powell': ParamType.LOG,
     'tau_star': ParamType.REAL,
     'cont_damage_s': ParamType.REAL,

From 78043ab5645ca1b765cf18d7075a75e3fd12c7a0 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Mon, 5 May 2025 17:25:16 -0400
Subject: [PATCH 11/62] more refactoring

---
 src/common/m_mpi_common.fpp              |  10 +-
 src/common/m_variables_conversion.fpp    |   4 +
 src/post_process/m_data_input.f90        | 603 +--------------
 src/post_process/m_global_parameters.fpp |   6 +
 src/post_process/m_mpi_proxy.fpp         | 651 ----------------
 src/post_process/m_start_up.f90          |   9 +-
 src/simulation/m_ibm.fpp                 |  17 +-
 src/simulation/m_mpi_proxy.fpp           | 946 ++++-------------------
 8 files changed, 191 insertions(+), 2055 deletions(-)

diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index b6dd6ad36b..6d670b1237 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -636,17 +636,17 @@ contains
 
         if (present(pb) .and. present(mv) .and. qbmm .and. .not. polytropic) then
             qbmm_comm = .true.
-            v_size = sys_size + 2*nb*4
+            v_size = nVar + 2*nb*4
             buffer_counts = (/ &
-                            buff_size*(nVar + 2*nb*4)*(n + 1)*(p + 1), &
-                            buff_size*(nVar + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), &
+                            buff_size*v_size*(n + 1)*(p + 1), &
+                            buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
                             buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
                             /)
         else
             v_size = nVar
             buffer_counts = (/ &
-                            buff_size*nVar*(n + 1)*(p + 1), &
-                            buff_size*nVar*(m + 2*buff_size + 1)*(p + 1), &
+                            buff_size*v_size*(n + 1)*(p + 1), &
+                            buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
                             buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
                             /)
         end if
diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp
index dbbf4457e1..a71bc4875b 100644
--- a/src/common/m_variables_conversion.fpp
+++ b/src/common/m_variables_conversion.fpp
@@ -1152,6 +1152,10 @@ contains
 
                     if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l)
 
+#ifdef MFC_POST_PROCESS
+                    if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l)
+#endif
+
                 end do
             end do
         end do
diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90
index 6986d76ee2..2db034b4b5 100644
--- a/src/post_process/m_data_input.f90
+++ b/src/post_process/m_data_input.f90
@@ -32,7 +32,6 @@ module m_data_input
  s_read_serial_data_files, &
  s_read_parallel_data_files, &
  s_populate_grid_variables_buffer_regions, &
- s_populate_conservative_variables_buffer_regions, &
  s_finalize_data_input_module
 
     abstract interface
@@ -55,9 +54,6 @@ end subroutine s_read_abstract_data_files
     type(scalar_field), allocatable, dimension(:), public :: q_prim_vf !<
     !! Primitive variables
 
-    type(scalar_field), allocatable, dimension(:), public :: q_particle !<
-    !! Lagrangian solver (particle void fraction)
-
     type(integer_field), allocatable, dimension(:, :), public :: bc_type !<
     !! Boundary condition identifiers
 
@@ -250,29 +246,6 @@ subroutine s_read_serial_data_files(t_step)
             end if
         end if
 
-        if (bubbles_lagrange) then !Lagrangian solver
-
-            ! Checking whether the data file associated with the variable
-            ! position of currently manipulated conservative variable exists
-            write (file_num, '(I0)') sys_size + 1
-            file_loc = trim(t_step_dir)//'/q_cons_vf'// &
-                       trim(file_num)//'.dat'
-            inquire (FILE=trim(file_loc), EXIST=file_check)
-
-            ! Reading the data file if it exists, exiting otherwise
-            if (file_check) then
-                open (1, FILE=trim(file_loc), FORM='unformatted', &
-                      STATUS='old', ACTION='read')
-                read (1) q_particle(1)%sf(0:m, 0:n, 0:p)
-                close (1)
-            else
-                print '(A)', 'File q_cons_vf'//trim(file_num)// &
-                    '.dat is missing in '//trim(t_step_dir)// &
-                    '. Exiting.'
-                call s_mpi_abort()
-            end if
-        end if
-
     end subroutine s_read_serial_data_files
 
     !>  This subroutine is called at each time-step that has to
@@ -304,14 +277,6 @@ subroutine s_read_parallel_data_files(t_step)
 
         integer :: i
 
-        integer :: alt_sys !Altered sys_size for lagrangian solver
-
-        if (bubbles_lagrange) then
-            alt_sys = sys_size + 1
-        else
-            alt_sys = sys_size
-        end if
-
         allocate (x_cb_glb(-1:m_glb))
         allocate (y_cb_glb(-1:n_glb))
         allocate (z_cb_glb(-1:p_glb))
@@ -467,8 +432,6 @@ subroutine s_read_parallel_data_files(t_step)
                 ! Initialize MPI data I/O
                 if (ib) then
                     call s_initialize_mpi_data(q_cons_vf, ib_markers)
-                elseif (bubbles_lagrange) then
-                    call s_initialize_mpi_data(q_cons_vf, beta=q_particle(1))
                 else
                     call s_initialize_mpi_data(q_cons_vf)
                 end if
@@ -483,46 +446,20 @@ subroutine s_read_parallel_data_files(t_step)
                 WP_MOK = int(8._wp, MPI_OFFSET_KIND)
                 MOK = int(1._wp, MPI_OFFSET_KIND)
                 str_MOK = int(name_len, MPI_OFFSET_KIND)
-                NVARS_MOK = int(alt_sys, MPI_OFFSET_KIND)
+                NVARS_MOK = int(sys_size, MPI_OFFSET_KIND)
 
                 ! Read the data for each variable
-                if (bubbles_euler .or. elasticity) then
-                    do i = 1, sys_size
-                        var_MOK = int(i, MPI_OFFSET_KIND)
-
-                        ! Initial displacement to skip at beginning of file
-                        disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1)
-
-                        call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), &
-                                               'native', mpi_info_int, ierr)
-                        call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, &
-                                               mpi_p, status, ierr)
-                    end do
-                else
-                    do i = 1, sys_size
-                        var_MOK = int(i, MPI_OFFSET_KIND)
-
-                        ! Initial displacement to skip at beginning of file
-                        disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1)
-
-                        call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), &
-                                               'native', mpi_info_int, ierr)
-                        call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, &
-                                               mpi_p, status, ierr)
-                    end do
-                end if
-
-                if (bubbles_lagrange) then !Lagrangian solver
-                    var_MOK = int(sys_size + 1, MPI_OFFSET_KIND)
+                do i = 1, sys_size
+                    var_MOK = int(i, MPI_OFFSET_KIND)
 
                     ! Initial displacement to skip at beginning of file
                     disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1)
 
-                    call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(sys_size + 1), &
+                    call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), &
                                            'native', mpi_info_int, ierr)
-                    call MPI_FILE_READ(ifile, MPI_IO_DATA%var(sys_size + 1)%sf, data_size, &
-                                       mpi_p, status, ierr)
-                end if
+                    call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, &
+                                           mpi_p, status, ierr)
+                end do
 
                 call s_mpi_barrier()
 
@@ -816,510 +753,6 @@ subroutine s_populate_grid_variables_buffer_regions
 
     end subroutine s_populate_grid_variables_buffer_regions
 
-    !>  The purpose of this procedure is to populate the buffers
-        !!      of the cell-average conservative variables, depending on
-        !!      the boundary conditions.
-    subroutine s_populate_conservative_variables_buffer_regions(q_particle)
-
-        type(scalar_field), intent(inout), optional :: q_particle
-
-        integer :: i, j, k !< Generic loop iterators
-
-        ! Populating Buffer Regions in the x-direction
-
-        ! Ghost-cell extrapolation BC at the beginning
-        if (bc_x%beg <= BC_GHOST_EXTRAP) then
-
-            do j = 1, buff_size
-                if (present(q_particle)) then
-                    q_particle%sf(-j, 0:n, 0:p) = &
-                        q_particle%sf(0, 0:n, 0:p)
-                else
-                    do i = 1, sys_size
-                        q_cons_vf(i)%sf(-j, 0:n, 0:p) = q_cons_vf(i)%sf(0, 0:n, 0:p)
-                    end do
-                end if
-            end do
-
-            ! Symmetry BC at the beginning
-        elseif (bc_x%beg == BC_REFLECTIVE) then
-
-            do j = 1, buff_size
-
-                if (present(q_particle)) then
-                    q_particle%sf(-j, 0:n, 0:p) = &
-                        q_particle%sf(j - 1, 0:n, 0:p)
-                else
-                    ! Density or partial densities
-                    do i = 1, cont_idx%end
-                        q_cons_vf(i)%sf(-j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf(j - 1, 0:n, 0:p)
-                    end do
-
-                    ! x-component of momentum
-                    q_cons_vf(mom_idx%beg)%sf(-j, 0:n, 0:p) = &
-                        -q_cons_vf(mom_idx%beg)%sf(j - 1, 0:n, 0:p)
-
-                    ! Remaining momentum component(s), if any, as well as the
-                    ! energy and the variable(s) from advection equation(s)
-                    do i = mom_idx%beg + 1, sys_size
-                        q_cons_vf(i)%sf(-j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf(j - 1, 0:n, 0:p)
-                    end do
-                end if
-
-            end do
-
-            ! Periodic BC at the beginning
-        elseif (bc_x%beg == BC_PERIODIC) then
-
-            do j = 1, buff_size
-                if (present(q_particle)) then
-                    q_particle%sf(-j, 0:n, 0:p) = &
-                        q_particle%sf((m + 1) - j, 0:n, 0:p)
-                else
-                    do i = 1, sys_size
-                        q_cons_vf(i)%sf(-j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p)
-                    end do
-                end if
-            end do
-
-            ! Processor BC at the beginning
-        else
-            if (present(q_particle)) then
-                call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                             'beg', 'x', q_particle)
-            else
-                call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                             'beg', 'x')
-            end if
-
-        end if
-
-        ! Ghost-cell extrapolation BC at the end
-        if (bc_x%end <= BC_GHOST_EXTRAP) then
-
-            do j = 1, buff_size
-                if (present(q_particle)) then
-                    q_particle%sf(m + j, 0:n, 0:p) = &
-                        q_particle%sf(m, 0:n, 0:p)
-                else
-                    do i = 1, sys_size
-                        q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf(m, 0:n, 0:p)
-                    end do
-                end if
-            end do
-
-            ! Symmetry BC at the end
-        elseif (bc_x%end == BC_REFLECTIVE) then
-
-            do j = 1, buff_size
-
-                if (present(q_particle)) then
-                    q_particle%sf(m + j, 0:n, 0:p) = &
-                        q_particle%sf((m + 1) - j, 0:n, 0:p)
-                else
-
-                    ! Density or partial densities
-                    do i = 1, cont_idx%end
-                        q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p)
-                    end do
-
-                    ! x-component of momentum
-                    q_cons_vf(mom_idx%beg)%sf(m + j, 0:n, 0:p) = &
-                        -q_cons_vf(mom_idx%beg)%sf((m + 1) - j, 0:n, 0:p)
-
-                    ! Remaining momentum component(s), if any, as well as the
-                    ! energy and the variable(s) from advection equation(s)
-                    do i = mom_idx%beg + 1, sys_size
-                        q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p)
-                    end do
-                end if
-
-            end do
-
-            ! Perodic BC at the end
-        elseif (bc_x%end == BC_PERIODIC) then
-
-            do j = 1, buff_size
-                if (present(q_particle)) then
-                    q_particle%sf(m + j, 0:n, 0:p) = &
-                        q_particle%sf(j - 1, 0:n, 0:p)
-                else
-                    do i = 1, sys_size
-                        q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
-                            q_cons_vf(i)%sf(j - 1, 0:n, 0:p)
-                    end do
-                end if
-            end do
-
-            ! Processor BC at the end
-        else
-
-            if (present(q_particle)) then
-                call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                             'end', 'x', q_particle)
-            else
-                call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                             'end', 'x')
-            end if
-
-        end if
-
-        ! END: Populating Buffer Regions in the x-direction
-
-        ! Populating Buffer Regions in the y-direction
-
-        if (n > 0) then
-
-            ! Ghost-cell extrapolation BC at the beginning
-            if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, -j, 0:p) = q_particle%sf(:, 0, 0:p)
-                    else
-                        do i = 1, sys_size
-                            q_cons_vf(i)%sf(:, -j, 0:p) = q_cons_vf(i)%sf(:, 0, 0:p)
-                        end do
-                    end if
-                end do
-
-                ! Axis BC at the beginning
-            elseif (bc_y%beg == BC_AXIS) then
-
-                do j = 1, buff_size
-                    do k = 0, p
-                        if (z_cc(k) < pi) then
-                            if (present(q_particle)) then
-                                q_particle%sf(:, -j, k) = &
-                                    q_particle%sf(:, j - 1, k + ((p + 1)/2))
-                            else
-                                do i = 1, mom_idx%beg
-                                    q_cons_vf(i)%sf(:, -j, k) = &
-                                        q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2))
-                                end do
-
-                                q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = &
-                                    -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k + ((p + 1)/2))
-
-                                q_cons_vf(mom_idx%end)%sf(:, -j, k) = &
-                                    -q_cons_vf(mom_idx%end)%sf(:, j - 1, k + ((p + 1)/2))
-
-                                do i = E_idx, sys_size
-                                    q_cons_vf(i)%sf(:, -j, k) = &
-                                        q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2))
-                                end do
-                            end if
-                        else
-                            if (present(q_particle)) then
-                                q_particle%sf(:, -j, k) = &
-                                    q_particle%sf(:, j - 1, k - ((p + 1)/2))
-                            else
-                                do i = 1, mom_idx%beg
-                                    q_cons_vf(i)%sf(:, -j, k) = &
-                                        q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2))
-                                end do
-
-                                q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = &
-                                    -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k - ((p + 1)/2))
-
-                                q_cons_vf(mom_idx%end)%sf(:, -j, k) = &
-                                    -q_cons_vf(mom_idx%end)%sf(:, j - 1, k - ((p + 1)/2))
-
-                                do i = E_idx, sys_size
-                                    q_cons_vf(i)%sf(:, -j, k) = &
-                                        q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2))
-                                end do
-                            end if
-                        end if
-                    end do
-                end do
-
-                ! Symmetry BC at the beginning
-            elseif (bc_y%beg == BC_REFLECTIVE) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, -j, 0:p) = &
-                            q_particle%sf(:, j - 1, 0:p)
-                    else
-                        ! Density or partial densities and x-momentum component
-                        do i = 1, mom_idx%beg
-                            q_cons_vf(i)%sf(:, -j, 0:p) = &
-                                q_cons_vf(i)%sf(:, j - 1, 0:p)
-                        end do
-
-                        ! y-component of momentum
-                        q_cons_vf(mom_idx%beg + 1)%sf(:, -j, 0:p) = &
-                            -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, 0:p)
-
-                        ! Remaining z-momentum component, if any, as well as the
-                        ! energy and variable(s) from advection equation(s)
-                        do i = mom_idx%beg + 2, sys_size
-                            q_cons_vf(i)%sf(:, -j, 0:p) = &
-                                q_cons_vf(i)%sf(:, j - 1, 0:p)
-                        end do
-                    end if
-
-                end do
-
-                ! Periodic BC at the beginning
-            elseif (bc_y%beg == BC_PERIODIC) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, -j, 0:p) = &
-                            q_particle%sf(:, (n + 1) - j, 0:p)
-                    else
-                        do i = 1, sys_size
-                            q_cons_vf(i)%sf(:, -j, 0:p) = &
-                                q_cons_vf(i)%sf(:, (n + 1) - j, 0:p)
-                        end do
-                    end if
-                end do
-
-                ! Processor BC at the beginning
-            else
-                if (present(q_particle)) then
-                    call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                 'beg', 'y', q_particle)
-                else
-                    call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                 'beg', 'y')
-                end if
-
-            end if
-
-            ! Ghost-cell extrapolation BC at the end
-            if (bc_y%end <= BC_GHOST_EXTRAP) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, n + j, 0:p) = &
-                            q_particle%sf(:, n, 0:p)
-                    else
-                        do i = 1, sys_size
-                            q_cons_vf(i)%sf(:, n + j, 0:p) = &
-                                q_cons_vf(i)%sf(:, n, 0:p)
-                        end do
-                    end if
-                end do
-
-                ! Symmetry BC at the end
-            elseif (bc_y%end == BC_REFLECTIVE) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, n + j, 0:p) = &
-                            q_particle%sf(:, (n + 1) - j, 0:p)
-                    else
-                        ! Density or partial densities and x-momentum component
-                        do i = 1, mom_idx%beg
-                            q_cons_vf(i)%sf(:, n + j, 0:p) = &
-                                q_cons_vf(i)%sf(:, (n + 1) - j, 0:p)
-                        end do
-
-                        ! y-component of momentum
-                        q_cons_vf(mom_idx%beg + 1)%sf(:, n + j, 0:p) = &
-                            -q_cons_vf(mom_idx%beg + 1)%sf(:, (n + 1) - j, 0:p)
-
-                        ! Remaining z-momentum component, if any, as well as the
-                        ! energy and variable(s) from advection equation(s)
-                        do i = mom_idx%beg + 2, sys_size
-                            q_cons_vf(i)%sf(:, n + j, 0:p) = &
-                                q_cons_vf(i)%sf(:, (n + 1) - j, 0:p)
-                        end do
-                    end if
-
-                end do
-
-                ! Perodic BC at the end
-            elseif (bc_y%end == BC_PERIODIC) then
-
-                do j = 1, buff_size
-                    if (present(q_particle)) then
-                        q_particle%sf(:, n + j, 0:p) = &
-                            q_particle%sf(:, j - 1, 0:p)
-                    else
-                        do i = 1, sys_size
-                            q_cons_vf(i)%sf(:, n + j, 0:p) = &
-                                q_cons_vf(i)%sf(:, j - 1, 0:p)
-                        end do
-                    end if
-                end do
-
-                ! Processor BC at the end
-            else
-
-                if (present(q_particle)) then
-                    call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                 'end', 'y', q_particle)
-                else
-                    call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                 'end', 'y')
-                end if
-
-            end if
-
-            ! END: Populating Buffer Regions in the y-direction
-
-            ! Populating Buffer Regions in the z-direction
-
-            if (p > 0) then
-
-                ! Ghost-cell extrapolation BC at the beginning
-                if (bc_z%beg <= BC_GHOST_EXTRAP) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, -j) = q_particle%sf(:, :, 0)
-                        else
-                            do i = 1, sys_size
-                                q_cons_vf(i)%sf(:, :, -j) = q_cons_vf(i)%sf(:, :, 0)
-                            end do
-                        end if
-                    end do
-
-                    ! Symmetry BC at the beginning
-                elseif (bc_z%beg == BC_REFLECTIVE) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, -j) = &
-                                q_particle%sf(:, :, j - 1)
-                        else
-                            ! Density or the partial densities and the momentum
-                            ! components in x- and y-directions
-                            do i = 1, mom_idx%beg + 1
-                                q_cons_vf(i)%sf(:, :, -j) = &
-                                    q_cons_vf(i)%sf(:, :, j - 1)
-                            end do
-
-                            ! z-component of momentum
-                            q_cons_vf(mom_idx%end)%sf(:, :, -j) = &
-                                -q_cons_vf(mom_idx%end)%sf(:, :, j - 1)
-
-                            ! Energy and advection equation(s) variable(s)
-                            do i = E_idx, sys_size
-                                q_cons_vf(i)%sf(:, :, -j) = &
-                                    q_cons_vf(i)%sf(:, :, j - 1)
-                            end do
-                        end if
-
-                    end do
-
-                    ! Periodic BC at the beginning
-                elseif (bc_z%beg == BC_PERIODIC) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, -j) = &
-                                q_particle%sf(:, :, (p + 1) - j)
-                        else
-                            do i = 1, sys_size
-                                q_cons_vf(i)%sf(:, :, -j) = &
-                                    q_cons_vf(i)%sf(:, :, (p + 1) - j)
-                            end do
-                        end if
-                    end do
-
-                    ! Processor BC at the beginning
-                else
-
-                    if (present(q_particle)) then
-                        call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                     'beg', 'z', q_particle)
-                    else
-                        call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                     'beg', 'z')
-                    end if
-
-                end if
-
-                ! Ghost-cell extrapolation BC at the end
-                if (bc_z%end <= BC_GHOST_EXTRAP) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, p + j) = &
-                                q_particle%sf(:, :, p)
-                        else
-                            do i = 1, sys_size
-                                q_cons_vf(i)%sf(:, :, p + j) = &
-                                    q_cons_vf(i)%sf(:, :, p)
-                            end do
-                        end if
-                    end do
-
-                    ! Symmetry BC at the end
-                elseif (bc_z%end == BC_REFLECTIVE) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, p + j) = &
-                                q_particle%sf(:, :, (p + 1) - j)
-                        else
-                            ! Density or the partial densities and the momentum
-                            ! components in x- and y-directions
-                            do i = 1, mom_idx%beg + 1
-                                q_cons_vf(i)%sf(:, :, p + j) = &
-                                    q_cons_vf(i)%sf(:, :, (p + 1) - j)
-                            end do
-
-                            ! z-component of momentum
-                            q_cons_vf(mom_idx%end)%sf(:, :, p + j) = &
-                                -q_cons_vf(mom_idx%end)%sf(:, :, (p + 1) - j)
-
-                            ! Energy and advection equation(s) variable(s)
-                            do i = E_idx, sys_size
-                                q_cons_vf(i)%sf(:, :, p + j) = &
-                                    q_cons_vf(i)%sf(:, :, (p + 1) - j)
-                            end do
-                        end if
-
-                    end do
-
-                    ! Perodic BC at the end
-                elseif (bc_z%end == BC_PERIODIC) then
-
-                    do j = 1, buff_size
-                        if (present(q_particle)) then
-                            q_particle%sf(:, :, p + j) = &
-                                q_particle%sf(:, :, j - 1)
-                        else
-                            do i = 1, sys_size
-                                q_cons_vf(i)%sf(:, :, p + j) = &
-                                    q_cons_vf(i)%sf(:, :, j - 1)
-                            end do
-                        end if
-                    end do
-
-                    ! Processor BC at the end
-                else
-
-                    if (present(q_particle)) then
-                        call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                     'end', 'z', q_particle)
-                    else
-                        call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
-                                                                     'end', 'z')
-                    end if
-
-                end if
-
-            end if
-
-        end if
-
-        ! END: Populating Buffer Regions in the z-direction
-
-    end subroutine s_populate_conservative_variables_buffer_regions
-
     !>  Computation of parameters, allocation procedures, and/or
         !!      any other tasks needed to properly setup the module
     subroutine s_initialize_data_input_module
@@ -1331,7 +764,6 @@ subroutine s_initialize_data_input_module
         ! the simulation
         allocate (q_cons_vf(1:sys_size))
         allocate (q_prim_vf(1:sys_size))
-        if (bubbles_lagrange) allocate (q_particle(1))
 
         ! Allocating the parts of the conservative and primitive variables
         ! that do require the direct knowledge of the dimensionality of the
@@ -1358,12 +790,6 @@ subroutine s_initialize_data_input_module
                                             -buff_size:p + buff_size))
                 end if
 
-                if (bubbles_lagrange) then
-                    allocate (q_particle(1)%sf(-buff_size:m + buff_size, &
-                                               -buff_size:n + buff_size, &
-                                               -buff_size:p + buff_size))
-                end if
-
                 if (chemistry) then
                     allocate (q_T_sf%sf(-buff_size:m + buff_size, &
                                         -buff_size:n + buff_size, &
@@ -1388,12 +814,6 @@ subroutine s_initialize_data_input_module
                                             0:0))
                 end if
 
-                if (bubbles_lagrange) then
-                    allocate (q_particle(1)%sf(-buff_size:m + buff_size, &
-                                               -buff_size:n + buff_size, &
-                                               0:0))
-                end if
-
                 if (chemistry) then
                     allocate (q_T_sf%sf(-buff_size:m + buff_size, &
                                         -buff_size:n + buff_size, &
@@ -1417,10 +837,6 @@ subroutine s_initialize_data_input_module
                 allocate (ib_markers%sf(-buff_size:m + buff_size, 0:0, 0:0))
             end if
 
-            if (bubbles_lagrange) then
-                allocate (q_particle(1)%sf(-buff_size:m + buff_size, 0:0, 0:0))
-            end if
-
             if (chemistry) then
                 allocate (q_T_sf%sf(-buff_size:m + buff_size, 0:0, 0:0))
             end if
@@ -1467,11 +883,6 @@ subroutine s_finalize_data_input_module
             deallocate (ib_markers%sf)
         end if
 
-        if (bubbles_lagrange) then
-            deallocate (q_particle(1)%sf)
-            deallocate (q_particle)
-        end if
-
         if (chemistry) then
             deallocate (q_T_sf%sf)
         end if
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index 3847ce0f73..c1cb89d614 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -124,6 +124,7 @@ module m_global_parameters
     type(int_bounds_info) :: mom_idx               !< Indexes of first & last momentum eqns.
     integer :: E_idx                               !< Index of energy equation
     integer :: n_idx                               !< Index of number density
+    integer :: beta_idx                            !< Index of lagrange bubbles beta
     type(int_bounds_info) :: adv_idx               !< Indexes of first & last advection eqns.
     type(int_bounds_info) :: internalEnergies_idx  !< Indexes of first & last internal energy eqns.
     type(bub_bounds_info) :: bub_idx               !< Indexes of first & last bubble variable eqns.
@@ -591,6 +592,11 @@ contains
 
             end if
 
+            if (bubbles_lagrange) then
+                beta_idx = sys_size + 1
+                sys_size = beta_idx
+            end if
+
             if (mhd) then
                 B_idx%beg = sys_size + 1
                 if (n == 0) then
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index 29ffbc134f..2df1025c21 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -202,657 +202,6 @@ contains
 
     end subroutine s_mpi_bcast_user_inputs
 
-    !>  Communicates buffer regions associated with conservative
-        !!      variables with processors in charge of the neighboring
-        !!      sub-domains
-        !!  @param q_cons_vf Conservative variables
-        !!  @param pbc_loc Processor boundary condition (PBC) location
-        !!  @param sweep_coord Coordinate direction normal to the processor boundary
-        !!  @param q_particle Projection of the lagrangian particles in the Eulerian framework
-    subroutine s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, pbc_loc, &
-                                                       sweep_coord, q_particle)
-
-        type(scalar_field), &
-            dimension(sys_size), &
-            intent(inout) :: q_cons_vf
-
-        character(LEN=3), intent(in) :: pbc_loc
-
-        character, intent(in) :: sweep_coord
-
-        type(scalar_field), &
-            intent(inout), optional :: q_particle
-
-#ifdef MFC_MPI
-
-        integer :: i, j, k, l, r !< Generic loop iterators
-
-        ! Communications in the x-direction
-
-        if (sweep_coord == 'x') then
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_x%end >= 0) then
-
-                    ! Packing the data to be sent to bc_x%end
-                    do l = 0, p
-                        do k = 0, n
-                            do j = m - buff_size + 1, m
-                                do i = 1, sys_size
-                                    r = sys_size*(j - m + buff_size - 1) &
-                                        + sys_size*buff_size*k + (i - 1) &
-                                        + sys_size*buff_size*(n + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%end, 0, &
-                                      q_cons_buffer_in(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_x%beg
-                    do l = 0, p
-                        do k = 0, n
-                            do j = 0, buff_size - 1
-                                do i = 1, sys_size
-                                    r = (i - 1) + sys_size*j &
-                                        + sys_size*buff_size*k &
-                                        + sys_size*buff_size*(n + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_x%beg/bc_x%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%beg, 1, &
-                                      q_cons_buffer_in(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data received from bc_x%beg
-                do l = 0, p
-                    do k = 0, n
-                        do j = -buff_size, -1
-                            do i = 1, sys_size
-                                r = sys_size*(j + buff_size) &
-                                    + sys_size*buff_size*k + (i - 1) &
-                                    + sys_size*buff_size*(n + 1)*l
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_x%beg >= 0) then
-
-                    ! Packing the data to be sent to bc_x%beg
-                    do l = 0, p
-                        do k = 0, n
-                            do j = 0, buff_size - 1
-                                do i = 1, sys_size
-                                    r = (i - 1) + sys_size*j &
-                                        + sys_size*buff_size*k &
-                                        + sys_size*buff_size*(n + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    call MPI_SENDRECV(q_cons_buffer_out(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%beg, 1, &
-                                      q_cons_buffer_in(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_x%end
-                    do l = 0, p
-                        do k = 0, n
-                            do j = m - buff_size + 1, m
-                                do i = 1, sys_size
-                                    r = sys_size*(j - m + buff_size - 1) &
-                                        + sys_size*buff_size*k + (i - 1) &
-                                        + sys_size*buff_size*(n + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    call MPI_SENDRECV(q_cons_buffer_out(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%end, 0, &
-                                      q_cons_buffer_in(0), &
-                                      buff_size*sys_size*(n + 1)*(p + 1), &
-                                      mpi_p, bc_x%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data received from bc_x%end
-                do l = 0, p
-                    do k = 0, n
-                        do j = m + 1, m + buff_size
-                            do i = 1, sys_size
-                                r = (i - 1) + sys_size*(j - m - 1) &
-                                    + sys_size*buff_size*k &
-                                    + sys_size*buff_size*(n + 1)*l
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            end if
-
-            ! END: Communications in the x-direction
-
-            ! Communications in the y-direction
-
-        elseif (sweep_coord == 'y') then
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_y%end >= 0) then
-
-                    ! Packing the data to be sent to bc_y%end
-                    do l = 0, p
-                        do k = n - buff_size + 1, n
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k - n + buff_size - 1) + (i - 1) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        buff_size*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_y%end/bc_y%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (p + 1), mpi_p, &
-                                      bc_y%end, 0, q_cons_buffer_in(0), &
-                                      buff_size*sys_size* &
-                                      (m + 2*buff_size + 1)*(p + 1), &
-                                      mpi_p, bc_y%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_y%beg
-                    do l = 0, p
-                        do k = 0, buff_size - 1
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)*k &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        buff_size*l + (i - 1)
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_y%beg/bc_y%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (p + 1), mpi_p, &
-                                      bc_y%beg, 1, q_cons_buffer_in(0), &
-                                      buff_size*sys_size* &
-                                      (m + 2*buff_size + 1)*(p + 1), &
-                                      mpi_p, bc_y%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data received from bc_y%beg
-                do l = 0, p
-                    do k = -buff_size, -1
-                        do j = -buff_size, m + buff_size
-                            do i = 1, sys_size
-                                r = (i - 1) + sys_size*(j + buff_size) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (k + buff_size) + sys_size* &
-                                    (m + 2*buff_size + 1)*buff_size*l
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_y%beg >= 0) then
-
-                    ! Packing the data to be sent to bc_y%beg
-                    do l = 0, p
-                        do k = 0, buff_size - 1
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)*k &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        buff_size*l + (i - 1)
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_y%beg/bc_y%end
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (p + 1), mpi_p, &
-                                      bc_y%beg, 1, q_cons_buffer_in(0), &
-                                      buff_size*sys_size* &
-                                      (m + 2*buff_size + 1)*(p + 1), &
-                                      mpi_p, bc_y%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_y%end
-                    do l = 0, p
-                        do k = n - buff_size + 1, n
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k - n + buff_size - 1) + (i - 1) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        buff_size*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_y%end/bc_y%end
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (p + 1), mpi_p, &
-                                      bc_y%end, 0, q_cons_buffer_in(0), &
-                                      buff_size*sys_size* &
-                                      (m + 2*buff_size + 1)*(p + 1), &
-                                      mpi_p, bc_y%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data received form bc_y%end
-                do l = 0, p
-                    do k = n + 1, n + buff_size
-                        do j = -buff_size, m + buff_size
-                            do i = 1, sys_size
-                                r = (i - 1) + sys_size*(j + buff_size) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (k - n - 1) + sys_size* &
-                                    (m + 2*buff_size + 1)*buff_size*l
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            end if
-
-            ! END: Communications in the y-direction
-
-            ! Communications in the z-direction
-
-        else
-
-            if (pbc_loc == 'beg') then    ! Buffer region at the beginning
-
-                ! PBC at both ends of the sub-domain
-                if (bc_z%end >= 0) then
-
-                    ! Packing the data to be sent to bc_z%end
-                    do l = p - buff_size + 1, p
-                        do k = -buff_size, n + buff_size
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k + buff_size) + sys_size* &
-                                        (m + 2*buff_size + 1)* &
-                                        (n + 2*buff_size + 1)* &
-                                        (l - p + buff_size - 1) + (i - 1)
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_z%end/bc_z%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%end, 0, &
-                                      q_cons_buffer_in(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at beginning of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_z%beg
-                    do l = 0, buff_size - 1
-                        do k = -buff_size, n + buff_size
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k + buff_size) + (i - 1) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (n + 2*buff_size + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_z%beg/bc_z%beg
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%beg, 1, &
-                                      q_cons_buffer_in(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%beg, 0, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data from bc_z%beg
-                do l = -buff_size, -1
-                    do k = -buff_size, n + buff_size
-                        do j = -buff_size, m + buff_size
-                            do i = 1, sys_size
-                                r = sys_size*(j + buff_size) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (k + buff_size) + (i - 1) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (n + 2*buff_size + 1)*(l + buff_size)
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            else                         ! Buffer region at the end
-
-                ! PBC at both ends of the sub-domain
-                if (bc_z%beg >= 0) then
-
-                    ! Packing the data to be sent to bc_z%beg
-                    do l = 0, buff_size - 1
-                        do k = -buff_size, n + buff_size
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k + buff_size) + (i - 1) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (n + 2*buff_size + 1)*l
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_z%beg/bc_z%end
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%beg, 1, &
-                                      q_cons_buffer_in(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                    ! PBC only at end of the sub-domain
-                else
-
-                    ! Packing the data to be sent to bc_z%end
-                    do l = p - buff_size + 1, p
-                        do k = -buff_size, n + buff_size
-                            do j = -buff_size, m + buff_size
-                                do i = 1, sys_size
-                                    r = sys_size*(j + buff_size) &
-                                        + sys_size*(m + 2*buff_size + 1)* &
-                                        (k + buff_size) + sys_size* &
-                                        (m + 2*buff_size + 1)* &
-                                        (n + 2*buff_size + 1)* &
-                                        (l - p + buff_size - 1) + (i - 1)
-                                    if (present(q_particle)) then
-                                        q_cons_buffer_out(r) = &
-                                            q_particle%sf(j, k, l)
-                                    else
-                                        q_cons_buffer_out(r) = &
-                                            q_cons_vf(i)%sf(j, k, l)
-                                    end if
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    ! Sending/receiving the data to/from bc_z%end/bc_z%end
-                    call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%end, 0, &
-                                      q_cons_buffer_in(0), buff_size* &
-                                      sys_size*(m + 2*buff_size + 1)* &
-                                      (n + 2*buff_size + 1), &
-                                      mpi_p, bc_z%end, 1, &
-                                      MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
-                                      ierr)
-
-                end if
-
-                ! Unpacking the data received from bc_z%end
-                do l = p + 1, p + buff_size
-                    do k = -buff_size, n + buff_size
-                        do j = -buff_size, m + buff_size
-                            do i = 1, sys_size
-                                r = sys_size*(j + buff_size) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (k + buff_size) + (i - 1) &
-                                    + sys_size*(m + 2*buff_size + 1)* &
-                                    (n + 2*buff_size + 1)*(l - p - 1)
-                                if (present(q_particle)) then
-                                    q_particle%sf(j, k, l) = q_cons_buffer_in(r)
-                                else
-                                    q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-                                end if
-#if defined(__INTEL_COMPILER)
-                                if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
-                                    print *, "Error", j, k, l, i
-                                    error stop "NaN(s) in recv"
-                                end if
-#endif
-                            end do
-                        end do
-                    end do
-                end do
-
-            end if
-
-        end if
-
-        ! END: Communications in the z-direction
-
-#endif
-
-    end subroutine s_mpi_sendrecv_cons_vars_buffer_regions
-
     !>  This subroutine gathers the Silo database metadata for
         !!      the spatial extents in order to boost the performance of
         !!      the multidimensional visualization.
diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index 3acf713d84..fe78f01da9 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -180,15 +180,10 @@ subroutine s_perform_time_step(t_step)
         ! Populating the grid and conservative variables
         call s_read_data_files(t_step)
 
-        ! Populating the buffer regions of the grid variables
+        ! Populating the buffer regions of the grid and conservative variables
         if (buff_size > 0) then
             call s_populate_grid_variables_buffer_regions()
-        end if
-
-        ! Populating the buffer regions of the conservative variables
-        if (buff_size > 0) then
             call s_populate_variables_buffers(bc_type, q_cons_vf)
-            !if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1))
         end if
 
         ! Initialize the Temperature cache.
@@ -674,7 +669,7 @@ subroutine s_save_data(t_step, varname, pres, c, H)
         ! Adding the lagrangian subgrid variables  to the formatted database file
         if (bubbles_lagrange) then
             !! Void fraction field
-            q_sf = 1._wp - q_particle(1)%sf( &
+            q_sf = 1._wp - q_cons_vf(beta_idx)%sf( &
                    -offset_x%beg:m + offset_x%end, &
                    -offset_y%beg:n + offset_y%end, &
                    -offset_z%beg:p + offset_z%end)
diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index 0c18d17327..f43fcb00ba 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -85,7 +85,7 @@ contains
         !$acc update device(levelset_norm%sf)
 
         ! Get neighboring IB variables from other processors
-        call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers)
+        call s_populate_ib_buffers(ib_markers, gp_layers)
 
         !$acc update host(ib_markers%sf)
 
@@ -108,6 +108,21 @@ contains
 
     end subroutine s_ibm_setup
 
+    subroutine s_populate_ib_buffers(ib_markers, gp_layers)
+
+        type(integer_field), intent(inout) :: ib_markers
+        integer, intent(in) :: gp_layers
+
+        #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)]
+            #:for LOCC, LOCI in [('beg', -1), ('end', 1)]
+                if (bc_${DIRC}$%${LOCC}$ > 0) then
+                    call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers, ${DIRI}$, ${LOCI}$)
+                end if
+            #:endfor
+        #:endfor
+
+    end subroutine s_populate_ib_buffers
+
     !>  Subroutine that updates the conservative variables at the ghost points
         !!  @param q_cons_vf Conservative Variables
         !!  @param q_prim_vf Primitive variables
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 9edd2f3121..aa59159c59 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -44,31 +44,37 @@ module m_mpi_proxy
 
     !> @name Generic flags used to identify and report MPI errors
     !> @{
-    integer, private :: err_code, ierr, v_size
+    integer, private :: err_code, ierr, i_halo_size
     !> @}
-    !$acc declare create(v_size)
+    !$acc declare create(i_halo_size)
 
 contains
 
     subroutine s_initialize_mpi_proxy_module()
 
+#ifdef MFC_MPI
         if (ib) then
             if (n > 0) then
                 if (p > 0) then
-                    @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * &
+                    i_halo_size = -1 + gp_layers * &
                                             & (m + 2*gp_layers + 1)* &
                                             & (n + 2*gp_layers + 1)* &
                                             & (p + 2*gp_layers + 1)/ &
-                                            & (min(m, n, p) + 2*gp_layers + 1)))
+                                            & (min(m, n, p) + 2*gp_layers + 1)
                 else
-                    @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* &
-                                            & (max(m, n) + 2*gp_layers + 1)))
+                    i_halo_size = -1 + gp_layers* &
+                                            & (max(m, n) + 2*gp_layers + 1)
                 end if
             else
-                @:ALLOCATE(ib_buff_send(0:-1 + gp_layers))
+                i_halo_size = -1 + gp_layers
             end if
-            @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1)))
+
+            !$acc declare create(i_halo_size)
+
+            @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size))
+
         end if
+#endif
 
     end subroutine s_initialize_mpi_proxy_module
 
@@ -229,834 +235,184 @@ contains
 
     end subroutine s_mpi_bcast_user_inputs
 
-    !>  The goal of this procedure is to populate the buffers of
-        !!      the cell-average conservative variables by communicating
-        !!      with the neighboring processors.
-    subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers)
+    subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers, mpi_dir, pbc_loc)
 
         type(integer_field), intent(inout) :: ib_markers
-        integer, intent(in) :: gp_layers
 
-        integer :: i, j, k, l, r !< Generic loop iterators
-        integer, pointer, dimension(:) :: p_i_send, p_i_recv
+        integer, intent(in) :: gp_layers, mpi_dir, pbc_loc
 
-#ifdef MFC_MPI
-        !nCalls_time = nCalls_time + 1
-
-        ! MPI Communication in x-direction
-        if (bc_x%beg >= 0) then      ! PBC at the beginning
+        integer :: i, j, k, l, r, q !< Generic loop iterators
 
-            if (bc_x%end >= 0) then      ! PBC at the beginning and end
-
-                ! Packing buffer to be sent to bc_x%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = 0, n
-                        do j = m - gp_layers + 1, m
-                            r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
+        integer :: buffer_counts(1:3), buffer_count
 
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
+        type(int_bounds_info) :: boundary_conditions(1:3)
+        integer :: beg_end(1:2), grid_dims(1:3)
+        integer :: dst_proc, src_proc, recv_tag, send_tag
 
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
+        logical :: beg_end_geq_0, qbmm_comm
 
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
+        integer :: pack_offset, unpack_offset
 
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+        integer, pointer :: p_i_send, p_i_recv
 
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
+#ifdef MFC_MPI
 
-                    !$acc update host(ib_buff_send, ib_buff_send)
+        call nvtxStartRange("RHS-COMM-PACKBUF")
 
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
+        buffer_counts = (/ &
                         gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+                        gp_layers*(m + 2*buff_size + 1)*(p + 1), &
+                        gp_layers*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
+                        /)
 
-#if defined(MFC_OpenACC)
-                end if
-#endif
+        buffer_count = buffer_counts(mpi_dir)
+        boundary_conditions = (/bc_x, bc_y, bc_z/)
+        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
+        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
 
-            else                        ! PBC at the beginning only
+        ! Implements:
+        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
+        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
+        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
+        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
+        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
 
-                ! Packing buffer to be sent to bc_x%beg
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, gp_layers - 1
-                            r = (j + gp_layers*(k + (n + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
+        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
+        recv_tag = f_logical_to_int(pbc_loc == 1)
 
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
+        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
+        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
 
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 1, &
-                        p_i_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            end if
-
-#if defined(MFC_OpenACC)
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
-            end if
-#endif
-
-            ! Unpacking buffer received from bc_x%beg
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = 0, p
-                do k = 0, n
-                    do j = -gp_layers, -1
-                        r = (j + gp_layers*((k + 1) + (n + 1)*l))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
-                    end do
-                end do
-            end do
+        grid_dims = (/m, n, p/)
 
+        pack_offset = 0
+        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
+            pack_offset = grid_dims(mpi_dir) - gp_layers + 1
         end if
 
-        if (bc_x%end >= 0) then                ! PBC at the end
-
-            if (bc_x%beg >= 0) then      ! PBC at the end and beginning
-
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                ! Packing buffer to be sent to bc_x%beg
-                do l = 0, p
-                    do k = 0, n
-                        do j = 0, gp_layers - 1
-                            r = (j + gp_layers*(k + (n + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 1, &
-                        p_i_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            else                        ! PBC at the end only
-
-                ! Packing buffer to be sent to bc_x%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = 0, n
-                        do j = m - gp_layers + 1, m
-                            r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(n + 1)*(p + 1), &
-                        MPI_INTEGER, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            end if
-
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
-            end if
-
-            ! Unpacking buffer received from bc_x%end
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = 0, p
-                do k = 0, n
-                    do j = m + 1, m + gp_layers
-                        r = ((j - m - 1) + gp_layers*(k + (n + 1)*l))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
-                    end do
-                end do
-            end do
-
+        unpack_offset = 0
+        if (pbc_loc == 1) then
+            unpack_offset = grid_dims(mpi_dir) + gp_layers + 1
         end if
-        ! END: MPI Communication in x-direction
-
-        ! MPI Communication in y-direction
-
-        if (bc_y%beg >= 0) then      ! PBC at the beginning
-
-            if (bc_y%end >= 0) then      ! PBC at the beginning and end
 
-                ! Packing buffer to be sent to bc_y%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = n - gp_layers + 1, n
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k - n + gp_layers - 1) + gp_layers*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
+        ! Pack Buffer to Send
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = 0, gp_layers - 1
+                                r = (j + gp_layers * (k + (n + 1) * l))
+                                ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l)
+                            end do
                         end do
                     end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            else                        ! PBC at the beginning only
-
-                ! Packing buffer to be sent to bc_y%beg
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = 0, gp_layers - 1
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 (k + gp_layers*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 1, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            end if
-
-#if defined(MFC_OpenACC)
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
-            end if
-#endif
-
-            ! Unpacking buffer received from bc_y%beg
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = 0, p
-                do k = -gp_layers, -1
-                    do j = -gp_layers, m + gp_layers
-                        r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                             ((k + gp_layers) + gp_layers*l))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
-                    end do
-                end do
-            end do
-
-        end if
-
-        if (bc_y%end >= 0) then                   ! PBC at the end
-
-            if (bc_y%beg >= 0) then      ! PBC at the end and beginning
-
-                ! Packing buffer to be sent to bc_y%beg
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = 0, gp_layers - 1
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 (k + gp_layers*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, gp_layers - 1
+                            do j = -buff_size, m + buff_size
+                                r = ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                     (k + gp_layers*l))
+                                ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l)
+                            end do
                         end do
                     end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 1, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            else                        ! PBC at the end only
-
-                ! Packing buffer to be sent to bc_y%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, p
-                    do k = n - gp_layers + 1, n
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k - n + gp_layers - 1) + gp_layers*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
+                #:else
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, gp_layers - 1
+                        do k = -buff_size, n + buff_size
+                            do j = -buff_size, m + buff_size
+                                r = ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                     ((k + buff_size) + (n + 2*buff_size + 1)*l))
+                                ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset)
+                            end do
                         end do
                     end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        MPI_INTEGER, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            end if
-
-#if defined(MFC_OpenACC)
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
+                #:endif
             end if
-#endif
-
-            ! Unpacking buffer received form bc_y%end
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = 0, p
-                do k = n + 1, n + gp_layers
-                    do j = -gp_layers, m + gp_layers
-                        r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                             ((k - n - 1) + gp_layers*l))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
-                    end do
-                end do
-            end do
-
-        end if
-        ! END: MPI Communication in y-direction
-
-        ! MPI Communication in z-direction
-        if (bc_z%beg >= 0) then      ! PBC at the beginning
-
-            if (bc_z%end >= 0) then      ! PBC at the beginning and end
-
-                ! Packing buffer to be sent to bc_z%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = p - gp_layers + 1, p
-                    do k = -gp_layers, n + gp_layers
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k + gp_layers) + (n + 2*gp_layers + 1)* &
-                                  (l - p + gp_layers - 1)))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            else                        ! PBC at the beginning only
-
-                ! Packing buffer to be sent to bc_z%beg
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, gp_layers - 1
-                    do k = -gp_layers, n + gp_layers
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k + gp_layers) + (n + 2*gp_layers + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
-                        end do
-                    end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 1, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
+        #:endfor
+        call nvtxEndRange ! Packbuf
+
+        p_i_send => ib_buff_send(0)
+        p_i_recv => ib_buff_recv(0)
+
+        ! Send/Recv
+        #:for rdma_mpi in [False, True]
+            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
+                #:if rdma_mpi
+                    !$acc data attach(p_send, p_recv)
+                    !$acc host_data use_device(p_send, p_recv)
+                    call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
+                #:else
+                    call nvtxStartRange("RHS-COMM-DEV2HOST")
+                    !$acc update host(buff_send)
+                    call nvtxEndRange
+                    call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA")
+                #:endif
+
+                call MPI_SENDRECV( &
+                    p_i_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, &
+                    p_i_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, &
+                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
+
+                #:if rdma_mpi
                     !$acc end host_data
                     !$acc end data
                     !$acc wait
-                else
-#endif
-
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            end if
-
-#if defined(MFC_OpenACC)
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
+                #:else
+                    call nvtxStartRange("RHS-COMM-HOST2DEV")
+                    !$acc update device(buff_recv)
+                    call nvtxEndRange
+                #:endif
             end if
-#endif
+        #:endfor
 
-            ! Unpacking buffer from bc_z%beg
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = -gp_layers, -1
-                do k = -gp_layers, n + gp_layers
-                    do j = -gp_layers, m + gp_layers
-                        r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                             ((k + gp_layers) + (n + 2*gp_layers + 1)* &
-                              (l + gp_layers)))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
+        ! Unpack Received Buffer
+        call nvtxStartRange("RHS-COMM-UNPACKBUF")
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = -gp_layers, -1
+                                r = (j + gp_layers * ((k + 1) + (n + 1) * l))
+                                ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r)
+                            end do
+                        end do
                     end do
-                end do
-            end do
-
-        end if
-
-        if (bc_z%end >= 0) then                       ! PBC at the end
-
-            if (bc_z%beg >= 0) then      ! PBC at the end and beginning
-
-                ! Packing buffer to be sent to bc_z%beg
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = 0, gp_layers - 1
-                    do k = -gp_layers, n + gp_layers
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k + gp_layers) + (n + 2*gp_layers + 1)*l))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = -gp_layers, -1
+                            do j = -buff_size, m + buff_size
+                                r = ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                     ((k + gp_layers) + gp_layers*l))
+                                ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r)
+                            end do
                         end do
                     end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 1, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%beg, 1, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            else                        ! PBC at the end only
-
-                ! Packing buffer to be sent to bc_z%end
-                !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                do l = p - gp_layers + 1, p
-                    do k = -gp_layers, n + gp_layers
-                        do j = -gp_layers, m + gp_layers
-                            r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                 ((k + gp_layers) + (n + 2*gp_layers + 1)* &
-                                  (l - p + gp_layers - 1)))
-                            ib_buff_send(r) = ib_markers%sf(j, k, l)
+                #:else
+                    ! Unpacking buffer from bc_z%beg
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = -gp_layers, -1
+                        do k = -buff_size, n + buff_size
+                            do j = -buff_size, m + buff_size
+                                r = ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                     ((k + buff_size) + (n + 2*buff_size + 1)* &
+                                     (l + gp_layers)))
+                                ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r)
+                            end do
                         end do
                     end do
-                end do
-
-                !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
-                if (rdma_mpi) then
-                    p_i_send => ib_buff_send
-                    p_i_recv => ib_buff_recv
-
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        p_i_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 0, &
-                        p_i_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                else
-#endif
-                    !$acc update host(ib_buff_send)
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        ib_buff_send(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 0, &
-                        ib_buff_recv(0), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
-                        MPI_INTEGER, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
-                end if
-#endif
-
-            end if
-
-#if defined(MFC_OpenACC)
-            if (rdma_mpi .eqv. .false.) then
-                !$acc update device(ib_buff_recv)
+                #:endif
             end if
-#endif
-
-            ! Unpacking buffer received from bc_z%end
-            !$acc parallel loop collapse(3) gang vector default(present) private(r)
-            do l = p + 1, p + gp_layers
-                do k = -gp_layers, n + gp_layers
-                    do j = -gp_layers, m + gp_layers
-                        r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                             ((k + gp_layers) + (n + 2*gp_layers + 1)* &
-                              (l - p - 1)))
-                        ib_markers%sf(j, k, l) = ib_buff_recv(r)
-                    end do
-                end do
-            end do
-
-        end if
-
-        ! END: MPI Communication in z-direction
-
+        #:endfor
+        call nvtxEndRange
 #endif
 
     end subroutine s_mpi_sendrecv_ib_buffers

From f2e5692c4558a766a4402964b4c40a77dfe69cde Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Mon, 5 May 2025 22:57:08 -0400
Subject: [PATCH 12/62] bug fixes and IBM halo exchange fix

---
 src/simulation/m_ibm.fpp       |  9 ++++----
 src/simulation/m_mpi_proxy.fpp | 40 ++++++++++++++++++----------------
 src/simulation/m_start_up.fpp  |  2 ++
 src/simulation/m_viscous.fpp   | 12 +++++-----
 src/simulation/m_weno.fpp      |  8 +++----
 5 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index f43fcb00ba..08d1893c1e 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -85,7 +85,7 @@ contains
         !$acc update device(levelset_norm%sf)
 
         ! Get neighboring IB variables from other processors
-        call s_populate_ib_buffers(ib_markers, gp_layers)
+        call s_populate_ib_buffers(ib_markers)
 
         !$acc update host(ib_markers%sf)
 
@@ -108,15 +108,14 @@ contains
 
     end subroutine s_ibm_setup
 
-    subroutine s_populate_ib_buffers(ib_markers, gp_layers)
+    subroutine s_populate_ib_buffers(ib_markers)
 
         type(integer_field), intent(inout) :: ib_markers
-        integer, intent(in) :: gp_layers
 
         #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)]
             #:for LOCC, LOCI in [('beg', -1), ('end', 1)]
-                if (bc_${DIRC}$%${LOCC}$ > 0) then
-                    call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers, ${DIRI}$, ${LOCI}$)
+                if (bc_${DIRC}$%${LOCC}$ >= 0) then
+                    call s_mpi_sendrecv_ib_buffers(ib_markers, ${DIRI}$, ${LOCI}$)
                 end if
             #:endfor
         #:endfor
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index aa59159c59..1ff0252fb4 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -44,8 +44,10 @@ module m_mpi_proxy
 
     !> @name Generic flags used to identify and report MPI errors
     !> @{
-    integer, private :: err_code, ierr, i_halo_size
+    integer, private :: err_code, ierr
     !> @}
+
+    integer :: i_halo_size
     !$acc declare create(i_halo_size)
 
 contains
@@ -70,9 +72,7 @@ contains
             end if
 
             !$acc declare create(i_halo_size)
-
             @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size))
-
         end if
 #endif
 
@@ -235,11 +235,11 @@ contains
 
     end subroutine s_mpi_bcast_user_inputs
 
-    subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers, mpi_dir, pbc_loc)
+    subroutine s_mpi_sendrecv_ib_buffers(ib_markers, mpi_dir, pbc_loc)
 
         type(integer_field), intent(inout) :: ib_markers
 
-        integer, intent(in) :: gp_layers, mpi_dir, pbc_loc
+        integer, intent(in) :: mpi_dir, pbc_loc
 
         integer :: i, j, k, l, r, q !< Generic loop iterators
 
@@ -261,8 +261,8 @@ contains
 
         buffer_counts = (/ &
                         gp_layers*(n + 1)*(p + 1), &
-                        gp_layers*(m + 2*buff_size + 1)*(p + 1), &
-                        gp_layers*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
+                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
+                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1) &
                         /)
 
         buffer_count = buffer_counts(mpi_dir)
@@ -312,8 +312,8 @@ contains
                     !$acc parallel loop collapse(3) gang vector default(present) private(r)
                     do l = 0, p
                         do k = 0, gp_layers - 1
-                            do j = -buff_size, m + buff_size
-                                r = ((j + buff_size) + (m + 2*buff_size + 1)* &
+                            do j = -gp_layers, m + gp_layers 
+                                r = ((j + gp_layers) + (m + 2*gp_layers+ 1)* &
                                      (k + gp_layers*l))
                                 ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l)
                             end do
@@ -322,10 +322,10 @@ contains
                 #:else
                     !$acc parallel loop collapse(3) gang vector default(present) private(r)
                     do l = 0, gp_layers - 1
-                        do k = -buff_size, n + buff_size
-                            do j = -buff_size, m + buff_size
-                                r = ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                     ((k + buff_size) + (n + 2*buff_size + 1)*l))
+                        do k = -gp_layers, n + gp_layers
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
+                                     ((k + gp_layers) + (n + 2*gp_layers + 1)*l))
                                 ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset)
                             end do
                         end do
@@ -389,8 +389,8 @@ contains
                     !$acc parallel loop collapse(3) gang vector default(present) private(r)
                     do l = 0, p
                         do k = -gp_layers, -1
-                            do j = -buff_size, m + buff_size
-                                r = ((j + buff_size) + (m + 2*buff_size + 1)* &
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
                                      ((k + gp_layers) + gp_layers*l))
                                 ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r)
                             end do
@@ -400,10 +400,10 @@ contains
                     ! Unpacking buffer from bc_z%beg
                     !$acc parallel loop collapse(3) gang vector default(present) private(r)
                     do l = -gp_layers, -1
-                        do k = -buff_size, n + buff_size
-                            do j = -buff_size, m + buff_size
-                                r = ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                     ((k + buff_size) + (n + 2*buff_size + 1)* &
+                        do k = -gp_layers, n + gp_layers
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
+                                     ((k + gp_layers) + (n + 2*gp_layers + 1)* &
                                      (l + gp_layers)))
                                 ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r)
                             end do
@@ -420,9 +420,11 @@ contains
     subroutine s_mpi_send_random_number(phi_rn, num_freq)
         integer, intent(in) :: num_freq
         real(wp), intent(inout), dimension(1:num_freq) :: phi_rn
+
 #ifdef MFC_MPI
         call MPI_BCAST(phi_rn, num_freq, mpi_p, 0, MPI_COMM_WORLD, ierr)
 #endif
+
     end subroutine s_mpi_send_random_number
 
     subroutine s_finalize_mpi_proxy_module()
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index a45346673c..8997b3f51a 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1480,6 +1480,7 @@ contains
 
 
         call s_initialize_mpi_common_module()
+        call s_initialize_mpi_proxy_module()
         call s_initialize_variables_conversion_module()
         if (grid_geometry == 3) call s_initialize_fftw_module()
         call s_initialize_riemann_solvers_module()
@@ -1686,6 +1687,7 @@ contains
         if (viscous) then
             call s_finalize_viscous_module()
         end if
+        call s_finalize_mpi_proxy_module()
 
         if (surface_tension)  call s_finalize_surface_tension_module()
         if (bodyForces) call s_finalize_body_forces_module()
diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp
index a56143a1aa..8f8f7b450b 100644
--- a/src/simulation/m_viscous.fpp
+++ b/src/simulation/m_viscous.fpp
@@ -1392,7 +1392,7 @@ contains
             end if
         end if
 
-        if (bc_x%beg <= BC_GHOST_EXTRAPOLATION) then
+        if (bc_x%beg <= BC_GHOST_EXTRAP) then
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = idwbuff(3)%beg, idwbuff(3)%end
                 do k = idwbuff(2)%beg, idwbuff(2)%end
@@ -1401,7 +1401,7 @@ contains
                 end do
             end do
         end if
-        if (bc_x%end <= BC_GHOST_EXTRAPOLATION) then
+        if (bc_x%end <= BC_GHOST_EXTRAP) then
             !$acc parallel loop collapse(2) gang vector default(present)
             do l = idwbuff(3)%beg, idwbuff(3)%end
                 do k = idwbuff(2)%beg, idwbuff(2)%end
@@ -1411,7 +1411,7 @@ contains
             end do
         end if
         if (n > 0) then
-            if (bc_y%beg <= BC_GHOST_EXTRAPOLATION .and. bc_y%beg /= BC_NULL) then
+            if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then
                 !$acc parallel loop collapse(2) gang vector default(present)
                 do l = idwbuff(3)%beg, idwbuff(3)%end
                     do j = idwbuff(1)%beg, idwbuff(1)%end
@@ -1420,7 +1420,7 @@ contains
                     end do
                 end do
             end if
-            if (bc_y%end <= BC_GHOST_EXTRAPOLATION) then
+            if (bc_y%end <= BC_GHOST_EXTRAP) then
                 !$acc parallel loop collapse(2) gang vector default(present)
                 do l = idwbuff(3)%beg, idwbuff(3)%end
                     do j = idwbuff(1)%beg, idwbuff(1)%end
@@ -1430,7 +1430,7 @@ contains
                 end do
             end if
             if (p > 0) then
-                if (bc_z%beg <= BC_GHOST_EXTRAPOLATION) then
+                if (bc_z%beg <= BC_GHOST_EXTRAP) then
                     !$acc parallel loop collapse(2) gang vector default(present)
                     do k = idwbuff(2)%beg, idwbuff(2)%end
                         do j = idwbuff(1)%beg, idwbuff(1)%end
@@ -1440,7 +1440,7 @@ contains
                         end do
                     end do
                 end if
-                if (bc_z%end <= BC_GHOST_EXTRAPOLATION) then
+                if (bc_z%end <= BC_GHOST_EXTRAP) then
                     !$acc parallel loop collapse(2) gang vector default(present)
                     do k = idwbuff(2)%beg, idwbuff(2)%end
                         do j = idwbuff(1)%beg, idwbuff(1)%end
diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp
index 2071b3a0f0..6747af2e1a 100644
--- a/src/simulation/m_weno.fpp
+++ b/src/simulation/m_weno.fpp
@@ -272,12 +272,12 @@ contains
                     ! any contributions from outside of the physical domain during
                     ! the WENO reconstruction
                     if (null_weights) then
-                        if (bc_s%beg == BC_RIEMANN_EXTRAPOLATION) then
+                        if (bc_s%beg == BC_RIEMANN_EXTRAP) then
                             d_cbR_${XYZ}$ (1, 0) = 0._wp; d_cbR_${XYZ}$ (0, 0) = 1._wp
                             d_cbL_${XYZ}$ (1, 0) = 0._wp; d_cbL_${XYZ}$ (0, 0) = 1._wp
                         end if
 
-                        if (bc_s%end == BC_RIEMANN_EXTRAPOLATION) then
+                        if (bc_s%end == BC_RIEMANN_EXTRAP) then
                             d_cbR_${XYZ}$ (0, s) = 0._wp; d_cbR_${XYZ}$ (1, s) = 1._wp
                             d_cbL_${XYZ}$ (0, s) = 0._wp; d_cbL_${XYZ}$ (1, s) = 1._wp
                         end if
@@ -418,14 +418,14 @@ contains
                     ! any contributions from outside of the physical domain during
                     ! the WENO reconstruction
                     if (null_weights) then
-                        if (bc_s%beg == BC_RIEMANN_EXTRAPOLATION) then
+                        if (bc_s%beg == BC_RIEMANN_EXTRAP) then
                             d_cbR_${XYZ}$ (1:2, 0) = 0._wp; d_cbR_${XYZ}$ (0, 0) = 1._wp
                             d_cbL_${XYZ}$ (1:2, 0) = 0._wp; d_cbL_${XYZ}$ (0, 0) = 1._wp
                             d_cbR_${XYZ}$ (2, 1) = 0._wp; d_cbR_${XYZ}$ (:, 1) = d_cbR_${XYZ}$ (:, 1)/sum(d_cbR_${XYZ}$ (:, 1))
                             d_cbL_${XYZ}$ (2, 1) = 0._wp; d_cbL_${XYZ}$ (:, 1) = d_cbL_${XYZ}$ (:, 1)/sum(d_cbL_${XYZ}$ (:, 1))
                         end if
 
-                        if (bc_s%end == BC_RIEMANN_EXTRAPOLATION) then
+                        if (bc_s%end == BC_RIEMANN_EXTRAP) then
                             d_cbR_${XYZ}$ (0, s - 1) = 0._wp; d_cbR_${XYZ}$ (:, s - 1) = d_cbR_${XYZ}$ (:, s - 1)/sum(d_cbR_${XYZ}$ (:, s - 1))
                             d_cbL_${XYZ}$ (0, s - 1) = 0._wp; d_cbL_${XYZ}$ (:, s - 1) = d_cbL_${XYZ}$ (:, s - 1)/sum(d_cbL_${XYZ}$ (:, s - 1))
                             d_cbR_${XYZ}$ (0:1, s) = 0._wp; d_cbR_${XYZ}$ (2, s) = 1._wp

From 5e53d533730825f6edcf549e7485d4fe88dfd278 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Tue, 6 May 2025 15:59:06 -0400
Subject: [PATCH 13/62] gpu fixes

---
 src/simulation/m_mpi_proxy.fpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 1ff0252fb4..82dede13ab 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -71,7 +71,7 @@ contains
                 i_halo_size = -1 + gp_layers
             end if
 
-            !$acc declare create(i_halo_size)
+            !$acc update device(i_halo_size)
             @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size))
         end if
 #endif
@@ -342,12 +342,12 @@ contains
         #:for rdma_mpi in [False, True]
             if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
                 #:if rdma_mpi
-                    !$acc data attach(p_send, p_recv)
-                    !$acc host_data use_device(p_send, p_recv)
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
                     call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
                 #:else
                     call nvtxStartRange("RHS-COMM-DEV2HOST")
-                    !$acc update host(buff_send)
+                    !$acc update host(ib_buff_send)
                     call nvtxEndRange
                     call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA")
                 #:endif
@@ -365,7 +365,7 @@ contains
                     !$acc wait
                 #:else
                     call nvtxStartRange("RHS-COMM-HOST2DEV")
-                    !$acc update device(buff_recv)
+                    !$acc update device(ib_buff_recv)
                     call nvtxEndRange
                 #:endif
             end if

From 697a0ef20deca1ad8a99da331d2ab8529778ab84 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Thu, 8 May 2025 13:41:28 -0400
Subject: [PATCH 14/62] additional code shortening

---
 src/common/m_mpi_common.fpp      | 25 +++-------
 src/post_process/m_mpi_proxy.fpp | 85 ++------------------------------
 src/pre_process/m_mpi_proxy.fpp  | 16 +-----
 src/simulation/m_mpi_proxy.fpp   |  2 +-
 4 files changed, 13 insertions(+), 115 deletions(-)

diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 6d670b1237..bf2260ec5b 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -24,7 +24,7 @@ module m_mpi_common
 
     implicit none
 
-    integer, private :: err_code, ierr, v_size !<
+    integer, private :: ierr, v_size !<
     !$acc declare create(v_size)
     !! Generic flags used to identify and report MPI errors
 
@@ -123,24 +123,11 @@ contains
     !! @param beta Eulerian void fraction from lagrangian bubbles
     subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta)
 
-        type(scalar_field), &
-            dimension(sys_size), &
-            intent(in) :: q_cons_vf
-
-        type(integer_field), &
-            optional, &
-            intent(in) :: ib_markers
-
-        type(levelset_field), &
-            optional, &
-            intent(IN) :: levelset
-
-        type(levelset_norm_field), &
-            optional, &
-            intent(IN) :: levelset_norm
-
-        type(scalar_field), &
-            intent(in), optional :: beta
+        type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
+        type(integer_field), optional, intent(in) :: ib_markers
+        type(levelset_field), optional, intent(IN) :: levelset
+        type(levelset_norm_field), optional, intent(IN) :: levelset_norm
+        type(scalar_field), intent(in), optional :: beta
 
         integer, dimension(num_dims) :: sizes_glb, sizes_loc
         integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index 2df1025c21..cbc6ba361c 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -23,14 +23,6 @@ module m_mpi_proxy
 
     implicit none
 
-    !> @name Buffers of the conservative variables received/sent from/to neighboring
-    !! processors. Note that these variables are structured as vectors rather
-    !! than arrays.
-    !> @{
-    real(wp), allocatable, dimension(:) :: q_cons_buffer_in
-    real(wp), allocatable, dimension(:) :: q_cons_buffer_out
-    !> @}
-
     !> @name Receive counts and displacement vector variables, respectively, used in
     !! enabling MPI to gather varying amounts of data from all processes to the
     !! root process
@@ -41,7 +33,7 @@ module m_mpi_proxy
 
     !> @name Generic flags used to identify and report MPI errors
     !> @{
-    integer, private :: err_code, ierr
+    integer, private :: ierr
     !> @}
 
 contains
@@ -54,61 +46,6 @@ contains
 
         integer :: i !< Generic loop iterator
 
-        ! Allocating vectorized buffer regions of conservative variables.
-        ! The length of buffer vectors are set according to the size of the
-        ! largest buffer region in the sub-domain.
-        if (buff_size > 0) then
-
-            ! Simulation is at least 2D
-            if (n > 0) then
-
-                ! Simulation is 3D
-                if (p > 0) then
-
-                    allocate (q_cons_buffer_in(0:buff_size* &
-                                               sys_size* &
-                                               (m + 2*buff_size + 1)* &
-                                               (n + 2*buff_size + 1)* &
-                                               (p + 2*buff_size + 1)/ &
-                                               (min(m, n, p) &
-                                                + 2*buff_size + 1) - 1))
-                    allocate (q_cons_buffer_out(0:buff_size* &
-                                                sys_size* &
-                                                (m + 2*buff_size + 1)* &
-                                                (n + 2*buff_size + 1)* &
-                                                (p + 2*buff_size + 1)/ &
-                                                (min(m, n, p) &
-                                                 + 2*buff_size + 1) - 1))
-
-                    ! Simulation is 2D
-                else
-
-                    allocate (q_cons_buffer_in(0:buff_size* &
-                                               sys_size* &
-                                               (max(m, n) &
-                                                + 2*buff_size + 1) - 1))
-                    allocate (q_cons_buffer_out(0:buff_size* &
-                                                sys_size* &
-                                                (max(m, n) &
-                                                 + 2*buff_size + 1) - 1))
-
-                end if
-
-                ! Simulation is 1D
-            else
-
-                allocate (q_cons_buffer_in(0:buff_size*sys_size - 1))
-                allocate (q_cons_buffer_out(0:buff_size*sys_size - 1))
-
-            end if
-
-            ! Initially zeroing out the vectorized buffer region variables
-            ! to avoid possible underflow from any unused allocated memory
-            q_cons_buffer_in = 0._wp
-            q_cons_buffer_out = 0._wp
-
-        end if
-
         ! Allocating and configuring the receive counts and the displacement
         ! vector variables used in variable-gather communication procedures.
         ! Note that these are only needed for either multidimensional runs
@@ -366,10 +303,7 @@ contains
     subroutine s_mpi_gather_data_extents(q_sf, data_extents)
 
         real(wp), dimension(:, :, :), intent(in) :: q_sf
-
-        real(wp), &
-            dimension(1:2, 0:num_procs - 1), &
-            intent(inout) :: data_extents
+        real(wp), dimension(1:2, 0:num_procs - 1), intent(inout) :: data_extents
 
 #ifdef MFC_MPI
 
@@ -395,13 +329,8 @@ contains
         !!  @param q_root_sf Flow variable defined on the entire computational domain
     subroutine s_mpi_defragment_1d_flow_variable(q_sf, q_root_sf)
 
-        real(wp), &
-            dimension(0:m), &
-            intent(in) :: q_sf
-
-        real(wp), &
-            dimension(0:m), &
-            intent(inout) :: q_root_sf
+        real(wp), dimension(0:m), intent(in) :: q_sf
+        real(wp), dimension(0:m), intent(inout) :: q_root_sf
 
 #ifdef MFC_MPI
 
@@ -421,12 +350,6 @@ contains
 
 #ifdef MFC_MPI
 
-        ! Deallocating the conservative variables buffer vectors
-        if (buff_size > 0) then
-            deallocate (q_cons_buffer_in)
-            deallocate (q_cons_buffer_out)
-        end if
-
         ! Deallocating the receive counts and the displacement vector
         ! variables used in variable-gather communication procedures
         if ((format == 1 .and. n > 0) .or. n == 0) then
diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp
index 6d16839eeb..abc47cdb65 100644
--- a/src/pre_process/m_mpi_proxy.fpp
+++ b/src/pre_process/m_mpi_proxy.fpp
@@ -23,20 +23,8 @@ module m_mpi_proxy
 
     implicit none
 
-    integer, private :: err_code, ierr, v_size !<
-        !! Generic flags used to identify and report MPI errors
-
-    real(wp), private, allocatable, dimension(:), target :: q_prims_buff_send !<
-        !! This variable is utilized to pack and send the buffer of the cell-average
-        !! primitive variables, for a single computational domain boundary at the
-        !! time, to the relevant neighboring processor.
-
-    real(wp), private, allocatable, dimension(:), target :: q_prims_buff_recv !<
-        !! q_prims_buff_recv is utilized to receive and unpack the buffer of the cell-
-        !! average primitive variables, for a single computational domain boundary
-        !! at the time, from the relevant neighboring processor.
-
-    ! integer :: halo_size
+    integer, private :: ierr !<
+    !! Generic flag used to identify and report MPI errors
 
 contains
     !> Since only processor with rank 0 is in charge of reading
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 82dede13ab..de42e53f5c 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -44,7 +44,7 @@ module m_mpi_proxy
 
     !> @name Generic flags used to identify and report MPI errors
     !> @{
-    integer, private :: err_code, ierr
+    integer, private :: ierr
     !> @}
 
     integer :: i_halo_size

From 1ff32f0450ce5f1271a3f9fd198f65829c361027 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Thu, 8 May 2025 17:00:06 -0400
Subject: [PATCH 15/62] remove even more code

---
 src/common/m_boundary_common.fpp         | 220 ++++++++++++++++++--
 src/post_process/m_data_input.f90        | 251 -----------------------
 src/post_process/m_global_parameters.fpp |  40 ++--
 src/post_process/m_start_up.f90          |  17 +-
 src/pre_process/m_perturbation.fpp       |   2 -
 src/simulation/m_start_up.fpp            | 215 +------------------
 6 files changed, 228 insertions(+), 517 deletions(-)

diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index 857eae09e9..2fa160c5f5 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -27,8 +27,6 @@ module m_boundary_common
     type(scalar_field), dimension(:, :), allocatable :: bc_buffers
     !$acc declare create(bc_buffers)
 
-    real(wp) :: bcxb, bcxe, bcyb, bcye, bczb, bcze
-
 #ifdef MFC_MPI
     integer, dimension(1:3, -1:1) :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE
 #endif
@@ -42,9 +40,10 @@ module m_boundary_common
  s_read_serial_boundary_condition_files, &
  s_read_parallel_boundary_condition_files, &
  s_assign_default_bc_type, &
+ s_populate_grid_variables_buffers, &
  s_finalize_boundary_common_module
 
-    public :: bc_buffers, bcxb, bcxe, bcyb, bcye, bczb, bcze
+    public :: bc_buffers
 
 #ifdef MFC_MPI
     public :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE
@@ -54,8 +53,6 @@ contains
 
     subroutine s_initialize_boundary_common_module()
 
-        bcxb = bc_x%beg; bcxe = bc_x%end; bcyb = bc_y%beg; bcye = bc_y%end; bczb = bc_z%beg; bcze = bc_z%end
-
         @:ALLOCATE(bc_buffers(1:num_dims, -1:1))
 
         if (bc_io) then
@@ -89,7 +86,7 @@ contains
         integer :: k, l
 
         ! Population of Buffers in x-direction
-        if (bcxb >= 0) then
+        if (bc_x%beg >= 0) then
             call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -113,7 +110,7 @@ contains
             end do
         end if
 
-        if (bcxe >= 0) then
+        if (bc_x%end >= 0) then
             call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -141,7 +138,7 @@ contains
 
         if (n == 0) return
 
-        if (bcyb >= 0) then
+        if (bc_y%beg >= 0) then
             call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -167,7 +164,7 @@ contains
             end do
         end if
 
-        if (bcye >= 0) then
+        if (bc_y%end >= 0) then
             call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -195,7 +192,7 @@ contains
 
         if (p == 0) return
 
-        if (bczb >= 0) then
+        if (bc_z%beg >= 0) then
             call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -219,7 +216,7 @@ contains
             end do
         end if
 
-        if (bcze >= 0) then
+        if (bc_z%end >= 0) then
             call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb, mv)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -1173,7 +1170,7 @@ contains
         integer :: i, j, k, l
 
         !< x-direction
-        if (bcxb >= 0) then
+        if (bc_x%beg >= 0) then
             call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -1191,7 +1188,7 @@ contains
             end do
         end if
 
-        if (bcxe >= 0) then
+        if (bc_x%end >= 0) then
             call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -1212,7 +1209,7 @@ contains
         if (n == 0) return
 
         !< y-direction
-        if (bcyb >= 0) then
+        if (bc_y%beg >= 0) then
             call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -1230,7 +1227,7 @@ contains
             end do
         end if
 
-        if (bcye >= 0) then
+        if (bc_y%end >= 0) then
             call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -1251,7 +1248,7 @@ contains
         if (p == 0) return
 
         !< z-direction
-        if (bczb >= 0) then
+        if (bc_z%beg >= 0) then
             call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -1269,7 +1266,7 @@ contains
             end do
         end if
 
-        if (bcze >= 0) then
+        if (bc_z%end >= 0) then
             call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1)
         else
             !$acc parallel loop collapse(2) gang vector default(present)
@@ -1790,6 +1787,195 @@ contains
 
     end subroutine s_assign_default_bc_type
 
+#ifndef MFC_PRE_PROCESS
+    !> The purpose of this subroutine is to populate the buffers
+        !!          of the grid variables, which are constituted of the cell-
+        !!          boundary locations and cell-width distributions, based on
+        !!          the boundary conditions.
+    subroutine s_populate_grid_variables_buffers
+
+        integer :: i !< Generic loop iterator
+
+#ifndef MFC_POST_PROCESS
+        ! Required for compatibiliy between codes
+        type(int_bounds_info) :: offset_x, offset_y, offset_z
+        offset_x%beg = buff_size; offset_x%end = buff_size
+        offset_y%beg = buff_size; offset_y%end = buff_size
+        offset_z%beg = buff_size; offset_z%end = buff_size
+#endif
+
+        ! Population of Buffers in x-direction
+
+        ! Populating cell-width distribution buffer at bc_x%beg
+        if (bc_x%beg >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(1, -1)
+        elseif (bc_x%beg <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dx(-i) = dx(0)
+            end do
+        elseif (bc_x%beg == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dx(-i) = dx(i - 1)
+            end do
+        elseif (bc_x%beg == BC_PERIODIC) then
+            do i = 1, buff_size
+                dx(-i) = dx(m - (i - 1))
+            end do
+        endif
+
+        ! Computing the cell-boundary and center locations buffer at bc_x%beg
+        do i = 1, offset_x%beg
+            x_cb(-1 - i) = x_cb(-i) - dx(-i)
+        enddo
+
+        do i = 1, buff_size
+            x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp
+        end do
+
+        ! Populating the cell-width distribution buffer at bc_x%end
+        if (bc_x%end >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(1, 1)
+        elseif (bc_x%end <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dx(m + i) = dx(m)
+            end do
+        elseif (bc_x%end == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dx(m + i) = dx(m - (i - 1))
+            end do
+        elseif (bc_x%end == BC_PERIODIC) then
+            do i = 1, buff_size
+                dx(m + i) = dx(i - 1)
+            end do
+        endif
+
+        ! Populating the cell-boundary and center locations buffer at bc_x%end
+        do i = 1, offset_x%end
+            x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i)
+        end do
+
+        do i = 1, buff_size
+            x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp
+        enddo
+        ! END: Population of Buffers in x-direction
+
+        ! Population of Buffers in y-direction
+
+        ! Populating cell-width distribution buffer at bc_y%beg
+        if (n == 0) then
+            return
+        elseif (bc_y%beg >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(2, -1)
+        elseif (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then
+            do i = 1, buff_size
+                dy(-i) = dy(0)
+            end do
+        elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then
+            do i = 1, buff_size
+                dy(-i) = dy(i - 1)
+            end do
+        elseif (bc_y%beg == BC_PERIODIC) then
+            do i = 1, buff_size
+                dy(-i) = dy(n - (i - 1))
+            end do
+        endif
+
+        ! Computing the cell-boundary and center locations buffer at bc_y%beg
+        do i = 1, offset_y%beg
+            y_cb(-1 - i) = y_cb(-i) - dy(-i)
+        enddo
+
+        do i = 1, buff_size
+            y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp
+        enddo
+
+        ! Populating the cell-width distribution buffer at bc_y%end
+        if (bc_y%end >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(2, 1)
+        elseif (bc_y%end <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dy(n + i) = dy(n)
+            end do
+        elseif (bc_y%end == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dy(n + i) = dy(n - (i - 1))
+            end do
+        elseif (bc_y%end == BC_PERIODIC) then
+            do i = 1, buff_size
+                dy(n + i) = dy(i - 1)
+            end do
+        endif
+
+        ! Populating the cell-boundary and center locations buffer at bc_y%end
+        do i = 1, offset_y%end
+            y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i)
+        enddo
+
+        do i = 1, buff_size
+            y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp
+        end do
+        ! END: Population of Buffers in y-direction
+
+        ! Population of Buffers in z-direction
+
+        ! Populating cell-width distribution buffer at bc_z%beg
+        if (p == 0) then
+            return
+        elseif (Bc_z%beg >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(3, -1)
+        elseif (bc_z%beg <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dz(-i) = dz(0)
+            end do
+        elseif (bc_z%beg == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dz(-i) = dz(i - 1)
+            end do
+        elseif (bc_z%beg == BC_PERIODIC) then
+            do i = 1, buff_size
+                dz(-i) = dz(p - (i - 1))
+            end do
+        endif
+
+        ! Computing the cell-boundary and center locations buffer at bc_z%beg
+        do i = 1, offset_z%beg
+            z_cb(-1 - i) = z_cb(-i) - dz(-i)
+        enddo
+
+        do i = 1, buff_size
+            z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp
+        enddo
+
+        ! Populating the cell-width distribution buffer at bc_z%end
+        if (bc_z%end >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(3, 1)
+        elseif (bc_z%end <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dz(p + i) = dz(p)
+            end do
+        elseif (bc_z%end == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dz(p + i) = dz(p - (i - 1))
+            end do
+        elseif (bc_z%end == BC_PERIODIC) then
+            do i = 1, buff_size
+                dz(p + i) = dz(i - 1)
+            end do
+        endif
+
+        ! Populating the cell-boundary and center locations buffer at bc_z%end
+        do i = 1, buff_size
+            z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
+        enddo
+
+        do i = 1, buff_size
+            z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp
+        enddo
+        ! END: Population of Buffers in z-direction
+
+    end subroutine s_populate_grid_variables_buffers
+#endif
+
     subroutine s_finalize_boundary_common_module()
 
         if (bc_io) then
diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90
index 2db034b4b5..1b8d91e260 100644
--- a/src/post_process/m_data_input.f90
+++ b/src/post_process/m_data_input.f90
@@ -31,7 +31,6 @@ module m_data_input
  s_read_data_files, &
  s_read_serial_data_files, &
  s_read_parallel_data_files, &
- s_populate_grid_variables_buffer_regions, &
  s_finalize_data_input_module
 
     abstract interface
@@ -503,256 +502,6 @@ subroutine s_read_parallel_data_files(t_step)
 
     end subroutine s_read_parallel_data_files
 
-    !>  The following subroutine populates the buffer regions of
-        !!      the cell-width spacings, the cell-boundary locations and
-        !!      the cell-center locations. Note that the buffer regions
-        !!      of the last two variables should be interpreted slightly
-        !!      differently than usual. They are really ghost zones that
-        !!      are used in aiding the multidimensional visualization of
-        !!      Silo database files, in VisIt, when processor boundary
-        !!      conditions are present.
-    subroutine s_populate_grid_variables_buffer_regions
-
-        integer :: i !< Generic loop iterator
-
-        ! Populating Buffer Regions in the x-direction
-
-        ! Ghost-cell extrapolation BC at the beginning
-        if (bc_x%beg <= BC_GHOST_EXTRAP) then
-
-            do i = 1, buff_size
-                dx(-i) = dx(0)
-            end do
-
-            ! Symmetry BC at the beginning
-        elseif (bc_x%beg == BC_REFLECTIVE) then
-
-            do i = 1, buff_size
-                dx(-i) = dx(i - 1)
-            end do
-
-            ! Periodic BC at the beginning
-        elseif (bc_x%beg == BC_PERIODIC) then
-
-            do i = 1, buff_size
-                dx(-i) = dx((m + 1) - i)
-            end do
-
-            ! Processor BC at the beginning
-        else
-
-            call s_mpi_sendrecv_grid_variables_buffers(1, -1)
-
-        end if
-
-        do i = 1, offset_x%beg
-            x_cb(-1 - i) = x_cb(-i) - dx(-i)
-        end do
-
-        do i = 1, buff_size
-            x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp
-        end do
-
-        ! Ghost-cell extrapolation BC at the end
-        if (bc_x%end <= BC_GHOST_EXTRAP) then
-
-            do i = 1, buff_size
-                dx(m + i) = dx(m)
-            end do
-
-            ! Symmetry BC at the end
-        elseif (bc_x%end == BC_REFLECTIVE) then
-
-            do i = 1, buff_size
-                dx(m + i) = dx((m + 1) - i)
-            end do
-
-            ! Periodic BC at the end
-        elseif (bc_x%end == BC_PERIODIC) then
-
-            do i = 1, buff_size
-                dx(m + i) = dx(i - 1)
-            end do
-
-            ! Processor BC at the end
-        else
-
-            call s_mpi_sendrecv_grid_variables_buffers(1, 1)
-
-        end if
-
-        do i = 1, offset_x%end
-            x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i)
-        end do
-
-        do i = 1, buff_size
-            x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp
-        end do
-
-        ! END: Populating Buffer Regions in the x-direction
-
-        ! Populating Buffer Regions in the y-direction
-
-        if (n > 0) then
-
-            ! Ghost-cell extrapolation BC at the beginning
-            if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then
-
-                do i = 1, buff_size
-                    dy(-i) = dy(0)
-                end do
-
-                ! Symmetry BC at the beginning
-            elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then
-
-                do i = 1, buff_size
-                    dy(-i) = dy(i - 1)
-                end do
-
-                ! Periodic BC at the beginning
-            elseif (bc_y%beg == BC_PERIODIC) then
-
-                do i = 1, buff_size
-                    dy(-i) = dy((n + 1) - i)
-                end do
-
-                ! Processor BC at the beginning
-            else
-
-                call s_mpi_sendrecv_grid_variables_buffers(2, -1)
-
-            end if
-
-            do i = 1, offset_y%beg
-                y_cb(-1 - i) = y_cb(-i) - dy(-i)
-            end do
-
-            do i = 1, buff_size
-                y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp
-            end do
-
-            ! Ghost-cell extrapolation BC at the end
-            if (bc_y%end <= BC_GHOST_EXTRAP) then
-
-                do i = 1, buff_size
-                    dy(n + i) = dy(n)
-                end do
-
-                ! Symmetry BC at the end
-            elseif (bc_y%end == BC_REFLECTIVE) then
-
-                do i = 1, buff_size
-                    dy(n + i) = dy((n + 1) - i)
-                end do
-
-                ! Periodic BC at the end
-            elseif (bc_y%end == BC_PERIODIC) then
-
-                do i = 1, buff_size
-                    dy(n + i) = dy(i - 1)
-                end do
-
-                ! Processor BC at the end
-            else
-
-                call s_mpi_sendrecv_grid_variables_buffers(2, 1)
-
-            end if
-
-            do i = 1, offset_y%end
-                y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i)
-            end do
-
-            do i = 1, buff_size
-                y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp
-            end do
-
-            ! END: Populating Buffer Regions in the y-direction
-
-            ! Populating Buffer Regions in the z-direction
-
-            if (p > 0) then
-
-                ! Ghost-cell extrapolation BC at the beginning
-                if (bc_z%beg <= BC_GHOST_EXTRAP) then
-
-                    do i = 1, buff_size
-                        dz(-i) = dz(0)
-                    end do
-
-                    ! Symmetry BC at the beginning
-                elseif (bc_z%beg == BC_REFLECTIVE) then
-
-                    do i = 1, buff_size
-                        dz(-i) = dz(i - 1)
-                    end do
-
-                    ! Periodic BC at the beginning
-                elseif (bc_z%beg == BC_PERIODIC) then
-
-                    do i = 1, buff_size
-                        dz(-i) = dz((p + 1) - i)
-                    end do
-
-                    ! Processor BC at the beginning
-                else
-
-                    call s_mpi_sendrecv_grid_variables_buffers(3, -1)
-
-                end if
-
-                do i = 1, offset_z%beg
-                    z_cb(-1 - i) = z_cb(-i) - dz(-i)
-                end do
-
-                do i = 1, buff_size
-                    z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp
-                end do
-
-                ! Ghost-cell extrapolation BC at the end
-                if (bc_z%end <= BC_GHOST_EXTRAP) then
-
-                    do i = 1, buff_size
-                        dz(p + i) = dz(p)
-                    end do
-
-                    ! Symmetry BC at the end
-                elseif (bc_z%end == BC_REFLECTIVE) then
-
-                    do i = 1, buff_size
-                        dz(p + i) = dz((p + 1) - i)
-                    end do
-
-                    ! Periodic BC at the end
-                elseif (bc_z%end == BC_PERIODIC) then
-
-                    do i = 1, buff_size
-                        dz(p + i) = dz(i - 1)
-                    end do
-
-                    ! Processor BC at the end
-                else
-
-                    call s_mpi_sendrecv_grid_variables_buffers(3, 1)
-
-                end if
-
-                do i = 1, offset_z%end
-                    z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
-                end do
-
-                do i = 1, buff_size
-                    z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp
-                end do
-
-            end if
-
-        end if
-
-        ! END: Populating Buffer Regions in the z-direction
-
-    end subroutine s_populate_grid_variables_buffer_regions
-
     !>  Computation of parameters, allocation procedures, and/or
         !!      any other tasks needed to properly setup the module
     subroutine s_initialize_data_input_module
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index c1cb89d614..ba36a8be19 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -474,7 +474,7 @@ contains
         integer :: i, j, fac
 
         ! Setting m_root equal to m in the case of a 1D serial simulation
-        if (num_procs >1 .and. n == 0) m_root = m_glb
+        if (num_procs > 1 .and. n == 0) m_root = m_glb
 
         ! Gamma/Pi_inf Model
         if (model_eqns == 1) then
@@ -773,21 +773,12 @@ contains
         chemxe = species_idx%end
 
 #ifdef MFC_MPI
-        if (bubbles_lagrange) then
-            allocate (MPI_IO_DATA%view(1:sys_size + 1))
-            allocate (MPI_IO_DATA%var(1:sys_size + 1))
-            do i = 1, sys_size + 1
-                allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p))
-                MPI_IO_DATA%var(i)%sf => null()
-            end do
-        else
-            allocate (MPI_IO_DATA%view(1:sys_size))
-            allocate (MPI_IO_DATA%var(1:sys_size))
-            do i = 1, sys_size
-                allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p))
-                MPI_IO_DATA%var(i)%sf => null()
-            end do
-        end if
+        allocate (MPI_IO_DATA%view(1:sys_size))
+        allocate (MPI_IO_DATA%var(1:sys_size))
+        do i = 1, sys_size
+            allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p))
+            MPI_IO_DATA%var(i)%sf => null()
+        end do
 
         if (ib) allocate (MPI_IO_IB_DATA%var%sf(0:m, 0:n, 0:p))
 #endif
@@ -935,21 +926,18 @@ contains
         integer :: i
 
         ! Deallocating the grid variables for the x-coordinate direction
-        deallocate (x_cb, x_cc, dx)
+        deallocate (x_cc, x_cb, dx)
 
         ! Deallocating grid variables for the y- and z-coordinate directions
         if (n > 0) then
-
-            deallocate (y_cb, y_cc, dy)
-
-            if (p > 0) deallocate (z_cb, z_cc, dz)
-
+            deallocate (y_cc, y_cb, dy)
+            if (p > 0) then
+                deallocate (z_cc, z_cb, dz)
+            end if
+        else
             ! Deallocating the grid variables, only used for the 1D simulations,
             ! and containing the defragmented computational domain grid data
-        else
-
             deallocate (x_root_cb, x_root_cc)
-
         end if
 
         deallocate (proc_coords)
@@ -964,8 +952,6 @@ contains
                 MPI_IO_DATA%var(i)%sf => null()
             end do
 
-            if (bubbles_lagrange) MPI_IO_DATA%var(sys_size + 1)%sf => null()
-
             deallocate (MPI_IO_DATA%var)
             deallocate (MPI_IO_DATA%view)
         end if
diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index fe78f01da9..e457c42722 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -182,7 +182,7 @@ subroutine s_perform_time_step(t_step)
 
         ! Populating the buffer regions of the grid and conservative variables
         if (buff_size > 0) then
-            call s_populate_grid_variables_buffer_regions()
+            call s_populate_grid_variables_buffers()
             call s_populate_variables_buffers(bc_type, q_cons_vf)
         end if
 
@@ -694,13 +694,15 @@ subroutine s_initialize_modules
         ! Computation of parameters, allocation procedures, and/or any other tasks
         ! needed to properly setup the modules
         call s_initialize_global_parameters_module()
-        if (bubbles_euler .and. nb > 1) then
-            call s_simpson
-        end if
+        if (bubbles_euler .and. nb > 1) call s_simpson
         if (bubbles_euler .and. .not. polytropic) then
             call s_initialize_nonpoly()
         end if
-        if (num_procs > 1) call s_initialize_mpi_proxy_module()
+        if (num_procs > 1) then 
+            call s_initialize_mpi_proxy_module()
+            call s_initialize_mpi_common_module()
+        endif
+        call s_initialize_boundary_common_module()
         call s_initialize_variables_conversion_module()
         call s_initialize_data_input_module()
         call s_initialize_derived_variables_module()
@@ -753,7 +755,10 @@ subroutine s_finalize_modules
         call s_finalize_derived_variables_module()
         call s_finalize_data_input_module()
         call s_finalize_variables_conversion_module()
-        if (num_procs > 1) call s_finalize_mpi_proxy_module()
+        if (num_procs > 1) then
+            call s_finalize_mpi_proxy_module()
+            call s_finalize_mpi_common_module()
+        endif
         call s_finalize_global_parameters_module()
 
         ! Finalizing the MPI environment
diff --git a/src/pre_process/m_perturbation.fpp b/src/pre_process/m_perturbation.fpp
index e1d9ed7efc..7afb4f9c4b 100644
--- a/src/pre_process/m_perturbation.fpp
+++ b/src/pre_process/m_perturbation.fpp
@@ -32,8 +32,6 @@ contains
 
     subroutine s_initialize_perturbation_module()
 
-        bcxb = bc_x%beg; bcxe = bc_x%end; bcyb = bc_y%beg; bcye = bc_y%end; bczb = bc_z%beg; bcze = bc_z%end
-
         if (mixlayer_perturb) then
             mixlayer_bc_fd = 2
             nbp = n + 2
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 8997b3f51a..fdceaecc43 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -96,7 +96,6 @@ module m_start_up
  s_read_data_files, &
  s_read_serial_data_files, &
  s_read_parallel_data_files, &
- s_populate_grid_variables_buffers, &
  s_initialize_internal_energy_equations, &
  s_initialize_modules, s_initialize_gpu_vars, &
  s_initialize_mpi_domain, s_finalize_modules, &
@@ -959,219 +958,7 @@ contains
 
     end subroutine s_read_parallel_data_files
 
-    !> The purpose of this subroutine is to populate the buffers
-        !!          of the grid variables, which are constituted of the cell-
-        !!          boundary locations and cell-width distributions, based on
-        !!          the boundary conditions.
-    subroutine s_populate_grid_variables_buffers
-
-        integer :: i !< Generic loop iterator
-
-        ! Population of Buffers in x-direction
-
-        ! Populating cell-width distribution buffer, at the beginning of the
-        ! coordinate direction, based on the selected boundary condition. In
-        ! order, these are the ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (bc_x%beg <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dx(-i) = dx(0)
-            end do
-        elseif (bc_x%beg == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dx(-i) = dx(i - 1)
-            end do
-        elseif (bc_x%beg == BC_PERIODIC) then
-            do i = 1, buff_size
-                dx(-i) = dx(m - (i - 1))
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(1, -1)
-        end if
-
-        ! Computing the cell-boundary locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            x_cb(-1 - i) = x_cb(-i) - dx(-i)
-        end do
-        ! Computing the cell-center locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp
-        end do
-
-        ! Populating the cell-width distribution buffer, at the end of the
-        ! coordinate direction, based on desired boundary condition. These
-        ! include, in order, ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (bc_x%end <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dx(m + i) = dx(m)
-            end do
-        elseif (bc_x%end == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dx(m + i) = dx(m - (i - 1))
-            end do
-        elseif (bc_x%end == BC_PERIODIC) then
-            do i = 1, buff_size
-                dx(m + i) = dx(i - 1)
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(1, 1)
-        end if
-
-        ! Populating the cell-boundary locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i)
-        end do
-        ! Populating the cell-center locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp
-        end do
-
-        ! END: Population of Buffers in x-direction
-
-        ! Population of Buffers in y-direction
-
-        ! Populating cell-width distribution buffer, at the beginning of the
-        ! coordinate direction, based on the selected boundary condition. In
-        ! order, these are the ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (n == 0) then
-            return
-        elseif (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then
-            do i = 1, buff_size
-                dy(-i) = dy(0)
-            end do
-        elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then
-            do i = 1, buff_size
-                dy(-i) = dy(i - 1)
-            end do
-        elseif (bc_y%beg == BC_PERIODIC) then
-            do i = 1, buff_size
-                dy(-i) = dy(n - (i - 1))
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(2, -1)
-        end if
-
-        ! Computing the cell-boundary locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            y_cb(-1 - i) = y_cb(-i) - dy(-i)
-        end do
-        ! Computing the cell-center locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp
-        end do
-
-        ! Populating the cell-width distribution buffer, at the end of the
-        ! coordinate direction, based on desired boundary condition. These
-        ! include, in order, ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (bc_y%end <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dy(n + i) = dy(n)
-            end do
-        elseif (bc_y%end == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dy(n + i) = dy(n - (i - 1))
-            end do
-        elseif (bc_y%end == BC_PERIODIC) then
-            do i = 1, buff_size
-                dy(n + i) = dy(i - 1)
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(2, 1)
-        end if
-
-        ! Populating the cell-boundary locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i)
-        end do
-        ! Populating the cell-center locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp
-        end do
-
-        ! END: Population of Buffers in y-direction
-
-        ! Population of Buffers in z-direction
-
-        ! Populating cell-width distribution buffer, at the beginning of the
-        ! coordinate direction, based on the selected boundary condition. In
-        ! order, these are the ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (p == 0) then
-            return
-        elseif (bc_z%beg <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dz(-i) = dz(0)
-            end do
-        elseif (bc_z%beg == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dz(-i) = dz(i - 1)
-            end do
-        elseif (bc_z%beg == BC_PERIODIC) then
-            do i = 1, buff_size
-                dz(-i) = dz(p - (i - 1))
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(3, -1)
-        end if
-
-        ! Computing the cell-boundary locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            z_cb(-1 - i) = z_cb(-i) - dz(-i)
-        end do
-        ! Computing the cell-center locations buffer, at the beginning of
-        ! the coordinate direction, from the cell-width distribution buffer
-        do i = 1, buff_size
-            z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp
-        end do
-
-        ! Populating the cell-width distribution buffer, at the end of the
-        ! coordinate direction, based on desired boundary condition. These
-        ! include, in order, ghost-cell extrapolation, symmetry, periodic,
-        ! and processor boundary conditions.
-        if (bc_z%end <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dz(p + i) = dz(p)
-            end do
-        elseif (bc_z%end == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dz(p + i) = dz(p - (i - 1))
-            end do
-        elseif (bc_z%end == BC_PERIODIC) then
-            do i = 1, buff_size
-                dz(p + i) = dz(i - 1)
-            end do
-        else
-            call s_mpi_sendrecv_grid_variables_buffers(3, 1)
-        end if
-
-        ! Populating the cell-boundary locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
-        end do
-        ! Populating the cell-center locations buffer, at the end of the
-        ! coordinate direction, from buffer of the cell-width distribution
-        do i = 1, buff_size
-            z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp
-        end do
-
-        ! END: Population of Buffers in z-direction
-
-    end subroutine s_populate_grid_variables_buffers
-
-    !> The purpose of this procedure is to initialize the
+        !> The purpose of this procedure is to initialize the
         !!      values of the internal-energy equations of each phase
         !!      from the mass of each phase, the mixture momentum and
         !!      mixture-total-energy equations.

From 85273cd9d136280dee7fb35668a885854a2e57d7 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Fri, 9 May 2025 12:17:33 -0400
Subject: [PATCH 16/62] format and bug fixes

---
 "\\"                              | 1603 +++++++++++++++++++++++++++++
 src/common/m_boundary_common.fpp  |   42 +-
 src/common/m_mpi_common.fpp       |   18 +-
 src/post_process/m_data_input.f90 |   22 +-
 src/post_process/m_start_up.f90   |    8 +-
 src/simulation/m_ibm.fpp          |    6 +-
 src/simulation/m_mpi_proxy.fpp    |   12 +-
 7 files changed, 1655 insertions(+), 56 deletions(-)
 create mode 100644 "\\"

diff --git "a/\\" "b/\\"
new file mode 100644
index 0000000000..6262662a46
--- /dev/null
+++ "b/\\"
@@ -0,0 +1,1603 @@
+
+#:include 'macros.fpp'
+
+!> @brief The module serves as a proxy to the parameters and subroutines
+!!          available in the MPI implementation's MPI module. Specifically,
+!!          the purpose of the proxy is to harness basic MPI commands into
+!!          more complicated procedures as to accomplish the communication
+!!          goals for the simulation.
+module m_mpi_common
+
+#ifdef MFC_MPI
+    use mpi                    !< Message passing interface (MPI) module
+#endif
+
+    use m_derived_types        !< Definitions of the derived types
+
+    use m_global_parameters    !< Definitions of the global parameters
+
+    use m_helper
+
+    use ieee_arithmetic
+
+    use m_nvtx
+
+    implicit none
+
+    integer, private :: ierr, v_size !<
+    !$acc declare create(v_size)
+    !! Generic flags used to identify and report MPI errors
+
+    real(wp), private, allocatable, dimension(:), target :: buff_send !<
+    !! This variable is utilized to pack and send the buffer of the cell-average
+    !! primitive variables, for a single computational domain boundary at the
+    !! time, to the relevant neighboring processor.
+
+    real(wp), private, allocatable, dimension(:), target :: buff_recv !<
+    !! buff_recv is utilized to receive and unpack the buffer of the cell-
+    !! average primitive variables, for a single computational domain boundary
+    !! at the time, from the relevant neighboring processor.
+
+    !$acc declare create(buff_send, buff_recv)
+
+    integer :: halo_size, nVars
+    !$acc declare create(halo_size, nVars)
+
+
+contains
+
+    !> The computation of parameters, the allocation of memory,
+        !!      the association of pointers and/or the execution of any
+        !!      other procedures that are necessary to setup the module.
+    subroutine s_initialize_mpi_common_module
+
+#ifdef MFC_MPI
+        ! Allocating buff_send/recv and. Please note that for the sake of
+        ! simplicity, both variables are provided sufficient storage to hold
+        ! the largest buffer in the computational domain.
+
+        if (qbmm .and. .not. polytropic) then
+            v_size = sys_size + 2*nb*4
+        else
+            v_size = sys_size
+        end if
+
+        if (n > 0) then
+            if (p > 0) then
+                halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)* &
+                                         & (m + 2*buff_size + 1)* &
+                                         & (n + 2*buff_size + 1)* &
+                                         & (p + 2*buff_size + 1)/ &
+                                         & (min(m, n, p) + 2*buff_size + 1))
+            else
+                halo_size = -1 + buff_size*(v_size)* &
+                                         & (max(m, n) + 2*buff_size + 1)
+            end if
+        else
+            halo_size = -1 + buff_size*(v_size)
+        end if
+
+        !$acc update device(halo_size, v_size)
+
+        @:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size))
+#endif
+
+    end subroutine s_initialize_mpi_common_module
+
+    !> The subroutine initializes the MPI execution environment
+        !!      and queries both the number of processors which will be
+        !!      available for the job and the local processor rank.
+    subroutine s_mpi_initialize
+
+#ifndef MFC_MPI
+
+        ! Serial run only has 1 processor
+        num_procs = 1
+        ! Local processor rank is 0
+        proc_rank = 0
+
+#else
+
+        ! Initializing the MPI environment
+        call MPI_INIT(ierr)
+
+        ! Checking whether the MPI environment has been properly initialized
+        if (ierr /= MPI_SUCCESS) then
+            print '(A)', 'Unable to initialize MPI environment. Exiting.'
+            call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
+        end if
+
+        ! Querying the number of processors available for the job
+        call MPI_COMM_SIZE(MPI_COMM_WORLD, num_procs, ierr)
+
+        ! Querying the rank of the local processor
+        call MPI_COMM_RANK(MPI_COMM_WORLD, proc_rank, ierr)
+
+#endif
+
+    end subroutine s_mpi_initialize
+
+    !! @param q_cons_vf Conservative variables
+    !! @param ib_markers track if a cell is within the immersed boundary
+    !! @param levelset closest distance from every cell to the IB
+    !! @param levelset_norm normalized vector from every cell to the closest point to the IB
+    !! @param beta Eulerian void fraction from lagrangian bubbles
+    subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta)
+
+        type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
+        type(integer_field), optional, intent(in) :: ib_markers
+        type(levelset_field), optional, intent(IN) :: levelset
+        type(levelset_norm_field), optional, intent(IN) :: levelset_norm
+        type(scalar_field), intent(in), optional :: beta
+
+        integer, dimension(num_dims) :: sizes_glb, sizes_loc
+        integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start
+
+#ifdef MFC_MPI
+
+        ! Generic loop iterator
+        integer :: i, j, q, k, l
+
+        !Altered system size for the lagrangian subgrid bubble model
+        integer :: alt_sys
+
+        if (present(beta)) then
+            alt_sys = sys_size + 1
+        else
+            alt_sys = sys_size
+        end if
+
+        do i = 1, sys_size
+            MPI_IO_DATA%var(i)%sf => q_cons_vf(i)%sf(0:m, 0:n, 0:p)
+        end do
+
+        if (present(beta)) then
+            MPI_IO_DATA%var(alt_sys)%sf => beta%sf(0:m, 0:n, 0:p)
+        end if
+
+        !Additional variables pb and mv for non-polytropic qbmm
+#ifdef MFC_PRE_PROCESS
+        if (qbmm .and. .not. polytropic) then
+            do i = 1, nb
+                do j = 1, nnode
+                    MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j)%sf => pb%sf(0:m, 0:n, 0:p, j, i)
+                    MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv%sf(0:m, 0:n, 0:p, j, i)
+                end do
+            end do
+        end if
+#endif
+
+#ifdef MFC_SIMULATION
+        if (qbmm .and. .not. polytropic) then
+            do i = 1, nb
+                do j = 1, nnode
+                    MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j)%sf => pb_ts(1)%sf(0:m, 0:n, 0:p, j, i)
+                    MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv_ts(1)%sf(0:m, 0:n, 0:p, j, i)
+                end do
+            end do
+        end if
+#endif
+        ! Define global(g) and local(l) sizes for flow variables
+        sizes_glb(1) = m_glb + 1; sizes_loc(1) = m + 1
+        if (n > 0) then
+            sizes_glb(2) = n_glb + 1; sizes_loc(2) = n + 1
+            if (p > 0) then
+                sizes_glb(3) = p_glb + 1; sizes_loc(3) = p + 1
+            end if
+        end if
+
+        ! Define the view for each variable
+        do i = 1, alt_sys
+            call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, &
+                                          MPI_ORDER_FORTRAN, mpi_p, MPI_IO_DATA%view(i), ierr)
+            call MPI_TYPE_COMMIT(MPI_IO_DATA%view(i), ierr)
+        end do
+
+#ifndef MFC_POST_PROCESS
+        if (qbmm .and. .not. polytropic) then
+            do i = sys_size + 1, sys_size + 2*nb*4
+                call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, &
+                                              MPI_ORDER_FORTRAN, mpi_p, MPI_IO_DATA%view(i), ierr)
+                call MPI_TYPE_COMMIT(MPI_IO_DATA%view(i), ierr)
+
+            end do
+        end if
+#endif
+
+        if (present(ib_markers)) then
+
+#ifdef MFC_PRE_PROCESS
+            MPI_IO_IB_DATA%var%sf => ib_markers%sf
+            MPI_IO_levelset_DATA%var%sf => levelset%sf
+            MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf
+#else
+            MPI_IO_IB_DATA%var%sf => ib_markers%sf(0:m, 0:n, 0:p)
+
+#ifndef MFC_POST_PROCESS
+            MPI_IO_levelset_DATA%var%sf => levelset%sf(0:m, 0:n, 0:p, 1:num_ibs)
+            MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3)
+#endif
+
+#endif
+            call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, &
+                                          MPI_ORDER_FORTRAN, MPI_INTEGER, MPI_IO_IB_DATA%view, ierr)
+            call MPI_TYPE_COMMIT(MPI_IO_IB_DATA%view, ierr)
+
+#ifndef MFC_POST_PROCESS
+            call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, &
+                                          MPI_ORDER_FORTRAN, mpi_p, MPI_IO_levelset_DATA%view, ierr)
+            call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, &
+                                          MPI_ORDER_FORTRAN, mpi_p, MPI_IO_levelsetnorm_DATA%view, ierr)
+
+            call MPI_TYPE_COMMIT(MPI_IO_levelset_DATA%view, ierr)
+            call MPI_TYPE_COMMIT(MPI_IO_levelsetnorm_DATA%view, ierr)
+#endif
+        end if
+
+#ifndef MFC_POST_PROCESS
+        if (present(ib_markers)) then
+            do j = 1, num_ibs
+                if (patch_ib(j)%c > 0) then
+
+#ifdef MFC_PRE_PROCESS
+                    allocate (MPI_IO_airfoil_IB_DATA%var(1:2*Np))
+#endif
+
+                    airfoil_glb(1) = 3*Np*num_procs
+                    airfoil_loc(1) = 3*Np
+                    airfoil_start(1) = 3*proc_rank*Np
+
+#ifdef MFC_PRE_PROCESS
+                    do i = 1, Np
+                        MPI_IO_airfoil_IB_DATA%var(i)%x = airfoil_grid_l(i)%x
+                        MPI_IO_airfoil_IB_DATA%var(i)%y = airfoil_grid_l(i)%y
+                    end do
+#endif
+
+                    call MPI_TYPE_CREATE_SUBARRAY(1, airfoil_glb, airfoil_loc, airfoil_start, &
+                                                  MPI_ORDER_FORTRAN, mpi_p, MPI_IO_airfoil_IB_DATA%view(1), ierr)
+                    call MPI_TYPE_COMMIT(MPI_IO_airfoil_IB_DATA%view(1), ierr)
+
+#ifdef MFC_PRE_PROCESS
+                    do i = 1, Np
+                        MPI_IO_airfoil_IB_DATA%var(Np + i)%x = airfoil_grid_u(i)%x
+                        MPI_IO_airfoil_IB_DATA%var(Np + i)%y = airfoil_grid_u(i)%y
+                    end do
+#endif
+                    call MPI_TYPE_CREATE_SUBARRAY(1, airfoil_glb, airfoil_loc, airfoil_start, &
+                                                  MPI_ORDER_FORTRAN, mpi_p, MPI_IO_airfoil_IB_DATA%view(2), ierr)
+                    call MPI_TYPE_COMMIT(MPI_IO_airfoil_IB_DATA%view(2), ierr)
+
+                end if
+            end do
+
+        end if
+#endif
+
+#endif
+
+    end subroutine s_initialize_mpi_data
+
+    subroutine s_mpi_gather_data(my_vector, counts, gathered_vector, root)
+
+        integer, intent(in) :: counts          ! Array of vector lengths for each process
+        real(wp), intent(in), dimension(counts) :: my_vector   ! Input vector on each process
+        integer, intent(in) :: root               ! Rank of the root process
+        real(wp), allocatable, intent(out) :: gathered_vector(:) ! Gathered vector on the root process
+
+        integer :: i, offset, ierr
+        integer, allocatable :: recounts(:), displs(:)
+
+#ifdef MFC_MPI
+
+        allocate (recounts(num_procs))
+
+        call MPI_GATHER(counts, 1, MPI_INTEGER, recounts, 1, MPI_INTEGER, root, &
+                        MPI_COMM_WORLD, ierr)
+
+        allocate (displs(size(recounts)))
+
+        displs(1) = 0
+
+        do i = 2, size(recounts)
+            displs(i) = displs(i - 1) + recounts(i - 1)
+        end do
+
+        allocate (gathered_vector(sum(recounts)))
+        call MPI_GATHERV(my_vector, counts, mpi_p, gathered_vector, recounts, displs, mpi_p, &
+                         root, MPI_COMM_WORLD, ierr)
+#endif
+    end subroutine s_mpi_gather_data
+
+    subroutine mpi_bcast_time_step_values(proc_time, time_avg)
+
+        real(wp), dimension(0:num_procs - 1), intent(inout) :: proc_time
+        real(wp), intent(inout) :: time_avg
+
+#ifdef MFC_MPI
+
+        call MPI_GATHER(time_avg, 1, mpi_p, proc_time(0), 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+
+#endif
+
+    end subroutine mpi_bcast_time_step_values
+
+    subroutine s_prohibit_abort(condition, message)
+        character(len=*), intent(in) :: condition, message
+
+        print *, ""
+        print *, "CASE FILE ERROR"
+        print *, "  - Prohibited condition: ", trim(condition)
+        if (len_trim(message) > 0) then
+            print *, "  - Note: ", trim(message)
+        end if
+        print *, ""
+        call s_mpi_abort(code=CASE_FILE_ERROR_CODE)
+    end subroutine s_prohibit_abort
+
+    !>  The goal of this subroutine is to determine the global
+        !!      extrema of the stability criteria in the computational
+        !!      domain. This is performed by sifting through the local
+        !!      extrema of each stability criterion. Note that each of
+        !!      the local extrema is from a single process, within its
+        !!      assigned section of the computational domain. Finally,
+        !!      note that the global extrema values are only bookkeept
+        !!      on the rank 0 processor.
+        !!  @param icfl_max_loc Local maximum ICFL stability criterion
+        !!  @param vcfl_max_loc Local maximum VCFL stability criterion
+        !!  @param Rc_min_loc Local minimum Rc stability criterion
+        !!  @param icfl_max_glb Global maximum ICFL stability criterion
+        !!  @param vcfl_max_glb Global maximum VCFL stability criterion
+        !!  @param Rc_min_glb Global minimum Rc stability criterion
+    subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, &
+                                                       vcfl_max_loc, &
+                                                       ccfl_max_loc, &
+                                                       Rc_min_loc, &
+                                                       icfl_max_glb, &
+                                                       vcfl_max_glb, &
+                                                       ccfl_max_glb, &
+                                                       Rc_min_glb)
+
+        real(wp), intent(in) :: icfl_max_loc
+        real(wp), intent(in) :: vcfl_max_loc
+        real(wp), intent(in) :: ccfl_max_loc
+        real(wp), intent(in) :: Rc_min_loc
+
+        real(wp), intent(out) :: icfl_max_glb
+        real(wp), intent(out) :: vcfl_max_glb
+        real(wp), intent(out) :: ccfl_max_glb
+        real(wp), intent(out) :: Rc_min_glb
+
+#ifdef MFC_SIMULATION
+#ifdef MFC_MPI
+
+        ! Reducing local extrema of ICFL, VCFL, CCFL and Rc numbers to their
+        ! global extrema and bookkeeping the results on the rank 0 processor
+        call MPI_REDUCE(icfl_max_loc, icfl_max_glb, 1, &
+                        mpi_p, MPI_MAX, 0, &
+                        MPI_COMM_WORLD, ierr)
+
+        if (viscous) then
+            call MPI_REDUCE(vcfl_max_loc, vcfl_max_glb, 1, &
+                            mpi_p, MPI_MAX, 0, &
+                            MPI_COMM_WORLD, ierr)
+            call MPI_REDUCE(Rc_min_loc, Rc_min_glb, 1, &
+                            mpi_p, MPI_MIN, 0, &
+                            MPI_COMM_WORLD, ierr)
+        end if
+
+#else
+
+        icfl_max_glb = icfl_max_loc
+
+        if (viscous) then
+            vcfl_max_glb = vcfl_max_loc
+            Rc_min_glb = Rc_min_loc
+        end if
+
+#endif
+#endif
+
+    end subroutine s_mpi_reduce_stability_criteria_extrema
+
+    !>  The following subroutine takes the input local variable
+        !!      from all processors and reduces to the sum of all
+        !!      values. The reduced variable is recorded back onto the
+        !!      original local variable on each processor.
+        !!  @param var_loc Some variable containing the local value which should be
+        !!  reduced amongst all the processors in the communicator.
+        !!  @param var_glb The globally reduced value
+    subroutine s_mpi_allreduce_sum(var_loc, var_glb)
+
+        real(wp), intent(in) :: var_loc
+        real(wp), intent(out) :: var_glb
+
+#ifdef MFC_MPI
+
+        ! Performing the reduction procedure
+        call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, &
+                           MPI_SUM, MPI_COMM_WORLD, ierr)
+
+#endif
+
+    end subroutine s_mpi_allreduce_sum
+
+    !>  The following subroutine takes the input local variable
+        !!      from all processors and reduces to the minimum of all
+        !!      values. The reduced variable is recorded back onto the
+        !!      original local variable on each processor.
+        !!  @param var_loc Some variable containing the local value which should be
+        !!  reduced amongst all the processors in the communicator.
+        !!  @param var_glb The globally reduced value
+    subroutine s_mpi_allreduce_min(var_loc, var_glb)
+
+        real(wp), intent(in) :: var_loc
+        real(wp), intent(out) :: var_glb
+
+#ifdef MFC_MPI
+
+        ! Performing the reduction procedure
+        call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, &
+                           MPI_MIN, MPI_COMM_WORLD, ierr)
+
+#endif
+
+    end subroutine s_mpi_allreduce_min
+
+    !>  The following subroutine takes the input local variable
+        !!      from all processors and reduces to the maximum of all
+        !!      values. The reduced variable is recorded back onto the
+        !!      original local variable on each processor.
+        !!  @param var_loc Some variable containing the local value which should be
+        !!  reduced amongst all the processors in the communicator.
+        !!  @param var_glb The globally reduced value
+    subroutine s_mpi_allreduce_max(var_loc, var_glb)
+
+        real(wp), intent(in) :: var_loc
+        real(wp), intent(out) :: var_glb
+
+#ifdef MFC_MPI
+
+        ! Performing the reduction procedure
+        call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, &
+                           MPI_MAX, MPI_COMM_WORLD, ierr)
+
+#endif
+
+    end subroutine s_mpi_allreduce_max
+
+    !>  The following subroutine takes the inputted variable and
+        !!      determines its minimum value on the entire computational
+        !!      domain. The result is stored back into inputted variable.
+        !!  @param var_loc holds the local value to be reduced among
+        !!      all the processors in communicator. On output, the variable holds
+        !!      the minimum value, reduced amongst all of the local values.
+    subroutine s_mpi_reduce_min(var_loc)
+
+        real(wp), intent(inout) :: var_loc
+
+#ifdef MFC_MPI
+
+        ! Temporary storage variable that holds the reduced minimum value
+        real(wp) :: var_glb
+
+        ! Performing reduction procedure and eventually storing its result
+        ! into the variable that was initially inputted into the subroutine
+        call MPI_REDUCE(var_loc, var_glb, 1, mpi_p, &
+                        MPI_MIN, 0, MPI_COMM_WORLD, ierr)
+
+        call MPI_BCAST(var_glb, 1, mpi_p, &
+                       0, MPI_COMM_WORLD, ierr)
+
+        var_loc = var_glb
+
+#endif
+
+    end subroutine s_mpi_reduce_min
+
+    !>  The following subroutine takes the first element of the
+        !!      2-element inputted variable and determines its maximum
+        !!      value on the entire computational domain. The result is
+        !!      stored back into the first element of the variable while
+        !!      the rank of the processor that is in charge of the sub-
+        !!      domain containing the maximum is stored into the second
+        !!      element of the variable.
+        !!  @param var_loc On input, this variable holds the local value and processor rank,
+        !!  which are to be reduced among all the processors in communicator.
+        !!  On output, this variable holds the maximum value, reduced amongst
+        !!  all of the local values, and the process rank to which the value
+        !!  belongs.
+    subroutine s_mpi_reduce_maxloc(var_loc)
+
+        real(wp), dimension(2), intent(inout) :: var_loc
+
+#ifdef MFC_MPI
+
+        real(wp), dimension(2) :: var_glb  !<
+            !! Temporary storage variable that holds the reduced maximum value
+            !! and the rank of the processor with which the value is associated
+
+        ! Performing reduction procedure and eventually storing its result
+        ! into the variable that was initially inputted into the subroutine
+        call MPI_REDUCE(var_loc, var_glb, 1, mpi_2p, &
+                        MPI_MAXLOC, 0, MPI_COMM_WORLD, ierr)
+
+        call MPI_BCAST(var_glb, 1, mpi_2p, &
+                       0, MPI_COMM_WORLD, ierr)
+
+        var_loc = var_glb
+
+#endif
+
+    end subroutine s_mpi_reduce_maxloc
+
+    !> The subroutine terminates the MPI execution environment.
+        !! @param prnt error message to be printed
+    subroutine s_mpi_abort(prnt, code)
+
+        character(len=*), intent(in), optional :: prnt
+        integer, intent(in), optional :: code
+
+        if (present(prnt)) then
+            print *, prnt
+            call flush (6)
+
+        end if
+
+#ifndef MFC_MPI
+        if (present(code)) then
+            stop code
+        else
+            stop 1
+        end if
+#else
+        ! Terminating the MPI environment
+        if (present(code)) then
+            call MPI_ABORT(MPI_COMM_WORLD, code, ierr)
+        else
+            call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
+        end if
+#endif
+
+    end subroutine s_mpi_abort
+
+    !>Halts all processes until all have reached barrier.
+    subroutine s_mpi_barrier
+
+#ifdef MFC_MPI
+
+        ! Calling MPI_BARRIER
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+
+#endif
+
+    end subroutine s_mpi_barrier
+
+    !> The subroutine finalizes the MPI execution environment.
+    subroutine s_mpi_finalize
+
+#ifdef MFC_MPI
+
+        ! Finalizing the MPI environment
+        call MPI_FINALIZE(ierr)
+
+#endif
+
+    end subroutine s_mpi_finalize
+
+    !>  The goal of this procedure is to populate the buffers of
+        !!      the cell-average conservative variables by communicating
+        !!      with the neighboring processors.
+        !!  @param q_cons_vf Cell-average conservative variables
+        !!  @param mpi_dir MPI communication coordinate direction
+        !!  @param pbc_loc Processor boundary condition (PBC) location
+    subroutine s_mpi_sendrecv_variables_buffers(q_comm, &
+                                                mpi_dir, &
+                                                pbc_loc, &
+                                                nVar, &
+                                                pb, mv)
+
+        type(scalar_field), dimension(1:), intent(inout) :: q_comm
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        integer, intent(in) :: mpi_dir, pbc_loc, nVar
+
+        integer :: i, j, k, l, r, q !< Generic loop iterators
+
+        integer :: buffer_counts(1:3), buffer_count
+
+        type(int_bounds_info) :: boundary_conditions(1:3)
+        integer :: beg_end(1:2), grid_dims(1:3)
+        integer :: dst_proc, src_proc, recv_tag, send_tag
+
+        logical :: beg_end_geq_0, qbmm_comm
+
+        integer :: pack_offset, unpack_offset
+
+        real(wp), pointer :: p_send, p_recv
+
+#ifdef MFC_MPI
+
+        call nvtxStartRange("RHS-COMM-PACKBUF")
+
+        qbmm_comm = .false.
+
+        if (present(pb) .and. present(mv) .and. qbmm .and. .not. polytropic) then
+            qbmm_comm = .true.
+            v_size = nVar + 2*nb*4
+            buffer_counts = (/ &
+                            buff_size*v_size*(n + 1)*(p + 1), &
+                            buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
+                            buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
+                            /)
+        else
+            v_size = nVar
+            buffer_counts = (/ &
+                            buff_size*v_size*(n + 1)*(p + 1), &
+                            buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
+                            buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
+                            /)
+        end if
+
+        !$acc update device(v_size, nVar)
+
+        buffer_count = buffer_counts(mpi_dir)
+        boundary_conditions = (/bc_x, bc_y, bc_z/)
+        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
+        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
+
+        ! Implements:
+        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
+        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
+        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
+        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
+        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
+
+        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
+        recv_tag = f_logical_to_int(pbc_loc == 1)
+
+        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
+        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
+
+        grid_dims = (/m, n, p/)
+
+        pack_offset = 0
+        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
+            pack_offset = grid_dims(mpi_dir) - buff_size + 1
+        end if
+
+        unpack_offset = 0
+        if (pbc_loc == 1) then
+            unpack_offset = grid_dims(mpi_dir) + buff_size + 1
+        end if
+
+        ! Pack Buffer to Send
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = 0, buff_size - 1
+                                do i = 1, nVar
+                                    r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
+                                    buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l)
+                                end do
+                            end do
+                        end do
+                    end do
+
+                    if (qbmm_comm) then
+                        !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                        do l = 0, p
+                            do k = 0, n
+                                do j = 0, buff_size - 1
+                                    do i = nVar + 1, nVar + 4
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + v_size* &
+                                                (j + buff_size*(k + (n + 1)*l))
+                                            buff_send(r) = pb(j + pack_offset, k, l, i - nVar, q)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+
+                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
+                        do l = 0, p
+                            do k = 0, n
+                                do j = 0, buff_size - 1
+                                    do i = nVar + 1, nVar + 4
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
+                                                (j + buff_size*(k + (n + 1)*l))
+                                            buff_send(r) = mv(j + pack_offset, k, l, i - nVar, q)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+                    end if
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVar
+                        do l = 0, p
+                            do k = 0, buff_size - 1
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + v_size* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         (k + buff_size*l))
+                                    buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l)
+                                end do
+                            end do
+                        end do
+                    end do
+
+                    if (qbmm_comm) then
+                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
+                        do i = nVar + 1, nVar + 4
+                            do l = 0, p
+                                do k = 0, buff_size - 1
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 (k + buff_size*l))
+                                            buff_send(r) = pb(j, k + pack_offset, l, i - nVar, q)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+
+                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
+                        do i = nVar + 1, nVar + 4
+                            do l = 0, p
+                                do k = 0, buff_size - 1
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 (k + buff_size*l))
+                                            buff_send(r) = mv(j, k + pack_offset, l, i - nVar, q)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+                    end if
+                #:else
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVar
+                        do l = 0, buff_size - 1
+                            do k = -buff_size, n + buff_size
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + v_size* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         ((k + buff_size) + (n + 2*buff_size + 1)*l))
+                                    buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset)
+                                end do
+                            end do
+                        end do
+                    end do
+
+                    if (qbmm_comm) then
+                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
+                        do i = nVar + 1, nVar + 4
+                            do l = 0, buff_size - 1
+                                do k = -buff_size, n + buff_size
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 ((k + buff_size) + (n + 2*buff_size + 1)*l))
+                                            buff_send(r) = pb(j, k, l + pack_offset, i - nVar, q)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+
+                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
+                        do i = nVar + 1, nVar + 4
+                            do l = 0, buff_size - 1
+                                do k = -buff_size, n + buff_size
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 ((k + buff_size) + (n + 2*buff_size + 1)*l))
+                                            buff_send(r) = mv(j, k, l + pack_offset, i - nVar, q)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+                    end if
+                #:endif
+            end if
+        #:endfor
+        call nvtxEndRange ! Packbuf
+
+        p_send => buff_send(0)
+        p_recv => buff_recv(0)
+
+        ! Send/Recv
+#ifdef MFC_SIMULATION
+        #:for rdma_mpi in [False, True]
+            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
+                #:if rdma_mpi
+                    !$acc data attach(p_send, p_recv)
+                    !$acc host_data use_device(p_send, p_recv)
+                    call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
+                #:else
+                    call nvtxStartRange("RHS-COMM-DEV2HOST")
+                    !$acc update host(buff_send)
+                    call nvtxEndRange
+                    call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA")
+                #:endif
+
+                call MPI_SENDRECV( &
+                    p_send, buffer_count, mpi_p, dst_proc, send_tag, &
+                    p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
+                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
+
+                #:if rdma_mpi
+                    !$acc end host_data
+                    !$acc end data
+                    !$acc wait
+                #:else
+                    call nvtxStartRange("RHS-COMM-HOST2DEV")
+                    !$acc update device(buff_recv)
+                    call nvtxEndRange
+                #:endif
+            end if
+        #:endfor
+#else
+        call MPI_SENDRECV( &
+            p_send, buffer_count, mpi_p, dst_proc, send_tag, &
+            p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
+            MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+#endif
+
+        ! Unpack Received Buffer
+        call nvtxStartRange("RHS-COMM-UNPACKBUF")
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = -buff_size, -1
+                                do i = 1, nVar
+                                    r = (i - 1) + v_size* &
+                                        (j + buff_size*((k + 1) + (n + 1)*l))
+                                    q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
+#if defined(__INTEL_COMPILER)
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
+                                        print *, "Error", j, k, l, i
+                                        error stop "NaN(s) in recv"
+                                    end if
+#endif
+                                end do
+                            end do
+                        end do
+                    end do
+
+                    if (qbmm_comm) then
+                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
+                        do l = 0, p
+                            do k = 0, n
+                                do j = -buff_size, -1
+                                    do i = nVar + 1, nVar + 4
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + v_size* &
+                                                (j + buff_size*((k + 1) + (n + 1)*l))
+                                            pb(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+
+                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
+                        do l = 0, p
+                            do k = 0, n
+                                do j = -buff_size, -1
+                                    do i = nVar + 1, nVar + 4
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
+                                                (j + buff_size*((k + 1) + (n + 1)*l))
+                                            mv(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+                    end if
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVar
+                        do l = 0, p
+                            do k = -buff_size, -1
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + v_size* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         ((k + buff_size) + buff_size*l))
+                                    q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
+#if defined(__INTEL_COMPILER)
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
+                                        print *, "Error", j, k, l, i
+                                        error stop "NaN(s) in recv"
+                                    end if
+#endif
+                                end do
+                            end do
+                        end do
+                    end do
+
+                    if (qbmm_comm) then
+                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
+                        do i = nVar + 1, nVar + 4
+                            do l = 0, p
+                                do k = -buff_size, -1
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 ((k + buff_size) + buff_size*l))
+                                            pb(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+
+                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
+                        do i = nVar + 1, nVar + 4
+                            do l = 0, p
+                                do k = -buff_size, -1
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 ((k + buff_size) + buff_size*l))
+                                            mv(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+                    end if
+                #:else
+                    ! Unpacking buffer from bc_z%beg
+                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
+                    do i = 1, nVar
+                        do l = -buff_size, -1
+                            do k = -buff_size, n + buff_size
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + v_size* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         ((k + buff_size) + (n + 2*buff_size + 1)* &
+                                          (l + buff_size)))
+                                    q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
+#if defined(__INTEL_COMPILER)
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
+                                        print *, "Error", j, k, l, i
+                                        error stop "NaN(s) in recv"
+                                    end if
+#endif
+                                end do
+                            end do
+                        end do
+                    end do
+
+                    if (qbmm_comm) then
+                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
+                        do i = nVar + 1, nVar + 4
+                            do l = -buff_size, -1
+                                do k = -buff_size, n + buff_size
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 ((k + buff_size) + (n + 2*buff_size + 1)* &
+                                                  (l + buff_size)))
+                                            pb(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+
+                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
+                        do i = nVar + 1, nVar + 4
+                            do l = -buff_size, -1
+                                do k = -buff_size, n + buff_size
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 ((k + buff_size) + (n + 2*buff_size + 1)* &
+                                                  (l + buff_size)))
+                                            mv(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+                    end if
+                #:endif
+            end if
+        #:endfor
+        call nvtxEndRange
+#endif
+
+    end subroutine s_mpi_sendrecv_variables_buffers
+
+    !>  The purpose of this procedure is to optimally decompose
+        !!      the computational domain among the available processors.
+        !!      This is performed by attempting to award each processor,
+        !!      in each of the coordinate directions, approximately the
+        !!      same number of cells, and then recomputing the affected
+        !!      global parameters.
+    subroutine s_mpi_decompose_computational_domain
+
+#ifdef MFC_MPI
+
+        integer :: num_procs_x, num_procs_y, num_procs_z !<
+            !! Optimal number of processors in the x-, y- and z-directions
+
+        real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !<
+            !! Non-optimal number of processors in the x-, y- and z-directions
+
+        real(wp) :: fct_min !<
+            !! Processor factorization (fct) minimization parameter
+
+        integer :: MPI_COMM_CART !<
+            !! Cartesian processor topology communicator
+
+        integer :: rem_cells !<
+            !! Remaining number of cells, in a particular coordinate direction,
+            !! after the majority is divided up among the available processors
+
+        integer :: i, j !< Generic loop iterators
+
+        if (num_procs == 1 .and. parallel_io) then
+            do i = 1, num_dims
+                start_idx(i) = 0
+            end do
+            return
+        end if
+
+        ! 3D Cartesian Processor Topology
+        if (n > 0) then
+
+            if (p > 0) then
+
+                if (cyl_coord .and. p > 0) then
+                    ! Implement pencil processor blocking if using cylindrical coordinates so
+                    ! that all cells in azimuthal direction are stored on a single processor.
+                    ! This is necessary for efficient application of Fourier filter near axis.
+
+                    ! Initial values of the processor factorization optimization
+                    num_procs_x = 1
+                    num_procs_y = num_procs
+                    num_procs_z = 1
+                    ierr = -1
+
+                    ! Computing minimization variable for these initial values
+                    tmp_num_procs_x = num_procs_x
+                    tmp_num_procs_y = num_procs_y
+                    tmp_num_procs_z = num_procs_z
+                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
+                                         - (n + 1)/tmp_num_procs_y)
+
+                    ! Searching for optimal computational domain distribution
+                    do i = 1, num_procs
+
+                        if (mod(num_procs, i) == 0 &
+                            .and. &
+                            (m + 1)/i >= num_stcls_min*weno_order) then
+
+                            tmp_num_procs_x = i
+                            tmp_num_procs_y = num_procs/i
+
+                            if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+                                               - (n + 1)/tmp_num_procs_y) &
+                                .and. &
+                                (n + 1)/tmp_num_procs_y &
+                                >= &
+                                num_stcls_min*weno_order) then
+
+                                num_procs_x = i
+                                num_procs_y = num_procs/i
+                                fct_min = abs((m + 1)/tmp_num_procs_x &
+                                              - (n + 1)/tmp_num_procs_y)
+                                ierr = 0
+
+                            end if
+
+                        end if
+
+                    end do
+
+                else
+
+                    ! Initial estimate of optimal processor topology
+                    num_procs_x = 1
+                    num_procs_y = 1
+                    num_procs_z = num_procs
+                    ierr = -1
+
+                    ! Benchmarking the quality of this initial guess
+                    tmp_num_procs_x = num_procs_x
+                    tmp_num_procs_y = num_procs_y
+                    tmp_num_procs_z = num_procs_z
+                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
+                                         - (n + 1)/tmp_num_procs_y) &
+                              + 10._wp*abs((n + 1)/tmp_num_procs_y &
+                                           - (p + 1)/tmp_num_procs_z)
+
+                    ! Optimization of the initial processor topology
+                    do i = 1, num_procs
+
+                        if (mod(num_procs, i) == 0 &
+                            .and. &
+                            (m + 1)/i >= num_stcls_min*weno_order) then
+
+                            do j = 1, num_procs/i
+
+                                if (mod(num_procs/i, j) == 0 &
+                                    .and. &
+                                    (n + 1)/j >= num_stcls_min*weno_order) then
+
+                                    tmp_num_procs_x = i
+                                    tmp_num_procs_y = j
+                                    tmp_num_procs_z = num_procs/(i*j)
+
+                                    if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+                                                       - (n + 1)/tmp_num_procs_y) &
+                                        + abs((n + 1)/tmp_num_procs_y &
+                                              - (p + 1)/tmp_num_procs_z) &
+                                        .and. &
+                                        (p + 1)/tmp_num_procs_z &
+                                        >= &
+                                        num_stcls_min*weno_order) &
+                                        then
+
+                                        num_procs_x = i
+                                        num_procs_y = j
+                                        num_procs_z = num_procs/(i*j)
+                                        fct_min = abs((m + 1)/tmp_num_procs_x &
+                                                      - (n + 1)/tmp_num_procs_y) &
+                                                  + abs((n + 1)/tmp_num_procs_y &
+                                                        - (p + 1)/tmp_num_procs_z)
+                                        ierr = 0
+
+                                    end if
+
+                                end if
+
+                            end do
+
+                        end if
+
+                    end do
+
+                end if
+
+                ! Verifying that a valid decomposition of the computational
+                ! domain has been established. If not, the simulation exits.
+                if (proc_rank == 0 .and. ierr == -1) then
+                    call s_mpi_abort('Unsupported combination of values '// &
+                                     'of num_procs, m, n, p and '// &
+                                     'weno_order. Exiting.')
+                end if
+
+                ! Creating new communicator using the Cartesian topology
+                call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
+                                                          num_procs_y, num_procs_z/), &
+                                     (/.true., .true., .true./), &
+                                     .false., MPI_COMM_CART, ierr)
+
+                ! Finding the Cartesian coordinates of the local process
+                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
+                                     proc_coords, ierr)
+                ! END: 3D Cartesian Processor Topology
+
+                ! Global Parameters for z-direction
+
+                ! Number of remaining cells
+                rem_cells = mod(p + 1, num_procs_z)
+
+                ! Optimal number of cells per processor
+                p = (p + 1)/num_procs_z - 1
+
+                ! Distributing the remaining cells
+                do i = 1, rem_cells
+                    if (proc_coords(3) == i - 1) then
+                        p = p + 1; exit
+                    end if
+                end do
+
+                ! Boundary condition at the beginning
+                if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then
+                    proc_coords(3) = proc_coords(3) - 1
+                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                       bc_z%beg, ierr)
+                    proc_coords(3) = proc_coords(3) + 1
+                end if
+
+                ! Boundary condition at the end
+                if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then
+                    proc_coords(3) = proc_coords(3) + 1
+                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                       bc_z%end, ierr)
+                    proc_coords(3) = proc_coords(3) - 1
+                end if
+
+                if (parallel_io) then
+                    if (proc_coords(3) < rem_cells) then
+                        start_idx(3) = (p + 1)*proc_coords(3)
+                    else
+                        start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
+                    end if
+                end if
+
+                ! 2D Cartesian Processor Topology
+            else
+
+                ! Initial estimate of optimal processor topology
+                num_procs_x = 1
+                num_procs_y = num_procs
+                ierr = -1
+
+                ! Benchmarking the quality of this initial guess
+                tmp_num_procs_x = num_procs_x
+                tmp_num_procs_y = num_procs_y
+                fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
+                                     - (n + 1)/tmp_num_procs_y)
+
+                ! Optimization of the initial processor topology
+                do i = 1, num_procs
+
+                    if (mod(num_procs, i) == 0 &
+                        .and. &
+                        (m + 1)/i >= num_stcls_min*weno_order) then
+
+                        tmp_num_procs_x = i
+                        tmp_num_procs_y = num_procs/i
+
+                        if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+                                           - (n + 1)/tmp_num_procs_y) &
+                            .and. &
+                            (n + 1)/tmp_num_procs_y &
+                            >= &
+                            num_stcls_min*weno_order) then
+
+                            num_procs_x = i
+                            num_procs_y = num_procs/i
+                            fct_min = abs((m + 1)/tmp_num_procs_x &
+                                          - (n + 1)/tmp_num_procs_y)
+                            ierr = 0
+
+                        end if
+
+                    end if
+
+                end do
+
+                ! Verifying that a valid decomposition of the computational
+                ! domain has been established. If not, the simulation exits.
+                if (proc_rank == 0 .and. ierr == -1) then
+                    call s_mpi_abort('Unsupported combination of values '// &
+                                     'of num_procs, m, n and '// &
+                                     'weno_order. Exiting.')
+                end if
+
+                ! Creating new communicator using the Cartesian topology
+                call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
+                                                          num_procs_y/), (/.true., &
+                                                                           .true./), .false., MPI_COMM_CART, &
+                                     ierr)
+
+                ! Finding the Cartesian coordinates of the local process
+                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
+                                     proc_coords, ierr)
+
+            end if
+            ! END: 2D Cartesian Processor Topology
+
+            ! Global Parameters for y-direction
+
+            ! Number of remaining cells
+            rem_cells = mod(n + 1, num_procs_y)
+
+            ! Optimal number of cells per processor
+            n = (n + 1)/num_procs_y - 1
+
+            ! Distributing the remaining cells
+            do i = 1, rem_cells
+                if (proc_coords(2) == i - 1) then
+                    n = n + 1; exit
+                end if
+            end do
+
+            ! Boundary condition at the beginning
+            if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then
+                proc_coords(2) = proc_coords(2) - 1
+                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                   bc_y%beg, ierr)
+                proc_coords(2) = proc_coords(2) + 1
+            end if
+
+            ! Boundary condition at the end
+            if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then
+                proc_coords(2) = proc_coords(2) + 1
+                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
+                                   bc_y%end, ierr)
+                proc_coords(2) = proc_coords(2) - 1
+            end if
+
+            if (parallel_io) then
+                if (proc_coords(2) < rem_cells) then
+                    start_idx(2) = (n + 1)*proc_coords(2)
+                else
+                    start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
+                end if
+            end if
+
+            ! 1D Cartesian Processor Topology
+        else
+
+            ! Optimal processor topology
+            num_procs_x = num_procs
+
+            ! Creating new communicator using the Cartesian topology
+            call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
+                                 (/.true./), .false., MPI_COMM_CART, &
+                                 ierr)
+
+            ! Finding the Cartesian coordinates of the local process
+            call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
+                                 proc_coords, ierr)
+
+        end if
+
+        ! Global Parameters for x-direction
+
+        ! Number of remaining cells
+        rem_cells = mod(m + 1, num_procs_x)
+
+        ! Optimal number of cells per processor
+        m = (m + 1)/num_procs_x - 1
+
+        ! Distributing the remaining cells
+        do i = 1, rem_cells
+            if (proc_coords(1) == i - 1) then
+                m = m + 1; exit
+            end if
+        end do
+
+        ! Boundary condition at the beginning
+        if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then
+            proc_coords(1) = proc_coords(1) - 1
+            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
+            proc_coords(1) = proc_coords(1) + 1
+        end if
+
+        ! Boundary condition at the end
+        if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then
+            proc_coords(1) = proc_coords(1) + 1
+            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
+            proc_coords(1) = proc_coords(1) - 1
+        end if
+
+        if (parallel_io) then
+            if (proc_coords(1) < rem_cells) then
+                start_idx(1) = (m + 1)*proc_coords(1)
+            else
+                start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
+            end if
+        end if
+
+#endif
+
+    end subroutine s_mpi_decompose_computational_domain
+
+    !>  The goal of this procedure is to populate the buffers of
+        !!      the grid variables by communicating with the neighboring
+        !!      processors. Note that only the buffers of the cell-width
+        !!      distributions are handled in such a way. This is because
+        !!      the buffers of cell-boundary locations may be calculated
+        !!      directly from those of the cell-width distributions.
+        !!  @param mpi_dir MPI communication coordinate direction
+        !!  @param pbc_loc Processor boundary condition (PBC) location
+#ifndef MFC_PRE_PROCESS
+    subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
+
+        integer, intent(in) :: mpi_dir
+        integer, intent(in) :: pbc_loc
+
+#ifdef MFC_MPI
+
+        ! MPI Communication in x-direction
+        if (mpi_dir == 1) then
+
+            if (pbc_loc == -1) then      ! PBC at the beginning
+
+                if (bc_x%end >= 0) then      ! PBC at the beginning and end
+
+                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
+                    call MPI_SENDRECV( &
+                        dx(m - buff_size + 1), buff_size, &
+                        mpi_p, bc_x%end, 0, &
+                        dx(-buff_size), buff_size, &
+                        mpi_p, bc_x%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the beginning only
+
+                    ! Send/receive buffer to/from bc_x%beg/bc_x%beg
+                    call MPI_SENDRECV( &
+                        dx(0), buff_size, &
+                        mpi_p, bc_x%beg, 1, &
+                        dx(-buff_size), buff_size, &
+                        mpi_p, bc_x%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            else                        ! PBC at the end
+
+                if (bc_x%beg >= 0) then      ! PBC at the end and beginning
+
+                    ! Send/receive buffer to/from bc_x%beg/bc_x%end
+                    call MPI_SENDRECV( &
+                        dx(0), buff_size, &
+                        mpi_p, bc_x%beg, 1, &
+                        dx(m + 1), buff_size, &
+                        mpi_p, bc_x%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the end only
+
+                    ! Send/receive buffer to/from bc_x%end/bc_x%end
+                    call MPI_SENDRECV( &
+                        dx(m - buff_size + 1), buff_size, &
+                        mpi_p, bc_x%end, 0, &
+                        dx(m + 1), buff_size, &
+                        mpi_p, bc_x%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            end if
+            ! END: MPI Communication in x-direction
+
+            ! MPI Communication in y-direction
+        elseif (mpi_dir == 2) then
+
+            if (pbc_loc == -1) then      ! PBC at the beginning
+
+                if (bc_y%end >= 0) then      ! PBC at the beginning and end
+
+                    ! Send/receive buffer to/from bc_y%end/bc_y%beg
+                    call MPI_SENDRECV( &
+                        dy(n - buff_size + 1), buff_size, &
+                        mpi_p, bc_y%end, 0, &
+                        dy(-buff_size), buff_size, &
+                        mpi_p, bc_y%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the beginning only
+
+                    ! Send/receive buffer to/from bc_y%beg/bc_y%beg
+                    call MPI_SENDRECV( &
+                        dy(0), buff_size, &
+                        mpi_p, bc_y%beg, 1, &
+                        dy(-buff_size), buff_size, &
+                        mpi_p, bc_y%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            else                        ! PBC at the end
+
+                if (bc_y%beg >= 0) then      ! PBC at the end and beginning
+
+                    ! Send/receive buffer to/from bc_y%beg/bc_y%end
+                    call MPI_SENDRECV( &
+                        dy(0), buff_size, &
+                        mpi_p, bc_y%beg, 1, &
+                        dy(n + 1), buff_size, &
+                        mpi_p, bc_y%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the end only
+
+                    ! Send/receive buffer to/from bc_y%end/bc_y%end
+                    call MPI_SENDRECV( &
+                        dy(n - buff_size + 1), buff_size, &
+                        mpi_p, bc_y%end, 0, &
+                        dy(n + 1), buff_size, &
+                        mpi_p, bc_y%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            end if
+            ! END: MPI Communication in y-direction
+
+            ! MPI Communication in z-direction
+        else
+
+            if (pbc_loc == -1) then      ! PBC at the beginning
+
+                if (bc_z%end >= 0) then      ! PBC at the beginning and end
+
+                    ! Send/receive buffer to/from bc_z%end/bc_z%beg
+                    call MPI_SENDRECV( &
+                        dz(p - buff_size + 1), buff_size, &
+                        mpi_p, bc_z%end, 0, &
+                        dz(-buff_size), buff_size, &
+                        mpi_p, bc_z%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the beginning only
+
+                    ! Send/receive buffer to/from bc_z%beg/bc_z%beg
+                    call MPI_SENDRECV( &
+                        dz(0), buff_size, &
+                        mpi_p, bc_z%beg, 1, &
+                        dz(-buff_size), buff_size, &
+                        mpi_p, bc_z%beg, 0, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            else                        ! PBC at the end
+
+                if (bc_z%beg >= 0) then      ! PBC at the end and beginning
+
+                    ! Send/receive buffer to/from bc_z%beg/bc_z%end
+                    call MPI_SENDRECV( &
+                        dz(0), buff_size, &
+                        mpi_p, bc_z%beg, 1, &
+                        dz(p + 1), buff_size, &
+                        mpi_p, bc_z%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                else                        ! PBC at the end only
+
+                    ! Send/receive buffer to/from bc_z%end/bc_z%end
+                    call MPI_SENDRECV( &
+                        dz(p - buff_size + 1), buff_size, &
+                        mpi_p, bc_z%end, 0, &
+                        dz(p + 1), buff_size, &
+                        mpi_p, bc_z%end, 1, &
+                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                end if
+
+            end if
+
+        end if
+        ! END: MPI Communication in z-direction
+#endif
+
+    end subroutine s_mpi_sendrecv_grid_variables_buffers
+#endif
+
+    !> Module deallocation and/or disassociation procedures
+    subroutine s_finalize_mpi_common_module
+
+#ifdef MFC_MPI
+        deallocate (buff_send, buff_recv)
+#endif
+
+    end subroutine s_finalize_mpi_common_module
+
+end module m_mpi_common
diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index 2fa160c5f5..bdfb163908 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -21,11 +21,10 @@ module m_boundary_common
 
     use m_compile_specific
 
-
     implicit none
 
     type(scalar_field), dimension(:, :), allocatable :: bc_buffers
-    !$acc declare create(bc_buffers)
+!$acc declare create(bc_buffers)
 
 #ifdef MFC_MPI
     integer, dimension(1:3, -1:1) :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE
@@ -1787,7 +1786,6 @@ contains
 
     end subroutine s_assign_default_bc_type
 
-#ifndef MFC_PRE_PROCESS
     !> The purpose of this subroutine is to populate the buffers
         !!          of the grid variables, which are constituted of the cell-
         !!          boundary locations and cell-width distributions, based on
@@ -1796,14 +1794,15 @@ contains
 
         integer :: i !< Generic loop iterator
 
-#ifndef MFC_POST_PROCESS
-        ! Required for compatibiliy between codes
+#ifdef MFC_SIMULATION
+        ! Required for compatibility between codes
         type(int_bounds_info) :: offset_x, offset_y, offset_z
         offset_x%beg = buff_size; offset_x%end = buff_size
         offset_y%beg = buff_size; offset_y%end = buff_size
         offset_z%beg = buff_size; offset_z%end = buff_size
 #endif
 
+#ifndef MFC_PRE_PROCESS
         ! Population of Buffers in x-direction
 
         ! Populating cell-width distribution buffer at bc_x%beg
@@ -1821,12 +1820,12 @@ contains
             do i = 1, buff_size
                 dx(-i) = dx(m - (i - 1))
             end do
-        endif
+        end if
 
         ! Computing the cell-boundary and center locations buffer at bc_x%beg
         do i = 1, offset_x%beg
             x_cb(-1 - i) = x_cb(-i) - dx(-i)
-        enddo
+        end do
 
         do i = 1, buff_size
             x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp
@@ -1847,7 +1846,7 @@ contains
             do i = 1, buff_size
                 dx(m + i) = dx(i - 1)
             end do
-        endif
+        end if
 
         ! Populating the cell-boundary and center locations buffer at bc_x%end
         do i = 1, offset_x%end
@@ -1856,7 +1855,7 @@ contains
 
         do i = 1, buff_size
             x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp
-        enddo
+        end do
         ! END: Population of Buffers in x-direction
 
         ! Population of Buffers in y-direction
@@ -1878,16 +1877,16 @@ contains
             do i = 1, buff_size
                 dy(-i) = dy(n - (i - 1))
             end do
-        endif
+        end if
 
         ! Computing the cell-boundary and center locations buffer at bc_y%beg
         do i = 1, offset_y%beg
             y_cb(-1 - i) = y_cb(-i) - dy(-i)
-        enddo
+        end do
 
         do i = 1, buff_size
             y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp
-        enddo
+        end do
 
         ! Populating the cell-width distribution buffer at bc_y%end
         if (bc_y%end >= 0) then
@@ -1904,12 +1903,12 @@ contains
             do i = 1, buff_size
                 dy(n + i) = dy(i - 1)
             end do
-        endif
+        end if
 
         ! Populating the cell-boundary and center locations buffer at bc_y%end
         do i = 1, offset_y%end
             y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i)
-        enddo
+        end do
 
         do i = 1, buff_size
             y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp
@@ -1935,16 +1934,16 @@ contains
             do i = 1, buff_size
                 dz(-i) = dz(p - (i - 1))
             end do
-        endif
+        end if
 
         ! Computing the cell-boundary and center locations buffer at bc_z%beg
         do i = 1, offset_z%beg
             z_cb(-1 - i) = z_cb(-i) - dz(-i)
-        enddo
+        end do
 
         do i = 1, buff_size
             z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp
-        enddo
+        end do
 
         ! Populating the cell-width distribution buffer at bc_z%end
         if (bc_z%end >= 0) then
@@ -1961,21 +1960,22 @@ contains
             do i = 1, buff_size
                 dz(p + i) = dz(i - 1)
             end do
-        endif
+        end if
 
         ! Populating the cell-boundary and center locations buffer at bc_z%end
         do i = 1, buff_size
             z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
-        enddo
+        end do
 
         do i = 1, buff_size
             z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp
-        enddo
+        end do
         ! END: Population of Buffers in z-direction
 
-    end subroutine s_populate_grid_variables_buffers
 #endif
 
+    end subroutine s_populate_grid_variables_buffers
+
     subroutine s_finalize_boundary_common_module()
 
         if (bc_io) then
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index bf2260ec5b..760f1ae5ac 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -40,8 +40,8 @@ module m_mpi_common
 
     !$acc declare create(buff_send, buff_recv)
 
-    integer :: halo_size, nVars
-    !$acc declare create(halo_size, nVars)
+    integer :: halo_size
+    !$acc declare create(halo_size)
 
 contains
 
@@ -51,8 +51,8 @@ contains
     subroutine s_initialize_mpi_common_module
 
 #ifdef MFC_MPI
-        ! Allocating buff_send/recv and. Please note that for the sake of 
-        ! simplicity, both variables are provided sufficient storage to hold 
+        ! Allocating buff_send/recv and. Please note that for the sake of
+        ! simplicity, both variables are provided sufficient storage to hold
         ! the largest buffer in the computational domain.
 
         if (qbmm .and. .not. polytropic) then
@@ -63,7 +63,7 @@ contains
 
         if (n > 0) then
             if (p > 0) then
-                halo_size = NINT(-1._wp + 1._wp * buff_size*(v_size)* &
+                halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)* &
                                          & (m + 2*buff_size + 1)* &
                                          & (n + 2*buff_size + 1)* &
                                          & (p + 2*buff_size + 1)/ &
@@ -598,8 +598,7 @@ contains
 
         type(scalar_field), dimension(1:), intent(inout) :: q_comm
         real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
-
-        integer, intent(in) :: mpi_dir, pbc_loc
+        integer, intent(in) :: mpi_dir, pbc_loc, nVar
 
         integer :: i, j, k, l, r, q !< Generic loop iterators
 
@@ -611,7 +610,7 @@ contains
 
         logical :: beg_end_geq_0, qbmm_comm
 
-        integer :: pack_offset, unpack_offset, nVar
+        integer :: pack_offset, unpack_offset
 
         real(wp), pointer :: p_send, p_recv
 
@@ -798,7 +797,7 @@ contains
                         end do
 
                         !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = nVar+ 1, nVar + 4
+                        do i = nVar + 1, nVar + 4
                             do l = 0, buff_size - 1
                                 do k = -buff_size, n + buff_size
                                     do j = -buff_size, m + buff_size
@@ -1591,7 +1590,6 @@ contains
     end subroutine s_mpi_sendrecv_grid_variables_buffers
 #endif
 
-
     !> Module deallocation and/or disassociation procedures
     subroutine s_finalize_mpi_common_module
 
diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90
index 1b8d91e260..73f6b2367f 100644
--- a/src/post_process/m_data_input.f90
+++ b/src/post_process/m_data_input.f90
@@ -593,16 +593,16 @@ subroutine s_initialize_data_input_module
         end if
 
         ! Allocating arrays to store the bc types
-        allocate(bc_type(1:num_dims,-1:1))
+        allocate (bc_type(1:num_dims, -1:1))
 
-        allocate(bc_type(1,-1)%sf(0:0,0:n,0:p))
-        allocate(bc_type(1,1)%sf(0:0,0:n,0:p))
+        allocate (bc_type(1, -1)%sf(0:0, 0:n, 0:p))
+        allocate (bc_type(1, 1)%sf(0:0, 0:n, 0:p))
         if (n > 0) then
-            allocate(bc_type(2,-1)%sf(-buff_size:m+buff_size,0:0,0:p))
-            allocate(bc_type(2,1)%sf(-buff_size:m+buff_size,0:0,0:p))
+            allocate (bc_type(2, -1)%sf(-buff_size:m + buff_size, 0:0, 0:p))
+            allocate (bc_type(2, 1)%sf(-buff_size:m + buff_size, 0:0, 0:p))
             if (p > 0) then
-                allocate(bc_type(3,-1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0))
-                allocate(bc_type(3,1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0))
+                allocate (bc_type(3, -1)%sf(-buff_size:m + buff_size, -buff_size:n + buff_size, 0:0))
+                allocate (bc_type(3, 1)%sf(-buff_size:m + buff_size, -buff_size:n + buff_size, 0:0))
             end if
         end if
 
@@ -636,15 +636,15 @@ subroutine s_finalize_data_input_module
             deallocate (q_T_sf%sf)
         end if
 
-        deallocate(bc_type(1,-1)%sf, bc_type(1,1)%sf)
+        deallocate (bc_type(1, -1)%sf, bc_type(1, 1)%sf)
         if (n > 0) then
-            deallocate(bc_type(2,-1)%sf, bc_type(2, 1)%sf)
+            deallocate (bc_type(2, -1)%sf, bc_type(2, 1)%sf)
             if (p > 0) then
-                deallocate(bc_type(3,-1)%sf, bc_type(3,1)%sf)
+                deallocate (bc_type(3, -1)%sf, bc_type(3, 1)%sf)
             end if
         end if
 
-        deallocate(bc_type)
+        deallocate (bc_type)
 
         s_read_data_files => null()
 
diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index e457c42722..218da370ae 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -123,7 +123,7 @@ subroutine s_read_input_file
             if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == -17) .or. &
                 num_bc_patches > 0) then
                 bc_io = .true.
-            endif
+            end if
 
         else
             call s_mpi_abort('File post_process.inp is missing. Exiting.')
@@ -698,10 +698,10 @@ subroutine s_initialize_modules
         if (bubbles_euler .and. .not. polytropic) then
             call s_initialize_nonpoly()
         end if
-        if (num_procs > 1) then 
+        if (num_procs > 1) then
             call s_initialize_mpi_proxy_module()
             call s_initialize_mpi_common_module()
-        endif
+        end if
         call s_initialize_boundary_common_module()
         call s_initialize_variables_conversion_module()
         call s_initialize_data_input_module()
@@ -758,7 +758,7 @@ subroutine s_finalize_modules
         if (num_procs > 1) then
             call s_finalize_mpi_proxy_module()
             call s_finalize_mpi_common_module()
-        endif
+        end if
         call s_finalize_global_parameters_module()
 
         ! Finalizing the MPI environment
diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index 08d1893c1e..746e67573e 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -85,7 +85,7 @@ contains
         !$acc update device(levelset_norm%sf)
 
         ! Get neighboring IB variables from other processors
-        call s_populate_ib_buffers(ib_markers)
+        call s_populate_ib_buffers()
 
         !$acc update host(ib_markers%sf)
 
@@ -108,9 +108,7 @@ contains
 
     end subroutine s_ibm_setup
 
-    subroutine s_populate_ib_buffers(ib_markers)
-
-        type(integer_field), intent(inout) :: ib_markers
+    subroutine s_populate_ib_buffers()
 
         #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)]
             #:for LOCC, LOCI in [('beg', -1), ('end', 1)]
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index de42e53f5c..c1aaa8f4b3 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -58,7 +58,7 @@ contains
         if (ib) then
             if (n > 0) then
                 if (p > 0) then
-                    i_halo_size = -1 + gp_layers * &
+                    i_halo_size = -1 + gp_layers* &
                                             & (m + 2*gp_layers + 1)* &
                                             & (n + 2*gp_layers + 1)* &
                                             & (p + 2*gp_layers + 1)/ &
@@ -303,7 +303,7 @@ contains
                     do l = 0, p
                         do k = 0, n
                             do j = 0, gp_layers - 1
-                                r = (j + gp_layers * (k + (n + 1) * l))
+                                r = (j + gp_layers*(k + (n + 1)*l))
                                 ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l)
                             end do
                         end do
@@ -312,8 +312,8 @@ contains
                     !$acc parallel loop collapse(3) gang vector default(present) private(r)
                     do l = 0, p
                         do k = 0, gp_layers - 1
-                            do j = -gp_layers, m + gp_layers 
-                                r = ((j + gp_layers) + (m + 2*gp_layers+ 1)* &
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
                                      (k + gp_layers*l))
                                 ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l)
                             end do
@@ -380,7 +380,7 @@ contains
                     do l = 0, p
                         do k = 0, n
                             do j = -gp_layers, -1
-                                r = (j + gp_layers * ((k + 1) + (n + 1) * l))
+                                r = (j + gp_layers*((k + 1) + (n + 1)*l))
                                 ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r)
                             end do
                         end do
@@ -404,7 +404,7 @@ contains
                             do j = -gp_layers, m + gp_layers
                                 r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
                                      ((k + gp_layers) + (n + 2*gp_layers + 1)* &
-                                     (l + gp_layers)))
+                                      (l + gp_layers)))
                                 ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r)
                             end do
                         end do

From 5bb1288b87b22208fb14be5593c0aa608885ed28 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Fri, 9 May 2025 12:17:52 -0400
Subject: [PATCH 17/62] garbage cleanup

---
 "\\" | 1603 ----------------------------------------------------------
 1 file changed, 1603 deletions(-)
 delete mode 100644 "\\"

diff --git "a/\\" "b/\\"
deleted file mode 100644
index 6262662a46..0000000000
--- "a/\\"
+++ /dev/null
@@ -1,1603 +0,0 @@
-
-#:include 'macros.fpp'
-
-!> @brief The module serves as a proxy to the parameters and subroutines
-!!          available in the MPI implementation's MPI module. Specifically,
-!!          the purpose of the proxy is to harness basic MPI commands into
-!!          more complicated procedures as to accomplish the communication
-!!          goals for the simulation.
-module m_mpi_common
-
-#ifdef MFC_MPI
-    use mpi                    !< Message passing interface (MPI) module
-#endif
-
-    use m_derived_types        !< Definitions of the derived types
-
-    use m_global_parameters    !< Definitions of the global parameters
-
-    use m_helper
-
-    use ieee_arithmetic
-
-    use m_nvtx
-
-    implicit none
-
-    integer, private :: ierr, v_size !<
-    !$acc declare create(v_size)
-    !! Generic flags used to identify and report MPI errors
-
-    real(wp), private, allocatable, dimension(:), target :: buff_send !<
-    !! This variable is utilized to pack and send the buffer of the cell-average
-    !! primitive variables, for a single computational domain boundary at the
-    !! time, to the relevant neighboring processor.
-
-    real(wp), private, allocatable, dimension(:), target :: buff_recv !<
-    !! buff_recv is utilized to receive and unpack the buffer of the cell-
-    !! average primitive variables, for a single computational domain boundary
-    !! at the time, from the relevant neighboring processor.
-
-    !$acc declare create(buff_send, buff_recv)
-
-    integer :: halo_size, nVars
-    !$acc declare create(halo_size, nVars)
-
-
-contains
-
-    !> The computation of parameters, the allocation of memory,
-        !!      the association of pointers and/or the execution of any
-        !!      other procedures that are necessary to setup the module.
-    subroutine s_initialize_mpi_common_module
-
-#ifdef MFC_MPI
-        ! Allocating buff_send/recv and. Please note that for the sake of
-        ! simplicity, both variables are provided sufficient storage to hold
-        ! the largest buffer in the computational domain.
-
-        if (qbmm .and. .not. polytropic) then
-            v_size = sys_size + 2*nb*4
-        else
-            v_size = sys_size
-        end if
-
-        if (n > 0) then
-            if (p > 0) then
-                halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)* &
-                                         & (m + 2*buff_size + 1)* &
-                                         & (n + 2*buff_size + 1)* &
-                                         & (p + 2*buff_size + 1)/ &
-                                         & (min(m, n, p) + 2*buff_size + 1))
-            else
-                halo_size = -1 + buff_size*(v_size)* &
-                                         & (max(m, n) + 2*buff_size + 1)
-            end if
-        else
-            halo_size = -1 + buff_size*(v_size)
-        end if
-
-        !$acc update device(halo_size, v_size)
-
-        @:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size))
-#endif
-
-    end subroutine s_initialize_mpi_common_module
-
-    !> The subroutine initializes the MPI execution environment
-        !!      and queries both the number of processors which will be
-        !!      available for the job and the local processor rank.
-    subroutine s_mpi_initialize
-
-#ifndef MFC_MPI
-
-        ! Serial run only has 1 processor
-        num_procs = 1
-        ! Local processor rank is 0
-        proc_rank = 0
-
-#else
-
-        ! Initializing the MPI environment
-        call MPI_INIT(ierr)
-
-        ! Checking whether the MPI environment has been properly initialized
-        if (ierr /= MPI_SUCCESS) then
-            print '(A)', 'Unable to initialize MPI environment. Exiting.'
-            call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
-        end if
-
-        ! Querying the number of processors available for the job
-        call MPI_COMM_SIZE(MPI_COMM_WORLD, num_procs, ierr)
-
-        ! Querying the rank of the local processor
-        call MPI_COMM_RANK(MPI_COMM_WORLD, proc_rank, ierr)
-
-#endif
-
-    end subroutine s_mpi_initialize
-
-    !! @param q_cons_vf Conservative variables
-    !! @param ib_markers track if a cell is within the immersed boundary
-    !! @param levelset closest distance from every cell to the IB
-    !! @param levelset_norm normalized vector from every cell to the closest point to the IB
-    !! @param beta Eulerian void fraction from lagrangian bubbles
-    subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta)
-
-        type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
-        type(integer_field), optional, intent(in) :: ib_markers
-        type(levelset_field), optional, intent(IN) :: levelset
-        type(levelset_norm_field), optional, intent(IN) :: levelset_norm
-        type(scalar_field), intent(in), optional :: beta
-
-        integer, dimension(num_dims) :: sizes_glb, sizes_loc
-        integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start
-
-#ifdef MFC_MPI
-
-        ! Generic loop iterator
-        integer :: i, j, q, k, l
-
-        !Altered system size for the lagrangian subgrid bubble model
-        integer :: alt_sys
-
-        if (present(beta)) then
-            alt_sys = sys_size + 1
-        else
-            alt_sys = sys_size
-        end if
-
-        do i = 1, sys_size
-            MPI_IO_DATA%var(i)%sf => q_cons_vf(i)%sf(0:m, 0:n, 0:p)
-        end do
-
-        if (present(beta)) then
-            MPI_IO_DATA%var(alt_sys)%sf => beta%sf(0:m, 0:n, 0:p)
-        end if
-
-        !Additional variables pb and mv for non-polytropic qbmm
-#ifdef MFC_PRE_PROCESS
-        if (qbmm .and. .not. polytropic) then
-            do i = 1, nb
-                do j = 1, nnode
-                    MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j)%sf => pb%sf(0:m, 0:n, 0:p, j, i)
-                    MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv%sf(0:m, 0:n, 0:p, j, i)
-                end do
-            end do
-        end if
-#endif
-
-#ifdef MFC_SIMULATION
-        if (qbmm .and. .not. polytropic) then
-            do i = 1, nb
-                do j = 1, nnode
-                    MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j)%sf => pb_ts(1)%sf(0:m, 0:n, 0:p, j, i)
-                    MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv_ts(1)%sf(0:m, 0:n, 0:p, j, i)
-                end do
-            end do
-        end if
-#endif
-        ! Define global(g) and local(l) sizes for flow variables
-        sizes_glb(1) = m_glb + 1; sizes_loc(1) = m + 1
-        if (n > 0) then
-            sizes_glb(2) = n_glb + 1; sizes_loc(2) = n + 1
-            if (p > 0) then
-                sizes_glb(3) = p_glb + 1; sizes_loc(3) = p + 1
-            end if
-        end if
-
-        ! Define the view for each variable
-        do i = 1, alt_sys
-            call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, &
-                                          MPI_ORDER_FORTRAN, mpi_p, MPI_IO_DATA%view(i), ierr)
-            call MPI_TYPE_COMMIT(MPI_IO_DATA%view(i), ierr)
-        end do
-
-#ifndef MFC_POST_PROCESS
-        if (qbmm .and. .not. polytropic) then
-            do i = sys_size + 1, sys_size + 2*nb*4
-                call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, &
-                                              MPI_ORDER_FORTRAN, mpi_p, MPI_IO_DATA%view(i), ierr)
-                call MPI_TYPE_COMMIT(MPI_IO_DATA%view(i), ierr)
-
-            end do
-        end if
-#endif
-
-        if (present(ib_markers)) then
-
-#ifdef MFC_PRE_PROCESS
-            MPI_IO_IB_DATA%var%sf => ib_markers%sf
-            MPI_IO_levelset_DATA%var%sf => levelset%sf
-            MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf
-#else
-            MPI_IO_IB_DATA%var%sf => ib_markers%sf(0:m, 0:n, 0:p)
-
-#ifndef MFC_POST_PROCESS
-            MPI_IO_levelset_DATA%var%sf => levelset%sf(0:m, 0:n, 0:p, 1:num_ibs)
-            MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3)
-#endif
-
-#endif
-            call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, &
-                                          MPI_ORDER_FORTRAN, MPI_INTEGER, MPI_IO_IB_DATA%view, ierr)
-            call MPI_TYPE_COMMIT(MPI_IO_IB_DATA%view, ierr)
-
-#ifndef MFC_POST_PROCESS
-            call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, &
-                                          MPI_ORDER_FORTRAN, mpi_p, MPI_IO_levelset_DATA%view, ierr)
-            call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, &
-                                          MPI_ORDER_FORTRAN, mpi_p, MPI_IO_levelsetnorm_DATA%view, ierr)
-
-            call MPI_TYPE_COMMIT(MPI_IO_levelset_DATA%view, ierr)
-            call MPI_TYPE_COMMIT(MPI_IO_levelsetnorm_DATA%view, ierr)
-#endif
-        end if
-
-#ifndef MFC_POST_PROCESS
-        if (present(ib_markers)) then
-            do j = 1, num_ibs
-                if (patch_ib(j)%c > 0) then
-
-#ifdef MFC_PRE_PROCESS
-                    allocate (MPI_IO_airfoil_IB_DATA%var(1:2*Np))
-#endif
-
-                    airfoil_glb(1) = 3*Np*num_procs
-                    airfoil_loc(1) = 3*Np
-                    airfoil_start(1) = 3*proc_rank*Np
-
-#ifdef MFC_PRE_PROCESS
-                    do i = 1, Np
-                        MPI_IO_airfoil_IB_DATA%var(i)%x = airfoil_grid_l(i)%x
-                        MPI_IO_airfoil_IB_DATA%var(i)%y = airfoil_grid_l(i)%y
-                    end do
-#endif
-
-                    call MPI_TYPE_CREATE_SUBARRAY(1, airfoil_glb, airfoil_loc, airfoil_start, &
-                                                  MPI_ORDER_FORTRAN, mpi_p, MPI_IO_airfoil_IB_DATA%view(1), ierr)
-                    call MPI_TYPE_COMMIT(MPI_IO_airfoil_IB_DATA%view(1), ierr)
-
-#ifdef MFC_PRE_PROCESS
-                    do i = 1, Np
-                        MPI_IO_airfoil_IB_DATA%var(Np + i)%x = airfoil_grid_u(i)%x
-                        MPI_IO_airfoil_IB_DATA%var(Np + i)%y = airfoil_grid_u(i)%y
-                    end do
-#endif
-                    call MPI_TYPE_CREATE_SUBARRAY(1, airfoil_glb, airfoil_loc, airfoil_start, &
-                                                  MPI_ORDER_FORTRAN, mpi_p, MPI_IO_airfoil_IB_DATA%view(2), ierr)
-                    call MPI_TYPE_COMMIT(MPI_IO_airfoil_IB_DATA%view(2), ierr)
-
-                end if
-            end do
-
-        end if
-#endif
-
-#endif
-
-    end subroutine s_initialize_mpi_data
-
-    subroutine s_mpi_gather_data(my_vector, counts, gathered_vector, root)
-
-        integer, intent(in) :: counts          ! Array of vector lengths for each process
-        real(wp), intent(in), dimension(counts) :: my_vector   ! Input vector on each process
-        integer, intent(in) :: root               ! Rank of the root process
-        real(wp), allocatable, intent(out) :: gathered_vector(:) ! Gathered vector on the root process
-
-        integer :: i, offset, ierr
-        integer, allocatable :: recounts(:), displs(:)
-
-#ifdef MFC_MPI
-
-        allocate (recounts(num_procs))
-
-        call MPI_GATHER(counts, 1, MPI_INTEGER, recounts, 1, MPI_INTEGER, root, &
-                        MPI_COMM_WORLD, ierr)
-
-        allocate (displs(size(recounts)))
-
-        displs(1) = 0
-
-        do i = 2, size(recounts)
-            displs(i) = displs(i - 1) + recounts(i - 1)
-        end do
-
-        allocate (gathered_vector(sum(recounts)))
-        call MPI_GATHERV(my_vector, counts, mpi_p, gathered_vector, recounts, displs, mpi_p, &
-                         root, MPI_COMM_WORLD, ierr)
-#endif
-    end subroutine s_mpi_gather_data
-
-    subroutine mpi_bcast_time_step_values(proc_time, time_avg)
-
-        real(wp), dimension(0:num_procs - 1), intent(inout) :: proc_time
-        real(wp), intent(inout) :: time_avg
-
-#ifdef MFC_MPI
-
-        call MPI_GATHER(time_avg, 1, mpi_p, proc_time(0), 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-
-#endif
-
-    end subroutine mpi_bcast_time_step_values
-
-    subroutine s_prohibit_abort(condition, message)
-        character(len=*), intent(in) :: condition, message
-
-        print *, ""
-        print *, "CASE FILE ERROR"
-        print *, "  - Prohibited condition: ", trim(condition)
-        if (len_trim(message) > 0) then
-            print *, "  - Note: ", trim(message)
-        end if
-        print *, ""
-        call s_mpi_abort(code=CASE_FILE_ERROR_CODE)
-    end subroutine s_prohibit_abort
-
-    !>  The goal of this subroutine is to determine the global
-        !!      extrema of the stability criteria in the computational
-        !!      domain. This is performed by sifting through the local
-        !!      extrema of each stability criterion. Note that each of
-        !!      the local extrema is from a single process, within its
-        !!      assigned section of the computational domain. Finally,
-        !!      note that the global extrema values are only bookkeept
-        !!      on the rank 0 processor.
-        !!  @param icfl_max_loc Local maximum ICFL stability criterion
-        !!  @param vcfl_max_loc Local maximum VCFL stability criterion
-        !!  @param Rc_min_loc Local minimum Rc stability criterion
-        !!  @param icfl_max_glb Global maximum ICFL stability criterion
-        !!  @param vcfl_max_glb Global maximum VCFL stability criterion
-        !!  @param Rc_min_glb Global minimum Rc stability criterion
-    subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, &
-                                                       vcfl_max_loc, &
-                                                       ccfl_max_loc, &
-                                                       Rc_min_loc, &
-                                                       icfl_max_glb, &
-                                                       vcfl_max_glb, &
-                                                       ccfl_max_glb, &
-                                                       Rc_min_glb)
-
-        real(wp), intent(in) :: icfl_max_loc
-        real(wp), intent(in) :: vcfl_max_loc
-        real(wp), intent(in) :: ccfl_max_loc
-        real(wp), intent(in) :: Rc_min_loc
-
-        real(wp), intent(out) :: icfl_max_glb
-        real(wp), intent(out) :: vcfl_max_glb
-        real(wp), intent(out) :: ccfl_max_glb
-        real(wp), intent(out) :: Rc_min_glb
-
-#ifdef MFC_SIMULATION
-#ifdef MFC_MPI
-
-        ! Reducing local extrema of ICFL, VCFL, CCFL and Rc numbers to their
-        ! global extrema and bookkeeping the results on the rank 0 processor
-        call MPI_REDUCE(icfl_max_loc, icfl_max_glb, 1, &
-                        mpi_p, MPI_MAX, 0, &
-                        MPI_COMM_WORLD, ierr)
-
-        if (viscous) then
-            call MPI_REDUCE(vcfl_max_loc, vcfl_max_glb, 1, &
-                            mpi_p, MPI_MAX, 0, &
-                            MPI_COMM_WORLD, ierr)
-            call MPI_REDUCE(Rc_min_loc, Rc_min_glb, 1, &
-                            mpi_p, MPI_MIN, 0, &
-                            MPI_COMM_WORLD, ierr)
-        end if
-
-#else
-
-        icfl_max_glb = icfl_max_loc
-
-        if (viscous) then
-            vcfl_max_glb = vcfl_max_loc
-            Rc_min_glb = Rc_min_loc
-        end if
-
-#endif
-#endif
-
-    end subroutine s_mpi_reduce_stability_criteria_extrema
-
-    !>  The following subroutine takes the input local variable
-        !!      from all processors and reduces to the sum of all
-        !!      values. The reduced variable is recorded back onto the
-        !!      original local variable on each processor.
-        !!  @param var_loc Some variable containing the local value which should be
-        !!  reduced amongst all the processors in the communicator.
-        !!  @param var_glb The globally reduced value
-    subroutine s_mpi_allreduce_sum(var_loc, var_glb)
-
-        real(wp), intent(in) :: var_loc
-        real(wp), intent(out) :: var_glb
-
-#ifdef MFC_MPI
-
-        ! Performing the reduction procedure
-        call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, &
-                           MPI_SUM, MPI_COMM_WORLD, ierr)
-
-#endif
-
-    end subroutine s_mpi_allreduce_sum
-
-    !>  The following subroutine takes the input local variable
-        !!      from all processors and reduces to the minimum of all
-        !!      values. The reduced variable is recorded back onto the
-        !!      original local variable on each processor.
-        !!  @param var_loc Some variable containing the local value which should be
-        !!  reduced amongst all the processors in the communicator.
-        !!  @param var_glb The globally reduced value
-    subroutine s_mpi_allreduce_min(var_loc, var_glb)
-
-        real(wp), intent(in) :: var_loc
-        real(wp), intent(out) :: var_glb
-
-#ifdef MFC_MPI
-
-        ! Performing the reduction procedure
-        call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, &
-                           MPI_MIN, MPI_COMM_WORLD, ierr)
-
-#endif
-
-    end subroutine s_mpi_allreduce_min
-
-    !>  The following subroutine takes the input local variable
-        !!      from all processors and reduces to the maximum of all
-        !!      values. The reduced variable is recorded back onto the
-        !!      original local variable on each processor.
-        !!  @param var_loc Some variable containing the local value which should be
-        !!  reduced amongst all the processors in the communicator.
-        !!  @param var_glb The globally reduced value
-    subroutine s_mpi_allreduce_max(var_loc, var_glb)
-
-        real(wp), intent(in) :: var_loc
-        real(wp), intent(out) :: var_glb
-
-#ifdef MFC_MPI
-
-        ! Performing the reduction procedure
-        call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, &
-                           MPI_MAX, MPI_COMM_WORLD, ierr)
-
-#endif
-
-    end subroutine s_mpi_allreduce_max
-
-    !>  The following subroutine takes the inputted variable and
-        !!      determines its minimum value on the entire computational
-        !!      domain. The result is stored back into inputted variable.
-        !!  @param var_loc holds the local value to be reduced among
-        !!      all the processors in communicator. On output, the variable holds
-        !!      the minimum value, reduced amongst all of the local values.
-    subroutine s_mpi_reduce_min(var_loc)
-
-        real(wp), intent(inout) :: var_loc
-
-#ifdef MFC_MPI
-
-        ! Temporary storage variable that holds the reduced minimum value
-        real(wp) :: var_glb
-
-        ! Performing reduction procedure and eventually storing its result
-        ! into the variable that was initially inputted into the subroutine
-        call MPI_REDUCE(var_loc, var_glb, 1, mpi_p, &
-                        MPI_MIN, 0, MPI_COMM_WORLD, ierr)
-
-        call MPI_BCAST(var_glb, 1, mpi_p, &
-                       0, MPI_COMM_WORLD, ierr)
-
-        var_loc = var_glb
-
-#endif
-
-    end subroutine s_mpi_reduce_min
-
-    !>  The following subroutine takes the first element of the
-        !!      2-element inputted variable and determines its maximum
-        !!      value on the entire computational domain. The result is
-        !!      stored back into the first element of the variable while
-        !!      the rank of the processor that is in charge of the sub-
-        !!      domain containing the maximum is stored into the second
-        !!      element of the variable.
-        !!  @param var_loc On input, this variable holds the local value and processor rank,
-        !!  which are to be reduced among all the processors in communicator.
-        !!  On output, this variable holds the maximum value, reduced amongst
-        !!  all of the local values, and the process rank to which the value
-        !!  belongs.
-    subroutine s_mpi_reduce_maxloc(var_loc)
-
-        real(wp), dimension(2), intent(inout) :: var_loc
-
-#ifdef MFC_MPI
-
-        real(wp), dimension(2) :: var_glb  !<
-            !! Temporary storage variable that holds the reduced maximum value
-            !! and the rank of the processor with which the value is associated
-
-        ! Performing reduction procedure and eventually storing its result
-        ! into the variable that was initially inputted into the subroutine
-        call MPI_REDUCE(var_loc, var_glb, 1, mpi_2p, &
-                        MPI_MAXLOC, 0, MPI_COMM_WORLD, ierr)
-
-        call MPI_BCAST(var_glb, 1, mpi_2p, &
-                       0, MPI_COMM_WORLD, ierr)
-
-        var_loc = var_glb
-
-#endif
-
-    end subroutine s_mpi_reduce_maxloc
-
-    !> The subroutine terminates the MPI execution environment.
-        !! @param prnt error message to be printed
-    subroutine s_mpi_abort(prnt, code)
-
-        character(len=*), intent(in), optional :: prnt
-        integer, intent(in), optional :: code
-
-        if (present(prnt)) then
-            print *, prnt
-            call flush (6)
-
-        end if
-
-#ifndef MFC_MPI
-        if (present(code)) then
-            stop code
-        else
-            stop 1
-        end if
-#else
-        ! Terminating the MPI environment
-        if (present(code)) then
-            call MPI_ABORT(MPI_COMM_WORLD, code, ierr)
-        else
-            call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
-        end if
-#endif
-
-    end subroutine s_mpi_abort
-
-    !>Halts all processes until all have reached barrier.
-    subroutine s_mpi_barrier
-
-#ifdef MFC_MPI
-
-        ! Calling MPI_BARRIER
-        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
-
-#endif
-
-    end subroutine s_mpi_barrier
-
-    !> The subroutine finalizes the MPI execution environment.
-    subroutine s_mpi_finalize
-
-#ifdef MFC_MPI
-
-        ! Finalizing the MPI environment
-        call MPI_FINALIZE(ierr)
-
-#endif
-
-    end subroutine s_mpi_finalize
-
-    !>  The goal of this procedure is to populate the buffers of
-        !!      the cell-average conservative variables by communicating
-        !!      with the neighboring processors.
-        !!  @param q_cons_vf Cell-average conservative variables
-        !!  @param mpi_dir MPI communication coordinate direction
-        !!  @param pbc_loc Processor boundary condition (PBC) location
-    subroutine s_mpi_sendrecv_variables_buffers(q_comm, &
-                                                mpi_dir, &
-                                                pbc_loc, &
-                                                nVar, &
-                                                pb, mv)
-
-        type(scalar_field), dimension(1:), intent(inout) :: q_comm
-        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
-        integer, intent(in) :: mpi_dir, pbc_loc, nVar
-
-        integer :: i, j, k, l, r, q !< Generic loop iterators
-
-        integer :: buffer_counts(1:3), buffer_count
-
-        type(int_bounds_info) :: boundary_conditions(1:3)
-        integer :: beg_end(1:2), grid_dims(1:3)
-        integer :: dst_proc, src_proc, recv_tag, send_tag
-
-        logical :: beg_end_geq_0, qbmm_comm
-
-        integer :: pack_offset, unpack_offset
-
-        real(wp), pointer :: p_send, p_recv
-
-#ifdef MFC_MPI
-
-        call nvtxStartRange("RHS-COMM-PACKBUF")
-
-        qbmm_comm = .false.
-
-        if (present(pb) .and. present(mv) .and. qbmm .and. .not. polytropic) then
-            qbmm_comm = .true.
-            v_size = nVar + 2*nb*4
-            buffer_counts = (/ &
-                            buff_size*v_size*(n + 1)*(p + 1), &
-                            buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
-                            buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
-                            /)
-        else
-            v_size = nVar
-            buffer_counts = (/ &
-                            buff_size*v_size*(n + 1)*(p + 1), &
-                            buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), &
-                            buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
-                            /)
-        end if
-
-        !$acc update device(v_size, nVar)
-
-        buffer_count = buffer_counts(mpi_dir)
-        boundary_conditions = (/bc_x, bc_y, bc_z/)
-        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
-        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
-
-        ! Implements:
-        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
-        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
-        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
-        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
-        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
-
-        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
-        recv_tag = f_logical_to_int(pbc_loc == 1)
-
-        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
-        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
-
-        grid_dims = (/m, n, p/)
-
-        pack_offset = 0
-        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
-            pack_offset = grid_dims(mpi_dir) - buff_size + 1
-        end if
-
-        unpack_offset = 0
-        if (pbc_loc == 1) then
-            unpack_offset = grid_dims(mpi_dir) + buff_size + 1
-        end if
-
-        ! Pack Buffer to Send
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = 0, buff_size - 1
-                                do i = 1, nVar
-                                    r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
-                                    buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l)
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    if (qbmm_comm) then
-                        !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                        do l = 0, p
-                            do k = 0, n
-                                do j = 0, buff_size - 1
-                                    do i = nVar + 1, nVar + 4
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + v_size* &
-                                                (j + buff_size*(k + (n + 1)*l))
-                                            buff_send(r) = pb(j + pack_offset, k, l, i - nVar, q)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-
-                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do l = 0, p
-                            do k = 0, n
-                                do j = 0, buff_size - 1
-                                    do i = nVar + 1, nVar + 4
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
-                                                (j + buff_size*(k + (n + 1)*l))
-                                            buff_send(r) = mv(j + pack_offset, k, l, i - nVar, q)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-                    end if
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVar
-                        do l = 0, p
-                            do k = 0, buff_size - 1
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + v_size* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         (k + buff_size*l))
-                                    buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l)
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    if (qbmm_comm) then
-                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = nVar + 1, nVar + 4
-                            do l = 0, p
-                                do k = 0, buff_size - 1
-                                    do j = -buff_size, m + buff_size
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + v_size* &
-                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                 (k + buff_size*l))
-                                            buff_send(r) = pb(j, k + pack_offset, l, i - nVar, q)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-
-                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = nVar + 1, nVar + 4
-                            do l = 0, p
-                                do k = 0, buff_size - 1
-                                    do j = -buff_size, m + buff_size
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
-                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                 (k + buff_size*l))
-                                            buff_send(r) = mv(j, k + pack_offset, l, i - nVar, q)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-                    end if
-                #:else
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVar
-                        do l = 0, buff_size - 1
-                            do k = -buff_size, n + buff_size
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + v_size* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                    buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset)
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    if (qbmm_comm) then
-                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = nVar + 1, nVar + 4
-                            do l = 0, buff_size - 1
-                                do k = -buff_size, n + buff_size
-                                    do j = -buff_size, m + buff_size
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + v_size* &
-                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                 ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                            buff_send(r) = pb(j, k, l + pack_offset, i - nVar, q)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-
-                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = nVar + 1, nVar + 4
-                            do l = 0, buff_size - 1
-                                do k = -buff_size, n + buff_size
-                                    do j = -buff_size, m + buff_size
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
-                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                 ((k + buff_size) + (n + 2*buff_size + 1)*l))
-                                            buff_send(r) = mv(j, k, l + pack_offset, i - nVar, q)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-                    end if
-                #:endif
-            end if
-        #:endfor
-        call nvtxEndRange ! Packbuf
-
-        p_send => buff_send(0)
-        p_recv => buff_recv(0)
-
-        ! Send/Recv
-#ifdef MFC_SIMULATION
-        #:for rdma_mpi in [False, True]
-            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
-                #:if rdma_mpi
-                    !$acc data attach(p_send, p_recv)
-                    !$acc host_data use_device(p_send, p_recv)
-                    call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
-                #:else
-                    call nvtxStartRange("RHS-COMM-DEV2HOST")
-                    !$acc update host(buff_send)
-                    call nvtxEndRange
-                    call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA")
-                #:endif
-
-                call MPI_SENDRECV( &
-                    p_send, buffer_count, mpi_p, dst_proc, send_tag, &
-                    p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
-                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
-
-                #:if rdma_mpi
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                #:else
-                    call nvtxStartRange("RHS-COMM-HOST2DEV")
-                    !$acc update device(buff_recv)
-                    call nvtxEndRange
-                #:endif
-            end if
-        #:endfor
-#else
-        call MPI_SENDRECV( &
-            p_send, buffer_count, mpi_p, dst_proc, send_tag, &
-            p_recv, buffer_count, mpi_p, src_proc, recv_tag, &
-            MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-#endif
-
-        ! Unpack Received Buffer
-        call nvtxStartRange("RHS-COMM-UNPACKBUF")
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = -buff_size, -1
-                                do i = 1, nVar
-                                    r = (i - 1) + v_size* &
-                                        (j + buff_size*((k + 1) + (n + 1)*l))
-                                    q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
-#endif
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    if (qbmm_comm) then
-                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do l = 0, p
-                            do k = 0, n
-                                do j = -buff_size, -1
-                                    do i = nVar + 1, nVar + 4
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + v_size* &
-                                                (j + buff_size*((k + 1) + (n + 1)*l))
-                                            pb(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-
-                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do l = 0, p
-                            do k = 0, n
-                                do j = -buff_size, -1
-                                    do i = nVar + 1, nVar + 4
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
-                                                (j + buff_size*((k + 1) + (n + 1)*l))
-                                            mv(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-                    end if
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVar
-                        do l = 0, p
-                            do k = -buff_size, -1
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + v_size* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + buff_size*l))
-                                    q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
-#endif
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    if (qbmm_comm) then
-                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = nVar + 1, nVar + 4
-                            do l = 0, p
-                                do k = -buff_size, -1
-                                    do j = -buff_size, m + buff_size
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + v_size* &
-                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                 ((k + buff_size) + buff_size*l))
-                                            pb(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-
-                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = nVar + 1, nVar + 4
-                            do l = 0, p
-                                do k = -buff_size, -1
-                                    do j = -buff_size, m + buff_size
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
-                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                 ((k + buff_size) + buff_size*l))
-                                            mv(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-                    end if
-                #:else
-                    ! Unpacking buffer from bc_z%beg
-                    !$acc parallel loop collapse(4) gang vector default(present) private(r)
-                    do i = 1, nVar
-                        do l = -buff_size, -1
-                            do k = -buff_size, n + buff_size
-                                do j = -buff_size, m + buff_size
-                                    r = (i - 1) + v_size* &
-                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                         ((k + buff_size) + (n + 2*buff_size + 1)* &
-                                          (l + buff_size)))
-                                    q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r)
-#if defined(__INTEL_COMPILER)
-                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
-                                        print *, "Error", j, k, l, i
-                                        error stop "NaN(s) in recv"
-                                    end if
-#endif
-                                end do
-                            end do
-                        end do
-                    end do
-
-                    if (qbmm_comm) then
-                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = nVar + 1, nVar + 4
-                            do l = -buff_size, -1
-                                do k = -buff_size, n + buff_size
-                                    do j = -buff_size, m + buff_size
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + v_size* &
-                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                 ((k + buff_size) + (n + 2*buff_size + 1)* &
-                                                  (l + buff_size)))
-                                            pb(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-
-                        !$acc parallel loop collapse(5) gang vector default(present) private(r)
-                        do i = nVar + 1, nVar + 4
-                            do l = -buff_size, -1
-                                do k = -buff_size, n + buff_size
-                                    do j = -buff_size, m + buff_size
-                                        do q = 1, nb
-                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
-                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                 ((k + buff_size) + (n + 2*buff_size + 1)* &
-                                                  (l + buff_size)))
-                                            mv(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r)
-                                        end do
-                                    end do
-                                end do
-                            end do
-                        end do
-                    end if
-                #:endif
-            end if
-        #:endfor
-        call nvtxEndRange
-#endif
-
-    end subroutine s_mpi_sendrecv_variables_buffers
-
-    !>  The purpose of this procedure is to optimally decompose
-        !!      the computational domain among the available processors.
-        !!      This is performed by attempting to award each processor,
-        !!      in each of the coordinate directions, approximately the
-        !!      same number of cells, and then recomputing the affected
-        !!      global parameters.
-    subroutine s_mpi_decompose_computational_domain
-
-#ifdef MFC_MPI
-
-        integer :: num_procs_x, num_procs_y, num_procs_z !<
-            !! Optimal number of processors in the x-, y- and z-directions
-
-        real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !<
-            !! Non-optimal number of processors in the x-, y- and z-directions
-
-        real(wp) :: fct_min !<
-            !! Processor factorization (fct) minimization parameter
-
-        integer :: MPI_COMM_CART !<
-            !! Cartesian processor topology communicator
-
-        integer :: rem_cells !<
-            !! Remaining number of cells, in a particular coordinate direction,
-            !! after the majority is divided up among the available processors
-
-        integer :: i, j !< Generic loop iterators
-
-        if (num_procs == 1 .and. parallel_io) then
-            do i = 1, num_dims
-                start_idx(i) = 0
-            end do
-            return
-        end if
-
-        ! 3D Cartesian Processor Topology
-        if (n > 0) then
-
-            if (p > 0) then
-
-                if (cyl_coord .and. p > 0) then
-                    ! Implement pencil processor blocking if using cylindrical coordinates so
-                    ! that all cells in azimuthal direction are stored on a single processor.
-                    ! This is necessary for efficient application of Fourier filter near axis.
-
-                    ! Initial values of the processor factorization optimization
-                    num_procs_x = 1
-                    num_procs_y = num_procs
-                    num_procs_z = 1
-                    ierr = -1
-
-                    ! Computing minimization variable for these initial values
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y)
-
-                    ! Searching for optimal computational domain distribution
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            tmp_num_procs_x = i
-                            tmp_num_procs_y = num_procs/i
-
-                            if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                               - (n + 1)/tmp_num_procs_y) &
-                                .and. &
-                                (n + 1)/tmp_num_procs_y &
-                                >= &
-                                num_stcls_min*weno_order) then
-
-                                num_procs_x = i
-                                num_procs_y = num_procs/i
-                                fct_min = abs((m + 1)/tmp_num_procs_x &
-                                              - (n + 1)/tmp_num_procs_y)
-                                ierr = 0
-
-                            end if
-
-                        end if
-
-                    end do
-
-                else
-
-                    ! Initial estimate of optimal processor topology
-                    num_procs_x = 1
-                    num_procs_y = 1
-                    num_procs_z = num_procs
-                    ierr = -1
-
-                    ! Benchmarking the quality of this initial guess
-                    tmp_num_procs_x = num_procs_x
-                    tmp_num_procs_y = num_procs_y
-                    tmp_num_procs_z = num_procs_z
-                    fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                         - (n + 1)/tmp_num_procs_y) &
-                              + 10._wp*abs((n + 1)/tmp_num_procs_y &
-                                           - (p + 1)/tmp_num_procs_z)
-
-                    ! Optimization of the initial processor topology
-                    do i = 1, num_procs
-
-                        if (mod(num_procs, i) == 0 &
-                            .and. &
-                            (m + 1)/i >= num_stcls_min*weno_order) then
-
-                            do j = 1, num_procs/i
-
-                                if (mod(num_procs/i, j) == 0 &
-                                    .and. &
-                                    (n + 1)/j >= num_stcls_min*weno_order) then
-
-                                    tmp_num_procs_x = i
-                                    tmp_num_procs_y = j
-                                    tmp_num_procs_z = num_procs/(i*j)
-
-                                    if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                                       - (n + 1)/tmp_num_procs_y) &
-                                        + abs((n + 1)/tmp_num_procs_y &
-                                              - (p + 1)/tmp_num_procs_z) &
-                                        .and. &
-                                        (p + 1)/tmp_num_procs_z &
-                                        >= &
-                                        num_stcls_min*weno_order) &
-                                        then
-
-                                        num_procs_x = i
-                                        num_procs_y = j
-                                        num_procs_z = num_procs/(i*j)
-                                        fct_min = abs((m + 1)/tmp_num_procs_x &
-                                                      - (n + 1)/tmp_num_procs_y) &
-                                                  + abs((n + 1)/tmp_num_procs_y &
-                                                        - (p + 1)/tmp_num_procs_z)
-                                        ierr = 0
-
-                                    end if
-
-                                end if
-
-                            end do
-
-                        end if
-
-                    end do
-
-                end if
-
-                ! Verifying that a valid decomposition of the computational
-                ! domain has been established. If not, the simulation exits.
-                if (proc_rank == 0 .and. ierr == -1) then
-                    call s_mpi_abort('Unsupported combination of values '// &
-                                     'of num_procs, m, n, p and '// &
-                                     'weno_order. Exiting.')
-                end if
-
-                ! Creating new communicator using the Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
-                                                          num_procs_y, num_procs_z/), &
-                                     (/.true., .true., .true./), &
-                                     .false., MPI_COMM_CART, ierr)
-
-                ! Finding the Cartesian coordinates of the local process
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
-                                     proc_coords, ierr)
-                ! END: 3D Cartesian Processor Topology
-
-                ! Global Parameters for z-direction
-
-                ! Number of remaining cells
-                rem_cells = mod(p + 1, num_procs_z)
-
-                ! Optimal number of cells per processor
-                p = (p + 1)/num_procs_z - 1
-
-                ! Distributing the remaining cells
-                do i = 1, rem_cells
-                    if (proc_coords(3) == i - 1) then
-                        p = p + 1; exit
-                    end if
-                end do
-
-                ! Boundary condition at the beginning
-                if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then
-                    proc_coords(3) = proc_coords(3) - 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%beg, ierr)
-                    proc_coords(3) = proc_coords(3) + 1
-                end if
-
-                ! Boundary condition at the end
-                if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then
-                    proc_coords(3) = proc_coords(3) + 1
-                    call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                       bc_z%end, ierr)
-                    proc_coords(3) = proc_coords(3) - 1
-                end if
-
-                if (parallel_io) then
-                    if (proc_coords(3) < rem_cells) then
-                        start_idx(3) = (p + 1)*proc_coords(3)
-                    else
-                        start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
-                    end if
-                end if
-
-                ! 2D Cartesian Processor Topology
-            else
-
-                ! Initial estimate of optimal processor topology
-                num_procs_x = 1
-                num_procs_y = num_procs
-                ierr = -1
-
-                ! Benchmarking the quality of this initial guess
-                tmp_num_procs_x = num_procs_x
-                tmp_num_procs_y = num_procs_y
-                fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x &
-                                     - (n + 1)/tmp_num_procs_y)
-
-                ! Optimization of the initial processor topology
-                do i = 1, num_procs
-
-                    if (mod(num_procs, i) == 0 &
-                        .and. &
-                        (m + 1)/i >= num_stcls_min*weno_order) then
-
-                        tmp_num_procs_x = i
-                        tmp_num_procs_y = num_procs/i
-
-                        if (fct_min >= abs((m + 1)/tmp_num_procs_x &
-                                           - (n + 1)/tmp_num_procs_y) &
-                            .and. &
-                            (n + 1)/tmp_num_procs_y &
-                            >= &
-                            num_stcls_min*weno_order) then
-
-                            num_procs_x = i
-                            num_procs_y = num_procs/i
-                            fct_min = abs((m + 1)/tmp_num_procs_x &
-                                          - (n + 1)/tmp_num_procs_y)
-                            ierr = 0
-
-                        end if
-
-                    end if
-
-                end do
-
-                ! Verifying that a valid decomposition of the computational
-                ! domain has been established. If not, the simulation exits.
-                if (proc_rank == 0 .and. ierr == -1) then
-                    call s_mpi_abort('Unsupported combination of values '// &
-                                     'of num_procs, m, n and '// &
-                                     'weno_order. Exiting.')
-                end if
-
-                ! Creating new communicator using the Cartesian topology
-                call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
-                                                          num_procs_y/), (/.true., &
-                                                                           .true./), .false., MPI_COMM_CART, &
-                                     ierr)
-
-                ! Finding the Cartesian coordinates of the local process
-                call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
-                                     proc_coords, ierr)
-
-            end if
-            ! END: 2D Cartesian Processor Topology
-
-            ! Global Parameters for y-direction
-
-            ! Number of remaining cells
-            rem_cells = mod(n + 1, num_procs_y)
-
-            ! Optimal number of cells per processor
-            n = (n + 1)/num_procs_y - 1
-
-            ! Distributing the remaining cells
-            do i = 1, rem_cells
-                if (proc_coords(2) == i - 1) then
-                    n = n + 1; exit
-                end if
-            end do
-
-            ! Boundary condition at the beginning
-            if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then
-                proc_coords(2) = proc_coords(2) - 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%beg, ierr)
-                proc_coords(2) = proc_coords(2) + 1
-            end if
-
-            ! Boundary condition at the end
-            if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then
-                proc_coords(2) = proc_coords(2) + 1
-                call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
-                                   bc_y%end, ierr)
-                proc_coords(2) = proc_coords(2) - 1
-            end if
-
-            if (parallel_io) then
-                if (proc_coords(2) < rem_cells) then
-                    start_idx(2) = (n + 1)*proc_coords(2)
-                else
-                    start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
-                end if
-            end if
-
-            ! 1D Cartesian Processor Topology
-        else
-
-            ! Optimal processor topology
-            num_procs_x = num_procs
-
-            ! Creating new communicator using the Cartesian topology
-            call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
-                                 (/.true./), .false., MPI_COMM_CART, &
-                                 ierr)
-
-            ! Finding the Cartesian coordinates of the local process
-            call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
-                                 proc_coords, ierr)
-
-        end if
-
-        ! Global Parameters for x-direction
-
-        ! Number of remaining cells
-        rem_cells = mod(m + 1, num_procs_x)
-
-        ! Optimal number of cells per processor
-        m = (m + 1)/num_procs_x - 1
-
-        ! Distributing the remaining cells
-        do i = 1, rem_cells
-            if (proc_coords(1) == i - 1) then
-                m = m + 1; exit
-            end if
-        end do
-
-        ! Boundary condition at the beginning
-        if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then
-            proc_coords(1) = proc_coords(1) - 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
-            proc_coords(1) = proc_coords(1) + 1
-        end if
-
-        ! Boundary condition at the end
-        if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then
-            proc_coords(1) = proc_coords(1) + 1
-            call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
-            proc_coords(1) = proc_coords(1) - 1
-        end if
-
-        if (parallel_io) then
-            if (proc_coords(1) < rem_cells) then
-                start_idx(1) = (m + 1)*proc_coords(1)
-            else
-                start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
-            end if
-        end if
-
-#endif
-
-    end subroutine s_mpi_decompose_computational_domain
-
-    !>  The goal of this procedure is to populate the buffers of
-        !!      the grid variables by communicating with the neighboring
-        !!      processors. Note that only the buffers of the cell-width
-        !!      distributions are handled in such a way. This is because
-        !!      the buffers of cell-boundary locations may be calculated
-        !!      directly from those of the cell-width distributions.
-        !!  @param mpi_dir MPI communication coordinate direction
-        !!  @param pbc_loc Processor boundary condition (PBC) location
-#ifndef MFC_PRE_PROCESS
-    subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
-
-        integer, intent(in) :: mpi_dir
-        integer, intent(in) :: pbc_loc
-
-#ifdef MFC_MPI
-
-        ! MPI Communication in x-direction
-        if (mpi_dir == 1) then
-
-            if (pbc_loc == -1) then      ! PBC at the beginning
-
-                if (bc_x%end >= 0) then      ! PBC at the beginning and end
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%beg
-                    call MPI_SENDRECV( &
-                        dx(m - buff_size + 1), buff_size, &
-                        mpi_p, bc_x%end, 0, &
-                        dx(-buff_size), buff_size, &
-                        mpi_p, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the beginning only
-
-                    ! Send/receive buffer to/from bc_x%beg/bc_x%beg
-                    call MPI_SENDRECV( &
-                        dx(0), buff_size, &
-                        mpi_p, bc_x%beg, 1, &
-                        dx(-buff_size), buff_size, &
-                        mpi_p, bc_x%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            else                        ! PBC at the end
-
-                if (bc_x%beg >= 0) then      ! PBC at the end and beginning
-
-                    ! Send/receive buffer to/from bc_x%beg/bc_x%end
-                    call MPI_SENDRECV( &
-                        dx(0), buff_size, &
-                        mpi_p, bc_x%beg, 1, &
-                        dx(m + 1), buff_size, &
-                        mpi_p, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the end only
-
-                    ! Send/receive buffer to/from bc_x%end/bc_x%end
-                    call MPI_SENDRECV( &
-                        dx(m - buff_size + 1), buff_size, &
-                        mpi_p, bc_x%end, 0, &
-                        dx(m + 1), buff_size, &
-                        mpi_p, bc_x%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            end if
-            ! END: MPI Communication in x-direction
-
-            ! MPI Communication in y-direction
-        elseif (mpi_dir == 2) then
-
-            if (pbc_loc == -1) then      ! PBC at the beginning
-
-                if (bc_y%end >= 0) then      ! PBC at the beginning and end
-
-                    ! Send/receive buffer to/from bc_y%end/bc_y%beg
-                    call MPI_SENDRECV( &
-                        dy(n - buff_size + 1), buff_size, &
-                        mpi_p, bc_y%end, 0, &
-                        dy(-buff_size), buff_size, &
-                        mpi_p, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the beginning only
-
-                    ! Send/receive buffer to/from bc_y%beg/bc_y%beg
-                    call MPI_SENDRECV( &
-                        dy(0), buff_size, &
-                        mpi_p, bc_y%beg, 1, &
-                        dy(-buff_size), buff_size, &
-                        mpi_p, bc_y%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            else                        ! PBC at the end
-
-                if (bc_y%beg >= 0) then      ! PBC at the end and beginning
-
-                    ! Send/receive buffer to/from bc_y%beg/bc_y%end
-                    call MPI_SENDRECV( &
-                        dy(0), buff_size, &
-                        mpi_p, bc_y%beg, 1, &
-                        dy(n + 1), buff_size, &
-                        mpi_p, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the end only
-
-                    ! Send/receive buffer to/from bc_y%end/bc_y%end
-                    call MPI_SENDRECV( &
-                        dy(n - buff_size + 1), buff_size, &
-                        mpi_p, bc_y%end, 0, &
-                        dy(n + 1), buff_size, &
-                        mpi_p, bc_y%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            end if
-            ! END: MPI Communication in y-direction
-
-            ! MPI Communication in z-direction
-        else
-
-            if (pbc_loc == -1) then      ! PBC at the beginning
-
-                if (bc_z%end >= 0) then      ! PBC at the beginning and end
-
-                    ! Send/receive buffer to/from bc_z%end/bc_z%beg
-                    call MPI_SENDRECV( &
-                        dz(p - buff_size + 1), buff_size, &
-                        mpi_p, bc_z%end, 0, &
-                        dz(-buff_size), buff_size, &
-                        mpi_p, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the beginning only
-
-                    ! Send/receive buffer to/from bc_z%beg/bc_z%beg
-                    call MPI_SENDRECV( &
-                        dz(0), buff_size, &
-                        mpi_p, bc_z%beg, 1, &
-                        dz(-buff_size), buff_size, &
-                        mpi_p, bc_z%beg, 0, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            else                        ! PBC at the end
-
-                if (bc_z%beg >= 0) then      ! PBC at the end and beginning
-
-                    ! Send/receive buffer to/from bc_z%beg/bc_z%end
-                    call MPI_SENDRECV( &
-                        dz(0), buff_size, &
-                        mpi_p, bc_z%beg, 1, &
-                        dz(p + 1), buff_size, &
-                        mpi_p, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                else                        ! PBC at the end only
-
-                    ! Send/receive buffer to/from bc_z%end/bc_z%end
-                    call MPI_SENDRECV( &
-                        dz(p - buff_size + 1), buff_size, &
-                        mpi_p, bc_z%end, 0, &
-                        dz(p + 1), buff_size, &
-                        mpi_p, bc_z%end, 1, &
-                        MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                end if
-
-            end if
-
-        end if
-        ! END: MPI Communication in z-direction
-#endif
-
-    end subroutine s_mpi_sendrecv_grid_variables_buffers
-#endif
-
-    !> Module deallocation and/or disassociation procedures
-    subroutine s_finalize_mpi_common_module
-
-#ifdef MFC_MPI
-        deallocate (buff_send, buff_recv)
-#endif
-
-    end subroutine s_finalize_mpi_common_module
-
-end module m_mpi_common

From 9adc5f8a15249aa8c84b281ff08bdc3ef23de10c Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Tue, 13 May 2025 22:42:47 -0400
Subject: [PATCH 18/62] bug fixes

---
 examples/3D_TaylorGreenVortex/case.py |   4 +-
 src/common/m_boundary_common.fpp      |   6 +-
 src/common/m_mpi_common.fpp           | 121 ++++++++++++++++++++++++--
 3 files changed, 119 insertions(+), 12 deletions(-)

diff --git a/examples/3D_TaylorGreenVortex/case.py b/examples/3D_TaylorGreenVortex/case.py
index 33889c7958..de440980cb 100644
--- a/examples/3D_TaylorGreenVortex/case.py
+++ b/examples/3D_TaylorGreenVortex/case.py
@@ -2,7 +2,7 @@
 import math
 import json
 
-N = 49
+N = 256
 
 Re = 1600
 L = 1
@@ -41,7 +41,7 @@
             "p": N,
             "cyl_coord": "F",
             "dt": dt,
-            "t_step_start": 0,
+            "t_step_start": 13529,
             "t_step_stop": Nt,
             "t_step_save": int(Nt / 100),
             # Simulation Algorithm Parameters
diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index bdfb163908..4810ee1223 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -1029,9 +1029,7 @@ contains
 
         integer :: j, i, q
 
-#ifdef MFC_PRE_PROCESS
-        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, -1, k, l)
-#else
+#ifdef MFC_SIMULATION
         if (bc_dir == 1) then !< x-direction
             if (bc_loc == -1) then !bc_x%beg
                 do i = 1, sys_size
@@ -1081,6 +1079,8 @@ contains
                 end do
             end if
         end if
+#else
+        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
 #endif
 
     end subroutine s_dirichlet
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 760f1ae5ac..c167f07ec8 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -1222,7 +1222,7 @@ contains
                 end do
 
                 ! Boundary condition at the beginning
-                if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then
+                if (proc_coords(3) > 0 .or. (bc_z%beg == BC_PERIODIC .and. num_procs_z > 1)) then
                     proc_coords(3) = proc_coords(3) - 1
                     call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
                                        bc_z%beg, ierr)
@@ -1230,19 +1230,55 @@ contains
                 end if
 
                 ! Boundary condition at the end
-                if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then
+                if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == BC_PERIODIC .and. num_procs_z > 1)) then
                     proc_coords(3) = proc_coords(3) + 1
                     call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
                                        bc_z%end, ierr)
                     proc_coords(3) = proc_coords(3) - 1
                 end if
 
+#ifdef MFC_POST_PROCESS
+                ! Ghost zone at the beginning
+                if (proc_coords(3) > 0 .and. format == 1) then
+                    offset_z%beg = 2
+                else
+                    offset_z%beg = 0
+                end if
+
+                ! Ghost zone at the end
+                if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then
+                    offset_z%end = 2
+                else
+                    offset_z%end = 0
+                end if
+#endif
+
+                ! Beginning and end sub-domain boundary locations
                 if (parallel_io) then
                     if (proc_coords(3) < rem_cells) then
                         start_idx(3) = (p + 1)*proc_coords(3)
                     else
                         start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
                     end if
+                else
+#ifdef MFC_PRE_PROCESS
+                    if (old_grid .neqv. .true.) then
+                        dz = (z_domain%end - z_domain%beg)/real(p_glb + 1, wp)
+
+                        if (proc_coords(3) < rem_cells) then
+                            z_domain%beg = z_domain%beg + dz*real((p + 1)* &
+                                                                  proc_coords(3))
+                            z_domain%end = z_domain%end - dz*real((p + 1)* &
+                                                                  (num_procs_z - proc_coords(3) - 1) &
+                                                                  - (num_procs_z - rem_cells))
+                        else
+                            z_domain%beg = z_domain%beg + dz*real((p + 1)* &
+                                                                  proc_coords(3) + rem_cells)
+                            z_domain%end = z_domain%end - dz*real((p + 1)* &
+                                                                  (num_procs_z - proc_coords(3) - 1))
+                        end if
+                    end if
+#endif
                 end if
 
                 ! 2D Cartesian Processor Topology
@@ -1325,7 +1361,7 @@ contains
             end do
 
             ! Boundary condition at the beginning
-            if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then
+            if (proc_coords(2) > 0 .or. (bc_y%beg == BC_PERIODIC .and. num_procs_y > 1)) then
                 proc_coords(2) = proc_coords(2) - 1
                 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
                                    bc_y%beg, ierr)
@@ -1333,19 +1369,55 @@ contains
             end if
 
             ! Boundary condition at the end
-            if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then
+            if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == BC_PERIODIC .and. num_procs_y > 1)) then
                 proc_coords(2) = proc_coords(2) + 1
                 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
                                    bc_y%end, ierr)
                 proc_coords(2) = proc_coords(2) - 1
             end if
 
+#ifdef MFC_POST_PROCESS
+            ! Ghost zone at the beginning
+            if (proc_coords(2) > 0 .and. format == 1) then
+                offset_y%beg = 2
+            else
+                offset_y%beg = 0
+            end if
+
+            ! Ghost zone at the end
+            if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then
+                offset_y%end = 2
+            else
+                offset_y%end = 0
+            end if
+#endif
+
+            ! Beginning and end sub-domain boundary locations
             if (parallel_io) then
                 if (proc_coords(2) < rem_cells) then
                     start_idx(2) = (n + 1)*proc_coords(2)
                 else
                     start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
                 end if
+            else
+#ifdef MFC_PRE_PROCESS
+                if (old_grid .neqv. .true.) then
+                    dy = (y_domain%end - y_domain%beg)/real(n_glb + 1, wp)
+
+                    if (proc_coords(2) < rem_cells) then
+                        y_domain%beg = y_domain%beg + dy*real((n + 1)* &
+                                                              proc_coords(2))
+                        y_domain%end = y_domain%end - dy*real((n + 1)* &
+                                                              (num_procs_y - proc_coords(2) - 1) &
+                                                              - (num_procs_y - rem_cells))
+                    else
+                        y_domain%beg = y_domain%beg + dy*real((n + 1)* &
+                                                              proc_coords(2) + rem_cells)
+                        y_domain%end = y_domain%end - dy*real((n + 1)* &
+                                                              (num_procs_y - proc_coords(2) - 1))
+                    end if
+                end if
+#endif
             end if
 
             ! 1D Cartesian Processor Topology
@@ -1381,27 +1453,62 @@ contains
         end do
 
         ! Boundary condition at the beginning
-        if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then
+        if (proc_coords(1) > 0 .or. (bc_x%beg == BC_PERIODIC .and. num_procs_x > 1)) then
             proc_coords(1) = proc_coords(1) - 1
             call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
             proc_coords(1) = proc_coords(1) + 1
         end if
 
         ! Boundary condition at the end
-        if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then
+        if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == BC_PERIODIC .and. num_procs_x > 1)) then
             proc_coords(1) = proc_coords(1) + 1
             call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
             proc_coords(1) = proc_coords(1) - 1
         end if
 
+#ifdef MFC_POST_PROCESS
+        ! Ghost zone at the beginning
+        if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then
+            offset_x%beg = 2
+        else
+            offset_x%beg = 0
+        end if
+
+        ! Ghost zone at the end
+        if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then
+            offset_x%end = 2
+        else
+            offset_x%end = 0
+        end if
+#endif
+
+        ! Beginning and end sub-domain boundary locations
         if (parallel_io) then
             if (proc_coords(1) < rem_cells) then
                 start_idx(1) = (m + 1)*proc_coords(1)
             else
                 start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
             end if
+        else
+#ifdef MFC_PRE_PROCESS
+            if (old_grid .neqv. .true.) then
+                dx = (x_domain%end - x_domain%beg)/real(m_glb + 1, wp)
+
+                if (proc_coords(1) < rem_cells) then
+                    x_domain%beg = x_domain%beg + dx*real((m + 1)* &
+                                                          proc_coords(1))
+                    x_domain%end = x_domain%end - dx*real((m + 1)* &
+                                                          (num_procs_x - proc_coords(1) - 1) &
+                                                          - (num_procs_x - rem_cells))
+                else
+                    x_domain%beg = x_domain%beg + dx*real((m + 1)* &
+                                                          proc_coords(1) + rem_cells)
+                    x_domain%end = x_domain%end - dx*real((m + 1)* &
+                                                          (num_procs_x - proc_coords(1) - 1))
+                end if
+            end if
+#endif
         end if
-
 #endif
 
     end subroutine s_mpi_decompose_computational_domain

From 6448b3701fa44700b99955107ddb27f961ce5749 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Tue, 13 May 2025 23:46:34 -0400
Subject: [PATCH 19/62] fix no mpi test

---
 src/post_process/m_global_parameters.fpp | 2 +-
 src/simulation/m_mpi_proxy.fpp           | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index ba36a8be19..70882a7ff4 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -474,7 +474,7 @@ contains
         integer :: i, j, fac
 
         ! Setting m_root equal to m in the case of a 1D serial simulation
-        if (num_procs > 1 .and. n == 0) m_root = m_glb
+        if (n == 0) m_root = m_glb
 
         ! Gamma/Pi_inf Model
         if (model_eqns == 1) then
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index c1aaa8f4b3..be8c822921 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -429,9 +429,11 @@ contains
 
     subroutine s_finalize_mpi_proxy_module()
 
+#ifdef MFC_MPI
         if (ib) then
             @:DEALLOCATE(ib_buff_send, ib_buff_recv)
         end if
+#endif
 
     end subroutine s_finalize_mpi_proxy_module
 

From a96f4c6eb9c5c372c2e70927ca0416ce3729d399 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Mon, 19 May 2025 13:32:26 -0400
Subject: [PATCH 20/62] bug fixes

---
 "\\"                             | 439 +++++++++++++++++++++++++++++++
 src/common/m_boundary_common.fpp |   6 +
 src/common/m_mpi_common.fpp      |   2 +-
 temp.txt                         | 225 ++++++++++++++++
 4 files changed, 671 insertions(+), 1 deletion(-)
 create mode 100644 "\\"
 create mode 100644 temp.txt

diff --git "a/\\" "b/\\"
new file mode 100644
index 0000000000..5d2a6a0944
--- /dev/null
+++ "b/\\"
@@ -0,0 +1,439 @@
+!>
+!! @file m_mpi_proxy.f90
+!! @brief Contains module m_mpi_proxy
+
+#:include 'case.fpp'
+#:include 'macros.fpp'
+
+!> @brief The module serves as a proxy to the parameters and subroutines
+!!          available in the MPI implementation's MPI module. Specifically,
+!!          the purpose of the proxy is to harness basic MPI commands into
+!!          more complicated procedures as to accomplish the communication
+!!          goals for the simulation.
+module m_mpi_proxy
+
+#ifdef MFC_MPI
+    use mpi                    !< Message passing interface (MPI) module
+#endif
+
+    use m_helper_basic         !< Functions to compare floating point numbers
+
+    use m_helper
+
+    use m_derived_types        !< Definitions of the derived types
+
+    use m_global_parameters    !< Definitions of the global parameters
+
+    use m_mpi_common
+
+    use m_nvtx
+
+    use ieee_arithmetic
+
+    implicit none
+
+    integer, private, allocatable, dimension(:), target :: ib_buff_send !<
+    !! This variable is utilized to pack and send the buffer of the immersed
+    !! boundary markers, for a single computational domain boundary at the
+    !! time, to the relevant neighboring processor.
+
+    integer, private, allocatable, dimension(:), target :: ib_buff_recv !<
+    !! q_cons_buff_recv is utilized to receive and unpack the buffer of the
+    !! immersed boundary markers, for a single computational domain boundary
+    !! at the time, from the relevant neighboring processor.
+
+    !> @name Generic flags used to identify and report MPI errors
+    !> @{
+    integer, private :: ierr
+    !> @}
+
+    integer :: i_halo_size
+    !$acc declare create(i_halo_size)
+
+contains
+
+    subroutine s_initialize_mpi_proxy_module()
+
+#ifdef MFC_MPI
+        if (ib) then
+            if (n > 0) then
+                if (p > 0) then
+                    i_halo_size = -1 + gp_layers* &
+                                            & (m + 2*gp_layers + 1)* &
+                                            & (n + 2*gp_layers + 1)* &
+                                            & (p + 2*gp_layers + 1)/ &
+                                            & (min(m, n, p) + 2*gp_layers + 1)
+                else
+                    i_halo_size = -1 + gp_layers* &
+                                            & (max(m, n) + 2*gp_layers + 1)
+                end if
+            else
+                i_halo_size = -1 + gp_layers
+            end if
+
+            !$acc update device(i_halo_size)
+            @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size))
+        end if
+#endif
+
+    end subroutine s_initialize_mpi_proxy_module
+
+    !>  Since only the processor with rank 0 reads and verifies
+        !!      the consistency of user inputs, these are initially not
+        !!      available to the other processors. Then, the purpose of
+        !!      this subroutine is to distribute the user inputs to the
+        !!      remaining processors in the communicator.
+    subroutine s_mpi_bcast_user_inputs()
+
+#ifdef MFC_MPI
+
+        integer :: i, j !< Generic loop iterator
+
+        call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+
+        #:for VAR in ['k_x', 'k_y', 'k_z', 'w_x', 'w_y', 'w_z', 'p_x', 'p_y', &
+            & 'p_z', 'g_x', 'g_y', 'g_z']
+            call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        #:endfor
+
+        #:for VAR in ['t_step_old', 'm', 'n', 'p', 'm_glb', 'n_glb', 'p_glb',  &
+            & 't_step_start','t_step_stop','t_step_save','t_step_print',       &
+            & 'model_eqns','time_stepper', 'riemann_solver', 'low_Mach',       &
+            & 'wave_speeds', 'avg_state', 'precision', 'bc_x%beg', 'bc_x%end', &
+            & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end',  'fd_order',     &
+            & 'num_probes', 'num_integrals', 'bubble_model', 'thermal',        &
+            & 'R0_type', 'num_source', 'relax_model', 'num_ibs', 'n_start',    &
+            & 'num_bc_patches']
+            call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+        #:endfor
+
+        #:for VAR in [ 'run_time_info','cyl_coord', 'mpp_lim',     &
+            &  'mp_weno', 'rdma_mpi', 'weno_flat', 'riemann_flat', &
+            & 'weno_Re_flux', 'alt_soundspeed', 'null_weights', 'mixture_err',   &
+            & 'parallel_io', 'hypoelasticity', 'bubbles_euler', 'polytropic',    &
+            & 'polydisperse', 'qbmm', 'acoustic_source', 'probe_wrt', 'integral_wrt',   &
+            & 'prim_vars_wrt', 'weno_avg', 'file_per_process', 'relax',          &
+            & 'adv_n', 'adap_dt', 'ib', 'bodyForces', 'bf_x', 'bf_y', 'bf_z',    &
+            & 'bc_x%grcbc_in', 'bc_x%grcbc_out', 'bc_x%grcbc_vel_out',          &
+            & 'bc_y%grcbc_in', 'bc_y%grcbc_out', 'bc_y%grcbc_vel_out',          &
+            & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out',          &
+            & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension',        &
+            & 'viscous', 'shear_stress', 'bulk_stress', 'bubbles_lagrange',     &
+            & 'hyperelasticity', 'rkck_adap_dt', 'bc_io', 'powell', 'cont_damage' ]
+            call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+        #:endfor
+
+        if (chemistry) then
+            #:for VAR in [ 'diffusion', 'reactions' ]
+                call MPI_BCAST(chem_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+
+            #:for VAR in [ 'gamma_method' ]
+                call MPI_BCAST(chem_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+        end if
+
+        if (bubbles_lagrange) then
+            #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', &
+                & 'write_bubbles', 'write_bubbles_stats']
+                call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+
+            #:for VAR in ['solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb']
+                call MPI_BCAST(lag_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+
+            #:for VAR in [ 'c0', 'rho0', 'T0', 'x0', 'diffcoefvap', 'epsilonb','charwidth', &
+                & 'valmaxvoid', 'Thost']
+                call MPI_BCAST(lag_params%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+        end if
+
+        #:for VAR in [ 'dt','weno_eps','teno_CT','pref','rhoref','R0ref','Web','Ca', 'sigma', &
+            & 'Re_inv', 'poly_sigma', 'palpha_eps', 'ptgalpha_eps', 'pi_fac',    &
+            & 'bc_x%vb1','bc_x%vb2','bc_x%vb3','bc_x%ve1','bc_x%ve2','bc_x%ve2', &
+            & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', &
+            & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', &
+            & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', &
+            & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end',    &
+            & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', &
+            & 'z_b', 't_stop', 't_save', 'cfl_target', 'rkck_tolerance', 'Bx0',  &
+            & 'tau_star', 'cont_damage_s', 'alpha_bar' ]
+            call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        #:endfor
+
+        do i = 1, 3
+            #:for VAR in [ 'bc_x%vel_in', 'bc_x%vel_out', 'bc_y%vel_in', 'bc_y%vel_out',  &
+                & 'bc_z%vel_in', 'bc_z%vel_out']
+                call MPI_BCAST(${VAR}$ (i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+        end do
+
+        #:if not MFC_CASE_OPTIMIZATION
+            call MPI_BCAST(mapped_weno, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+            call MPI_BCAST(wenoz, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+            call MPI_BCAST(teno, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+            call MPI_BCAST(weno_order, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+            call MPI_BCAST(nb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+            call MPI_BCAST(num_fluids, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+            call MPI_BCAST(wenoz_q, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            call MPI_BCAST(mhd, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+            call MPI_BCAST(relativity, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+        #:endif
+
+        do i = 1, num_fluids_max
+            #:for VAR in [ 'gamma','pi_inf','mul0','ss','pv','gamma_v','M_v',  &
+                & 'mu_v','k_v', 'cp_v','G', 'cv', 'qv', 'qvp' ]
+                call MPI_BCAST(fluid_pp(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+            call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        end do
+
+        do i = 1, num_fluids_max
+            #:for VAR in ['bc_x%alpha_rho_in','bc_x%alpha_in','bc_y%alpha_rho_in','bc_y%alpha_in','bc_z%alpha_rho_in','bc_z%alpha_in']
+                call MPI_BCAST(${VAR}$ (i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+        end do
+
+        do i = 1, num_ibs
+            #:for VAR in [ 'radius', 'length_x', 'length_y', &
+                & 'x_centroid', 'y_centroid', 'c', 'm', 'p', 't', 'theta', 'slip' ]
+                call MPI_BCAST(patch_ib(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+            call MPI_BCAST(patch_ib(i)%geometry, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+        end do
+
+        do j = 1, num_probes_max
+            do i = 1, 3
+                call MPI_BCAST(acoustic(j)%loc(i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            end do
+
+            call MPI_BCAST(acoustic(j)%dipole, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+
+            #:for VAR in [ 'pulse', 'support', 'num_elements', 'element_on', 'bb_num_freq' ]
+                call MPI_BCAST(acoustic(j)%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+
+            #:for VAR in [ 'mag', 'length', 'height', &
+                'wavelength', 'frequency', 'gauss_sigma_dist', 'gauss_sigma_time', &
+                'npulse', 'dir', 'delay', 'foc_length', 'aperture', &
+                'element_spacing_angle', 'element_polygon_ratio', 'rotate_angle', &
+                'bb_bandwidth', 'bb_lowest_freq' ]
+                call MPI_BCAST(acoustic(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+
+            #:for VAR in [ 'x','y','z' ]
+                call MPI_BCAST(probe(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+
+            #:for VAR in [ 'xmin', 'xmax', 'ymin', 'ymax', 'zmin', 'zmax' ]
+                call MPI_BCAST(integral(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+        end do
+
+#endif
+
+    end subroutine s_mpi_bcast_user_inputs
+
+    subroutine s_mpi_sendrecv_ib_buffers(ib_markers, mpi_dir, pbc_loc)
+
+        type(integer_field), intent(inout) :: ib_markers
+
+        integer, intent(in) :: mpi_dir, pbc_loc
+
+        integer :: i, j, k, l, r, q !< Generic loop iterators
+
+        integer :: buffer_counts(1:3), buffer_count
+
+        type(int_bounds_info) :: boundary_conditions(1:3)
+        integer :: beg_end(1:2), grid_dims(1:3)
+        integer :: dst_proc, src_proc, recv_tag, send_tag
+
+        logical :: beg_end_geq_0, qbmm_comm
+
+        integer :: pack_offset, unpack_offset
+
+        integer, pointer :: p_i_send, p_i_recv
+
+#ifdef MFC_MPI
+
+        call nvtxStartRange("IBM-COMM-PACKBUF")
+
+        buffer_counts = (/ &
+                        gp_layers*(n + 1)*(p + 1), &
+                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
+                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1) &
+                        /)
+
+        buffer_count = buffer_counts(mpi_dir)
+        boundary_conditions = (/bc_x, bc_y, bc_z/)
+        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
+        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
+
+        ! Implements:
+        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
+        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
+        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
+        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
+        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
+
+        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
+        recv_tag = f_logical_to_int(pbc_loc == 1)
+
+        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
+        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
+
+        grid_dims = (/m, n, p/)
+
+        pack_offset = 0
+        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
+            pack_offset = grid_dims(mpi_dir) - gp_layers + 1
+        end if
+
+        unpack_offset = 0
+        if (pbc_loc == 1) then
+            unpack_offset = grid_dims(mpi_dir) + gp_layers + 1
+        end if
+
+        ! Pack Buffer to Send
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = 0, gp_layers - 1
+                                r = (j + gp_layers*(k + (n + 1)*l))
+                                ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l)
+                            end do
+                        end do
+                    end do
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, gp_layers - 1
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
+                                     (k + gp_layers*l))
+                                ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l)
+                            end do
+                        end do
+                    end do
+                #:else
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, gp_layers - 1
+                        do k = -gp_layers, n + gp_layers
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
+                                     ((k + gp_layers) + (n + 2*gp_layers + 1)*l))
+                                ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset)
+                            end do
+                        end do
+                    end do
+                #:endif
+            end if
+        #:endfor
+        call nvtxEndRange ! Packbuf
+
+        p_i_send => ib_buff_send(0)
+        p_i_recv => ib_buff_recv(0)
+
+        ! Send/Recv
+        #:for rdma_mpi in [False, True]
+            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
+                #:if rdma_mpi
+                    !$acc data attach(p_i_send, p_i_recv)
+                    !$acc host_data use_device(p_i_send, p_i_recv)
+                    call nvtxStartRange("IBM-COMM-SENDRECV-RDMA")
+                #:else
+                    call nvtxStartRange("IBM-COMM-DEV2HOST")
+                    !$acc update host(ib_buff_send)
+                    call nvtxEndRange
+                    call nvtxStartRange("IBM-COMM-SENDRECV-NO-RMDA")
+                #:endif
+
+                call MPI_SENDRECV( &
+                    p_i_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, &
+                    p_i_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, &
+                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
+
+                #:if rdma_mpi
+                    !$acc end host_data
+                    !$acc end data
+                    !$acc wait
+                #:else
+                    call nvtxStartRange("IBM-COMM-HOST2DEV")
+                    !$acc update device(ib_buff_recv)
+                    call nvtxEndRange
+                #:endif
+            end if
+        #:endfor
+
+        ! Unpack Received Buffer
+        call nvtxStartRange("IBM-COMM-UNPACKBUF")
+        #:for mpi_dir in [1, 2, 3]
+            if (mpi_dir == ${mpi_dir}$) then
+                #:if mpi_dir == 1
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = 0, n
+                            do j = -gp_layers, -1
+                                r = (j + gp_layers*((k + 1) + (n + 1)*l))
+                                ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r)
+                            end do
+                        end do
+                    end do
+                #:elif mpi_dir == 2
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = 0, p
+                        do k = -gp_layers, -1
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
+                                     ((k + gp_layers) + gp_layers*l))
+                                ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r)
+                            end do
+                        end do
+                    end do
+                #:else
+                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
+                    do l = -gp_layers, -1
+                        do k = -gp_layers, n + gp_layers
+                            do j = -gp_layers, m + gp_layers
+                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
+                                     ((k + gp_layers) + (n + 2*gp_layers + 1)* &
+                                      (l + gp_layers)))
+                                ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r)
+                            end do
+                        end do
+                    end do
+                #:endif
+            end if
+        #:endfor
+        call nvtxEndRange
+#endif
+
+    end subroutine s_mpi_sendrecv_ib_buffers
+
+    subroutine s_mpi_send_random_number(phi_rn, num_freq)
+        integer, intent(in) :: num_freq
+        real(wp), intent(inout), dimension(1:num_freq) :: phi_rn
+
+#ifdef MFC_MPI
+        call MPI_BCAST(phi_rn, num_freq, mpi_p, 0, MPI_COMM_WORLD, ierr)
+#endif
+
+    end subroutine s_mpi_send_random_number
+
+    subroutine s_finalize_mpi_proxy_module()
+
+#ifdef MFC_MPI
+        if (ib) then
+            @:DEALLOCATE(ib_buff_send, ib_buff_recv)
+        end if
+#endif
+
+    end subroutine s_finalize_mpi_proxy_module
+
+end module m_mpi_proxy
diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index 4810ee1223..b47f4b592c 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -255,9 +255,11 @@ contains
 
         integer :: j, q, i
 
+#ifndef MFC_POST_PROCESS
         if (qbmm .and. .not. polytropic) then
             call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l)
         end if
+#endif
 
         if (bc_dir == 1) then !< x-direction
             if (bc_loc == -1) then !bc_x%beg
@@ -795,9 +797,11 @@ contains
 
         integer :: j, q, i
 
+#ifndef MFC_POST_PROCESS
         if (qbmm .and. .not. polytropic) then
             call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l)
         end if
+#endif
 
         if (bc_dir == 1) then !< x-direction
             if (bc_loc == -1) then !< bc_x%beg
@@ -894,9 +898,11 @@ contains
 
         integer :: j, q, i
 
+#ifndef MFC_POST_PROCESS
         if (qbmm .and. .not. polytropic) then
             call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l)
         end if
+#endif
 
         if (bc_dir == 1) then !< x-direction
             if (bc_loc == -1) then !< bc_x%beg
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index c167f07ec8..185a84892e 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -637,7 +637,7 @@ contains
                             /)
         end if
 
-        !$acc update device(v_size, nVar)
+        !$acc update device(v_size)
 
         buffer_count = buffer_counts(mpi_dir)
         boundary_conditions = (/bc_x, bc_y, bc_z/)
diff --git a/temp.txt b/temp.txt
new file mode 100644
index 0000000000..cc44ee4e57
--- /dev/null
+++ b/temp.txt
@@ -0,0 +1,225 @@
+            2           74          224
+            0           32           96
+            1           32           97
+            2           32           98
+            0           33           99
+            1           33          100
+            2           33          101
+            0           34          102
+            1           34          103
+            2           34          104
+            0           35          105
+            1           35          106
+            2           35          107
+            0           36          108
+            1           36          109
+            2           36          110
+            0           37          111
+            1           37          112
+            2           37          113
+            0           38          114
+            1           38          115
+            2           38          116
+            0           39          117
+            1           39          118
+            2           39          119
+            0           40          120
+            1           40          121
+            2           40          122
+            0           41          123
+            1           41          124
+            2           41          125
+            0           42          126
+            1           42          127
+            2           10           32
+            0           11           33
+            1           11           34
+            2           11           35
+            0           12           36
+            1           12           37
+            2           12           38
+            0           13           39
+            1           13           40
+            2           13           41
+            0           14           42
+            1           14           43
+            2           14           44
+            0           15           45
+            1           15           46
+            2           15           47
+            0           16           48
+            1           16           49
+            2           16           50
+            0           17           51
+            1           17           52
+            2           17           53
+            0           18           54
+            1           18           55
+            2           18           56
+            0           19           57
+            1           19           58
+            2           19           59
+            0           20           60
+            1           20           61
+            2           20           62
+            0           21           63
+            0            0            0
+            1            0            1
+            2            0            2
+            0            1            3
+            1            1            4
+            2            1            5
+            0            2            6
+            1            2            7
+            2            2            8
+            0            3            9
+            1            3           10
+            2            3           11
+            0            4           12
+            1            4           13
+            2            4           14
+            0            5           15
+            1            5           16
+            2            5           17
+            0            6           18
+            1            6           19
+            2            6           20
+            0            7           21
+            1            7           22
+            2            7           23
+            0            8           24
+            1            8           25
+            2            8           26
+            0            9           27
+            1            9           28
+            2            9           29
+            0           10           30
+            1           10           31
+            1           53          160
+            2           53          161
+            0           54          162
+            1           54          163
+            2           54          164
+            0           55          165
+            1           55          166
+            2           55          167
+            0           56          168
+            1           56          169
+            2           56          170
+            0           57          171
+            1           57          172
+            2           57          173
+            0           58          174
+            1           58          175
+            2           58          176
+            0           59          177
+            1           59          178
+            2           59          179
+            0           60          180
+            1           60          181
+            2           60          182
+            0           61          183
+            1           61          184
+            2           61          185
+            0           62          186
+            1           62          187
+            2           62          188
+            0           63          189
+            1           63          190
+            2           63          191
+            0           64          192
+            1           64          193
+            2           64          194
+            0           65          195
+            1           65          196
+            2           65          197
+            0           66          198
+            1           66          199
+            2           66          200
+            0           67          201
+            1           67          202
+            2           67          203
+            0           68          204
+            1           68          205
+            2           68          206
+            0           69          207
+            1           69          208
+            2           69          209
+            0           70          210
+            1           70          211
+            2           70          212
+            0           71          213
+            1           71          214
+            2           71          215
+            0           72          216
+            1           72          217
+            2           72          218
+            0           73          219
+            1           73          220
+            2           73          221
+            0           74          222
+            1           74          223
+            2           42          128
+            0           43          129
+            1           43          130
+            2           43          131
+            0           44          132
+            1           44          133
+            2           44          134
+            0           45          135
+            1           45          136
+            2           45          137
+            0           46          138
+            1           46          139
+            2           46          140
+            0           47          141
+            1           47          142
+            2           47          143
+            0           48          144
+            1           48          145
+            2           48          146
+            0           49          147
+            1           49          148
+            2           49          149
+            0           50          150
+            1           50          151
+            2           50          152
+            0           51          153
+            1           51          154
+            2           51          155
+            0           52          156
+            1           52          157
+            2           52          158
+            0           53          159
+            1           21           64
+            2           21           65
+            0           22           66
+            1           22           67
+            2           22           68
+            0           23           69
+            1           23           70
+            2           23           71
+            0           24           72
+            1           24           73
+            2           24           74
+            0           25           75
+            1           25           76
+            2           25           77
+            0           26           78
+            1           26           79
+            2           26           80
+            0           27           81
+            1           27           82
+            2           27           83
+            0           28           84
+            1           28           85
+            2           28           86
+            0           29           87
+            1           29           88
+            2           29           89
+            0           30           90
+            1           30           91
+            2           30           92
+            0           31           93
+            1           31           94
+            2           31           95

From b817ef15f002c1a682d321c18f4a736251706de6 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Mon, 19 May 2025 13:32:46 -0400
Subject: [PATCH 21/62] remove extra files

---
 "\\"     | 439 -------------------------------------------------------
 temp.txt | 225 ----------------------------
 2 files changed, 664 deletions(-)
 delete mode 100644 "\\"
 delete mode 100644 temp.txt

diff --git "a/\\" "b/\\"
deleted file mode 100644
index 5d2a6a0944..0000000000
--- "a/\\"
+++ /dev/null
@@ -1,439 +0,0 @@
-!>
-!! @file m_mpi_proxy.f90
-!! @brief Contains module m_mpi_proxy
-
-#:include 'case.fpp'
-#:include 'macros.fpp'
-
-!> @brief The module serves as a proxy to the parameters and subroutines
-!!          available in the MPI implementation's MPI module. Specifically,
-!!          the purpose of the proxy is to harness basic MPI commands into
-!!          more complicated procedures as to accomplish the communication
-!!          goals for the simulation.
-module m_mpi_proxy
-
-#ifdef MFC_MPI
-    use mpi                    !< Message passing interface (MPI) module
-#endif
-
-    use m_helper_basic         !< Functions to compare floating point numbers
-
-    use m_helper
-
-    use m_derived_types        !< Definitions of the derived types
-
-    use m_global_parameters    !< Definitions of the global parameters
-
-    use m_mpi_common
-
-    use m_nvtx
-
-    use ieee_arithmetic
-
-    implicit none
-
-    integer, private, allocatable, dimension(:), target :: ib_buff_send !<
-    !! This variable is utilized to pack and send the buffer of the immersed
-    !! boundary markers, for a single computational domain boundary at the
-    !! time, to the relevant neighboring processor.
-
-    integer, private, allocatable, dimension(:), target :: ib_buff_recv !<
-    !! q_cons_buff_recv is utilized to receive and unpack the buffer of the
-    !! immersed boundary markers, for a single computational domain boundary
-    !! at the time, from the relevant neighboring processor.
-
-    !> @name Generic flags used to identify and report MPI errors
-    !> @{
-    integer, private :: ierr
-    !> @}
-
-    integer :: i_halo_size
-    !$acc declare create(i_halo_size)
-
-contains
-
-    subroutine s_initialize_mpi_proxy_module()
-
-#ifdef MFC_MPI
-        if (ib) then
-            if (n > 0) then
-                if (p > 0) then
-                    i_halo_size = -1 + gp_layers* &
-                                            & (m + 2*gp_layers + 1)* &
-                                            & (n + 2*gp_layers + 1)* &
-                                            & (p + 2*gp_layers + 1)/ &
-                                            & (min(m, n, p) + 2*gp_layers + 1)
-                else
-                    i_halo_size = -1 + gp_layers* &
-                                            & (max(m, n) + 2*gp_layers + 1)
-                end if
-            else
-                i_halo_size = -1 + gp_layers
-            end if
-
-            !$acc update device(i_halo_size)
-            @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size))
-        end if
-#endif
-
-    end subroutine s_initialize_mpi_proxy_module
-
-    !>  Since only the processor with rank 0 reads and verifies
-        !!      the consistency of user inputs, these are initially not
-        !!      available to the other processors. Then, the purpose of
-        !!      this subroutine is to distribute the user inputs to the
-        !!      remaining processors in the communicator.
-    subroutine s_mpi_bcast_user_inputs()
-
-#ifdef MFC_MPI
-
-        integer :: i, j !< Generic loop iterator
-
-        call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
-
-        #:for VAR in ['k_x', 'k_y', 'k_z', 'w_x', 'w_y', 'w_z', 'p_x', 'p_y', &
-            & 'p_z', 'g_x', 'g_y', 'g_z']
-            call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-        #:endfor
-
-        #:for VAR in ['t_step_old', 'm', 'n', 'p', 'm_glb', 'n_glb', 'p_glb',  &
-            & 't_step_start','t_step_stop','t_step_save','t_step_print',       &
-            & 'model_eqns','time_stepper', 'riemann_solver', 'low_Mach',       &
-            & 'wave_speeds', 'avg_state', 'precision', 'bc_x%beg', 'bc_x%end', &
-            & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end',  'fd_order',     &
-            & 'num_probes', 'num_integrals', 'bubble_model', 'thermal',        &
-            & 'R0_type', 'num_source', 'relax_model', 'num_ibs', 'n_start',    &
-            & 'num_bc_patches']
-            call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-        #:endfor
-
-        #:for VAR in [ 'run_time_info','cyl_coord', 'mpp_lim',     &
-            &  'mp_weno', 'rdma_mpi', 'weno_flat', 'riemann_flat', &
-            & 'weno_Re_flux', 'alt_soundspeed', 'null_weights', 'mixture_err',   &
-            & 'parallel_io', 'hypoelasticity', 'bubbles_euler', 'polytropic',    &
-            & 'polydisperse', 'qbmm', 'acoustic_source', 'probe_wrt', 'integral_wrt',   &
-            & 'prim_vars_wrt', 'weno_avg', 'file_per_process', 'relax',          &
-            & 'adv_n', 'adap_dt', 'ib', 'bodyForces', 'bf_x', 'bf_y', 'bf_z',    &
-            & 'bc_x%grcbc_in', 'bc_x%grcbc_out', 'bc_x%grcbc_vel_out',          &
-            & 'bc_y%grcbc_in', 'bc_y%grcbc_out', 'bc_y%grcbc_vel_out',          &
-            & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out',          &
-            & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension',        &
-            & 'viscous', 'shear_stress', 'bulk_stress', 'bubbles_lagrange',     &
-            & 'hyperelasticity', 'rkck_adap_dt', 'bc_io', 'powell', 'cont_damage' ]
-            call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
-        #:endfor
-
-        if (chemistry) then
-            #:for VAR in [ 'diffusion', 'reactions' ]
-                call MPI_BCAST(chem_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-
-            #:for VAR in [ 'gamma_method' ]
-                call MPI_BCAST(chem_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-        end if
-
-        if (bubbles_lagrange) then
-            #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', &
-                & 'write_bubbles', 'write_bubbles_stats']
-                call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-
-            #:for VAR in ['solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb']
-                call MPI_BCAST(lag_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-
-            #:for VAR in [ 'c0', 'rho0', 'T0', 'x0', 'diffcoefvap', 'epsilonb','charwidth', &
-                & 'valmaxvoid', 'Thost']
-                call MPI_BCAST(lag_params%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-        end if
-
-        #:for VAR in [ 'dt','weno_eps','teno_CT','pref','rhoref','R0ref','Web','Ca', 'sigma', &
-            & 'Re_inv', 'poly_sigma', 'palpha_eps', 'ptgalpha_eps', 'pi_fac',    &
-            & 'bc_x%vb1','bc_x%vb2','bc_x%vb3','bc_x%ve1','bc_x%ve2','bc_x%ve2', &
-            & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', &
-            & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', &
-            & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', &
-            & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end',    &
-            & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', &
-            & 'z_b', 't_stop', 't_save', 'cfl_target', 'rkck_tolerance', 'Bx0',  &
-            & 'tau_star', 'cont_damage_s', 'alpha_bar' ]
-            call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-        #:endfor
-
-        do i = 1, 3
-            #:for VAR in [ 'bc_x%vel_in', 'bc_x%vel_out', 'bc_y%vel_in', 'bc_y%vel_out',  &
-                & 'bc_z%vel_in', 'bc_z%vel_out']
-                call MPI_BCAST(${VAR}$ (i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-        end do
-
-        #:if not MFC_CASE_OPTIMIZATION
-            call MPI_BCAST(mapped_weno, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
-            call MPI_BCAST(wenoz, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
-            call MPI_BCAST(teno, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
-            call MPI_BCAST(weno_order, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-            call MPI_BCAST(nb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-            call MPI_BCAST(num_fluids, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-            call MPI_BCAST(wenoz_q, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-            call MPI_BCAST(mhd, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
-            call MPI_BCAST(relativity, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
-        #:endif
-
-        do i = 1, num_fluids_max
-            #:for VAR in [ 'gamma','pi_inf','mul0','ss','pv','gamma_v','M_v',  &
-                & 'mu_v','k_v', 'cp_v','G', 'cv', 'qv', 'qvp' ]
-                call MPI_BCAST(fluid_pp(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-            call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr)
-        end do
-
-        do i = 1, num_fluids_max
-            #:for VAR in ['bc_x%alpha_rho_in','bc_x%alpha_in','bc_y%alpha_rho_in','bc_y%alpha_in','bc_z%alpha_rho_in','bc_z%alpha_in']
-                call MPI_BCAST(${VAR}$ (i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-        end do
-
-        do i = 1, num_ibs
-            #:for VAR in [ 'radius', 'length_x', 'length_y', &
-                & 'x_centroid', 'y_centroid', 'c', 'm', 'p', 't', 'theta', 'slip' ]
-                call MPI_BCAST(patch_ib(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-            call MPI_BCAST(patch_ib(i)%geometry, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-        end do
-
-        do j = 1, num_probes_max
-            do i = 1, 3
-                call MPI_BCAST(acoustic(j)%loc(i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-            end do
-
-            call MPI_BCAST(acoustic(j)%dipole, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
-
-            #:for VAR in [ 'pulse', 'support', 'num_elements', 'element_on', 'bb_num_freq' ]
-                call MPI_BCAST(acoustic(j)%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-
-            #:for VAR in [ 'mag', 'length', 'height', &
-                'wavelength', 'frequency', 'gauss_sigma_dist', 'gauss_sigma_time', &
-                'npulse', 'dir', 'delay', 'foc_length', 'aperture', &
-                'element_spacing_angle', 'element_polygon_ratio', 'rotate_angle', &
-                'bb_bandwidth', 'bb_lowest_freq' ]
-                call MPI_BCAST(acoustic(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-
-            #:for VAR in [ 'x','y','z' ]
-                call MPI_BCAST(probe(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-
-            #:for VAR in [ 'xmin', 'xmax', 'ymin', 'ymax', 'zmin', 'zmax' ]
-                call MPI_BCAST(integral(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-            #:endfor
-        end do
-
-#endif
-
-    end subroutine s_mpi_bcast_user_inputs
-
-    subroutine s_mpi_sendrecv_ib_buffers(ib_markers, mpi_dir, pbc_loc)
-
-        type(integer_field), intent(inout) :: ib_markers
-
-        integer, intent(in) :: mpi_dir, pbc_loc
-
-        integer :: i, j, k, l, r, q !< Generic loop iterators
-
-        integer :: buffer_counts(1:3), buffer_count
-
-        type(int_bounds_info) :: boundary_conditions(1:3)
-        integer :: beg_end(1:2), grid_dims(1:3)
-        integer :: dst_proc, src_proc, recv_tag, send_tag
-
-        logical :: beg_end_geq_0, qbmm_comm
-
-        integer :: pack_offset, unpack_offset
-
-        integer, pointer :: p_i_send, p_i_recv
-
-#ifdef MFC_MPI
-
-        call nvtxStartRange("IBM-COMM-PACKBUF")
-
-        buffer_counts = (/ &
-                        gp_layers*(n + 1)*(p + 1), &
-                        gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
-                        gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1) &
-                        /)
-
-        buffer_count = buffer_counts(mpi_dir)
-        boundary_conditions = (/bc_x, bc_y, bc_z/)
-        beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
-        beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
-
-        ! Implements:
-        ! pbc_loc  bc_x >= 0 -> [send/recv]_tag  [dst/src]_proc
-        ! -1 (=0)      0            ->     [1,0]       [0,0]      | 0 0 [1,0] [beg,beg]
-        ! -1 (=0)      1            ->     [0,0]       [1,0]      | 0 1 [0,0] [end,beg]
-        ! +1 (=1)      0            ->     [0,1]       [1,1]      | 1 0 [0,1] [end,end]
-        ! +1 (=1)      1            ->     [1,1]       [0,1]      | 1 1 [1,1] [beg,end]
-
-        send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
-        recv_tag = f_logical_to_int(pbc_loc == 1)
-
-        dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
-        src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
-
-        grid_dims = (/m, n, p/)
-
-        pack_offset = 0
-        if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
-            pack_offset = grid_dims(mpi_dir) - gp_layers + 1
-        end if
-
-        unpack_offset = 0
-        if (pbc_loc == 1) then
-            unpack_offset = grid_dims(mpi_dir) + gp_layers + 1
-        end if
-
-        ! Pack Buffer to Send
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = 0, gp_layers - 1
-                                r = (j + gp_layers*(k + (n + 1)*l))
-                                ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l)
-                            end do
-                        end do
-                    end do
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, gp_layers - 1
-                            do j = -gp_layers, m + gp_layers
-                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                     (k + gp_layers*l))
-                                ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l)
-                            end do
-                        end do
-                    end do
-                #:else
-                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                    do l = 0, gp_layers - 1
-                        do k = -gp_layers, n + gp_layers
-                            do j = -gp_layers, m + gp_layers
-                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                     ((k + gp_layers) + (n + 2*gp_layers + 1)*l))
-                                ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset)
-                            end do
-                        end do
-                    end do
-                #:endif
-            end if
-        #:endfor
-        call nvtxEndRange ! Packbuf
-
-        p_i_send => ib_buff_send(0)
-        p_i_recv => ib_buff_recv(0)
-
-        ! Send/Recv
-        #:for rdma_mpi in [False, True]
-            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
-                #:if rdma_mpi
-                    !$acc data attach(p_i_send, p_i_recv)
-                    !$acc host_data use_device(p_i_send, p_i_recv)
-                    call nvtxStartRange("IBM-COMM-SENDRECV-RDMA")
-                #:else
-                    call nvtxStartRange("IBM-COMM-DEV2HOST")
-                    !$acc update host(ib_buff_send)
-                    call nvtxEndRange
-                    call nvtxStartRange("IBM-COMM-SENDRECV-NO-RMDA")
-                #:endif
-
-                call MPI_SENDRECV( &
-                    p_i_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, &
-                    p_i_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, &
-                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
-
-                #:if rdma_mpi
-                    !$acc end host_data
-                    !$acc end data
-                    !$acc wait
-                #:else
-                    call nvtxStartRange("IBM-COMM-HOST2DEV")
-                    !$acc update device(ib_buff_recv)
-                    call nvtxEndRange
-                #:endif
-            end if
-        #:endfor
-
-        ! Unpack Received Buffer
-        call nvtxStartRange("IBM-COMM-UNPACKBUF")
-        #:for mpi_dir in [1, 2, 3]
-            if (mpi_dir == ${mpi_dir}$) then
-                #:if mpi_dir == 1
-                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = 0, n
-                            do j = -gp_layers, -1
-                                r = (j + gp_layers*((k + 1) + (n + 1)*l))
-                                ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r)
-                            end do
-                        end do
-                    end do
-                #:elif mpi_dir == 2
-                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                    do l = 0, p
-                        do k = -gp_layers, -1
-                            do j = -gp_layers, m + gp_layers
-                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                     ((k + gp_layers) + gp_layers*l))
-                                ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r)
-                            end do
-                        end do
-                    end do
-                #:else
-                    !$acc parallel loop collapse(3) gang vector default(present) private(r)
-                    do l = -gp_layers, -1
-                        do k = -gp_layers, n + gp_layers
-                            do j = -gp_layers, m + gp_layers
-                                r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
-                                     ((k + gp_layers) + (n + 2*gp_layers + 1)* &
-                                      (l + gp_layers)))
-                                ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r)
-                            end do
-                        end do
-                    end do
-                #:endif
-            end if
-        #:endfor
-        call nvtxEndRange
-#endif
-
-    end subroutine s_mpi_sendrecv_ib_buffers
-
-    subroutine s_mpi_send_random_number(phi_rn, num_freq)
-        integer, intent(in) :: num_freq
-        real(wp), intent(inout), dimension(1:num_freq) :: phi_rn
-
-#ifdef MFC_MPI
-        call MPI_BCAST(phi_rn, num_freq, mpi_p, 0, MPI_COMM_WORLD, ierr)
-#endif
-
-    end subroutine s_mpi_send_random_number
-
-    subroutine s_finalize_mpi_proxy_module()
-
-#ifdef MFC_MPI
-        if (ib) then
-            @:DEALLOCATE(ib_buff_send, ib_buff_recv)
-        end if
-#endif
-
-    end subroutine s_finalize_mpi_proxy_module
-
-end module m_mpi_proxy
diff --git a/temp.txt b/temp.txt
deleted file mode 100644
index cc44ee4e57..0000000000
--- a/temp.txt
+++ /dev/null
@@ -1,225 +0,0 @@
-            2           74          224
-            0           32           96
-            1           32           97
-            2           32           98
-            0           33           99
-            1           33          100
-            2           33          101
-            0           34          102
-            1           34          103
-            2           34          104
-            0           35          105
-            1           35          106
-            2           35          107
-            0           36          108
-            1           36          109
-            2           36          110
-            0           37          111
-            1           37          112
-            2           37          113
-            0           38          114
-            1           38          115
-            2           38          116
-            0           39          117
-            1           39          118
-            2           39          119
-            0           40          120
-            1           40          121
-            2           40          122
-            0           41          123
-            1           41          124
-            2           41          125
-            0           42          126
-            1           42          127
-            2           10           32
-            0           11           33
-            1           11           34
-            2           11           35
-            0           12           36
-            1           12           37
-            2           12           38
-            0           13           39
-            1           13           40
-            2           13           41
-            0           14           42
-            1           14           43
-            2           14           44
-            0           15           45
-            1           15           46
-            2           15           47
-            0           16           48
-            1           16           49
-            2           16           50
-            0           17           51
-            1           17           52
-            2           17           53
-            0           18           54
-            1           18           55
-            2           18           56
-            0           19           57
-            1           19           58
-            2           19           59
-            0           20           60
-            1           20           61
-            2           20           62
-            0           21           63
-            0            0            0
-            1            0            1
-            2            0            2
-            0            1            3
-            1            1            4
-            2            1            5
-            0            2            6
-            1            2            7
-            2            2            8
-            0            3            9
-            1            3           10
-            2            3           11
-            0            4           12
-            1            4           13
-            2            4           14
-            0            5           15
-            1            5           16
-            2            5           17
-            0            6           18
-            1            6           19
-            2            6           20
-            0            7           21
-            1            7           22
-            2            7           23
-            0            8           24
-            1            8           25
-            2            8           26
-            0            9           27
-            1            9           28
-            2            9           29
-            0           10           30
-            1           10           31
-            1           53          160
-            2           53          161
-            0           54          162
-            1           54          163
-            2           54          164
-            0           55          165
-            1           55          166
-            2           55          167
-            0           56          168
-            1           56          169
-            2           56          170
-            0           57          171
-            1           57          172
-            2           57          173
-            0           58          174
-            1           58          175
-            2           58          176
-            0           59          177
-            1           59          178
-            2           59          179
-            0           60          180
-            1           60          181
-            2           60          182
-            0           61          183
-            1           61          184
-            2           61          185
-            0           62          186
-            1           62          187
-            2           62          188
-            0           63          189
-            1           63          190
-            2           63          191
-            0           64          192
-            1           64          193
-            2           64          194
-            0           65          195
-            1           65          196
-            2           65          197
-            0           66          198
-            1           66          199
-            2           66          200
-            0           67          201
-            1           67          202
-            2           67          203
-            0           68          204
-            1           68          205
-            2           68          206
-            0           69          207
-            1           69          208
-            2           69          209
-            0           70          210
-            1           70          211
-            2           70          212
-            0           71          213
-            1           71          214
-            2           71          215
-            0           72          216
-            1           72          217
-            2           72          218
-            0           73          219
-            1           73          220
-            2           73          221
-            0           74          222
-            1           74          223
-            2           42          128
-            0           43          129
-            1           43          130
-            2           43          131
-            0           44          132
-            1           44          133
-            2           44          134
-            0           45          135
-            1           45          136
-            2           45          137
-            0           46          138
-            1           46          139
-            2           46          140
-            0           47          141
-            1           47          142
-            2           47          143
-            0           48          144
-            1           48          145
-            2           48          146
-            0           49          147
-            1           49          148
-            2           49          149
-            0           50          150
-            1           50          151
-            2           50          152
-            0           51          153
-            1           51          154
-            2           51          155
-            0           52          156
-            1           52          157
-            2           52          158
-            0           53          159
-            1           21           64
-            2           21           65
-            0           22           66
-            1           22           67
-            2           22           68
-            0           23           69
-            1           23           70
-            2           23           71
-            0           24           72
-            1           24           73
-            2           24           74
-            0           25           75
-            1           25           76
-            2           25           77
-            0           26           78
-            1           26           79
-            2           26           80
-            0           27           81
-            1           27           82
-            2           27           83
-            0           28           84
-            1           28           85
-            2           28           86
-            0           29           87
-            1           29           88
-            2           29           89
-            0           30           90
-            1           30           91
-            2           30           92
-            0           31           93
-            1           31           94
-            2           31           95

From 5cf0fef62e56820a702af3724fdaba83e8495cd9 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Mon, 19 May 2025 14:12:40 -0400
Subject: [PATCH 22/62] fix build + toolchain

---
 src/simulation/m_time_steppers.fpp | 2 +-
 toolchain/mfc/run/case_dicts.py    | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 20def0ae5e..5af09f5ed6 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -938,7 +938,7 @@ contains
 
         elseif (bubbles_lagrange) then
 
-            call s_populate_variables_buffers(q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf, bc_type)
+            call s_populate_variables_buffers(bc_type, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf)
             call s_compute_bubble_EL_dynamics(q_cons_ts(1)%vf, q_prim_vf, t_step, rhs_vf, stage)
             call s_transfer_data_to_tmp()
             call s_smear_voidfraction()
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index 44e195221e..e50858b3b4 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -291,11 +291,7 @@ def analytic(self):
     'surface_tension': ParamType.LOG,
     'viscous': ParamType.LOG,
     'bubbles_lagrange': ParamType.LOG,
-<<<<<<< HEAD
-    'rkck_tolerance': ParamType.REAL,
-=======
     'num_bc_patches': ParamType.INT,
->>>>>>> upstream/master
     'powell': ParamType.LOG,
     'tau_star': ParamType.REAL,
     'cont_damage_s': ParamType.REAL,

From 645f2cf64e9983188500740291284145582fdec1 Mon Sep 17 00:00:00 2001
From: wilfonba <bwilfong3@gatech.edu>
Date: Thu, 22 May 2025 13:32:59 -0400
Subject: [PATCH 23/62] fix boundary_common on Frontier

---
 src/common/m_boundary_common.fpp | 171 +++++++++++++++----------------
 toolchain/modules                |   6 +-
 2 files changed, 85 insertions(+), 92 deletions(-)

diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index b47f4b592c..97134421fa 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -93,18 +93,23 @@ contains
                 do k = 0, n
                     select case (int(bc_type(1, -1)%sf(0, k, l)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_symmetry(q_prim_vf, 1, -1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_periodic(q_prim_vf, 1, -1, k, l, pb, mv)
                     case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_slip_wall(q_prim_vf, 1, -1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 1, -1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 1, -1, k, l)
+                        call s_dirichlet(q_prim_vf, 1, -1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(1, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then
+                        call s_qbmm_extrapolation(1, -1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
@@ -117,18 +122,23 @@ contains
                 do k = 0, n
                     select case (int(bc_type(1, 1)%sf(0, k, l)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_symmetry(q_prim_vf, 1, 1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_periodic(q_prim_vf, 1, 1, k, l, pb, mv)
                     case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_slip_wall(q_prim_vf, 1, 1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 1, 1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 1, 1, k, l)
+                        call s_dirichlet(q_prim_vf, 1, 1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then
+                        call s_qbmm_extrapolation(1, 1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
@@ -145,20 +155,26 @@ contains
                 do k = -buff_size, m + buff_size
                     select case (int(bc_type(2, -1)%sf(k, 0, l)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l)
                     case (BC_AXIS)
-                        call s_axis(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_axis(q_prim_vf, 2, -1, k, l, pb, mv)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_symmetry(q_prim_vf, 2, -1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_periodic(q_prim_vf, 2, -1, k, l, pb, mv)
                     case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_slip_wall(q_prim_vf, 2, -1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 2, -1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 2, -1, k, l)
+                        call s_dirichlet(q_prim_vf, 2, -1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(2, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP) .and. &
+                        (bc_type(2, -1)%sf(0, k, l) /= BC_AXIS)) then
+                        call s_qbmm_extrapolation(2, -1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
@@ -171,18 +187,23 @@ contains
                 do k = -buff_size, m + buff_size
                     select case (int(bc_type(2, 1)%sf(k, 0, l)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_symmetry(q_prim_vf, 2, 1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_periodic(q_prim_vf, 2, 1, k, l, pb, mv)
                     case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_slip_wall(q_prim_vf, 2, 1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 2, 1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 2, 1, k, l)
+                        call s_dirichlet(q_prim_vf, 2, 1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(2, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then
+                        call s_qbmm_extrapolation(2, 1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
@@ -199,18 +220,23 @@ contains
                 do k = -buff_size, m + buff_size
                     select case (int(bc_type(3, -1)%sf(k, l, 0)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_symmetry(q_prim_vf, 3, -1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_periodic(q_prim_vf, 3, -1, k, l, pb, mv)
                     case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_slip_wall(q_prim_vf, 3, -1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 3, -1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 3, -1, k, l)
+                        call s_dirichlet(q_prim_vf, 3, -1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(3, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then
+                        call s_qbmm_extrapolation(3, -1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
@@ -223,18 +249,23 @@ contains
                 do k = -buff_size, m + buff_size
                     select case (int(bc_type(3, 1)%sf(k, l, 0)))
                     case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l)
                     case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_symmetry(q_prim_vf, 3, 1, k, l, pb, mv)
                     case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_periodic(q_prim_vf, 3, 1, k, l, pb, mv)
                     case (BC_SlIP_WALL)
-                        call s_slip_wall(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_slip_wall(q_prim_vf, 3, 1, k, l)
                     case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_no_slip_wall(q_prim_vf, 3, 1, k, l)
                     case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, pb, mv, 3, 1, k, l)
+                        call s_dirichlet(q_prim_vf, 3, 1, k, l)
                     end select
+
+                    if (qbmm .and. (.not. polytropic) .and. &
+                        (bc_type(3, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then
+                        call s_qbmm_extrapolation(3, 1, k, l, pb, mv)
+                    end if
                 end do
             end do
         end if
@@ -242,25 +273,18 @@ contains
 
     end subroutine s_populate_variables_buffers
 
-    subroutine s_ghost_cell_extrapolation(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
+    subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l)
 #ifdef _CRAYFTN
         !DIR$ INLINEALWAYS s_ghost_cell_extrapolation
 #else
         !$acc routine seq
 #endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
         integer :: j, q, i
 
-#ifndef MFC_POST_PROCESS
-        if (qbmm .and. .not. polytropic) then
-            call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l)
-        end if
-#endif
-
         if (bc_dir == 1) then !< x-direction
             if (bc_loc == -1) then !bc_x%beg
                 do i = 1, sys_size
@@ -313,14 +337,10 @@ contains
 
     end subroutine s_ghost_cell_extrapolation
 
-    subroutine s_symmetry(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
-        !DIR$ INLINEALWAYS s_symmetry
-#else
+    subroutine s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv)
         !$acc routine seq
-#endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
@@ -577,14 +597,10 @@ contains
 
     end subroutine s_symmetry
 
-    subroutine s_periodic(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
-        !DIR$ INLINEALWAYS s_periodic
-#else
+    subroutine s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv)
         !$acc routine seq
-#endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
@@ -720,14 +736,10 @@ contains
 
     end subroutine s_periodic
 
-    subroutine s_axis(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
-        !DIR$ INLINEALWAYS s_axis
-#else
+    subroutine s_axis(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv)
         !$acc routine seq
-#endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
@@ -784,25 +796,18 @@ contains
 
     end subroutine s_axis
 
-    subroutine s_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
+    subroutine s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l)
 #ifdef _CRAYFTN
         !DIR$ INLINEALWAYS s_slip_wall
 #else
         !$acc routine seq
 #endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
         integer :: j, q, i
 
-#ifndef MFC_POST_PROCESS
-        if (qbmm .and. .not. polytropic) then
-            call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l)
-        end if
-#endif
-
         if (bc_dir == 1) then !< x-direction
             if (bc_loc == -1) then !< bc_x%beg
                 do i = 1, sys_size
@@ -885,25 +890,18 @@ contains
 
     end subroutine s_slip_wall
 
-    subroutine s_no_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
+    subroutine s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l)
 #ifdef _CRAYFTN
         !DIR$ INLINEALWAYS s_no_slip_wall
 #else
         !$acc routine seq
 #endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
         integer :: j, q, i
 
-#ifndef MFC_POST_PROCESS
-        if (qbmm .and. .not. polytropic) then
-            call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l)
-        end if
-#endif
-
         if (bc_dir == 1) then !< x-direction
             if (bc_loc == -1) then !< bc_x%beg
                 do i = 1, sys_size
@@ -1022,14 +1020,13 @@ contains
 
     end subroutine s_no_slip_wall
 
-    subroutine s_dirichlet(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
+    subroutine s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l)
 #ifdef _CRAYFTN
         !DIR$ INLINEALWAYS s_dirichlet
 #else
         !$acc routine seq
 #endif
         type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
@@ -1086,18 +1083,14 @@ contains
             end if
         end if
 #else
-        call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l)
+        call s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l)
 #endif
 
     end subroutine s_dirichlet
 
-    subroutine s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l)
-#ifdef _CRAYFTN
-        !DIR$ INLINEALWAYS s_qbmm_extrapolation
-#else
+    subroutine s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb, mv)
         !$acc routine seq
-#endif
-        real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
+        real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv
         integer, intent(in) :: bc_dir, bc_loc
         integer, intent(in) :: k, l
 
@@ -1969,7 +1962,7 @@ contains
         end if
 
         ! Populating the cell-boundary and center locations buffer at bc_z%end
-        do i = 1, buff_size
+        do i = 1, offset_z%end
             z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
         end do
 
diff --git a/toolchain/modules b/toolchain/modules
index 8221e77aaa..023039a915 100644
--- a/toolchain/modules
+++ b/toolchain/modules
@@ -48,9 +48,9 @@ p-gpu nvhpc/24.5 hpcx/2.19-cuda cuda/12.1.1
 p-gpu MFC_CUDA_CC=70,75,80,89,90 NVHPC_CUDA_HOME=$CUDA_HOME CC=nvc CXX=nvc++ FC=nvfortran
 
 f     OLCF Frontier
-f-all cce/18.0.0 cpe/24.07 rocm/6.1.3 cray-mpich/8.1.28
-f-all cray-fftw cray-hdf5 cray-python omniperf
-f-gpu craype-accel-amd-gfx90a
+f-all cpe/25.03 rocm/6.3.1
+f-all cray-fftw cray-hdf5 cray-python
+f-gpu craype-accel-amd-gfx90a rocprofiler-compute/3.0.0
 
 d     NCSA Delta
 d-all python/3.11.6

From b3c8e8a72ab887416b004e7a4f90cce9716c7cab Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Mon, 26 May 2025 14:47:24 -0400
Subject: [PATCH 24/62] wall BCs for EL and bug fixes

---
 src/simulation/m_bubbles.fpp            |   5 +-
 src/simulation/m_bubbles_EL.fpp         | 118 +++++++++++++++++-------
 src/simulation/m_bubbles_EL_kernels.fpp |  14 +--
 src/simulation/m_global_parameters.fpp  |  12 ++-
 src/simulation/m_start_up.fpp           |   4 +
 5 files changed, 107 insertions(+), 46 deletions(-)

diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp
index c93c333e41..6aa38ed24b 100644
--- a/src/simulation/m_bubbles.fpp
+++ b/src/simulation/m_bubbles.fpp
@@ -47,7 +47,10 @@ contains
         real(wp) :: fCpbw, fCpinf, fCpinf_dot, fH, fHdot, c_gas, c_liquid
         real(wp) :: f_rddot
 
-        if (bubble_model == 1) then
+        if (bubble_model == 0) then
+            ! Particle
+            f_rddot = 0._wp
+        else if (bubble_model == 1) then
             ! Gilmore bubbles
             fCpinf = fP - pref
             fCpbw = f_cpbw(fR0, fR, fV, fpb)
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 7d5a442e69..5b0ece63f2 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -320,17 +320,17 @@ contains
         call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
 
         ! Check if the bubble is located in the ghost cell of a symmetric boundary
-        if ((bc_x%beg == BC_REFLECTIVE .and. cell(1) < 0) .or. &
-            (bc_x%end == BC_REFLECTIVE .and. cell(1) > m) .or. &
-            (bc_y%beg == BC_REFLECTIVE .and. cell(2) < 0) .or. &
-            (bc_y%end == BC_REFLECTIVE .and. cell(2) > n)) then
-            call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric boundary.")
+        if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. &
+            (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. &
+            (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. &
+            (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then
+            call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
         end if
 
         if (p > 0) then
-            if ((bc_z%beg == BC_REFLECTIVE .and. cell(3) < 0) .or. &
-                (bc_z%end == BC_REFLECTIVE .and. cell(3) > p)) then
-                call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric boundary.")
+            if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. &
+                (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then
+                call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
             end if
         end if
 
@@ -545,13 +545,11 @@ contains
             end do
         end if
 
-
-! Radial motion model
+        ! Radial motion model
         adap_dt_stop_max = 0
         !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) &
         !$acc reduction(MAX:adap_dt_stop_max) copy(adap_dt_stop_max) copyin(stage)
         do k = 1, nBubs
-            ! Keller-Miksis model
 
             ! Current bubble state
             myPb = gas_p(k, 2)
@@ -610,20 +608,22 @@ contains
 
             end if
 
-            if (lag_params%vel_model == 1) then
-                mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), &
-                                            cell, l, q_prim_vf)
-                mtn_dveldt(k, l, stage) = 0._wp
-            elseif (lag_params%vel_model == 2) then
-                mtn_dposdt(k, l, stage) = mtn_vel(k,l,1)
-                mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),&
-                                            intfc_rad(k,1), mtn_vel(k,l,1), &
-                                            gas_mg(k), gas_mv(k), &
-                                            cell, l, q_prim_vf)
-            else
-                mtn_dposdt(k, l, stage) = 0._wp
-                mtn_dveldt(k, l, stage) = 0._wp
-            end if
+            do l = 1, 3
+                if (lag_params%vel_model == 1) then
+                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), &
+                                                cell, l, q_prim_vf)
+                    mtn_dveldt(k, l, stage) = 0._wp
+                elseif (lag_params%vel_model == 2) then
+                    mtn_dposdt(k, l, stage) = mtn_vel(k,l,1)
+                    mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1), &
+                                                intfc_rad(k,1), mtn_vel(k,l,1), &
+                                                gas_mg(k), gas_mv(k, 1), &
+                                                Re(1), cell, l, q_prim_vf)
+                else
+                    mtn_dposdt(k, l, stage) = 0._wp
+                    mtn_dveldt(k, l, stage) = 0._wp
+                end if
+            end do
 
             adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop)
 
@@ -1044,6 +1044,8 @@ contains
                 gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
             end do
 
+            if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
+
             call s_transfer_data_to_tmp()
             call s_write_void_evol(mytime)
             if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
@@ -1066,6 +1068,8 @@ contains
                     gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
                 end do
 
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
+
             elseif (stage == 2) then
                 !$acc parallel loop gang vector default(present) private(k)
                 do k = 1, nBubs
@@ -1078,10 +1082,11 @@ contains
                     gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp
                 end do
 
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
+
                 call s_transfer_data_to_tmp()
                 call s_write_void_evol(mytime)
                 if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
-
                 if (lag_params%write_bubbles) then
                     !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel)
                     call s_write_lag_particles(mytime)
@@ -1102,6 +1107,8 @@ contains
                     gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
                 end do
 
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
+
             elseif (stage == 2) then
                 !$acc parallel loop gang vector default(present) private(k)
                 do k = 1, nBubs
@@ -1113,6 +1120,9 @@ contains
                     gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp
                     gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp
                 end do
+
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
+
             elseif (stage == 3) then
                 !$acc parallel loop gang vector default(present) private(k)
                 do k = 1, nBubs
@@ -1125,6 +1135,8 @@ contains
                     gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3))
                 end do
 
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
+
                 call s_transfer_data_to_tmp()
                 call s_write_void_evol(mytime)
                 if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
@@ -1135,11 +1147,49 @@ contains
                 end if
 
             end if
-
         end if
 
     end subroutine s_update_lagrange_tdv_rk
 
+    !> This subroutine enforces reflective and wall boundary conditions for EL bubbles
+    subroutine s_enforce_EL_bubbles_boundary_conditions(dest)
+
+        integer :: dest
+        integer :: k
+
+        !$acc parallel loop gang vector default(present)
+        do k = 1, nBubs
+
+            if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then
+                mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest)
+            else if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then
+                mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest)
+            end if
+
+            if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                .and. mtn_pos(k,2,dest) < y_cb(-1) + intfc_rad(k,dest)) then
+                mtn_pos(k, 2, dest) = y_cb(-1) + intfc_rad(k,dest)
+            else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then
+                mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest)
+            end if
+
+            if (p > 0) then
+                if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                    .and. mtn_pos(k,3,dest) < z_cb(-1) + intfc_rad(k,dest)) then
+                    mtn_pos(k, 3, dest) = z_cb(-1) + intfc_rad(k,dest)
+                else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                    .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then
+                    mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest)
+                end if
+            end if
+
+        end do
+
+    end subroutine s_enforce_EL_bubbles_boundary_conditions
+
     !> This subroutine returns the computational coordinate of the cell for the given position.
           !! @param pos Input coordinates
           !! @param cell Computational coordinate of the cell
@@ -1242,25 +1292,25 @@ contains
                                   (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1)))
         end if
 
-        ! For symmetric boundary condition
-        if (bc_x%beg == BC_REFLECTIVE) then
+        ! For symmetric and wall boundary condition
+        if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
             particle_in_domain = (particle_in_domain .and. (pos_part(1) >= x_cb(-1)))
         end if
-        if (bc_x%end == BC_REFLECTIVE) then
+        if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
             particle_in_domain = (particle_in_domain .and. (pos_part(1) < x_cb(m)))
         end if
-        if (bc_y%beg == BC_REFLECTIVE .and. (.not. cyl_coord)) then
+        if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then
             particle_in_domain = (particle_in_domain .and. (pos_part(2) >= y_cb(-1)))
         end if
-        if (bc_y%end == BC_REFLECTIVE .and. (.not. cyl_coord)) then
+        if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then
             particle_in_domain = (particle_in_domain .and. (pos_part(2) < y_cb(n)))
         end if
 
         if (p > 0) then
-            if (bc_z%beg == BC_REFLECTIVE) then
+            if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
                 particle_in_domain = (particle_in_domain .and. (pos_part(3) >= z_cb(-1)))
             end if
-            if (bc_z%end == BC_REFLECTIVE) then
+            if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
                 particle_in_domain = (particle_in_domain .and. (pos_part(3) < z_cb(p)))
             end if
         end if
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index 43a81c0ec3..f4944850a9 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -473,13 +473,13 @@ contains
 
     end function f_interpolate_velocity
 
-    function f_get_acceleration(pos,rad,vel,mg,mv,cell,i,q_prim_vf) result(a)
+    function f_get_acceleration(pos,rad,vel,mg,mv,Re,cell,i,q_prim_vf) result(a)
 !$acc routine seq
         integer, dimension(3) :: cell
         integer :: i
         type(scalar_field), dimension(sys_size) :: q_prim_vf
-        real(wp) :: a, area, vol, mass, force, drag_force, vel
-        real(wp) :: pos, rad, dp, v_rel, mg, mv
+        real(wp) :: a, vol, mass, force, vel
+        real(wp) :: pos, rad, dp, v_rel, mg, mv, Re
 
         if (i == 1) then
             dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - &
@@ -488,7 +488,7 @@ contains
         elseif (i == 2) then
             dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - &
                   q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / &
-                  (y_cc(cell(2) + 1) -y_cc(cell(2) - 1))
+                  (y_cc(cell(2) + 1) - y_cc(cell(2) - 1))
         elseif (i == 3 .and. p > 0) then
             dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - &
                   q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / &
@@ -497,12 +497,12 @@ contains
             dp = 0._wp
         end if
 
-        area = pi * rad**2._wp
-        force = -1._wp * area * dp
+        vol = (4._wp/3._wp) * pi * rad**3._wp
+        force = -1._wp * vol * dp
 
         if (lag_params%drag_model == 1) then ! Stokes drag
             v_rel = vel - f_interpolate_velocity(pos,cell,i,q_prim_vf)
-            force = force - (6._wp * pi * rad * v_rel) / fluid_pp(1)%Re(1)
+            force = force - (6._wp * pi * rad * v_rel) / Re
         end if
 
         a = force / (mg + mv)
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 6082804d4c..e82405d85c 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -328,6 +328,10 @@ module m_global_parameters
     !$acc declare create(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3)
     !$acc declare create(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3)
 
+    !$acc declare create(bc_x%beg, bc_x%end)
+    !$acc declare create(bc_y%beg, bc_y%end)
+    !$acc declare create(bc_z%beg, bc_z%end)
+
     integer :: fd_order !<
     !! The order of the finite-difference (fd) approximations of the first-order
     !! derivatives that need to be evaluated when the CoM or flow probe data
@@ -1250,12 +1254,12 @@ contains
         @:ALLOCATE(x_cc(-buff_size:m + buff_size))
         @:ALLOCATE(dx(-buff_size:m + buff_size))
 
-        if (n == 0) return; 
+        if (n == 0) return;
         @:ALLOCATE(y_cb(-1 - buff_size:n + buff_size))
         @:ALLOCATE(y_cc(-buff_size:n + buff_size))
         @:ALLOCATE(dy(-buff_size:n + buff_size))
 
-        if (p == 0) return; 
+        if (p == 0) return;
         @:ALLOCATE(z_cb(-1 - buff_size:p + buff_size))
         @:ALLOCATE(z_cc(-buff_size:p + buff_size))
         @:ALLOCATE(dz(-buff_size:p + buff_size))
@@ -1334,10 +1338,10 @@ contains
         ! Deallocating grid variables for the x-, y- and z-directions
         @:DEALLOCATE(x_cb, x_cc, dx)
 
-        if (n == 0) return; 
+        if (n == 0) return;
         @:DEALLOCATE(y_cb, y_cc, dy)
 
-        if (p == 0) return; 
+        if (p == 0) return;
         @:DEALLOCATE(z_cb, z_cc, dz)
 
     end subroutine s_finalize_global_parameters_module
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 5959c00ed0..49500840a2 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1656,6 +1656,10 @@ contains
         !$acc update device(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3)
         !$acc update device(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3)
 
+        !$acc update device(bc_x%beg, bc_x%end)
+        !$acc update device(bc_y%beg, bc_y%end)
+        !$acc update device(bc_z%beg, bc_z%end)
+
         !$acc update device(bc_x%grcbc_in, bc_x%grcbc_out, bc_x%grcbc_vel_out)
         !$acc update device(bc_y%grcbc_in, bc_y%grcbc_out, bc_y%grcbc_vel_out)
         !$acc update device(bc_z%grcbc_in, bc_z%grcbc_out, bc_z%grcbc_vel_out)

From d58d5a83d7ac49b346a9c7b259e433ef1b6fed84 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Sun, 1 Jun 2025 18:09:23 -0400
Subject: [PATCH 25/62] bug fix for moving bubbles

---
 src/simulation/m_bubbles_EL.fpp | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 5b0ece63f2..72155bcb13 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -608,17 +608,17 @@ contains
 
             end if
 
-            do l = 1, 3
+            do l = 1, num_dims
                 if (lag_params%vel_model == 1) then
                     mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), &
                                                 cell, l, q_prim_vf)
                     mtn_dveldt(k, l, stage) = 0._wp
                 elseif (lag_params%vel_model == 2) then
-                    mtn_dposdt(k, l, stage) = mtn_vel(k,l,1)
-                    mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1), &
-                                                intfc_rad(k,1), mtn_vel(k,l,1), &
-                                                gas_mg(k), gas_mv(k, 1), &
-                                                Re(1), cell, l, q_prim_vf)
+                    mtn_dposdt(k, l, stage) = mtn_vel(k,l,2)
+                    mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), &
+                                                intfc_rad(k,1), mtn_vel(k,l,2), &
+                                                gas_mg(k), gas_mv(k, 2), &
+                                                Re(1), myRho, cell, l, q_prim_vf)
                 else
                     mtn_dposdt(k, l, stage) = 0._wp
                     mtn_dveldt(k, l, stage) = 0._wp
@@ -834,11 +834,17 @@ contains
         integer :: smearGrid, smearGridz
         logical :: celloutside
 
-        scoord = mtn_s(bub_id, 1:3, 2)
         f_pinfl = 0._wp
 
-        !< Find current bubble cell
-        cell(:) = int(scoord(:))
+        if (lag_params%vel_model > 0) then
+            cell = -buff_size
+            call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
+            scoord = mtn_s(bub_id, 1:3, 2)
+        else
+            scoord = mtn_s(bub_id, 1:3, 2)
+            cell(:) = int(scoord(:))
+        end if
+
         !$acc loop seq
         do i = 1, num_dims
             if (scoord(i) < 0._wp) cell(i) = cell(i) - 1

From 22ba445eacc1674163e2ba30c09df5b447511181 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Sun, 1 Jun 2025 18:11:35 -0400
Subject: [PATCH 26/62] drag models

---
 src/common/m_constants.fpp              |  1 +
 src/common/m_derived_types.fpp          |  1 +
 src/simulation/m_bubbles_EL_kernels.fpp | 39 +++++++++++++++++--------
 src/simulation/m_global_parameters.fpp  |  1 +
 toolchain/mfc/run/case_dicts.py         |  2 +-
 5 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp
index 02e2eb9bd4..0c17e9789f 100644
--- a/src/common/m_constants.fpp
+++ b/src/common/m_constants.fpp
@@ -13,6 +13,7 @@ module m_constants
     real(wp), parameter :: small_alf = 1e-11_wp                !< Small alf tolerance
     real(wp), parameter :: pi = 3.141592653589793_wp !< Pi
     real(wp), parameter :: verysmall = 1.e-12_wp              !< Very small number
+    real(wp), parameter :: Re_b_min = 1.e-6_wp                !< minimum bubble reynolds number for drag coeff calc.
 
     integer, parameter :: num_stcls_min = 5                       !< Minimum # of stencils
     integer, parameter :: path_len = 400                          !< Maximum path length
diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp
index fd174840ac..d5978bb91b 100644
--- a/src/common/m_derived_types.fpp
+++ b/src/common/m_derived_types.fpp
@@ -434,6 +434,7 @@ module m_derived_types
         integer :: nBubs_glb                !< Global number of bubbles
         integer :: vel_model                !< Particle velocity model
         integer :: drag_model               !< Particle drag model
+        real(wp) :: c_d                     !< Drag coefficient
         real(wp) :: epsilonb         !< Standard deviation scaling for the gaussian function
         real(wp) :: charwidth        !< Domain virtual depth (z direction, for 2D simulations)
         real(wp) :: valmaxvoid       !< Maximum void fraction permitted
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index f4944850a9..ea0281ce14 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -453,16 +453,13 @@ contains
             y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2)    , cell(3))
             x3 = y_cc(cell(2) + 1)
             y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3))
-        elseif (i == 3 .and. p > 0) then
+        elseif (i == 3) then
             x1 = z_cc(cell(3) - 1)
             y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1)
             x2 = z_cc(cell(3))
             y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)    )
             x3 = z_cc(cell(3) + 1)
             y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1)
-        else
-            x1 = 0._wp; x2 = 0._wp; x3 = 0._wp
-            y1 = 0._wp; y2 = 0._wp; y3 = 0._wp
         endif
 
         L1 = ((pos - x2)*(pos - x3)) / ((x1 - x2)*(x1 - x3))
@@ -473,13 +470,13 @@ contains
 
     end function f_interpolate_velocity
 
-    function f_get_acceleration(pos,rad,vel,mg,mv,Re,cell,i,q_prim_vf) result(a)
+    function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,q_prim_vf) result(a)
 !$acc routine seq
         integer, dimension(3) :: cell
         integer :: i
         type(scalar_field), dimension(sys_size) :: q_prim_vf
-        real(wp) :: a, vol, mass, force, vel
-        real(wp) :: pos, rad, dp, v_rel, mg, mv, Re
+        real(wp) :: a, vol, mass, force, vel, area
+        real(wp) :: pos, rad, dp, v_rel, mg, mv, Re, rho, c_d
 
         if (i == 1) then
             dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - &
@@ -489,24 +486,42 @@ contains
             dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - &
                   q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / &
                   (y_cc(cell(2) + 1) - y_cc(cell(2) - 1))
-        elseif (i == 3 .and. p > 0) then
+        elseif (i == 3) then
             dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - &
                   q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / &
                   (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
-        else
-            dp = 0._wp
         end if
 
         vol = (4._wp/3._wp) * pi * rad**3._wp
         force = -1._wp * vol * dp
 
-        if (lag_params%drag_model == 1) then ! Stokes drag
-            v_rel = vel - f_interpolate_velocity(pos,cell,i,q_prim_vf)
+        v_rel = vel - f_interpolate_velocity(pos,cell,i,q_prim_vf)
+
+        if (lag_params%drag_model == 1) then ! Free slip Stokes drag
+            force = force - (4._wp * pi * rad * v_rel) / Re
+        else if (lag_params%drag_model == 2) then ! No slip Stokes drag
             force = force - (6._wp * pi * rad * v_rel) / Re
+        elseif (lag_params%drag_model == 3) then ! Clift and Gauvin (1971) Cd fit
+            area = pi * rad**2._wp
+            c_d = f_get_clift_gauvin_drag_coeff(v_rel, Re, rho, rad)
+            force = force - 0.5_wp * sign(1._wp, v_rel) * rho * (v_rel ** 2._wp) * c_d * area
         end if
 
         a = force / (mg + mv)
 
     end function f_get_acceleration
 
+    function f_get_clift_gauvin_drag_coeff(v_rel, mu_inv, rho, rad) result(c_d)
+!$acc routine seq
+        real(wp) :: v_rel, mu_inv, rho, rad
+        real(wp) :: c_d
+        real(wp) :: Re_b ! Bubble Reynolds number
+
+        Re_b = max(2._wp * rho * abs(v_rel) * rad * mu_inv, Re_b_min)
+
+        C_d = 24._wp * (1 + 0.15_wp * Re_b ** (0.687_wp)) / Re_b
+        !C_d = C_d + 0.42_wp / (1 + (42500 / (Re_b ** 1.16_wp)))
+
+    end function f_get_clift_gauvin_drag_coeff
+
 end module m_bubbles_EL_kernels
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 3f2a295bf9..6b3ef62bd1 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -751,6 +751,7 @@ contains
         lag_params%nBubs_glb = dflt_int
         lag_params%vel_model = dflt_int
         lag_params%drag_model = dflt_int
+        lag_params%c_d = dflt_real
         lag_params%epsilonb = 1._wp
         lag_params%charwidth = dflt_real
         lag_params%valmaxvoid = dflt_real
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index cc05bcddf2..ad35b62ae1 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -307,7 +307,7 @@ def analytic(self):
     SIMULATION[f'lag_params%{var}'] = ParamType.INT
 
 for var in [ 'epsilonb', 'valmaxvoid', 'charwidth', 'diffcoefvap',
-            'c0', 'rho0', 'T0', 'x0', 'Thost' ]:
+            'c0', 'rho0', 'T0', 'x0', 'Thost', 'c_d' ]:
     SIMULATION[f'lag_params%{var}'] = ParamType.REAL
 
 for var in [ 'diffusion', 'reactions' ]:

From 854484a397d1aaaf4ab1ceb12c95303b51400b2e Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Sun, 1 Jun 2025 18:49:52 -0400
Subject: [PATCH 27/62] QBMMM BC Bug Fix

---
 src/common/m_boundary_common.fpp | 10 +++++-----
 src/simulation/m_bubbles_EL.fpp  | 13 +++++++++++++
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index 97134421fa..29ce08c70e 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -171,8 +171,8 @@ contains
                     end select
 
                     if (qbmm .and. (.not. polytropic) .and. &
-                        (bc_type(2, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP) .and. &
-                        (bc_type(2, -1)%sf(0, k, l) /= BC_AXIS)) then
+                        (bc_type(2, -1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. &
+                        (bc_type(2, -1)%sf(k, 0, l) /= BC_AXIS)) then
                         call s_qbmm_extrapolation(2, -1, k, l, pb, mv)
                     end if
                 end do
@@ -201,7 +201,7 @@ contains
                     end select
 
                     if (qbmm .and. (.not. polytropic) .and. &
-                        (bc_type(2, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then
+                        (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then
                         call s_qbmm_extrapolation(2, 1, k, l, pb, mv)
                     end if
                 end do
@@ -234,7 +234,7 @@ contains
                     end select
 
                     if (qbmm .and. (.not. polytropic) .and. &
-                        (bc_type(3, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then
+                        (bc_type(3, -1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then
                         call s_qbmm_extrapolation(3, -1, k, l, pb, mv)
                     end if
                 end do
@@ -263,7 +263,7 @@ contains
                     end select
 
                     if (qbmm .and. (.not. polytropic) .and. &
-                        (bc_type(3, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then
+                        (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then
                         call s_qbmm_extrapolation(3, 1, k, l, pb, mv)
                     end if
                 end do
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 72155bcb13..c377b0bfc2 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -25,6 +25,8 @@ module m_bubbles_EL
 
     use m_helper
 
+    use m_ibm
+
     implicit none
 
     !(nBub)
@@ -1163,6 +1165,8 @@ contains
         integer :: dest
         integer :: k
 
+        integer, dimension(3) :: cell
+
         !$acc parallel loop gang vector default(present)
         do k = 1, nBubs
 
@@ -1192,6 +1196,15 @@ contains
                 end if
             end if
 
+            if (ib) then
+                cell = -buff_size
+                call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1))
+
+                if (ib_markers%sf(cell(1), cell(2), cell(3)) == 1) then
+                    print*, "In IB"
+                end if
+            end if
+
         end do
 
     end subroutine s_enforce_EL_bubbles_boundary_conditions

From 35ac23566437718b67882593a00c9e023b337979 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Mon, 2 Jun 2025 14:35:27 -0400
Subject: [PATCH 28/62] IBM support for EL Bubbles

---
 src/simulation/m_bubbles_EL.fpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index c377b0bfc2..8f00cf8788 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -1162,8 +1162,8 @@ contains
     !> This subroutine enforces reflective and wall boundary conditions for EL bubbles
     subroutine s_enforce_EL_bubbles_boundary_conditions(dest)
 
-        integer :: dest
-        integer :: k
+        integer, intent(in) :: dest
+        integer :: k, i, patch_id
 
         integer, dimension(3) :: cell
 
@@ -1200,8 +1200,15 @@ contains
                 cell = -buff_size
                 call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1))
 
-                if (ib_markers%sf(cell(1), cell(2), cell(3)) == 1) then
-                    print*, "In IB"
+                if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then
+                    patch_id = ib_markers%sf(cell(1), cell(2), cell(3))
+
+                    !$acc routine seq
+                    do i = 1, num_dims
+                        mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - &
+                                              levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) &
+                                              * levelset%sf(cell(1), cell(2), cell(3), 1)
+                    end do
                 end if
             end if
 

From a02fe6df22e3cf5b469462f3ae8ab672fa71a5d3 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Mon, 2 Jun 2025 18:01:43 -0400
Subject: [PATCH 29/62] stuff

---
 src/simulation/m_bubbles_EL.fpp    | 24 ++++++++++++++++--------
 src/simulation/m_time_steppers.fpp |  2 +-
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 8f00cf8788..beda4de145 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -571,6 +571,8 @@ contains
             ! Obtaining driving pressure
             call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2)
 
+            !if (k == 14) print*, k, cell(1), cell(2), cell(3)
+
             ! Obtain liquid density and computing speed of sound from pinf
             !$acc loop seq
             do i = 1, num_fluids
@@ -840,8 +842,13 @@ contains
 
         if (lag_params%vel_model > 0) then
             cell = -buff_size
-            call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
+            call s_locate_cell(mtn_pos(bub_id, 1:3, 2), cell, mtn_s(bub_id, 1:3, 2))
+            !print*, mtn_pos(bub_id, 1, 2), mtn_pos(bub_id, 2, 2)
             scoord = mtn_s(bub_id, 1:3, 2)
+
+            !if (k == 14) print*, "pinf cell", cell
+            !if (k == 14) print*, "pinf scoord", scoord
+            !if (k == 14) print*, "pinf pos", mtn_pos(bub_id,:,2)
         else
             scoord = mtn_s(bub_id, 1:3, 2)
             cell(:) = int(scoord(:))
@@ -1164,10 +1171,9 @@ contains
 
         integer, intent(in) :: dest
         integer :: k, i, patch_id
-
         integer, dimension(3) :: cell
 
-        !$acc parallel loop gang vector default(present)
+        !$acc parallel loop gang vector default(present) private(cell)
         do k = 1, nBubs
 
             if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
@@ -1198,17 +1204,19 @@ contains
 
             if (ib) then
                 cell = -buff_size
-                call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1))
-
+                call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
+                !if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell
                 if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then
                     patch_id = ib_markers%sf(cell(1), cell(2), cell(3))
-
-                    !$acc routine seq
+                    if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell
                     do i = 1, num_dims
                         mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - &
                                               levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) &
-                                              * levelset%sf(cell(1), cell(2), cell(3), 1)
+                                              * levelset%sf(cell(1), cell(2), cell(3), patch_id)
                     end do
+                    cell = -buff_size
+                    call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
+                    if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell
                 end if
             end if
 
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 5af09f5ed6..de07dfd631 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -1020,7 +1020,7 @@ contains
         integer :: i, j, k, l
 
         call nvtxStartRange("RHS-BODYFORCES")
-        call s_compute_body_forces_rhs(q_prim_vf, q_cons_vf, rhs_vf)
+        call s_compute_body_forces_rhs(q_cons_vf, q_prim_vf, rhs_vf)
 
         !$acc parallel loop collapse(4) gang vector default(present)
         do i = momxb, E_idx

From 27d8a4a430a94215af97f69f469bbd3e8156f044 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Tue, 10 Jun 2025 11:58:32 -0400
Subject: [PATCH 30/62] MPI EL on CPUs

---
 MPIRefactor.txt                          | 1614 -------------------
 src/common/m_mpi_common.fpp              |   57 +-
 src/post_process/m_global_parameters.fpp |    5 +
 src/pre_process/m_global_parameters.fpp  |    5 +
 "src/simulation/\\"                      | 1837 ++++++++++++++++++++++
 src/simulation/m_bubbles_EL.fpp          |   80 +-
 src/simulation/m_bubbles_EL_kernels.fpp  |   20 +-
 src/simulation/m_global_parameters.fpp   |    5 +
 src/simulation/m_mpi_proxy.fpp           |  224 ++-
 src/simulation/m_rhs.fpp                 |   15 +-
 src/simulation/m_time_steppers.fpp       |    2 +-
 11 files changed, 2210 insertions(+), 1654 deletions(-)
 delete mode 100644 MPIRefactor.txt
 create mode 100644 "src/simulation/\\"

diff --git a/MPIRefactor.txt b/MPIRefactor.txt
deleted file mode 100644
index fe7a7b4511..0000000000
--- a/MPIRefactor.txt
+++ /dev/null
@@ -1,1614 +0,0 @@
-mfc: Found CMake: /fastscratch/bwilfong3/software/MFC-Wilfong/build/cmake/bin/cmake.
-mfc: OK > (venv) Entered the Python 3.10.16 virtual environment (>= 3.9).
-
-      .=++*:          -+*+=.        | bwilfong3@wingtip-gpu3 [Linux]
-     :+   -*-        ==   =* .      | ------------------------------
-   :*+      ==      ++    .+-       | 
-  :*##-.....:*+   .#%+++=--+=:::.   | --jobs 32
-  -=-++-======#=--**+++==+*++=::-:. | --mpi --gpu --no-debug --no-gcov --no-unified --no-single
- .:++=----------====+*= ==..:%..... | --targets pre_process and simulation
-  .:-=++++===--==+=-+=   +.  :=     | 
-  +#=::::::::=%=. -+:    =+   *:    | ----------------------------------------------------------
- .*=-=*=..    :=+*+:      -...--    | $ ./mfc.sh (build, run, test, clean, count, packer) --help
-
- Acquiring /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/case.py...
- Acquiring /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/case.py...
- Build | syscheck, syscheck, pre_process, and simulation | Case Optimized: /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/case.py
-
-
- Run
-   Using queue system Interactive.
-   Using baked-in template for default.
-   Generating input files for syscheck...
-    
-     Generating syscheck.inp:
-       INFO: Forwarded 0/90 parameters.
-    
-   Generating input files for pre_process...
-    
-     Generating pre_process.inp:
-       INFO: Forwarded 72/90 parameters.
-    
-   Generating input files for simulation...
-    
-     Generating simulation.inp:
-       INFO: Forwarded 41/90 parameters.
-    
-   $ /bin/bash /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/MFC.sh
-
-+-----------------------------------------------------------------------------------------------------------+ 
-| MFC case # MFC @ /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/case.py:        |
-+-----------------------------------------------------------------------------------------------------------+ 
-| * Start-time     14:00:47                            * Start-date     14:00:47                            |
-| * Partition      N/A                                 * Walltime       01:00:00                            |
-| * Account        N/A                                 * Nodes          1                                   |
-| * Job Name       MFC                                 * Engine         interactive                         |
-| * QoS            N/A                                 * Binary         N/A                                 |
-| * Queue System   Interactive                         * Email          N/A                                 |
-+-----------------------------------------------------------------------------------------------------------+ 
-
-mfc: WARNING > This is the default template.
-mfc: WARNING > It is not intended to support all systems and execution engines.
-mfc: WARNING > Consider using a different template via the --computer option if you encounter problems.
-mfc: OK > :) Selected MPI launcher mpirun. Use --binary to override.
-mfc: OK > :) Running syscheck:
-
- [TEST] MPI: call mpi_init(ierr)
- [TEST] MPI: call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr)
- [TEST] MPI: call mpi_barrier(MPI_COMM_WORLD, ierr)
- [TEST] MPI: call assert(rank >= 0)
- [TEST] MPI: call mpi_comm_size(MPI_COMM_WORLD, nRanks, ierr)
- [TEST] MPI: call assert(nRanks > 0 .and. rank < nRanks)
- [TEST] ACC: devtype = acc_get_device_type()
- [TEST] ACC: num_devices = acc_get_num_devices(devtype)
- [TEST] ACC: call assert(num_devices > 0)
- [TEST] ACC: call acc_set_device_num(mod(rank, nRanks), devtype)
- [TEST] ACC: allocate(arr(1:N))
- [TEST] ACC: !$acc enter data create(arr(1:N))
- [TEST] ACC: !$acc parallel loop
- [TEST] ACC: !$acc update host(arr(1:N))
- [TEST] ACC: !$acc exit data delete(arr)
- [TEST] MPI: call mpi_barrier(MPI_COMM_WORLD, ierr)
- [TEST] MPI: call mpi_finalize(ierr)
- 
- Syscheck: PASSED.
-Generating '/nethome/bwilfong3/tmp/nsys-report-3830.qdstrm'
-[1/8] [0%                          ] report1.nsys-rep[1/8] [0%                          ] report1.nsys-rep[1/8] [==========49%               ] report1.nsys-rep[1/8] [=======================95%  ] report1.nsys-rep[1/8] [=======================96%  ] report1.nsys-rep[1/8] [========================100%] report1.nsys-rep[1/8] [========================100%] report1.nsys-rep
-[2/8] [0%                          ] report1.sqlite[2/8] [1%                          ] report1.sqlite[2/8] [2%                          ] report1.sqlite[2/8] [3%                          ] report1.sqlite[2/8] [4%                          ] report1.sqlite[2/8] [5%                          ] report1.sqlite[2/8] [6%                          ] report1.sqlite[2/8] [7%                          ] report1.sqlite[2/8] [8%                          ] report1.sqlite[2/8] [9%                          ] report1.sqlite[2/8] [10%                         ] report1.sqlite[2/8] [11%                         ] report1.sqlite[2/8] [12%                         ] report1.sqlite[2/8] [13%                         ] report1.sqlite[2/8] [14%                         ] report1.sqlite[2/8] [=15%                        ] report1.sqlite[2/8] [=16%                        ] report1.sqlite[2/8] [=17%                        ] report1.sqlite[2/8] [==18%                       ] report1.sqlite[2/8] [==19%                       ] report1.sqlite[2/8] [==20%                       ] report1.sqlite[2/8] [==21%                       ] report1.sqlite[2/8] [===22%                      ] report1.sqlite[2/8] [===23%                      ] report1.sqlite[2/8] [===24%                      ] report1.sqlite[2/8] [====25%                     ] report1.sqlite[2/8] [====26%                     ] report1.sqlite[2/8] [====27%                     ] report1.sqlite[2/8] [====28%                     ] report1.sqlite[2/8] [=====29%                    ] report1.sqlite[2/8] [=====30%                    ] report1.sqlite[2/8] [=====31%                    ] report1.sqlite[2/8] [=====32%                    ] report1.sqlite[2/8] [======33%                   ] report1.sqlite[2/8] [======34%                   ] report1.sqlite[2/8] [======35%                   ] report1.sqlite[2/8] [=======36%                  ] report1.sqlite[2/8] [=======37%                  ] report1.sqlite[2/8] [=======38%                  ] report1.sqlite[2/8] [=======39%                  ] report1.sqlite[2/8] [========40%                 ] report1.sqlite[2/8] [========41%                 ] report1.sqlite[2/8] [========42%                 ] report1.sqlite[2/8] [=========43%                ] report1.sqlite[2/8] [=========44%                ] report1.sqlite[2/8] [=========45%                ] report1.sqlite[2/8] [=========46%                ] report1.sqlite[2/8] [==========47%               ] report1.sqlite[2/8] [==========48%               ] report1.sqlite[2/8] [==========49%               ] report1.sqlite[2/8] [===========50%              ] report1.sqlite[2/8] [===========51%              ] report1.sqlite[2/8] [===========52%              ] report1.sqlite[2/8] [===========53%              ] report1.sqlite[2/8] [============54%             ] report1.sqlite[2/8] [============55%             ] report1.sqlite[2/8] [============56%             ] report1.sqlite[2/8] [============57%             ] report1.sqlite[2/8] [=============58%            ] report1.sqlite[2/8] [=============59%            ] report1.sqlite[2/8] [=============60%            ] report1.sqlite[2/8] [==============61%           ] report1.sqlite[2/8] [==============62%           ] report1.sqlite[2/8] [==============63%           ] report1.sqlite[2/8] [==============64%           ] report1.sqlite[2/8] [===============65%          ] report1.sqlite[2/8] [===============66%          ] report1.sqlite[2/8] [===============67%          ] report1.sqlite[2/8] [================68%         ] report1.sqlite[2/8] [================69%         ] report1.sqlite[2/8] [================70%         ] report1.sqlite[2/8] [================71%         ] report1.sqlite[2/8] [=================72%        ] report1.sqlite[2/8] [=================73%        ] report1.sqlite[2/8] [=================74%        ] report1.sqlite[2/8] [==================75%       ] report1.sqlite[2/8] [==================76%       ] report1.sqlite[2/8] [==================77%       ] report1.sqlite[2/8] [==================78%       ] report1.sqlite[2/8] [===================79%      ] report1.sqlite[2/8] [===================80%      ] report1.sqlite[2/8] [===================81%      ] report1.sqlite[2/8] [===================82%      ] report1.sqlite[2/8] [====================83%     ] report1.sqlite[2/8] [====================84%     ] report1.sqlite[2/8] [====================85%     ] report1.sqlite[2/8] [=====================86%    ] report1.sqlite[2/8] [=====================87%    ] report1.sqlite[2/8] [=====================88%    ] report1.sqlite[2/8] [=====================89%    ] report1.sqlite[2/8] [======================90%   ] report1.sqlite[2/8] [======================91%   ] report1.sqlite[2/8] [======================92%   ] report1.sqlite[2/8] [=======================93%  ] report1.sqlite[2/8] [=======================94%  ] report1.sqlite[2/8] [=======================95%  ] report1.sqlite[2/8] [=======================96%  ] report1.sqlite[2/8] [========================97% ] report1.sqlite[2/8] [========================98% ] report1.sqlite[2/8] [========================99% ] report1.sqlite[2/8] [========================100%] report1.sqlite[2/8] [========================100%] report1.sqlite
-[3/8] Executing 'nvtx_sum' stats report
-[4/8] Executing 'cuda_api_sum' stats report
-
- Time (%)  Total Time (ns)  Num Calls  Avg (ns)   Med (ns)   Min (ns)  Max (ns)  StdDev (ns)           Name         
- --------  ---------------  ---------  ---------  ---------  --------  --------  -----------  ----------------------
-     48.0          849,042          1  849,042.0  849,042.0   849,042   849,042          0.0  cuMemAllocHost_v2     
-     41.1          727,231          1  727,231.0  727,231.0   727,231   727,231          0.0  cuModuleLoadDataEx    
-      6.8          121,094          2   60,547.0   60,547.0    19,619   101,475     57,880.9  cuMemAlloc_v2         
-      1.5           25,990          1   25,990.0   25,990.0    25,990    25,990          0.0  cuLaunchKernel        
-      1.0           18,462          1   18,462.0   18,462.0    18,462    18,462          0.0  cuMemcpyDtoHAsync_v2  
-      0.6           11,129          1   11,129.0   11,129.0    11,129    11,129          0.0  cuMemcpyHtoDAsync_v2  
-      0.5            8,387          4    2,096.8    2,002.0       707     3,676      1,222.8  cuStreamSynchronize   
-      0.2            2,716          1    2,716.0    2,716.0     2,716     2,716          0.0  cuInit                
-      0.1            2,532          1    2,532.0    2,532.0     2,532     2,532          0.0  cuModuleGetLoadingMode
-      0.1            2,101          3      700.3      448.0       367     1,286        508.8  cuCtxSetCurrent       
-
-[5/8] Executing 'cuda_gpu_kern_sum' stats report
-
- Time (%)  Total Time (ns)  Instances  Avg (ns)  Med (ns)  Min (ns)  Max (ns)  StdDev (ns)       Name      
- --------  ---------------  ---------  --------  --------  --------  --------  -----------  ---------------
-    100.0            1,920          1   1,920.0   1,920.0     1,920     1,920          0.0  syscheck_72_gpu
-
-[6/8] Executing 'cuda_gpu_mem_time_sum' stats report
-
- Time (%)  Total Time (ns)  Count  Avg (ns)  Med (ns)  Min (ns)  Max (ns)  StdDev (ns)           Operation          
- --------  ---------------  -----  --------  --------  --------  --------  -----------  ----------------------------
-     64.0            2,272      1   2,272.0   2,272.0     2,272     2,272          0.0  [CUDA memcpy Device-to-Host]
-     36.0            1,280      1   1,280.0   1,280.0     1,280     1,280          0.0  [CUDA memcpy Host-to-Device]
-
-[7/8] Executing 'cuda_gpu_mem_size_sum' stats report
-
- Total (MB)  Count  Avg (MB)  Med (MB)  Min (MB)  Max (MB)  StdDev (MB)           Operation          
- ----------  -----  --------  --------  --------  --------  -----------  ----------------------------
-      0.001      1     0.001     0.001     0.001     0.001        0.000  [CUDA memcpy Device-to-Host]
-      0.000      1     0.000     0.000     0.000     0.000        0.000  [CUDA memcpy Host-to-Device]
-
-[8/8] Executing 'openacc_sum' stats report
-
- Time (%)  Total Time (ns)  Num Calls    Avg (ns)       Med (ns)      Min (ns)     Max (ns)    StdDev (ns)                Name               
- --------  ---------------  ---------  -------------  -------------  -----------  -----------  -----------  ---------------------------------
-     99.8      107,957,962          1  107,957,962.0  107,957,962.0  107,957,962  107,957,962          0.0  Device Init                      
-      0.1           72,467          1       72,467.0       72,467.0       72,467       72,467          0.0  Enter Data@syscheck.fpp:71       
-      0.0           40,021          1       40,021.0       40,021.0       40,021       40,021          0.0  Compute Construct@syscheck.fpp:72
-      0.0           30,068          1       30,068.0       30,068.0       30,068       30,068          0.0  Update@syscheck.fpp:76           
-      0.0           28,352          1       28,352.0       28,352.0       28,352       28,352          0.0  Enqueue Upload                   
-      0.0           27,962          1       27,962.0       27,962.0       27,962       27,962          0.0  Enqueue Launch@syscheck.fpp:72   
-      0.0           20,103          1       20,103.0       20,103.0       20,103       20,103          0.0  Enqueue Download@syscheck.fpp:76 
-      0.0           13,219          1       13,219.0       13,219.0       13,219       13,219          0.0  Exit Data@syscheck.fpp:77        
-      0.0            5,457          1        5,457.0        5,457.0        5,457        5,457          0.0  Wait@syscheck.fpp:71             
-      0.0            4,477          2        2,238.5        2,238.5        1,494        2,983      1,052.9  Wait@syscheck.fpp:72             
-      0.0            4,124          1        4,124.0        4,124.0        4,124        4,124          0.0  Exit Data@syscheck.fpp:72        
-      0.0            3,276          1        3,276.0        3,276.0        3,276        3,276          0.0  Enter Data@syscheck.fpp:72       
-      0.0            2,644          1        2,644.0        2,644.0        2,644        2,644          0.0  Wait@syscheck.fpp:76             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@syscheck.fpp:71            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@syscheck.fpp:71           
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@syscheck.fpp:77           
-
-Generated:
-    /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report1.nsys-rep
-    /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report1.sqlite
-
-mfc: OK > :) Running pre_process:
-
- Pre-processing a 200x200x200 case on 1 rank(s)
- Processing patch            1
- Processing patch            2
- Elapsed Time    6.484550952911377     
-Generating '/nethome/bwilfong3/tmp/nsys-report-f00c.qdstrm'
-[1/8] [0%                          ] report2.nsys-rep[1/8] [0%                          ] report2.nsys-rep[1/8] [=17%                        ] report2.nsys-rep[1/8] [===24%                      ] report2.nsys-rep[1/8] [============55%             ] report2.nsys-rep[1/8] [====================85%     ] report2.nsys-rep[1/8] [=====================86%    ] report2.nsys-rep[1/8] [=====================87%    ] report2.nsys-rep[1/8] [=====================88%    ] report2.nsys-rep[1/8] [=====================89%    ] report2.nsys-rep[1/8] [======================90%   ] report2.nsys-rep[1/8] [========================100%] report2.nsys-rep[1/8] [========================100%] report2.nsys-rep
-[2/8] [0%                          ] report2.sqlite[2/8] [1%                          ] report2.sqlite[2/8] [2%                          ] report2.sqlite[2/8] [3%                          ] report2.sqlite[2/8] [4%                          ] report2.sqlite[2/8] [5%                          ] report2.sqlite[2/8] [6%                          ] report2.sqlite[2/8] [7%                          ] report2.sqlite[2/8] [8%                          ] report2.sqlite[2/8] [9%                          ] report2.sqlite[2/8] [10%                         ] report2.sqlite[2/8] [11%                         ] report2.sqlite[2/8] [12%                         ] report2.sqlite[2/8] [13%                         ] report2.sqlite[2/8] [14%                         ] report2.sqlite[2/8] [=15%                        ] report2.sqlite[2/8] [=16%                        ] report2.sqlite[2/8] [=17%                        ] report2.sqlite[2/8] [==18%                       ] report2.sqlite[2/8] [==19%                       ] report2.sqlite[2/8] [==20%                       ] report2.sqlite[2/8] [==21%                       ] report2.sqlite[2/8] [===22%                      ] report2.sqlite[2/8] [===23%                      ] report2.sqlite[2/8] [===24%                      ] report2.sqlite[2/8] [====25%                     ] report2.sqlite[2/8] [====26%                     ] report2.sqlite[2/8] [====27%                     ] report2.sqlite[2/8] [====28%                     ] report2.sqlite[2/8] [=====29%                    ] report2.sqlite[2/8] [=====30%                    ] report2.sqlite[2/8] [=====31%                    ] report2.sqlite[2/8] [=====32%                    ] report2.sqlite[2/8] [======33%                   ] report2.sqlite[2/8] [======34%                   ] report2.sqlite[2/8] [======35%                   ] report2.sqlite[2/8] [=======36%                  ] report2.sqlite[2/8] [=======37%                  ] report2.sqlite[2/8] [=======38%                  ] report2.sqlite[2/8] [=======39%                  ] report2.sqlite[2/8] [========40%                 ] report2.sqlite[2/8] [========41%                 ] report2.sqlite[2/8] [========42%                 ] report2.sqlite[2/8] [=========43%                ] report2.sqlite[2/8] [=========44%                ] report2.sqlite[2/8] [=========45%                ] report2.sqlite[2/8] [=========46%                ] report2.sqlite[2/8] [==========47%               ] report2.sqlite[2/8] [==========48%               ] report2.sqlite[2/8] [==========49%               ] report2.sqlite[2/8] [===========50%              ] report2.sqlite[2/8] [===========51%              ] report2.sqlite[2/8] [===========52%              ] report2.sqlite[2/8] [===========53%              ] report2.sqlite[2/8] [============54%             ] report2.sqlite[2/8] [============55%             ] report2.sqlite[2/8] [============56%             ] report2.sqlite[2/8] [============57%             ] report2.sqlite[2/8] [=============58%            ] report2.sqlite[2/8] [=============59%            ] report2.sqlite[2/8] [=============60%            ] report2.sqlite[2/8] [==============61%           ] report2.sqlite[2/8] [==============62%           ] report2.sqlite[2/8] [==============63%           ] report2.sqlite[2/8] [==============64%           ] report2.sqlite[2/8] [===============65%          ] report2.sqlite[2/8] [===============66%          ] report2.sqlite[2/8] [===============67%          ] report2.sqlite[2/8] [================68%         ] report2.sqlite[2/8] [================69%         ] report2.sqlite[2/8] [================70%         ] report2.sqlite[2/8] [================71%         ] report2.sqlite[2/8] [=================72%        ] report2.sqlite[2/8] [=================73%        ] report2.sqlite[2/8] [=================74%        ] report2.sqlite[2/8] [==================75%       ] report2.sqlite[2/8] [==================76%       ] report2.sqlite[2/8] [==================77%       ] report2.sqlite[2/8] [==================78%       ] report2.sqlite[2/8] [===================79%      ] report2.sqlite[2/8] [===================80%      ] report2.sqlite[2/8] [===================81%      ] report2.sqlite[2/8] [===================82%      ] report2.sqlite[2/8] [====================83%     ] report2.sqlite[2/8] [====================84%     ] report2.sqlite[2/8] [====================85%     ] report2.sqlite[2/8] [=====================86%    ] report2.sqlite[2/8] [=====================87%    ] report2.sqlite[2/8] [=====================88%    ] report2.sqlite[2/8] [=====================89%    ] report2.sqlite[2/8] [======================90%   ] report2.sqlite[2/8] [======================91%   ] report2.sqlite[2/8] [======================92%   ] report2.sqlite[2/8] [=======================93%  ] report2.sqlite[2/8] [=======================94%  ] report2.sqlite[2/8] [=======================95%  ] report2.sqlite[2/8] [=======================96%  ] report2.sqlite[2/8] [========================97% ] report2.sqlite[2/8] [========================98% ] report2.sqlite[2/8] [========================99% ] report2.sqlite[2/8] [========================100%] report2.sqlite[2/8] [========================100%] report2.sqlite
-[3/8] Executing 'nvtx_sum' stats report
-[4/8] Executing 'cuda_api_sum' stats report
-
- Time (%)  Total Time (ns)  Num Calls  Avg (ns)  Med (ns)  Min (ns)  Max (ns)  StdDev (ns)           Name         
- --------  ---------------  ---------  --------  --------  --------  --------  -----------  ----------------------
-    100.0            2,331          1   2,331.0   2,331.0     2,331     2,331          0.0  cuModuleGetLoadingMode
-
-[5/8] Executing 'cuda_gpu_kern_sum' stats report
-[6/8] Executing 'cuda_gpu_mem_time_sum' stats report
-[7/8] Executing 'cuda_gpu_mem_size_sum' stats report
-[8/8] Executing 'openacc_sum' stats report
-Generated:
-    /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report2.nsys-rep
-    /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report2.sqlite
-
-mfc: OK > :) Running simulation:
-
- Simulating a case-optimized 200x200x200 case on 1 rank(s) with OpenACC offloading.
- [  0%]  Time step        1 of 31 @ t_step = 0
- [  4%]  Time step        2 of 31 @ t_step = 1
- [  7%]  Time step        3 of 31 @ t_step = 2
- [ 10%]  Time step        4 of 31 @ t_step = 3
- [ 13%]  Time step        5 of 31 @ t_step = 4
- [ 17%]  Time step        6 of 31 @ t_step = 5
- [ 20%]  Time step        7 of 31 @ t_step = 6
- [ 23%]  Time step        8 of 31 @ t_step = 7
- [ 26%]  Time step        9 of 31 @ t_step = 8
- [ 30%]  Time step       10 of 31 @ t_step = 9
- [ 33%]  Time step       11 of 31 @ t_step = 10
- [ 36%]  Time step       12 of 31 @ t_step = 11
- [ 39%]  Time step       13 of 31 @ t_step = 12
- [ 42%]  Time step       14 of 31 @ t_step = 13
- [ 46%]  Time step       15 of 31 @ t_step = 14
- [ 49%]  Time step       16 of 31 @ t_step = 15
- [ 52%]  Time step       17 of 31 @ t_step = 16
- [ 55%]  Time step       18 of 31 @ t_step = 17
- [ 59%]  Time step       19 of 31 @ t_step = 18
- [ 62%]  Time step       20 of 31 @ t_step = 19
- [ 65%]  Time step       21 of 31 @ t_step = 20
- [ 68%]  Time step       22 of 31 @ t_step = 21
- [ 71%]  Time step       23 of 31 @ t_step = 22
- [ 75%]  Time step       24 of 31 @ t_step = 23
- [ 78%]  Time step       25 of 31 @ t_step = 24
- [ 81%]  Time step       26 of 31 @ t_step = 25
- [ 84%]  Time step       27 of 31 @ t_step = 26
- [ 88%]  Time step       28 of 31 @ t_step = 27
- [ 91%]  Time step       29 of 31 @ t_step = 28
- [ 94%]  Time step       30 of 31 @ t_step = 29
- Performance:   0.7153143678123322      ns/gp/eq/rhs
-Generating '/nethome/bwilfong3/tmp/nsys-report-e3c3.qdstrm'
-[1/8] [0%                          ] report3.nsys-rep[1/8] [0%                          ] report3.nsys-rep[1/8] [6%                          ] report3.nsys-rep[1/8] [5%                          ] report3.nsys-rep[1/8] [11%                         ] report3.nsys-rep[1/8] [==21%                       ] report3.nsys-rep[1/8] [=====31%                    ] report3.nsys-rep[1/8] [========41%                 ] report3.nsys-rep[1/8] [===========51%              ] report3.nsys-rep[1/8] [==============61%           ] report3.nsys-rep[1/8] [================71%         ] report3.nsys-rep[1/8] [==================77%       ] report3.nsys-rep[1/8] [==================78%       ] report3.nsys-rep[1/8] [========================100%] report3.nsys-rep[1/8] [========================100%] report3.nsys-rep
-[2/8] [0%                          ] report3.sqlite[2/8] [1%                          ] report3.sqlite[2/8] [2%                          ] report3.sqlite[2/8] [3%                          ] report3.sqlite[2/8] [4%                          ] report3.sqlite[2/8] [5%                          ] report3.sqlite[2/8] [6%                          ] report3.sqlite[2/8] [7%                          ] report3.sqlite[2/8] [8%                          ] report3.sqlite[2/8] [9%                          ] report3.sqlite[2/8] [10%                         ] report3.sqlite[2/8] [11%                         ] report3.sqlite[2/8] [12%                         ] report3.sqlite[2/8] [13%                         ] report3.sqlite[2/8] [14%                         ] report3.sqlite[2/8] [=15%                        ] report3.sqlite[2/8] [=16%                        ] report3.sqlite[2/8] [=17%                        ] report3.sqlite[2/8] [==18%                       ] report3.sqlite[2/8] [==19%                       ] report3.sqlite[2/8] [==20%                       ] report3.sqlite[2/8] [==21%                       ] report3.sqlite[2/8] [===22%                      ] report3.sqlite[2/8] [===23%                      ] report3.sqlite[2/8] [===24%                      ] report3.sqlite[2/8] [====25%                     ] report3.sqlite[2/8] [====26%                     ] report3.sqlite[2/8] [====27%                     ] report3.sqlite[2/8] [====28%                     ] report3.sqlite[2/8] [=====29%                    ] report3.sqlite[2/8] [=====30%                    ] report3.sqlite[2/8] [=====31%                    ] report3.sqlite[2/8] [=====32%                    ] report3.sqlite[2/8] [======33%                   ] report3.sqlite[2/8] [======34%                   ] report3.sqlite[2/8] [======35%                   ] report3.sqlite[2/8] [=======36%                  ] report3.sqlite[2/8] [=======37%                  ] report3.sqlite[2/8] [=======38%                  ] report3.sqlite[2/8] [=======39%                  ] report3.sqlite[2/8] [========40%                 ] report3.sqlite[2/8] [========41%                 ] report3.sqlite[2/8] [========42%                 ] report3.sqlite[2/8] [=========43%                ] report3.sqlite[2/8] [=========44%                ] report3.sqlite[2/8] [=========45%                ] report3.sqlite[2/8] [=========46%                ] report3.sqlite[2/8] [==========47%               ] report3.sqlite[2/8] [==========48%               ] report3.sqlite[2/8] [==========49%               ] report3.sqlite[2/8] [===========50%              ] report3.sqlite[2/8] [===========51%              ] report3.sqlite[2/8] [===========52%              ] report3.sqlite[2/8] [===========53%              ] report3.sqlite[2/8] [============54%             ] report3.sqlite[2/8] [============55%             ] report3.sqlite[2/8] [============56%             ] report3.sqlite[2/8] [============57%             ] report3.sqlite[2/8] [=============58%            ] report3.sqlite[2/8] [=============59%            ] report3.sqlite[2/8] [=============60%            ] report3.sqlite[2/8] [==============61%           ] report3.sqlite[2/8] [==============62%           ] report3.sqlite[2/8] [==============63%           ] report3.sqlite[2/8] [==============64%           ] report3.sqlite[2/8] [===============65%          ] report3.sqlite[2/8] [===============66%          ] report3.sqlite[2/8] [===============67%          ] report3.sqlite[2/8] [================68%         ] report3.sqlite[2/8] [================69%         ] report3.sqlite[2/8] [================70%         ] report3.sqlite[2/8] [================71%         ] report3.sqlite[2/8] [=================72%        ] report3.sqlite[2/8] [=================73%        ] report3.sqlite[2/8] [=================74%        ] report3.sqlite[2/8] [==================75%       ] report3.sqlite[2/8] [==================76%       ] report3.sqlite[2/8] [==================77%       ] report3.sqlite[2/8] [==================78%       ] report3.sqlite[2/8] [===================79%      ] report3.sqlite[2/8] [===================80%      ] report3.sqlite[2/8] [===================81%      ] report3.sqlite[2/8] [===================82%      ] report3.sqlite[2/8] [====================83%     ] report3.sqlite[2/8] [====================84%     ] report3.sqlite[2/8] [====================85%     ] report3.sqlite[2/8] [=====================86%    ] report3.sqlite[2/8] [=====================87%    ] report3.sqlite[2/8] [=====================88%    ] report3.sqlite[2/8] [=====================89%    ] report3.sqlite[2/8] [======================90%   ] report3.sqlite[2/8] [======================91%   ] report3.sqlite[2/8] [======================92%   ] report3.sqlite[2/8] [=======================93%  ] report3.sqlite[2/8] [=======================94%  ] report3.sqlite[2/8] [=======================95%  ] report3.sqlite[2/8] [=======================96%  ] report3.sqlite[2/8] [========================97% ] report3.sqlite[2/8] [========================98% ] report3.sqlite[2/8] [========================99% ] report3.sqlite[2/8] [========================100%] report3.sqlite[2/8] [========================100%] report3.sqlite
-[3/8] Executing 'nvtx_sum' stats report
-
- Time (%)  Total Time (ns)  Instances     Avg (ns)         Med (ns)        Min (ns)       Max (ns)     StdDev (ns)   Style           Range        
- --------  ---------------  ---------  ---------------  ---------------  -------------  -------------  -----------  -------  ---------------------
-     25.1    5,556,131,040          1  5,556,131,040.0  5,556,131,040.0  5,556,131,040  5,556,131,040          0.0  PushPop  SIMULATION-TIME-MARCH
-     19.7    4,343,486,923         30    144,782,897.4    144,523,688.0    144,224,270    151,966,428  1,420,233.3  PushPop  TIMESTEP             
-     18.7    4,132,222,633         90     45,913,584.8     45,808,604.0     45,724,623     52,483,659    729,504.9  PushPop  COMPUTE-RHS          
-      7.5    1,649,133,287        270      6,107,901.1      5,774,906.5      5,708,817      6,886,989    513,412.2  PushPop  RHS-WENO             
-      6.8    1,492,020,514        270      5,526,001.9      5,211,245.5      5,037,210      8,508,008    573,089.0  PushPop  RHS-RIEMANN-SOLVER   
-      6.4    1,423,381,163          1  1,423,381,163.0  1,423,381,163.0  1,423,381,163  1,423,381,163          0.0  PushPop  INIT                 
-      5.5    1,211,622,280          1  1,211,622,280.0  1,211,622,280.0  1,211,622,280  1,211,622,280          0.0  PushPop  SAVE-DATA            
-      3.4      740,475,264          1    740,475,264.0    740,475,264.0    740,475,264    740,475,264          0.0  PushPop  INIT-MODULES         
-      2.9      630,345,732        270      2,334,613.8      2,374,501.0      2,188,147      3,693,835    122,116.5  PushPop  RHS-ADVECTION-SRC    
-      2.8      620,236,026          1    620,236,026.0    620,236,026.0    620,236,026    620,236,026          0.0  PushPop  INIT-MPI             
-      0.5      103,612,551         90      1,151,250.6      1,143,181.0      1,140,578      1,803,069     69,542.1  PushPop  RHS-CONVERT          
-      0.4       80,835,591          1     80,835,591.0     80,835,591.0     80,835,591     80,835,591          0.0  PushPop  FINALIZE-MODULES     
-      0.3       62,647,077          1     62,647,077.0     62,647,077.0     62,647,077     62,647,077          0.0  PushPop  INIT-GPU-VARS        
-      0.2       46,654,954         90        518,388.4        508,176.0        503,473      1,304,662     84,017.3  PushPop  RHS-COMMUNICATION    
-      0.0          106,687        270            395.1            275.5            162          2,831        419.7  PushPop  RHS-HYPOELASTICITY   
-      0.0           83,918        270            310.8            230.5            104          2,128        329.9  PushPop  RHS-MHD              
-      0.0           53,827        120            448.6            276.0            130          4,553        670.4  PushPop  RHS-ELASTIC          
-
-[4/8] Executing 'cuda_api_sum' stats report
-
- Time (%)  Total Time (ns)  Num Calls    Avg (ns)      Med (ns)     Min (ns)    Max (ns)   StdDev (ns)           Name         
- --------  ---------------  ---------  ------------  ------------  ----------  ----------  -----------  ----------------------
-     91.8    4,239,454,329     14,595     290,473.1       6,639.0         442  12,077,131    875,932.1  cuStreamSynchronize   
-      4.2      193,440,619         38   5,090,542.6     239,459.5     233,982  28,768,773  9,655,708.6  cuMemcpyDtoHAsync_v2  
-      1.7       77,550,358      5,596      13,858.2       2,461.0       2,024   7,834,190    292,223.2  cuMemcpyHtoDAsync_v2  
-      0.9       41,644,729          1  41,644,729.0  41,644,729.0  41,644,729  41,644,729          0.0  cuModuleLoadDataEx    
-      0.5       23,663,193          1  23,663,193.0  23,663,193.0  23,663,193  23,663,193          0.0  cuMemHostAlloc        
-      0.5       21,146,694      6,121       3,454.8       3,199.0       2,713      35,564        869.4  cuLaunchKernel        
-      0.3       13,049,363        243      53,701.1      75,275.0       1,809     320,571     47,890.1  cuMemAlloc_v2         
-      0.1        5,900,487      3,098       1,904.6       1,701.0       1,396      11,795        599.6  cuMemsetD32Async      
-      0.0          885,633          1     885,633.0     885,633.0     885,633     885,633          0.0  cuMemAllocHost_v2     
-      0.0            2,758          3         919.3         594.0         273       1,891        856.7  cuCtxSetCurrent       
-      0.0            1,885          1       1,885.0       1,885.0       1,885       1,885          0.0  cuModuleGetLoadingMode
-      0.0            1,561          1       1,561.0       1,561.0       1,561       1,561          0.0  cuInit                
-
-[5/8] Executing 'cuda_gpu_kern_sum' stats report
-
- Time (%)  Total Time (ns)  Instances    Avg (ns)      Med (ns)     Min (ns)    Max (ns)   StdDev (ns)                                      Name                                    
- --------  ---------------  ---------  ------------  ------------  ----------  ----------  -----------  ----------------------------------------------------------------------------
-      9.9      416,089,697         90   4,623,218.9   4,621,818.5   4,616,714   4,661,451      6,274.4  m_weno_s_weno_797_gpu__2                                                    
-      9.9      416,057,697         90   4,622,863.3   4,619,467.5   4,617,291   4,629,931      4,723.0  m_weno_s_weno_797_gpu__1                                                    
-      9.9      415,779,420         90   4,619,771.3   4,615,755.0   4,613,963   4,627,307      4,933.4  m_weno_s_weno_797_gpu                                                       
-      8.4      350,819,554         90   3,897,995.0   3,897,780.5   3,890,789   3,904,773      3,667.2  m_riemann_solvers_s_hllc_riemann_solver_2449_gpu_1                          
-      8.4      350,508,165         90   3,894,535.2   3,894,132.5   3,886,500   3,925,669      4,850.2  m_riemann_solvers_s_hllc_riemann_solver_2449_gpu_2                          
-      8.3      346,941,412         90   3,854,904.6   3,855,059.5   3,847,716   3,863,715      3,599.7  m_riemann_solvers_s_hllc_riemann_solver_2449_gpu                            
-      4.6      192,740,190         90   2,141,557.7   2,141,284.0   2,138,036   2,161,044      3,262.1  m_weno_s_initialize_weno_1209_gpu                                           
-      4.3      180,014,829         90   2,000,164.8   1,999,906.5   1,995,731   2,017,203      3,654.5  m_riemann_solvers_s_finalize_riemann_solver_5038_gpu                        
-      3.0      126,642,266         90   1,407,136.3   1,407,213.0   1,403,628   1,411,086      1,797.9  m_rhs_s_compute_advection_source_term_1117_gpu                              
-      3.0      126,007,860         90   1,400,087.3   1,400,109.0   1,395,949   1,409,869      2,102.8  m_rhs_s_compute_advection_source_term_1322_gpu                              
-      2.5      104,706,485         90   1,163,405.4   1,162,939.0   1,161,387   1,173,611      1,935.5  m_rhs_s_compute_rhs_938_gpu                                                 
-      2.4      102,629,501         90   1,140,327.8   1,138,459.0   1,137,738   1,143,819      2,387.1  m_rhs_s_compute_advection_source_term_1008_gpu                              
-      2.4      101,209,868         90   1,124,554.1   1,124,603.0   1,122,315   1,126,731        970.6  m_variables_conversion_s_convert_conservative_to_primitive_variables_886_gpu
-      2.4      100,723,842         90   1,119,153.8   1,119,658.0   1,117,066   1,121,547      1,835.3  m_rhs_s_compute_rhs_635_gpu                                                 
-      2.3       97,362,414         90   1,081,804.6   1,080,346.0   1,078,859   1,085,130      2,189.4  m_weno_s_initialize_weno_1179_gpu                                           
-      2.3       96,209,503         90   1,068,994.5   1,067,834.0   1,066,346   1,071,818      1,961.4  m_riemann_solvers_s_finalize_riemann_solver_4986_gpu                        
-      2.2       92,780,226         90   1,030,891.4   1,029,674.0   1,029,033   1,033,194      1,680.6  m_weno_s_initialize_weno_1141_gpu                                           
-      2.1       87,143,916         90     968,265.7     967,400.5     966,506     970,184      1,445.8  m_riemann_solvers_s_finalize_riemann_solver_5090_gpu                        
-      1.8       74,516,980         30   2,483,899.3   2,483,991.0   2,479,544   2,487,767      2,157.5  m_data_output_s_write_run_time_information_280_gpu                          
-      1.0       43,655,287         30   1,455,176.2   1,455,341.0   1,449,389   1,462,574      3,705.9  m_time_steppers_s_3rd_order_tvd_rk_843_gpu                                  
-      1.0       43,617,519         30   1,453,917.3   1,453,341.5   1,446,637   1,462,190      3,312.8  m_time_steppers_s_3rd_order_tvd_rk_770_gpu                                  
-      0.9       36,698,516         30   1,223,283.9   1,223,179.5   1,220,908   1,225,899      2,096.1  m_time_steppers_s_3rd_order_tvd_rk_698_gpu                                  
-      0.9       35,884,076         90     398,712.0     398,627.0     397,444     400,676        664.5  m_rhs_s_compute_advection_source_term_1259_gpu                              
-      0.9       35,849,934         90     398,332.6     398,307.5     396,612     400,900        817.6  m_rhs_s_compute_advection_source_term_1496_gpu                              
-      0.9       35,657,615         90     396,195.7     396,131.5     395,012     397,604        601.3  m_rhs_s_compute_advection_source_term_1086_gpu                              
-      0.5       22,455,606         90     249,506.7     249,634.0     248,483     251,299        536.9  m_riemann_solvers_s_finalize_riemann_solver_5065_gpu                        
-      0.5       20,922,471         90     232,471.9     232,418.0     230,402     234,946        977.2  m_boundary_common_s_populate_variables_buffers_135_gpu                      
-      0.3       12,077,135          1  12,077,135.0  12,077,135.0  12,077,135  12,077,135          0.0  m_rhs_s_initialize_rhs_module_595_gpu                                       
-      0.3       11,555,275         90     128,391.9     128,369.0     127,873     128,961        313.6  m_riemann_solvers_s_finalize_riemann_solver_5012_gpu                        
-      0.2       10,021,407         90     111,349.0     111,393.0     110,849     112,001        281.9  m_riemann_solvers_s_finalize_riemann_solver_5102_gpu                        
-      0.2        8,476,463        270      31,394.3      31,424.5      30,112      32,609        462.1  m_variables_conversion_s_convert_primitive_to_flux_variables_1486_gpu       
-      0.2        8,055,083         90      89,500.9      89,456.5      87,425      92,193        870.2  m_cbc_s_finalize_cbc_1341_gpu                                               
-      0.2        6,798,914         90      75,543.5      75,409.0      73,153      79,456      1,190.5  m_boundary_common_s_populate_variables_buffers_164_gpu                      
-      0.1        5,495,696         90      61,063.3      60,928.5      59,233      63,712        924.5  m_cbc_s_cbc_760_gpu_2                                                       
-      0.1        5,479,797         90      60,886.6      60,768.0      59,361      63,425        835.8  m_cbc_s_cbc_760_gpu_1                                                       
-      0.1        5,443,123         90      60,479.1      60,416.0      58,752      62,529        792.1  m_cbc_s_cbc_760_gpu                                                         
-      0.1        4,847,150         90      53,857.2      53,840.5      53,409      54,368        215.8  m_cbc_s_finalize_cbc_1445_gpu                                               
-      0.1        4,171,398         30     139,046.6     139,089.0     138,594     139,585        293.4  m_data_output_s_write_run_time_information_318_gpu                          
-      0.1        3,730,853         90      41,453.9      41,408.0      40,960      42,465        330.0  m_cbc_s_initialize_cbc_1085_gpu                                             
-      0.1        3,511,677         90      39,018.6      39,040.0      38,080      39,713        336.0  m_cbc_s_initialize_cbc_1108_gpu                                             
-      0.1        3,313,820         90      36,820.2      36,833.0      36,193      37,505        295.6  m_cbc_s_initialize_cbc_1260_gpu                                             
-      0.1        3,147,677         90      34,974.2      34,976.0      34,592      35,456        187.4  m_cbc_s_finalize_cbc_1392_gpu                                               
-      0.1        3,074,368         90      34,159.6      34,128.5      33,248      35,265        395.2  m_boundary_common_s_populate_variables_buffers_197_gpu                      
-      0.1        3,051,648         90      33,907.2      33,888.0      33,568      34,337        180.0  m_cbc_s_initialize_cbc_1184_gpu                                             
-      0.1        2,848,443         90      31,649.4      31,632.5      31,137      32,352        291.2  m_boundary_common_s_populate_variables_buffers_262_gpu                      
-      0.1        2,826,427         30      94,214.2      93,952.0      92,129      97,121      1,474.8  m_data_output_s_write_run_time_information_318_gpu__red                     
-      0.1        2,809,400         90      31,215.6      31,232.0      30,720      32,000        228.9  m_cbc_s_initialize_cbc_1237_gpu                                             
-      0.1        2,570,675         90      28,563.1      28,544.0      28,192      29,024        183.4  m_cbc_s_initialize_cbc_1161_gpu                                             
-      0.1        2,322,772         90      25,808.6      25,760.5      24,576      27,679        576.7  m_cbc_s_cbc_717_gpu_1                                                       
-      0.1        2,322,323         90      25,803.6      25,792.0      24,928      27,232        518.0  m_cbc_s_cbc_717_gpu                                                         
-      0.1        2,313,912         90      25,710.1      25,616.5      24,800      27,841        535.0  m_cbc_s_cbc_717_gpu_2                                                       
-      0.0        1,529,426         90      16,993.6      16,992.0      16,255      18,048        317.3  m_boundary_common_s_populate_variables_buffers_229_gpu                      
-      0.0        1,435,822         90      15,953.6      15,904.0      15,456      16,576        246.3  m_boundary_common_s_populate_variables_buffers_291_gpu                      
-      0.0        1,315,084         90      14,612.0      14,529.0      13,760      16,096        469.6  m_cbc_s_finalize_cbc_1353_gpu                                               
-      0.0        1,304,080         90      14,489.8      14,496.0      13,793      15,648        364.9  m_cbc_s_finalize_cbc_1376_gpu                                               
-      0.0          942,406         90      10,471.2      10,495.5       9,729      11,200        294.9  m_cbc_s_cbc_737_gpu                                                         
-      0.0          932,877         90      10,365.3      10,352.0       9,472      11,328        348.2  m_cbc_s_cbc_737_gpu_2                                                       
-      0.0          918,157         90      10,201.7      10,224.0       9,216      11,168        385.9  m_cbc_s_cbc_737_gpu_1                                                       
-      0.0          911,813         90      10,131.3      10,144.0       9,792      10,369        123.7  m_cbc_s_finalize_cbc_1481_gpu                                               
-      0.0          897,476         90       9,972.0       9,984.0       9,664      10,337        132.0  m_cbc_s_finalize_cbc_1458_gpu                                               
-      0.0          791,649         90       8,796.1       8,768.0       8,512       9,312        152.3  m_cbc_s_initialize_cbc_1121_gpu                                             
-      0.0          773,028         90       8,589.2       8,576.0       8,320       8,896        124.8  m_cbc_s_initialize_cbc_1144_gpu                                             
-      0.0          767,812         90       8,531.2       8,512.0       8,256       8,928        153.2  m_cbc_s_initialize_cbc_1097_gpu                                             
-      0.0          696,165         90       7,735.2       7,712.0       7,456       8,160        134.6  m_cbc_s_initialize_cbc_1296_gpu                                             
-      0.0          681,025         90       7,566.9       7,552.0       7,072       7,904        165.3  m_cbc_s_initialize_cbc_1220_gpu                                             
-      0.0          648,641         90       7,207.1       7,168.0       7,008       7,520        123.4  m_cbc_s_initialize_cbc_1273_gpu                                             
-      0.0          622,053         90       6,911.7       6,912.0       6,624       7,424        150.3  m_cbc_s_initialize_cbc_1197_gpu                                             
-      0.0          620,804         90       6,897.8       6,880.0       6,528       7,328        169.6  m_cbc_s_finalize_cbc_1428_gpu                                               
-      0.0          602,209         90       6,691.2       6,688.0       6,528       7,040         94.9  m_cbc_s_initialize_cbc_1249_gpu                                             
-      0.0          594,472         90       6,605.2       6,592.0       6,432       6,848        103.5  m_cbc_s_finalize_cbc_1405_gpu                                               
-      0.0          567,978         90       6,310.9       6,304.0       6,111       6,720        107.5  m_cbc_s_initialize_cbc_1173_gpu                                             
-
-[6/8] Executing 'cuda_gpu_mem_time_sum' stats report
-
- Time (%)  Total Time (ns)  Count   Avg (ns)    Med (ns)  Min (ns)   Max (ns)   StdDev (ns)           Operation          
- --------  ---------------  -----  -----------  --------  --------  ----------  -----------  ----------------------------
-     71.6      178,474,331     38  4,696,692.9   1,664.0     1,664  27,670,726  9,349,057.8  [CUDA memcpy Device-to-Host]
-     26.9       66,903,272  5,596     11,955.6   1,056.0       991   7,652,198    285,442.5  [CUDA memcpy Host-to-Device]
-      1.5        3,756,091  3,098      1,212.4   1,056.0       992       1,984        256.0  [CUDA memset]               
-
-[7/8] Executing 'cuda_gpu_mem_size_sum' stats report
-
- Total (MB)  Count  Avg (MB)  Med (MB)  Min (MB)  Max (MB)  StdDev (MB)           Operation          
- ----------  -----  --------  --------  --------  --------  -----------  ----------------------------
-    586.516  5,596     0.105     0.000     0.000    73.035        2.760  [CUDA memcpy Host-to-Device]
-    584.277     38    15.376     0.000     0.000    73.035       30.175  [CUDA memcpy Device-to-Host]
-      0.013  3,098     0.000     0.000     0.000     0.000        0.000  [CUDA memset]               
-
-[8/8] Executing 'openacc_sum' stats report
-
- Time (%)  Total Time (ns)  Num Calls    Avg (ns)       Med (ns)      Min (ns)     Max (ns)    StdDev (ns)                        Name                       
- --------  ---------------  ---------  -------------  -------------  -----------  -----------  -----------  -------------------------------------------------
-     13.4    1,250,848,660        270    4,632,772.8    4,631,543.5    4,624,229    4,671,891      5,580.2  Compute Construct@m_weno.fpp:797                 
-     13.4    1,249,882,607        540    2,314,597.4    2,311,349.5        2,056    4,665,923  2,314,285.0  Wait@m_weno.fpp:797                              
-     11.3    1,053,050,176        270    3,900,185.8    3,904,208.0    3,859,076    5,654,119    108,985.3  Compute Construct@m_riemann_solvers.fpp:2449     
-     11.3    1,051,799,364        540    1,947,776.6    1,930,833.0        1,955    5,646,863  1,948,701.8  Wait@m_riemann_solvers.fpp:2449                  
-      2.1      193,652,964         90    2,151,699.6    2,152,170.0    2,147,468    2,171,156      3,397.6  Compute Construct@m_weno.fpp:1209                
-      2.1      193,330,633        180    1,074,059.1    1,072,409.0        1,958    2,165,526  1,074,660.9  Wait@m_weno.fpp:1209                             
-      2.0      186,408,230          8   23,301,028.8   22,562,643.0   18,585,494   28,786,288  3,716,929.5  Update@m_start_up.fpp:1193                       
-      2.0      186,275,668          8   23,284,458.5   22,540,639.0   18,568,804   28,772,517  3,717,693.3  Enqueue Download@m_start_up.fpp:1193             
-      1.9      181,024,641         90    2,011,384.9    2,012,962.5    2,007,029    2,029,224      3,792.6  Compute Construct@m_riemann_solvers.fpp:5038     
-      1.9      180,637,889        180    1,003,543.8    1,001,911.5        2,094    2,022,050  1,003,767.5  Wait@m_riemann_solvers.fpp:5038                  
-      1.6      152,569,231          1  152,569,231.0  152,569,231.0  152,569,231  152,569,231          0.0  Device Init                                      
-      1.4      127,524,434         90    1,416,938.2    1,416,992.0    1,413,114    1,421,826      1,984.1  Compute Construct@m_rhs.fpp:1117                 
-      1.4      127,228,639        180      706,825.8      705,605.0        2,033    1,415,458    706,426.4  Wait@m_rhs.fpp:1117                              
-      1.4      126,914,882         90    1,410,165.4    1,409,912.0    1,405,358    1,433,807      3,399.0  Compute Construct@m_rhs.fpp:1322                 
-      1.4      126,623,638        180      703,464.7      701,715.5        1,996    1,428,242    703,088.2  Wait@m_rhs.fpp:1322                              
-      1.1      105,635,392         90    1,173,726.6    1,174,350.0    1,171,004    1,183,555      2,185.0  Compute Construct@m_rhs.fpp:938                  
-      1.1      105,337,066        180      585,205.9      584,688.0        2,083    1,178,377    584,333.3  Wait@m_rhs.fpp:938                               
-      1.1      103,499,428         90    1,149,993.6    1,150,466.0    1,146,841    1,156,325      2,597.5  Compute Construct@m_rhs.fpp:1008                 
-      1.1      103,217,825        180      573,432.4      572,278.5        2,185    1,148,282    572,639.5  Wait@m_rhs.fpp:1008                              
-      1.1      102,215,531         90    1,135,728.1    1,135,479.0    1,133,078    1,141,059      1,633.5  Compute Construct@m_variables_conversion.fpp:886 
-      1.1      101,828,592        180      565,714.4      565,300.5        2,241    1,131,119    564,625.7  Wait@m_variables_conversion.fpp:886              
-      1.1      101,768,752         90    1,130,763.9    1,131,035.5    1,127,083    1,167,795      4,435.9  Compute Construct@m_rhs.fpp:635                  
-      1.1      101,359,813        180      563,110.1      562,322.0        2,456    1,126,454    561,936.1  Wait@m_rhs.fpp:635                               
-      1.1       98,266,239         90    1,091,847.1    1,092,958.5    1,088,676    1,095,989      2,295.9  Compute Construct@m_weno.fpp:1179                
-      1.1       97,938,114        180      544,100.6      543,073.5        2,183    1,089,076    543,261.1  Wait@m_weno.fpp:1179                             
-      1.0       97,256,086         90    1,080,623.2    1,080,523.5    1,077,445    1,085,473      2,280.9  Compute Construct@m_riemann_solvers.fpp:4986     
-      1.0       96,851,542        180      538,064.1      537,177.0        2,183    1,076,471    536,981.5  Wait@m_riemann_solvers.fpp:4986                  
-      1.0       93,672,129         90    1,040,801.4    1,041,305.0    1,038,063    1,045,627      1,952.5  Compute Construct@m_weno.fpp:1141                
-      1.0       93,347,022        180      518,594.6      517,941.0        1,890    1,037,406    517,730.9  Wait@m_weno.fpp:1141                             
-      0.9       88,158,070         90      979,534.1      979,809.5      977,095      984,751      1,780.3  Compute Construct@m_riemann_solvers.fpp:5090     
-      0.9       87,757,597        180      487,542.2      486,596.5        2,205      974,844    486,291.9  Wait@m_riemann_solvers.fpp:5090                  
-      0.8       74,865,105         30    2,495,503.5    2,495,608.0    2,491,010    2,499,427      2,265.6  Compute Construct@m_data_output.fpp:280          
-      0.8       74,726,676         60    1,245,444.6    1,243,514.0        2,347    2,492,248  1,253,194.6  Wait@m_data_output.fpp:280                       
-      0.7       62,634,956         20    3,131,747.8       82,004.5       14,260    7,871,967  3,888,670.1  Update@m_start_up.fpp.f90:54                     
-      0.7       62,182,948         85      731,564.1        2,628.0        2,222    7,835,806  2,271,843.5  Enqueue Upload@m_start_up.fpp.f90:54             
-      0.5       44,024,848         30    1,467,494.9    1,467,029.5    1,461,329    1,474,592      3,752.0  Compute Construct@m_time_steppers.fpp:843        
-      0.5       43,985,551         30    1,466,185.0    1,465,305.5    1,458,442    1,474,513      3,505.6  Compute Construct@m_time_steppers.fpp:770        
-      0.5       43,780,776         30    1,459,359.2    1,459,511.0    1,453,410    1,466,562      3,720.3  Wait@m_time_steppers.fpp:843                     
-      0.5       43,742,732         30    1,458,091.1    1,457,490.5    1,451,269    1,466,692      3,288.5  Wait@m_time_steppers.fpp:770                     
-      0.4       37,043,224         30    1,234,774.1    1,236,126.0    1,231,696    1,238,694      2,262.6  Compute Construct@m_time_steppers.fpp:698        
-      0.4       36,847,491         90      409,416.6      409,381.5      407,723      411,800        940.4  Compute Construct@m_rhs.fpp:1259                 
-      0.4       36,827,252         30    1,227,575.1    1,227,755.0    1,224,966    1,230,396      2,012.8  Wait@m_time_steppers.fpp:698                     
-      0.4       36,820,995         90      409,122.2      408,941.0      406,632      413,061      1,263.5  Compute Construct@m_rhs.fpp:1496                 
-      0.4       36,617,753         90      406,863.9      406,722.5      405,073      410,996      1,135.9  Compute Construct@m_rhs.fpp:1086                 
-      0.4       36,497,064        180      202,761.5      202,439.0        2,170      404,866    200,781.9  Wait@m_rhs.fpp:1259                              
-      0.4       36,470,319        180      202,612.9      202,304.0        2,423      405,212    200,595.9  Wait@m_rhs.fpp:1496                              
-      0.4       36,270,659        180      201,503.7      201,139.0        2,085      401,974    199,526.6  Wait@m_rhs.fpp:1086                              
-      0.3       23,431,978         90      260,355.3      260,194.5      258,688      266,998      1,122.2  Compute Construct@m_riemann_solvers.fpp:5065     
-      0.2       23,067,797        180      128,154.4      127,817.5        2,201      260,975    126,015.1  Wait@m_riemann_solvers.fpp:5065                  
-      0.2       21,929,869         90      243,665.2      243,487.0      240,723      249,561      1,499.3  Compute Construct@m_boundary_common.fpp:135      
-      0.2       21,531,930        180      119,621.8      119,289.0        2,203      239,161    117,306.0  Wait@m_boundary_common.fpp:135                   
-      0.2       19,911,481        270       73,746.2       72,651.5       70,711      305,908     14,239.4  Compute Construct@m_cbc.fpp:760                  
-      0.2       16,902,775        270       62,602.9       62,528.0       49,975       69,770      1,341.3  Wait@m_cbc.fpp:1024                              
-      0.1       12,485,606         90      138,729.0      138,623.5      137,315      141,207        659.8  Compute Construct@m_riemann_solvers.fpp:5012     
-      0.1       12,156,707        180       67,537.3       67,307.5        2,094      133,448     65,255.2  Wait@m_riemann_solvers.fpp:5012                  
-      0.1       12,116,640          1   12,116,640.0   12,116,640.0   12,116,640   12,116,640          0.0  Compute Construct@m_rhs.fpp:595                  
-      0.1       12,078,596          1   12,078,596.0   12,078,596.0   12,078,596   12,078,596          0.0  Wait@m_rhs.fpp:595                               
-      0.1       11,153,111        270       41,307.8       41,215.0       39,830       44,605        817.3  Compute Construct@m_variables_conversion.fpp:1486
-      0.1       10,961,187         90      121,791.0      121,654.0      120,786      124,670        739.6  Compute Construct@m_riemann_solvers.fpp:5102     
-      0.1       10,628,053        180       59,044.7       58,959.0        2,018      116,470     56,754.2  Wait@m_riemann_solvers.fpp:5102                  
-      0.1       10,174,198        540       18,841.1       18,715.0        1,955       38,600     16,499.7  Wait@m_variables_conversion.fpp:1486             
-      0.1        9,582,682        270       35,491.4       35,342.5       33,786       39,715        833.5  Compute Construct@m_cbc.fpp:717                  
-      0.1        8,951,624         90       99,462.5       99,469.5       97,344      104,475      1,104.2  Compute Construct@m_cbc.fpp:1341                 
-      0.1        8,664,902        180       48,138.3       47,463.0        1,906       96,539     46,006.1  Wait@m_cbc.fpp:1341                              
-      0.1        8,135,343        270       30,130.9       30,111.0       27,914       32,218        595.2  Wait@m_cbc.fpp:717                               
-      0.1        8,015,214        270       29,686.0       29,434.5       27,871       36,530      1,282.7  Update@m_variables_conversion.fpp:1481           
-      0.1        7,969,925         30      265,664.2      263,774.0      261,460      299,346      6,813.7  Compute Construct@m_data_output.fpp:317          
-      0.1        7,702,934         90       85,588.2       85,329.5       83,151       90,619      1,371.6  Compute Construct@m_boundary_common.fpp:164      
-      0.1        7,397,633        180       41,098.0       40,290.5        1,891       83,915     38,867.0  Wait@m_boundary_common.fpp:164                   
-      0.1        7,224,644         30      240,821.5      240,119.5      235,041      248,449      2,685.4  Enqueue Download@m_data_output.fpp:318           
-      0.1        6,644,951        270       24,610.9       23,518.5       22,805       38,374      2,507.9  Update@m_cbc.fpp:1079                            
-      0.1        5,895,669        270       21,835.8       20,855.0       20,010       42,013      2,540.1  Update@m_riemann_solvers.fpp:3487                
-      0.1        5,741,926         90       63,799.2       63,623.5       62,980       71,071        951.2  Compute Construct@m_cbc.fpp:1445                 
-      0.1        5,677,067        270       21,026.2       20,369.0       19,012       29,600      1,895.2  Update@m_weno.fpp:667                            
-      0.1        5,460,983        270       20,225.9       19,404.0       18,638       28,775      1,956.1  Update@m_riemann_solvers.fpp:3500                
-      0.1        5,454,572        180       30,303.2       30,248.5        1,871       58,994     28,117.9  Wait@m_cbc.fpp:1445                              
-      0.1        5,327,857        270       19,732.8       19,628.5       18,502       24,708        819.4  Compute Construct@m_cbc.fpp:737                  
-      0.1        5,232,788        270       19,380.7       18,972.0       17,804       26,658      1,330.2  Update@m_riemann_solvers.fpp:3501                
-      0.0        4,639,757         90       51,552.9       51,210.5       50,170       61,320      1,520.9  Compute Construct@m_cbc.fpp:1085                 
-      0.0        4,444,722        270       16,461.9       16,076.0       15,059       21,589      1,082.6  Update@m_cbc.fpp:668                             
-      0.0        4,350,633         90       48,340.4       48,191.5       47,273       51,279        812.0  Compute Construct@m_cbc.fpp:1108                 
-      0.0        4,297,233        180       23,873.5       27,427.0        1,848       46,316     21,523.9  Wait@m_cbc.fpp:1085                              
-      0.0        4,154,069         90       46,156.3       46,064.0       44,925       49,207        696.8  Compute Construct@m_cbc.fpp:1260                 
-      0.0        4,134,404        270       15,312.6       14,902.0       14,258       22,506      1,140.7  Update@m_cbc.fpp:1080                            
-      0.0        4,083,068        180       22,683.7       22,752.0        2,196       43,913     20,396.7  Wait@m_cbc.fpp:1108                              
-      0.0        4,032,900         90       44,810.0       44,702.0       43,493       48,495        635.0  Compute Construct@m_cbc.fpp:1392                 
-      0.0        3,965,758         90       44,064.0       43,920.5       42,929       46,735        667.9  Compute Construct@m_boundary_common.fpp:197      
-      0.0        3,924,756        270       14,536.1       14,561.5       13,411       15,935        417.2  Wait@m_cbc.fpp:737                               
-      0.0        3,909,937         90       43,443.7       43,223.0       42,386       46,787        801.4  Compute Construct@m_cbc.fpp:1184                 
-      0.0        3,909,539      1,620        2,413.3        2,245.0        1,989        9,853        514.5  Enqueue Upload@m_variables_conversion.fpp:1481   
-      0.0        3,886,433        180       21,591.3       21,655.5        2,010       41,630     19,296.7  Wait@m_cbc.fpp:1260                              
-      0.0        3,755,646        180       20,864.7       20,774.5        1,917       40,499     18,661.6  Wait@m_cbc.fpp:1392                              
-      0.0        3,742,258         90       41,580.6       41,261.5       40,583       55,642      1,671.1  Compute Construct@m_boundary_common.fpp:262      
-      0.0        3,704,893         90       41,165.5       40,934.5       40,285       44,449        778.6  Compute Construct@m_cbc.fpp:1237                 
-      0.0        3,673,408        180       20,407.8       19,825.5        1,980       39,607     18,169.1  Wait@m_boundary_common.fpp:197                   
-      0.0        3,630,135        180       20,167.4       20,193.0        1,978       40,042     17,837.8  Wait@m_cbc.fpp:1184                              
-      0.0        3,600,968      1,080        3,334.2        3,098.5        2,025       11,217        913.4  Enqueue Upload@m_cbc.fpp:1079                    
-      0.0        3,463,476         90       38,483.1       38,137.0       37,554       51,707      1,574.4  Compute Construct@m_cbc.fpp:1161                 
-      0.0        3,453,354        180       19,185.3       19,439.5        1,855       49,901     16,927.6  Wait@m_boundary_common.fpp:262                   
-      0.0        3,383,018        180       18,794.5       21,836.0        1,888       36,102     16,453.1  Wait@m_cbc.fpp:1237                              
-      0.0        3,219,140        810        3,974.2        3,511.5        2,984       21,333      1,067.5  Enqueue Upload@m_riemann_solvers.fpp:3487        
-      0.0        3,144,190        180       17,467.7       18,651.5        1,893       45,377     15,193.7  Wait@m_cbc.fpp:1161                              
-      0.0        3,085,992        270       11,429.6       11,286.5       10,554       14,798        598.3  Update@m_cbc.fpp:673                             
-      0.0        3,070,733        270       11,373.1       11,103.5       10,313       20,610        990.0  Update@m_weno.fpp:1138                           
-      0.0        3,058,569        270       11,328.0       11,197.5       10,412       14,103        536.6  Update@m_cbc.fpp:1336                            
-      0.0        3,003,459        270       11,123.9       11,438.5        6,545       13,176      1,360.7  Wait@m_variables_conversion.fpp:1481             
-      0.0        2,999,770        810        3,703.4        3,354.0        2,709       11,142        925.4  Enqueue Upload@m_weno.fpp:667                    
-      0.0        2,847,341        810        3,515.2        3,260.5        2,922        7,155        626.4  Enqueue Upload@m_riemann_solvers.fpp:3500        
-      0.0        2,569,302        810        3,172.0        3,019.5        2,695        9,636        542.4  Enqueue Upload@m_riemann_solvers.fpp:3501        
-      0.0        2,414,762         90       26,830.7       26,713.0       25,856       29,751        678.9  Compute Construct@m_boundary_common.fpp:229      
-      0.0        2,329,097         90       25,878.9       25,776.0       24,909       29,745        656.5  Compute Construct@m_boundary_common.fpp:291      
-      0.0        2,189,685         90       24,329.8       24,113.0       23,269       27,395        852.1  Compute Construct@m_cbc.fpp:1353                 
-      0.0        2,171,114         90       24,123.5       23,822.0       22,896       31,159      1,181.1  Compute Construct@m_cbc.fpp:1376                 
-      0.0        2,152,550        270        7,972.4        8,122.0        6,132        9,241        607.5  Wait@m_cbc.fpp:1079                              
-      0.0        2,121,413        180       11,785.6       11,697.5        2,029       22,452      9,502.9  Wait@m_boundary_common.fpp:229                   
-      0.0        2,102,768        270        7,788.0        7,908.5        6,412       11,146        523.7  Wait@m_cbc.fpp:668                               
-      0.0        2,028,904        180       11,271.7       11,319.5        1,938       21,073      8,993.4  Wait@m_boundary_common.fpp:291                   
-      0.0        1,948,938        270        7,218.3        7,263.5        5,130        8,332        412.4  Wait@m_riemann_solvers.fpp:3501                  
-      0.0        1,945,902        270        7,207.0        7,211.5        6,408        7,945        196.4  Wait@m_cbc.fpp:1336                              
-      0.0        1,915,548        180       10,641.9        9,307.0        1,906       20,432      8,369.1  Wait@m_cbc.fpp:1353                              
-      0.0        1,908,450        270        7,068.3        7,089.0        5,403        8,173        290.2  Wait@m_cbc.fpp:1080                              
-      0.0        1,898,091        270        7,030.0        7,032.0        6,062       12,000        464.9  Wait@m_weno.fpp:667                              
-      0.0        1,893,017        180       10,516.8       10,626.0        1,905       25,777      8,187.2  Wait@m_cbc.fpp:1376                              
-      0.0        1,840,372        270        6,816.2        6,810.5        5,939       10,201        285.4  Wait@m_weno.fpp:1138                             
-      0.0        1,840,347        270        6,816.1        6,805.5        4,851        9,943        405.4  Wait@m_riemann_solvers.fpp:3500                  
-      0.0        1,832,900        270        6,788.5        6,720.5        3,422       11,373        489.5  Wait@m_riemann_solvers.fpp:3487                  
-      0.0        1,772,366         90       19,693.0       19,419.5       18,666       23,024        918.9  Compute Construct@m_cbc.fpp:1458                 
-      0.0        1,769,896        270        6,555.2        6,540.0        5,586        8,123        222.3  Wait@m_cbc.fpp:673                               
-      0.0        1,763,916         90       19,599.1       19,467.0       18,986       22,482        596.4  Compute Construct@m_cbc.fpp:1481                 
-      0.0        1,738,964         16      108,685.3      106,295.5       92,275      130,271     11,217.6  Enter Data@m_time_steppers.fpp:103               
-      0.0        1,699,728        540        3,147.6        3,183.5        2,130        7,597        779.3  Enqueue Upload@m_cbc.fpp:668                     
-      0.0        1,697,000        540        3,142.6        2,961.5        2,620        8,989        621.3  Enqueue Upload@m_cbc.fpp:1080                    
-      0.0        1,689,421         16      105,588.8       98,911.5       92,517      124,844     11,959.2  Enter Data@m_rhs.fpp:543                         
-      0.0        1,670,770         90       18,564.1       18,380.5       17,941       20,979        610.0  Compute Construct@m_cbc.fpp:1121                 
-      0.0        1,652,033         90       18,355.9       18,115.0       17,331       21,471        773.1  Compute Construct@m_cbc.fpp:1097                 
-      0.0        1,609,518         90       17,883.5       17,696.0       17,208       21,890        778.2  Compute Construct@m_cbc.fpp:1144                 
-      0.0        1,536,132         90       17,068.1       16,808.0       16,265       24,071        987.4  Compute Construct@m_cbc.fpp:1296                 
-      0.0        1,523,884         90       16,932.0       16,763.5       16,013       19,558        663.2  Compute Construct@m_cbc.fpp:1273                 
-      0.0        1,522,251         90       16,913.9       16,672.5       16,061       19,843        826.9  Compute Construct@m_cbc.fpp:1220                 
-      0.0        1,509,362        180        8,385.3        8,316.5        1,988       14,924      6,051.8  Wait@m_cbc.fpp:1481                              
-      0.0        1,500,245         90       16,669.4       16,533.0       15,967       18,961        651.4  Compute Construct@m_cbc.fpp:1197                 
-      0.0        1,493,810        180        8,298.9        8,411.5        2,002       14,714      5,959.1  Wait@m_cbc.fpp:1458                              
-      0.0        1,480,783         90       16,453.1       16,191.5       15,673       19,202        814.1  Compute Construct@m_cbc.fpp:1249                 
-      0.0        1,478,736         90       16,430.4       16,308.0       15,672       19,753        742.7  Compute Construct@m_cbc.fpp:1428                 
-      0.0        1,462,683         90       16,252.0       15,942.5       15,435       26,038      1,225.2  Compute Construct@m_cbc.fpp:1405                 
-      0.0        1,429,527         90       15,883.6       15,683.5       15,198       19,149        755.3  Compute Construct@m_cbc.fpp:1173                 
-      0.0        1,422,884        270        5,269.9        5,086.0        4,374       14,995        933.6  Enqueue Launch@m_riemann_solvers.fpp:2449        
-      0.0        1,400,781        180        7,782.1        9,708.5        1,960       13,703      5,438.7  Wait@m_cbc.fpp:1121                              
-      0.0        1,379,267        180        7,662.6        8,927.0        1,950       13,994      5,318.3  Wait@m_cbc.fpp:1097                              
-      0.0        1,347,346        180        7,485.3        7,680.0        2,186       16,425      5,162.6  Wait@m_cbc.fpp:1144                              
-      0.0        1,273,415        180        7,074.5        9,989.5        1,986       12,592      4,707.5  Wait@m_cbc.fpp:1296                              
-      0.0        1,265,237        180        7,029.1        7,298.5        1,896       14,882      4,710.1  Wait@m_cbc.fpp:1220                              
-      0.0        1,253,710        180        6,965.1        6,910.5        1,923       12,078      4,633.0  Wait@m_cbc.fpp:1273                              
-      0.0        1,231,162        180        6,839.8        6,841.0        1,969       11,892      4,486.0  Wait@m_cbc.fpp:1197                              
-      0.0        1,213,121        270        4,493.0        4,383.0        3,905        6,531        398.4  Enter Data@m_variables_conversion.fpp:1486       
-      0.0        1,212,499        180        6,736.1        9,925.0        2,029       11,857      4,348.8  Wait@m_cbc.fpp:1249                              
-      0.0        1,209,426        180        6,719.0        6,705.0        1,962       11,817      4,413.0  Wait@m_cbc.fpp:1428                              
-      0.0        1,201,674        180        6,676.0        8,450.0        1,956       11,757      4,307.9  Wait@m_cbc.fpp:1405                              
-      0.0        1,184,977        270        4,388.8        4,234.0        3,918        7,103        559.5  Enqueue Launch@m_variables_conversion.fpp:1486   
-      0.0        1,180,050        270        4,370.6        4,160.5        3,646       11,271        721.5  Enqueue Launch@m_weno.fpp:797                    
-      0.0        1,164,490        180        6,469.4        6,647.0        1,925       11,096      4,128.5  Wait@m_cbc.fpp:1173                              
-      0.0        1,123,650        270        4,161.7        4,058.0        3,663        7,411        383.3  Enter Data@m_weno.fpp:797                        
-      0.0        1,120,110        270        4,148.6        3,982.5        3,561       11,141        720.8  Enter Data@m_riemann_solvers.fpp:2449            
-      0.0        1,045,033        270        3,870.5        3,746.5        3,394        9,220        581.0  Enqueue Launch@m_cbc.fpp:760                     
-      0.0        1,034,772        270        3,832.5        3,679.5        3,415        7,689        553.7  Enqueue Launch@m_cbc.fpp:717                     
-      0.0        1,032,587          9      114,731.9      116,217.0       98,780      138,854     13,474.8  Enter Data@m_rhs.fpp:281                         
-      0.0        1,011,771        270        3,747.3        3,584.0        3,334        9,036        629.1  Enqueue Launch@m_cbc.fpp:737                     
-      0.0          934,256          9      103,806.2      101,886.0       95,969      114,182      6,517.5  Enter Data@m_rhs.fpp:280                         
-      0.0          909,780          8      113,722.5      115,200.0       98,732      131,867     10,882.3  Enter Data@m_rhs.fpp:503                         
-      0.0          908,242          8      113,530.3      104,954.5       98,923      157,515     19,028.3  Enter Data@m_time_steppers.fpp:135               
-      0.0          904,158        270        3,348.7        3,223.5        2,905        6,103        467.9  Enqueue Upload@m_cbc.fpp:673                     
-      0.0          881,202          8      110,150.3      107,698.5       99,331      126,051     10,011.6  Enter Data@m_time_steppers.fpp:305               
-      0.0          865,683        270        3,206.2        3,020.0        2,425       12,541        872.3  Enqueue Upload@m_weno.fpp:1138                   
-      0.0          857,348          8      107,168.5      102,164.0       93,946      129,138     13,983.5  Enter Data@m_rhs.fpp:499                         
-      0.0          850,116          8      106,264.5      100,714.5       95,395      123,343     11,490.1  Enter Data@m_rhs.fpp:188                         
-      0.0          727,815        270        2,695.6        2,588.0        2,354        5,182        418.4  Enqueue Upload@m_cbc.fpp:1336                    
-      0.0          487,746         90        5,419.4        5,098.5        4,658       10,489        850.7  Enter Data@m_boundary_common.fpp:135             
-      0.0          486,965         90        5,410.7        4,803.0        4,245       39,344      3,687.8  Enqueue Launch@m_rhs.fpp:635                     
-      0.0          476,874         90        5,298.6        5,009.0        4,626        8,751        774.1  Enqueue Launch@m_riemann_solvers.fpp:4986        
-      0.0          475,971         90        5,288.6        4,954.0        4,546        9,006        913.9  Enqueue Launch@m_riemann_solvers.fpp:5090        
-      0.0          470,504         90        5,227.8        4,910.0        4,288       12,450      1,162.2  Enqueue Launch@m_boundary_common.fpp:135         
-      0.0          469,989        270        1,740.7        1,710.0        1,591        2,667        152.7  Exit Data@m_variables_conversion.fpp:1486        
-      0.0          462,572         90        5,139.7        4,591.0        4,103       10,182      1,223.4  Enqueue Launch@m_variables_conversion.fpp:886    
-      0.0          456,902         90        5,076.7        4,882.5        4,554        8,640        636.2  Enqueue Launch@m_riemann_solvers.fpp:5038        
-      0.0          447,921         90        4,976.9        4,883.5        4,343        7,497        373.3  Enter Data@m_rhs.fpp:1086                        
-      0.0          445,472         90        4,949.7        4,818.0        4,585        6,689        438.1  Enter Data@m_rhs.fpp:1496                        
-      0.0          444,295         90        4,936.6        4,842.5        4,469        7,787        392.8  Enter Data@m_rhs.fpp:1259                        
-      0.0          436,975         90        4,855.3        4,757.5        4,081        7,967        654.7  Enqueue Launch@m_riemann_solvers.fpp:5065        
-      0.0          435,702        270        1,613.7        1,568.0        1,418        2,942        198.4  Exit Data@m_riemann_solvers.fpp:2449             
-      0.0          433,836         90        4,820.4        4,518.0        4,050        9,238        829.3  Enter Data@m_variables_conversion.fpp:886        
-      0.0          428,501        270        1,587.0        1,551.0        1,380        2,993        173.7  Exit Data@m_weno.fpp:797                         
-      0.0          426,437          4      106,609.3      106,251.0       98,485      115,450      9,090.8  Enter Data@m_rhs.fpp:192                         
-      0.0          423,765         90        4,708.5        4,405.5        3,983        8,379        830.0  Enqueue Launch@m_rhs.fpp:1496                    
-      0.0          421,648         90        4,685.0        4,559.0        3,997        6,752        637.7  Enqueue Launch@m_rhs.fpp:1259                    
-      0.0          420,931         90        4,677.0        4,341.0        3,928        8,583        863.9  Enqueue Launch@m_rhs.fpp:1086                    
-      0.0          414,471         90        4,605.2        4,444.0        3,971        9,785        723.0  Enter Data@m_rhs.fpp:635                         
-      0.0          409,122         90        4,545.8        4,267.0        3,990        9,982        839.9  Enqueue Launch@m_cbc.fpp:1085                    
-      0.0          407,137         90        4,523.7        4,296.0        4,019        7,985        640.6  Enqueue Launch@m_riemann_solvers.fpp:5102        
-      0.0          402,403         90        4,471.1        4,249.5        3,942        6,380        459.2  Enqueue Launch@m_riemann_solvers.fpp:5012        
-      0.0          396,882         90        4,409.8        4,159.5        3,928        7,089        614.8  Enqueue Launch@m_cbc.fpp:1237                    
-      0.0          395,055         90        4,389.5        4,086.5        3,816        7,399        740.4  Enqueue Launch@m_weno.fpp:1179                   
-      0.0          393,224         90        4,369.2        4,183.5        3,849        7,547        649.3  Enqueue Launch@m_weno.fpp:1141                   
-      0.0          390,911         90        4,343.5        4,115.5        3,880        6,999        610.8  Enqueue Launch@m_cbc.fpp:1161                    
-      0.0          389,777         90        4,330.9        4,024.5        3,727        8,094        802.7  Enqueue Launch@m_weno.fpp:1209                   
-      0.0          380,540         90        4,228.2        4,023.0        3,729        6,972        564.6  Enqueue Launch@m_boundary_common.fpp:164         
-      0.0          376,698         90        4,185.5        3,784.0        3,519        8,206        963.2  Enqueue Launch@m_rhs.fpp:938                     
-      0.0          376,688         90        4,185.4        4,091.5        3,825        7,195        456.8  Enqueue Launch@m_boundary_common.fpp:291         
-      0.0          376,652         90        4,185.0        4,083.5        3,625        6,088        335.7  Enter Data@m_boundary_common.fpp:164             
-      0.0          372,470         90        4,138.6        3,955.0        3,726        6,832        554.5  Enqueue Launch@m_rhs.fpp:1117                    
-      0.0          370,963         90        4,121.8        3,963.0        3,576        7,651        514.0  Enter Data@m_riemann_solvers.fpp:5090            
-      0.0          370,441         90        4,116.0        4,022.0        3,524        6,345        376.8  Enter Data@m_boundary_common.fpp:262             
-      0.0          369,404         90        4,104.5        4,049.0        3,765        5,651        263.9  Enter Data@m_boundary_common.fpp:197             
-      0.0          369,257         90        4,102.9        3,970.0        3,697        6,428        503.2  Enqueue Launch@m_boundary_common.fpp:229         
-      0.0          367,313         90        4,081.3        3,891.0        3,621        7,144        637.1  Enqueue Launch@m_rhs.fpp:1322                    
-      0.0          367,140         90        4,079.3        3,984.0        3,760        6,191        353.4  Enter Data@m_boundary_common.fpp:229             
-      0.0          367,106         90        4,079.0        3,998.0        3,694        5,985        364.1  Enter Data@m_boundary_common.fpp:291             
-      0.0          365,910         90        4,065.7        3,963.0        3,663        6,435        457.9  Enqueue Launch@m_boundary_common.fpp:197         
-      0.0          364,238         90        4,047.1        3,904.5        3,533        6,818        548.3  Enqueue Launch@m_boundary_common.fpp:262         
-      0.0          360,918         90        4,010.2        3,798.0        3,534        6,788        603.1  Enqueue Launch@m_cbc.fpp:1184                    
-      0.0          359,864         90        3,998.5        3,771.5        3,511        6,632        683.2  Enqueue Launch@m_cbc.fpp:1376                    
-      0.0          359,814         90        3,997.9        3,866.5        3,504        5,407        414.7  Enter Data@m_riemann_solvers.fpp:4986            
-      0.0          359,804         90        3,997.8        3,841.5        3,637        6,883        596.2  Enqueue Launch@m_cbc.fpp:1341                    
-      0.0          359,359         90        3,992.9        3,741.5        3,492        8,401        791.8  Enqueue Launch@m_rhs.fpp:1008                    
-      0.0          359,131         90        3,990.3        3,804.0        3,535       11,071        872.4  Enqueue Launch@m_cbc.fpp:1445                    
-      0.0          358,957         90        3,988.4        3,904.0        3,446        6,125        362.2  Enter Data@m_riemann_solvers.fpp:5038            
-      0.0          358,907         90        3,987.9        3,739.0        3,483        6,287        725.7  Enqueue Launch@m_cbc.fpp:1458                    
-      0.0          357,663         90        3,974.0        3,740.0        3,467        7,056        737.0  Enqueue Launch@m_cbc.fpp:1249                    
-      0.0          356,174         90        3,957.5        3,757.5        3,495        7,385        660.3  Enqueue Launch@m_cbc.fpp:1097                    
-      0.0          352,190         90        3,913.2        3,732.5        3,480        6,413        572.8  Enqueue Launch@m_cbc.fpp:1197                    
-      0.0          350,315         90        3,892.4        3,642.0        3,389       11,314        967.8  Enqueue Launch@m_cbc.fpp:1296                    
-      0.0          350,309         90        3,892.3        3,738.5        3,503        5,875        476.8  Enqueue Launch@m_cbc.fpp:1121                    
-      0.0          349,743         90        3,886.0        3,775.0        3,537        6,358        439.5  Enqueue Launch@m_cbc.fpp:1392                    
-      0.0          349,536         90        3,883.7        3,686.5        3,440        5,569        523.8  Enter Data@m_rhs.fpp:938                         
-      0.0          349,263         90        3,880.7        3,702.0        3,490        7,194        589.4  Enqueue Launch@m_cbc.fpp:1428                    
-      0.0          349,211         90        3,880.1        3,640.5        3,443        6,613        702.1  Enqueue Launch@m_cbc.fpp:1108                    
-      0.0          348,845         90        3,876.1        3,654.0        3,407        6,940        643.4  Enqueue Launch@m_cbc.fpp:1173                    
-      0.0          348,787         90        3,875.4        3,584.5        3,373       13,535      1,168.6  Enqueue Launch@m_cbc.fpp:1405                    
-      0.0          348,719         90        3,874.7        3,664.5        3,437        6,647        660.7  Enqueue Launch@m_cbc.fpp:1273                    
-      0.0          347,992         90        3,866.6        3,661.5        3,430        7,193        634.6  Enqueue Launch@m_cbc.fpp:1353                    
-      0.0          345,675         90        3,840.8        3,680.5        3,434        6,224        503.5  Enqueue Launch@m_cbc.fpp:1260                    
-      0.0          344,755         90        3,830.6        3,657.0        3,393        6,760        536.0  Enqueue Launch@m_cbc.fpp:1144                    
-      0.0          341,736         20       17,086.8       10,722.0        6,787       30,056     10,291.1  Wait@m_start_up.fpp.f90:54                       
-      0.0          340,824         90        3,786.9        3,585.0        3,384        6,941        673.6  Enqueue Launch@m_cbc.fpp:1220                    
-      0.0          339,031         90        3,767.0        3,710.0        3,406        4,947        259.6  Enter Data@m_riemann_solvers.fpp:5065            
-      0.0          337,047         90        3,745.0        3,626.0        3,381        6,260        482.2  Enqueue Launch@m_cbc.fpp:1481                    
-      0.0          334,988         90        3,722.1        3,668.5        3,257        4,674        212.6  Enter Data@m_riemann_solvers.fpp:5102            
-      0.0          331,710         90        3,685.7        3,626.0        3,275        4,611        206.3  Enter Data@m_riemann_solvers.fpp:5012            
-      0.0          330,039         90        3,667.1        3,509.5        3,346        6,355        496.5  Enter Data@m_rhs.fpp:1008                        
-      0.0          320,699         90        3,563.3        3,302.5        2,883       11,487      1,014.5  Enter Data@m_cbc.fpp:1085                        
-      0.0          319,784         90        3,553.2        3,455.0        3,154        5,623        355.7  Enter Data@m_rhs.fpp:1117                        
-      0.0          319,005         90        3,544.5        3,346.5        3,079        5,722        500.9  Enter Data@m_weno.fpp:1209                       
-      0.0          315,838         60        5,264.0        5,181.5        4,544        7,622        551.4  Enqueue Launch@m_data_output.fpp:318             
-      0.0          315,603         90        3,506.7        3,365.0        3,222        5,044        413.1  Enter Data@m_weno.fpp:1179                       
-      0.0          315,359         90        3,504.0        3,322.5        2,895       10,379        830.5  Enter Data@m_cbc.fpp:1237                        
-      0.0          314,078         90        3,489.8        3,437.0        3,140        4,964        244.0  Enter Data@m_rhs.fpp:1322                        
-      0.0          311,747         90        3,463.9        3,341.5        2,927        5,123        354.6  Enter Data@m_cbc.fpp:1376                        
-      0.0          311,147         90        3,457.2        3,310.5        2,965        7,677        552.5  Enter Data@m_cbc.fpp:1161                        
-      0.0          310,994         90        3,455.5        3,320.5        3,021        5,472        402.3  Enter Data@m_cbc.fpp:1184                        
-      0.0          310,482         90        3,449.8        3,304.0        3,001        9,834        738.5  Enter Data@m_cbc.fpp:1296                        
-      0.0          309,573         90        3,439.7        3,323.0        3,017        7,631        510.3  Enter Data@m_cbc.fpp:1405                        
-      0.0          309,458         90        3,438.4        3,242.0        2,980       10,999        873.4  Enter Data@m_cbc.fpp:1249                        
-      0.0          308,468         90        3,427.4        3,329.0        3,029        5,516        341.0  Enter Data@m_cbc.fpp:1260                        
-      0.0          308,143         90        3,423.8        3,311.5        2,989        5,302        376.0  Enter Data@m_cbc.fpp:1458                        
-      0.0          307,589         90        3,417.7        3,313.0        3,203        5,060        335.0  Enter Data@m_cbc.fpp:1108                        
-      0.0          307,403         90        3,415.6        3,289.0        2,993        7,901        538.0  Enter Data@m_cbc.fpp:1121                        
-      0.0          306,786         90        3,408.7        3,316.0        3,007        4,979        302.0  Enter Data@m_cbc.fpp:1481                        
-      0.0          306,685         90        3,407.6        3,308.0        3,198        4,965        307.1  Enter Data@m_cbc.fpp:1144                        
-      0.0          306,427         90        3,404.7        3,330.0        3,012        4,778        267.4  Enter Data@m_cbc.fpp:1341                        
-      0.0          305,934         90        3,399.3        3,287.5        2,980        4,780        316.4  Enter Data@m_cbc.fpp:1197                        
-      0.0          304,982         90        3,388.7        3,300.0        2,876        4,896        310.2  Enter Data@m_weno.fpp:1141                       
-      0.0          304,741         90        3,386.0        3,285.5        2,905        5,053        317.5  Enter Data@m_cbc.fpp:1273                        
-      0.0          302,810         90        3,364.6        3,238.0        2,874        5,304        386.4  Enter Data@m_cbc.fpp:1173                        
-      0.0          302,501         90        3,361.1        3,258.5        2,931        4,731        295.8  Enter Data@m_cbc.fpp:1353                        
-      0.0          301,958         90        3,355.1        3,289.5        3,007        4,827        259.6  Enter Data@m_cbc.fpp:1392                        
-      0.0          301,884         90        3,354.3        3,287.5        2,926        4,669        270.6  Enter Data@m_cbc.fpp:1445                        
-      0.0          300,875         90        3,343.1        3,225.5        2,886        6,789        451.5  Enter Data@m_cbc.fpp:1097                        
-      0.0          300,869         90        3,343.0        3,273.0        2,957        4,737        257.5  Enter Data@m_cbc.fpp:1428                        
-      0.0          300,124         90        3,334.7        3,256.0        2,860        4,444        236.3  Enter Data@m_cbc.fpp:1220                        
-      0.0          198,393         35        5,668.4        5,662.0        4,350        6,688        596.5  Enqueue Upload@m_time_steppers.fpp:103           
-      0.0          175,087         32        5,471.5        5,566.0        4,775        6,345        510.2  Enqueue Upload@m_rhs.fpp:543                     
-      0.0          174,823         30        5,827.4        5,665.0        5,197        8,613        681.7  Enqueue Upload@m_data_output.fpp:317             
-      0.0          171,480         46        3,727.8        3,657.5        2,895        6,186        799.1  Enqueue Upload@m_rhs.fpp:653                     
-      0.0          167,545         30        5,584.8        5,427.0        4,803        7,404        577.2  Enqueue Launch@m_time_steppers.fpp:843           
-      0.0          166,370         90        1,848.6        1,767.0        1,622        5,363        412.6  Exit Data@m_boundary_common.fpp:135              
-      0.0          164,462         30        5,482.1        5,423.5        4,686        8,261        734.3  Enqueue Launch@m_time_steppers.fpp:770           
-      0.0          164,382         30        5,479.4        5,064.0        4,685        7,888        841.3  Enqueue Launch@m_data_output.fpp:280             
-      0.0          152,034         30        5,067.8        4,769.5        4,329        8,498        834.7  Enqueue Launch@m_time_steppers.fpp:698           
-      0.0          151,119         90        1,679.1        1,587.5        1,494        3,669        299.5  Exit Data@m_rhs.fpp:1086                         
-      0.0          150,178         90        1,668.6        1,610.0        1,474        2,472        198.7  Exit Data@m_variables_conversion.fpp:886         
-      0.0          149,710         90        1,663.4        1,629.5        1,536        2,642        157.4  Exit Data@m_boundary_common.fpp:164              
-      0.0          149,294         90        1,658.8        1,597.0        1,500        2,588        177.3  Exit Data@m_rhs.fpp:1259                         
-      0.0          149,053         90        1,656.1        1,613.5        1,552        2,435        164.7  Exit Data@m_boundary_common.fpp:229              
-      0.0          148,270         90        1,647.4        1,599.5        1,500        2,344        158.6  Exit Data@m_rhs.fpp:1496                         
-      0.0          147,760         90        1,641.8        1,606.0        1,533        2,839        177.3  Exit Data@m_boundary_common.fpp:262              
-      0.0          147,439         90        1,638.2        1,607.5        1,506        2,478        148.7  Exit Data@m_boundary_common.fpp:197              
-      0.0          146,469         90        1,627.4        1,603.5        1,516        2,488        126.5  Exit Data@m_boundary_common.fpp:291              
-      0.0          143,814         30        4,793.8        4,600.0        4,281        5,707        456.7  Enter Data@m_data_output.fpp:280                 
-      0.0          133,103          1      133,103.0      133,103.0      133,103      133,103          0.0  Update@m_boundary_common.fpp:1816                
-      0.0          130,394         20        6,519.7        6,497.0        6,341        6,687        101.6  Wait@m_rhs.fpp:653                               
-      0.0          118,917         21        5,662.7        5,739.0        4,822        7,908        749.9  Enqueue Upload@m_rhs.fpp:281                     
-      0.0          118,731         90        1,319.2        1,220.5        1,109        5,362        460.9  Exit Data@m_rhs.fpp:635                          
-      0.0          114,711          1      114,711.0      114,711.0      114,711      114,711          0.0  Enter Data@m_time_steppers.fpp:328               
-      0.0          112,756         90        1,252.8        1,183.5        1,104        2,708        194.9  Exit Data@m_rhs.fpp:1322                         
-      0.0          111,649          1      111,649.0      111,649.0      111,649      111,649          0.0  Update@m_global_parameters.fpp:1222              
-      0.0          110,727         60        1,845.5        1,966.5        1,069        3,031        676.2  Wait@m_data_output.fpp:318                       
-      0.0          109,378         19        5,756.7        5,557.0        3,952        8,036        896.3  Enqueue Upload@m_time_steppers.fpp:135           
-      0.0          108,736         16        6,796.0        6,393.5        6,076        7,846        783.1  Wait@m_time_steppers.fpp:103                     
-      0.0          108,570         16        6,785.6        6,343.5        5,808        7,899        805.1  Wait@m_rhs.fpp:543                               
-      0.0          106,828         90        1,187.0        1,161.0        1,036        1,490         88.1  Exit Data@m_rhs.fpp:1117                         
-      0.0          105,701         90        1,174.5        1,145.0        1,021        1,446         90.3  Exit Data@m_rhs.fpp:1008                         
-      0.0          103,371         90        1,148.6        1,091.0          990        1,529        126.2  Exit Data@m_rhs.fpp:938                          
-      0.0          102,012         90        1,133.5        1,117.5          997        1,837        125.9  Exit Data@m_riemann_solvers.fpp:5038             
-      0.0           99,992         16        6,249.5        5,896.0        4,897       14,415      2,260.6  Enqueue Upload@m_rhs.fpp:499                     
-      0.0           99,819         90        1,109.1        1,066.0          971        1,559        108.8  Exit Data@m_riemann_solvers.fpp:4986             
-      0.0           99,184         90        1,102.0        1,068.0          875        3,985        324.7  Exit Data@m_riemann_solvers.fpp:5102             
-      0.0           98,765         90        1,097.4        1,067.5          984        1,388         88.8  Exit Data@m_riemann_solvers.fpp:5090             
-      0.0           98,346         18        5,463.7        5,443.0        4,857        6,478        528.6  Enqueue Upload@m_rhs.fpp:280                     
-      0.0           97,840          1       97,840.0       97,840.0       97,840       97,840          0.0  Enter Data@m_rhs.fpp:518                         
-      0.0           96,744         90        1,074.9        1,054.0          880        2,546        190.3  Exit Data@m_weno.fpp:1209                        
-      0.0           93,641          1       93,641.0       93,641.0       93,641       93,641          0.0  Update@m_global_parameters.fpp:1215              
-      0.0           93,529         16        5,845.6        5,854.0        5,073        6,637        547.8  Enqueue Upload@m_time_steppers.fpp:305           
-      0.0           93,369         90        1,037.4          981.0          884        2,544        192.9  Exit Data@m_weno.fpp:1179                        
-      0.0           93,268         16        5,829.3        5,940.5        4,901        7,165        652.9  Enqueue Upload@m_rhs.fpp:503                     
-      0.0           91,966          2       45,983.0       45,983.0       21,800       70,166     34,199.9  Enqueue Upload@m_boundary_common.fpp:1816        
-      0.0           90,733         90        1,008.1          979.0          894        1,400         86.1  Exit Data@m_weno.fpp:1141                        
-      0.0           88,282         16        5,517.6        5,522.0        4,788        6,249        534.4  Enqueue Upload@m_rhs.fpp:188                     
-      0.0           85,846         90          953.8          916.0          842        1,417        125.4  Exit Data@m_riemann_solvers.fpp:5065             
-      0.0           85,667         90          951.9          885.0          832        2,205        209.0  Exit Data@m_cbc.fpp:1376                         
-      0.0           82,259         90          914.0          888.0          828        1,363         97.6  Exit Data@m_cbc.fpp:1144                         
-      0.0           81,305         90          903.4          868.0          824        1,651        122.7  Exit Data@m_cbc.fpp:1341                         
-      0.0           80,450         90          893.9          858.0          802        1,428        127.9  Exit Data@m_cbc.fpp:1445                         
-      0.0           79,442         90          882.7          831.5          792        1,617        143.3  Exit Data@m_cbc.fpp:1353                         
-      0.0           78,218         90          869.1          827.0          780        1,507        122.4  Exit Data@m_cbc.fpp:1161                         
-      0.0           78,046         90          867.2          833.0          777        2,073        160.4  Exit Data@m_riemann_solvers.fpp:5012             
-      0.0           77,814         90          864.6          834.0          795        1,679        124.0  Exit Data@m_cbc.fpp:1458                         
-      0.0           77,757         28        2,777.0        2,438.5        2,093        7,541      1,023.9  Enqueue Upload@m_global_parameters.fpp:1222      
-      0.0           77,724         90          863.6          834.0          788        1,602        117.6  Exit Data@m_cbc.fpp:1108                         
-      0.0           77,661         90          862.9          842.0          794        1,242         73.8  Exit Data@m_cbc.fpp:1392                         
-      0.0           77,570         90          861.9          831.0          783        1,415        105.6  Exit Data@m_cbc.fpp:1405                         
-      0.0           76,542         90          850.5          817.0          780        1,270         98.4  Exit Data@m_cbc.fpp:1260                         
-      0.0           76,434         90          849.3          818.5          749        1,547        118.0  Exit Data@m_cbc.fpp:1428                         
-      0.0           76,244         90          847.2          821.5          768        1,345         98.7  Exit Data@m_cbc.fpp:1184                         
-      0.0           76,115         90          845.7          817.0          756        1,411        113.6  Exit Data@m_cbc.fpp:1220                         
-      0.0           75,550         90          839.4          813.5          761        1,688        114.0  Exit Data@m_cbc.fpp:1296                         
-      0.0           75,471         90          838.6          820.0          774        1,240         68.2  Exit Data@m_cbc.fpp:1481                         
-      0.0           74,679         90          829.8          802.0          732        1,433        109.5  Exit Data@m_cbc.fpp:1085                         
-      0.0           74,279         90          825.3          803.0          756        1,306         89.5  Exit Data@m_cbc.fpp:1121                         
-      0.0           73,914         90          821.3          806.0          763        1,182         61.9  Exit Data@m_cbc.fpp:1197                         
-      0.0           73,662         90          818.5          788.5          711        1,382        111.0  Exit Data@m_cbc.fpp:1237                         
-      0.0           73,575         90          817.5          800.0          752        1,253         81.2  Exit Data@m_cbc.fpp:1273                         
-      0.0           71,067         90          789.6          754.0          707        1,438        129.6  Exit Data@m_cbc.fpp:1173                         
-      0.0           70,369         90          781.9          759.0          713        1,243         96.1  Exit Data@m_cbc.fpp:1249                         
-      0.0           70,130          3       23,376.7       23,758.0       21,725       24,647      1,497.9  Enter Data@m_rhs.fpp:277                         
-      0.0           69,640         90          773.8          748.0          703        1,120         86.6  Exit Data@m_cbc.fpp:1097                         
-      0.0           68,555          3       22,851.7       23,564.0       21,229       23,762      1,408.8  Enter Data@m_rhs.fpp:493                         
-      0.0           63,278          9        7,030.9        7,332.0        6,072        7,917        849.2  Wait@m_rhs.fpp:281                               
-      0.0           62,647          1       62,647.0       62,647.0       62,647       62,647          0.0  Update@m_global_parameters.fpp:1027              
-      0.0           62,601          5       12,520.2       13,243.0        9,062       15,247      2,354.7  Enqueue Upload@m_boundary_common.fpp:1826        
-      0.0           61,589          3       20,529.7       19,571.0       18,561       23,457      2,585.0  Enter Data@m_rhs.fpp:494                         
-      0.0           60,754          3       20,251.3       19,968.0       18,870       21,916      1,542.6  Enter Data@m_rhs.fpp:495                         
-      0.0           59,546          3       19,848.7       19,822.0       19,624       20,100        239.1  Enter Data@m_rhs.fpp:278                         
-      0.0           58,793          9        6,532.6        6,224.0        6,054        7,803        638.0  Wait@m_rhs.fpp:280                               
-      0.0           57,532         23        2,501.4        2,335.0        2,202        4,210        443.4  Enqueue Upload@m_global_parameters.fpp:1215      
-      0.0           56,463          8        7,057.9        7,060.0        6,178        7,976        837.5  Wait@m_rhs.fpp:503                               
-      0.0           55,683          3       18,561.0       16,065.0       15,568       24,050      4,760.1  Enter Data@m_rhs.fpp:375                         
-      0.0           55,669          8        6,958.6        6,519.0        6,158        8,538        921.1  Wait@m_rhs.fpp:188                               
-      0.0           54,722          8        6,840.3        6,228.0        6,053        8,462        987.5  Wait@m_time_steppers.fpp:135                     
-      0.0           52,318          8        6,539.8        6,247.5        5,983        7,675        704.8  Wait@m_time_steppers.fpp:305                     
-      0.0           51,546          8        6,443.3        6,158.5        5,777        7,745        770.7  Wait@m_rhs.fpp:499                               
-      0.0           51,330          3       17,110.0       16,150.0       15,786       19,394      1,986.4  Enter Data@m_rhs.fpp:372                         
-      0.0           49,182          1       49,182.0       49,182.0       49,182       49,182          0.0  Enter Data@m_global_parameters.fpp:1241          
-      0.0           48,494          8        6,061.8        6,186.0        4,165        7,377      1,162.5  Enqueue Upload@m_weno.fpp:628                    
-      0.0           48,051          8        6,006.4        5,675.5        3,628        8,275      1,599.7  Enqueue Upload@m_weno.fpp:630                    
-      0.0           47,686          8        5,960.8        5,905.0        5,361        7,105        553.4  Wait@m_start_up.fpp:1193                         
-      0.0           47,443          1       47,443.0       47,443.0       47,443       47,443          0.0  Update@m_boundary_common.fpp:1821                
-      0.0           47,070          1       47,070.0       47,070.0       47,070       47,070          0.0  Update@m_boundary_common.fpp:1826                
-      0.0           45,534          8        5,691.8        5,703.0        5,081        6,345        550.4  Enqueue Upload@m_rhs.fpp:192                     
-      0.0           45,216          8        5,652.0        5,493.0        3,770        7,718      1,327.4  Enqueue Upload@m_weno.fpp:632                    
-      0.0           44,594          3       14,864.7       15,720.0       13,058       15,816      1,565.4  Enter Data@m_rhs.fpp:378                         
-      0.0           44,013         16        2,750.8        1,502.5          826       11,558      2,817.9  Exit Data@m_time_steppers.fpp:1090               
-      0.0           42,709          1       42,709.0       42,709.0       42,709       42,709          0.0  Enter Data@m_mpi_common.fpp:81                   
-      0.0           42,424          1       42,424.0       42,424.0       42,424       42,424          0.0  Enter Data@m_rhs.fpp:574                         
-      0.0           42,140          1       42,140.0       42,140.0       42,140       42,140          0.0  Enter Data@m_cbc.fpp:402                         
-      0.0           41,981          1       41,981.0       41,981.0       41,981       41,981          0.0  Enter Data@m_cbc.fpp:403                         
-      0.0           41,866          1       41,866.0       41,866.0       41,866       41,866          0.0  Enter Data@m_cbc.fpp:400                         
-      0.0           41,602         12        3,466.8        3,271.5        2,999        4,043        402.1  Enqueue Upload@m_cbc.fpp:400                     
-      0.0           41,210          1       41,210.0       41,210.0       41,210       41,210          0.0  Update@m_weno.fpp:628                            
-      0.0           40,905          1       40,905.0       40,905.0       40,905       40,905          0.0  Enter Data@m_cbc.fpp:401                         
-      0.0           40,476         12        3,373.0        3,208.5        3,065        3,894        341.1  Enqueue Upload@m_cbc.fpp:402                     
-      0.0           40,441         12        3,370.1        3,248.0        2,892        3,950        365.0  Enqueue Upload@m_cbc.fpp:401                     
-      0.0           40,279          1       40,279.0       40,279.0       40,279       40,279          0.0  Update@m_weno.fpp:632                            
-      0.0           40,139          2       20,069.5       20,069.5       19,577       20,562        696.5  Enter Data@m_time_steppers.fpp:98                
-      0.0           38,917          1       38,917.0       38,917.0       38,917       38,917          0.0  Update@m_cbc.fpp:428                             
-      0.0           38,452          1       38,452.0       38,452.0       38,452       38,452          0.0  Update@m_variables_conversion.fpp:815            
-      0.0           38,424          1       38,424.0       38,424.0       38,424       38,424          0.0  Enter Data@m_rhs.fpp:184                         
-      0.0           38,135          3       12,711.7       13,223.0        3,927       20,985      8,540.5  Enqueue Upload@m_global_parameters.fpp:1238      
-      0.0           37,950          1       37,950.0       37,950.0       37,950       37,950          0.0  Update@m_weno.fpp:630                            
-      0.0           37,685          1       37,685.0       37,685.0       37,685       37,685          0.0  Enter Data@m_time_steppers.fpp:319               
-      0.0           37,452          1       37,452.0       37,452.0       37,452       37,452          0.0  Enqueue Upload                                   
-      0.0           36,908          8        4,613.5        4,968.5        2,678        5,806        946.1  Enqueue Upload@m_mpi_common.fpp:79               
-      0.0           35,585         30        1,186.2        1,178.5        1,023        1,654        121.2  Exit Data@m_data_output.fpp:280                  
-      0.0           35,553         11        3,232.1        3,025.0        2,891        4,304        414.4  Enqueue Upload@m_variables_conversion.fpp:815    
-      0.0           34,128          6        5,688.0        5,264.0        4,540        7,815      1,341.4  Enqueue Upload@m_weno.fpp:179                    
-      0.0           32,837          6        5,472.8        5,207.0        3,669        7,647      1,466.3  Enqueue Upload@m_global_parameters.fpp:1241      
-      0.0           32,452          6        5,408.7        5,189.5        3,252        7,690      1,713.4  Enqueue Upload@m_weno.fpp:134                    
-      0.0           31,923          9        3,547.0        3,566.0        2,939        4,267        454.1  Enqueue Upload@m_rhs.fpp:378                     
-      0.0           31,450          1       31,450.0       31,450.0       31,450       31,450          0.0  Enter Data@m_cbc.fpp:256                         
-      0.0           30,705          1       30,705.0       30,705.0       30,705       30,705          0.0  Update@m_cbc.fpp:375                             
-      0.0           30,220          2       15,110.0       15,110.0       14,069       16,151      1,472.2  Enqueue Upload@m_boundary_common.fpp:1821        
-      0.0           28,956          1       28,956.0       28,956.0       28,956       28,956          0.0  Update@m_rhs.fpp:580                             
-      0.0           28,835          8        3,604.4        3,506.5        3,169        4,266        425.4  Enqueue Upload@m_cbc.fpp:396                     
-      0.0           28,790          6        4,798.3        4,700.0        3,838        5,939        717.1  Enqueue Upload@m_weno.fpp:189                    
-      0.0           28,688          1       28,688.0       28,688.0       28,688       28,688          0.0  Enter Data@m_data_output.fpp:1758                
-      0.0           28,473          6        4,745.5        4,369.5        3,126        7,190      1,642.7  Enqueue Upload@m_cbc.fpp:168                     
-      0.0           27,547          4        6,886.8        6,850.0        6,101        7,746        780.0  Wait@m_rhs.fpp:192                               
-      0.0           27,517          1       27,517.0       27,517.0       27,517       27,517          0.0  Enter Data@m_weno.fpp:149                        
-      0.0           27,513          6        4,585.5        4,377.0        3,081        6,682      1,484.7  Enqueue Upload@m_riemann_solvers.fpp:3401        
-      0.0           27,497          1       27,497.0       27,497.0       27,497       27,497          0.0  Enter Data@m_cbc.fpp:197                         
-      0.0           27,370          1       27,370.0       27,370.0       27,370       27,370          0.0  Enter Data@m_cbc.fpp:154                         
-      0.0           27,110          1       27,110.0       27,110.0       27,110       27,110          0.0  Enter Data@m_global_parameters.fpp:1242          
-      0.0           27,079          4        6,769.8        5,410.0        2,458       13,801      5,263.2  Enqueue Upload@m_global_parameters.fpp:1027      
-      0.0           26,979          1       26,979.0       26,979.0       26,979       26,979          0.0  Enter Data@m_time_steppers.fpp:220               
-      0.0           26,896          1       26,896.0       26,896.0       26,896       26,896          0.0  Enter Data@m_cbc.fpp:201                         
-      0.0           26,852          1       26,852.0       26,852.0       26,852       26,852          0.0  Enter Data@m_riemann_solvers.fpp:3407            
-      0.0           26,810          1       26,810.0       26,810.0       26,810       26,810          0.0  Enter Data@m_weno.fpp:134                        
-      0.0           26,791          1       26,791.0       26,791.0       26,791       26,791          0.0  Enter Data@m_cbc.fpp:191                         
-      0.0           26,730          1       26,730.0       26,730.0       26,730       26,730          0.0  Enter Data@m_weno.fpp:166                        
-      0.0           26,693          1       26,693.0       26,693.0       26,693       26,693          0.0  Enter Data@m_cbc.fpp:158                         
-      0.0           26,617          1       26,617.0       26,617.0       26,617       26,617          0.0  Enter Data@m_weno.fpp:202                        
-      0.0           26,613          1       26,613.0       26,613.0       26,613       26,613          0.0  Enter Data@m_riemann_solvers.fpp:3382            
-      0.0           26,589          1       26,589.0       26,589.0       26,589       26,589          0.0  Enter Data@m_rhs.fpp:291                         
-      0.0           26,485          1       26,485.0       26,485.0       26,485       26,485          0.0  Enter Data@m_time_steppers.fpp:95                
-      0.0           26,452          1       26,452.0       26,452.0       26,452       26,452          0.0  Enter Data@m_rhs.fpp:310                         
-      0.0           26,332          1       26,332.0       26,332.0       26,332       26,332          0.0  Enter Data@m_riemann_solvers.fpp:3373            
-      0.0           26,291          1       26,291.0       26,291.0       26,291       26,291          0.0  Enter Data@m_rhs.fpp:312                         
-      0.0           26,286          1       26,286.0       26,286.0       26,286       26,286          0.0  Enter Data@m_time_steppers.fpp:132               
-      0.0           26,282          1       26,282.0       26,282.0       26,282       26,282          0.0  Enter Data@m_riemann_solvers.fpp:3379            
-      0.0           26,270          1       26,270.0       26,270.0       26,270       26,270          0.0  Enter Data@m_cbc.fpp:164                         
-      0.0           26,248          1       26,248.0       26,248.0       26,248       26,248          0.0  Enter Data@m_riemann_solvers.fpp:3376            
-      0.0           26,209          1       26,209.0       26,209.0       26,209       26,209          0.0  Enter Data@m_riemann_solvers.fpp:3355            
-      0.0           26,188          1       26,188.0       26,188.0       26,188       26,188          0.0  Enter Data@m_rhs.fpp:298                         
-      0.0           26,086          1       26,086.0       26,086.0       26,086       26,086          0.0  Enter Data@m_riemann_solvers.fpp:3349            
-      0.0           26,046          1       26,046.0       26,046.0       26,046       26,046          0.0  Enter Data@m_weno.fpp:136                        
-      0.0           26,044          1       26,044.0       26,044.0       26,044       26,044          0.0  Enter Data@m_cbc.fpp:363                         
-      0.0           26,022          1       26,022.0       26,022.0       26,022       26,022          0.0  Enter Data@m_rhs.fpp:300                         
-      0.0           26,012          6        4,335.3        4,363.5        3,421        5,149        733.0  Enqueue Upload@m_cbc.fpp:191                     
-      0.0           26,002          1       26,002.0       26,002.0       26,002       26,002          0.0  Enter Data@m_cbc.fpp:211                         
-      0.0           25,981          1       25,981.0       25,981.0       25,981       25,981          0.0  Enter Data@m_cbc.fpp:236                         
-      0.0           25,953          1       25,953.0       25,953.0       25,953       25,953          0.0  Enter Data@m_cbc.fpp:168                         
-      0.0           25,901          6        4,316.8        4,267.5        3,391        5,300        690.2  Enqueue Upload@m_cbc.fpp:197                     
-      0.0           25,884          2       12,942.0       12,942.0       12,035       13,849      1,282.7  Enter Data@m_rhs.fpp:216                         
-      0.0           25,869          1       25,869.0       25,869.0       25,869       25,869          0.0  Enter Data@m_weno.fpp:140                        
-      0.0           25,854          1       25,854.0       25,854.0       25,854       25,854          0.0  Enter Data@m_riemann_solvers.fpp:3346            
-      0.0           25,835          1       25,835.0       25,835.0       25,835       25,835          0.0  Enter Data@m_riemann_solvers.fpp:3352            
-      0.0           25,760          1       25,760.0       25,760.0       25,760       25,760          0.0  Enter Data@m_weno.fpp:191                        
-      0.0           25,731          1       25,731.0       25,731.0       25,731       25,731          0.0  Enter Data@m_riemann_solvers.fpp:3410            
-      0.0           25,671          6        4,278.5        4,169.0        3,382        5,304        630.6  Enqueue Upload@m_weno.fpp:136                    
-      0.0           25,601          1       25,601.0       25,601.0       25,601       25,601          0.0  Enter Data@m_variables_conversion.fpp:630        
-      0.0           25,600          6        4,266.7        3,717.5        3,099        7,623      1,702.8  Enqueue Upload@m_weno.fpp:140                    
-      0.0           25,600          1       25,600.0       25,600.0       25,600       25,600          0.0  Enter Data@m_variables_conversion.fpp:626        
-      0.0           25,581          1       25,581.0       25,581.0       25,581       25,581          0.0  Enter Data@m_time_steppers.fpp:260               
-      0.0           25,565          6        4,260.8        4,142.0        3,164        5,240        843.6  Enqueue Upload@m_rhs.fpp:291                     
-      0.0           25,554          1       25,554.0       25,554.0       25,554       25,554          0.0  Enter Data@m_variables_conversion.fpp:628        
-      0.0           25,540          6        4,256.7        4,128.0        3,164        5,810      1,053.5  Enqueue Upload@m_cbc.fpp:211                     
-      0.0           25,515          1       25,515.0       25,515.0       25,515       25,515          0.0  Enter Data@m_cbc.fpp:263                         
-      0.0           25,507          1       25,507.0       25,507.0       25,507       25,507          0.0  Enter Data@m_weno.fpp:197                        
-      0.0           25,506          8        3,188.3        3,213.5        2,869        3,349        153.9  Enqueue Upload@m_cbc.fpp:428                     
-      0.0           25,501          6        4,250.2        4,349.5        3,267        4,956        651.7  Enqueue Upload@m_cbc.fpp:154                     
-      0.0           25,495          1       25,495.0       25,495.0       25,495       25,495          0.0  Enter Data@m_global_parameters.fpp:1243          
-      0.0           25,492          1       25,492.0       25,492.0       25,492       25,492          0.0  Enter Data@m_riemann_solvers.fpp:3401            
-      0.0           25,473          1       25,473.0       25,473.0       25,473       25,473          0.0  Enter Data@m_rhs.fpp:326                         
-      0.0           25,451          6        4,241.8        4,266.5        3,119        5,358        907.2  Enqueue Upload@m_cbc.fpp:148                     
-      0.0           25,410          1       25,410.0       25,410.0       25,410       25,410          0.0  Enter Data@m_cbc.fpp:207                         
-      0.0           25,409          1       25,409.0       25,409.0       25,409       25,409          0.0  Enter Data@m_weno.fpp:194                        
-      0.0           25,395          1       25,395.0       25,395.0       25,395       25,395          0.0  Enter Data@m_global_parameters.fpp:1246          
-      0.0           25,386          1       25,386.0       25,386.0       25,386       25,386          0.0  Enter Data@m_rhs.fpp:489                         
-      0.0           25,375          1       25,375.0       25,375.0       25,375       25,375          0.0  Enter Data@m_rhs.fpp:293                         
-      0.0           25,372          1       25,372.0       25,372.0       25,372       25,372          0.0  Enter Data@m_weno.fpp:174                        
-      0.0           25,366          1       25,366.0       25,366.0       25,366       25,366          0.0  Enter Data@m_cbc.fpp:246                         
-      0.0           25,361          1       25,361.0       25,361.0       25,361       25,361          0.0  Enter Data@m_rhs.fpp:483                         
-      0.0           25,335          1       25,335.0       25,335.0       25,335       25,335          0.0  Enter Data@m_cbc.fpp:328                         
-      0.0           25,334          1       25,334.0       25,334.0       25,334       25,334          0.0  Enter Data@m_rhs.fpp:324                         
-      0.0           25,333          1       25,333.0       25,333.0       25,333       25,333          0.0  Enter Data@m_riemann_solvers.fpp:3404            
-      0.0           25,323          1       25,323.0       25,323.0       25,323       25,323          0.0  Enter Data@m_rhs.fpp:487                         
-      0.0           25,317          1       25,317.0       25,317.0       25,317       25,317          0.0  Enter Data@m_rhs.fpp:273                         
-      0.0           25,310          1       25,310.0       25,310.0       25,310       25,310          0.0  Enter Data@m_cbc.fpp:242                         
-      0.0           25,306          5        5,061.2        5,102.0        3,713        6,095        890.9  Enqueue Upload@m_time_steppers.fpp:328           
-      0.0           25,258          1       25,258.0       25,258.0       25,258       25,258          0.0  Enter Data@m_global_parameters.fpp:1247          
-      0.0           25,242          1       25,242.0       25,242.0       25,242       25,242          0.0  Enter Data@m_rhs.fpp:388                         
-      0.0           25,217          1       25,217.0       25,217.0       25,217       25,217          0.0  Enter Data@m_rhs.fpp:488                         
-      0.0           25,191          1       25,191.0       25,191.0       25,191       25,191          0.0  Enter Data@m_cbc.fpp:366                         
-      0.0           25,168          6        4,194.7        4,223.0        3,672        4,767        451.3  Enqueue Upload@m_rhs.fpp:277                     
-      0.0           25,148          1       25,148.0       25,148.0       25,148       25,148          0.0  Enter Data@m_weno.fpp:142                        
-      0.0           25,146          2       12,573.0       12,573.0       12,418       12,728        219.2  Enter Data@m_rhs.fpp:209                         
-      0.0           25,131          1       25,131.0       25,131.0       25,131       25,131          0.0  Enter Data@m_weno.fpp:168                        
-      0.0           25,130          1       25,130.0       25,130.0       25,130       25,130          0.0  Enter Data@m_rhs.fpp:325                         
-      0.0           25,118          6        4,186.3        4,171.0        3,302        5,053        757.1  Enqueue Upload@m_weno.fpp:202                    
-      0.0           25,116          1       25,116.0       25,116.0       25,116       25,116          0.0  Enter Data@m_global_parameters.fpp:1248          
-      0.0           25,069          1       25,069.0       25,069.0       25,069       25,069          0.0  Enter Data@m_cbc.fpp:292                         
-      0.0           25,067          1       25,067.0       25,067.0       25,067       25,067          0.0  Enter Data@m_variables_conversion.fpp:631        
-      0.0           25,065          1       25,065.0       25,065.0       25,065       25,065          0.0  Enter Data@m_cbc.fpp:252                         
-      0.0           25,011          1       25,011.0       25,011.0       25,011       25,011          0.0  Enter Data@m_weno.fpp:139                        
-      0.0           25,008          1       25,008.0       25,008.0       25,008       25,008          0.0  Enter Data@m_weno.fpp:171                        
-      0.0           25,006          1       25,006.0       25,006.0       25,006       25,006          0.0  Enter Data@m_variables_conversion.fpp:627        
-      0.0           24,967          1       24,967.0       24,967.0       24,967       24,967          0.0  Enter Data@m_variables_conversion.fpp:632        
-      0.0           24,928          1       24,928.0       24,928.0       24,928       24,928          0.0  Enter Data@m_global_parameters.fpp:1253          
-      0.0           24,898          1       24,898.0       24,898.0       24,898       24,898          0.0  Enter Data@m_cbc.fpp:289                         
-      0.0           24,841          1       24,841.0       24,841.0       24,841       24,841          0.0  Enter Data@m_rhs.fpp:389                         
-      0.0           24,824          1       24,824.0       24,824.0       24,824       24,824          0.0  Enter Data@m_variables_conversion.fpp:629        
-      0.0           24,798          1       24,798.0       24,798.0       24,798       24,798          0.0  Enter Data@m_time_steppers.fpp:302               
-      0.0           24,751          1       24,751.0       24,751.0       24,751       24,751          0.0  Enter Data@m_global_parameters.fpp:1251          
-      0.0           24,744          1       24,744.0       24,744.0       24,744       24,744          0.0  Enter Data@m_global_parameters.fpp:1252          
-      0.0           24,669          6        4,111.5        4,104.0        3,118        5,216        883.8  Enqueue Upload@m_riemann_solvers.fpp:3404        
-      0.0           24,668          1       24,668.0       24,668.0       24,668       24,668          0.0  Enter Data@m_weno.fpp:195                        
-      0.0           24,623          1       24,623.0       24,623.0       24,623       24,623          0.0  Enter Data@m_weno.fpp:172                        
-      0.0           24,598          6        4,099.7        4,076.5        3,371        4,881        662.4  Enqueue Upload@m_riemann_solvers.fpp:3379        
-      0.0           24,555          6        4,092.5        4,060.5        3,056        5,169        863.9  Enqueue Upload@m_riemann_solvers.fpp:3373        
-      0.0           24,545          1       24,545.0       24,545.0       24,545       24,545          0.0  Enter Data@m_rhs.fpp:390                         
-      0.0           24,511          1       24,511.0       24,511.0       24,511       24,511          0.0  Enter Data@m_rhs.fpp:274                         
-      0.0           24,499          1       24,499.0       24,499.0       24,499       24,499          0.0  Enter Data@m_rhs.fpp:391                         
-      0.0           24,461          1       24,461.0       24,461.0       24,461       24,461          0.0  Enter Data@m_cbc.fpp:325                         
-      0.0           24,445          1       24,445.0       24,445.0       24,445       24,445          0.0  Enter Data@m_rhs.fpp:387                         
-      0.0           24,437          1       24,437.0       24,437.0       24,437       24,437          0.0  Enter Data@m_variables_conversion.fpp:633        
-      0.0           24,381          6        4,063.5        3,969.5        3,088        5,166        862.5  Enqueue Upload@m_riemann_solvers.fpp:3346        
-      0.0           24,351          6        4,058.5        3,971.5        3,108        5,245        870.3  Enqueue Upload@m_riemann_solvers.fpp:3349        
-      0.0           24,294          6        4,049.0        3,904.5        3,125        5,151        785.3  Enqueue Upload@m_riemann_solvers.fpp:3382        
-      0.0           24,281          6        4,046.8        4,034.0        3,106        4,985        766.3  Enqueue Upload@m_cbc.fpp:164                     
-      0.0           24,280          1       24,280.0       24,280.0       24,280       24,280          0.0  Update@m_global_parameters.fpp:1226              
-      0.0           24,271          1       24,271.0       24,271.0       24,271       24,271          0.0  Enqueue Launch@m_rhs.fpp:595                     
-      0.0           24,199          1       24,199.0       24,199.0       24,199       24,199          0.0  Enter Data@m_rhs.fpp:484                         
-      0.0           24,169          1       24,169.0       24,169.0       24,169       24,169          0.0  Wait@m_cbc.fpp:402                               
-      0.0           24,090          6        4,015.0        4,115.0        3,306        4,696        574.3  Enqueue Upload@m_cbc.fpp:201                     
-      0.0           24,022          6        4,003.7        3,966.5        3,087        4,957        822.9  Enqueue Upload@m_rhs.fpp:293                     
-      0.0           23,987          1       23,987.0       23,987.0       23,987       23,987          0.0  Wait@m_global_parameters.fpp:1215                
-      0.0           23,960          1       23,960.0       23,960.0       23,960       23,960          0.0  Wait@m_cbc.fpp:400                               
-      0.0           23,949          6        3,991.5        3,934.5        3,160        4,968        757.7  Enqueue Upload@m_rhs.fpp:310                     
-      0.0           23,938          6        3,989.7        3,878.0        3,105        5,238        846.6  Enqueue Upload@m_cbc.fpp:242                     
-      0.0           23,877          6        3,979.5        4,014.0        3,081        4,938        774.8  Enqueue Upload@m_cbc.fpp:207                     
-      0.0           23,871          6        3,978.5        3,929.0        3,120        4,934        769.5  Enqueue Upload@m_riemann_solvers.fpp:3355        
-      0.0           23,847          6        3,974.5        3,919.5        3,177        4,831        691.4  Enqueue Upload@m_cbc.fpp:158                     
-      0.0           23,793          6        3,965.5        3,844.0        3,034        5,248        891.1  Enqueue Upload@m_rhs.fpp:300                     
-      0.0           23,735          6        3,955.8        3,928.0        3,149        4,818        717.7  Enqueue Upload@m_riemann_solvers.fpp:3376        
-      0.0           23,557          6        3,926.2        3,932.5        3,115        4,848        721.1  Enqueue Upload@m_cbc.fpp:246                     
-      0.0           23,543          6        3,923.8        3,932.5        2,950        5,014        842.2  Enqueue Upload@m_cbc.fpp:252                     
-      0.0           23,521          1       23,521.0       23,521.0       23,521       23,521          0.0  Wait@m_rhs.fpp:574                               
-      0.0           23,496          1       23,496.0       23,496.0       23,496       23,496          0.0  Wait@m_mpi_common.fpp:81                         
-      0.0           23,485          1       23,485.0       23,485.0       23,485       23,485          0.0  Wait@m_cbc.fpp:403                               
-      0.0           23,476          6        3,912.7        3,813.5        2,994        5,107        854.4  Enqueue Upload@m_rhs.fpp:298                     
-      0.0           23,366          6        3,894.3        3,872.5        3,087        4,986        698.3  Enqueue Upload@m_weno.fpp:139                    
-      0.0           23,318          6        3,886.3        3,796.5        3,191        4,515        492.9  Enqueue Upload@m_rhs.fpp:495                     
-      0.0           23,316          6        3,886.0        3,852.0        3,119        4,750        687.9  Enqueue Upload@m_riemann_solvers.fpp:3407        
-      0.0           23,312          1       23,312.0       23,312.0       23,312       23,312          0.0  Wait@m_cbc.fpp:401                               
-      0.0           23,254          6        3,875.7        3,869.5        3,040        4,730        743.9  Enqueue Upload@m_riemann_solvers.fpp:3352        
-      0.0           23,150          6        3,858.3        3,715.5        3,110        4,987        737.5  Enqueue Upload@m_weno.fpp:171                    
-      0.0           23,008          1       23,008.0       23,008.0       23,008       23,008          0.0  Update@m_global_parameters.fpp:1219              
-      0.0           22,982          6        3,830.3        3,782.0        3,030        4,594        598.4  Enqueue Upload@m_cbc.fpp:236                     
-      0.0           22,907          6        3,817.8        3,794.5        3,130        4,425        487.6  Enqueue Upload@m_rhs.fpp:494                     
-      0.0           22,829          1       22,829.0       22,829.0       22,829       22,829          0.0  Update@m_global_parameters.fpp:1216              
-      0.0           22,783          2       11,391.5       11,391.5       10,902       11,881        692.3  Update@m_cbc.fpp.f90:450                         
-      0.0           22,769          6        3,794.8        3,810.0        3,163        4,632        499.6  Enqueue Upload@m_weno.fpp:149                    
-      0.0           22,726          6        3,787.7        3,746.5        3,270        4,240        400.0  Enqueue Upload@m_rhs.fpp:493                     
-      0.0           22,655          6        3,775.8        3,533.0        3,411        4,402        458.6  Enqueue Upload@m_data_output.fpp:1758            
-      0.0           22,462          1       22,462.0       22,462.0       22,462       22,462          0.0  Enter Data@m_boundary_common.fpp:55              
-      0.0           22,359          6        3,726.5        3,586.5        3,158        4,528        491.9  Enqueue Upload@m_global_parameters.fpp:1243      
-      0.0           22,292          6        3,715.3        3,428.5        3,267        4,389        515.5  Enqueue Upload@m_global_parameters.fpp:1242      
-      0.0           22,251          6        3,708.5        3,556.5        3,225        4,457        497.5  Enqueue Upload@m_weno.fpp:166                    
-      0.0           22,216          6        3,702.7        3,547.0        3,152        4,606        548.2  Enqueue Upload@m_global_parameters.fpp:1247      
-      0.0           22,195          6        3,699.2        3,667.5        3,206        4,138        402.6  Enqueue Upload@m_rhs.fpp:278                     
-      0.0           22,113          1       22,113.0       22,113.0       22,113       22,113          0.0  Enter Data@m_rhs.fpp:185                         
-      0.0           22,066          6        3,677.7        3,512.5        3,162        4,307        454.4  Enqueue Upload@m_variables_conversion.fpp:632    
-      0.0           21,971          1       21,971.0       21,971.0       21,971       21,971          0.0  Enter Data@m_rhs.fpp:367                         
-      0.0           21,808          1       21,808.0       21,808.0       21,808       21,808          0.0  Enter Data@m_cbc.fpp:148                         
-      0.0           21,796          6        3,632.7        3,589.5        3,001        4,643        593.0  Enqueue Upload@m_variables_conversion.fpp:631    
-      0.0           21,749          1       21,749.0       21,749.0       21,749       21,749          0.0  Enter Data@m_time_steppers.fpp:321               
-      0.0           21,626          6        3,604.3        3,362.5        3,158        4,265        495.6  Enqueue Upload@m_global_parameters.fpp:1246      
-      0.0           21,618          6        3,603.0        3,502.5        3,008        4,510        577.4  Enqueue Upload@m_weno.fpp:195                    
-      0.0           21,617          1       21,617.0       21,617.0       21,617       21,617          0.0  Enter Data@m_weno.fpp:179                        
-      0.0           21,514          6        3,585.7        3,540.5        3,054        4,261        407.9  Enqueue Upload@m_rhs.fpp:312                     
-      0.0           21,490          6        3,581.7        3,591.0        3,098        4,204        442.4  Enqueue Upload@m_rhs.fpp:375                     
-      0.0           21,427          6        3,571.2        3,501.0        2,980        4,405        573.4  Enqueue Upload@m_rhs.fpp:387                     
-      0.0           21,381          1       21,381.0       21,381.0       21,381       21,381          0.0  Enter Data@m_rhs.fpp:369                         
-      0.0           21,355          3        7,118.3        7,069.0        5,728        8,558      1,415.6  Enqueue Upload@m_rhs.fpp:595                     
-      0.0           21,348          6        3,558.0        3,400.0        3,220        4,058        339.0  Enqueue Upload@m_weno.fpp:168                    
-      0.0           21,253          6        3,542.2        3,386.0        3,020        4,317        487.6  Enqueue Upload@m_global_parameters.fpp:1248      
-      0.0           21,241          6        3,540.2        3,453.0        3,118        4,225        417.8  Enqueue Upload@m_weno.fpp:172                    
-      0.0           21,150          6        3,525.0        3,425.0        2,941        4,170        498.6  Enqueue Upload@m_cbc.fpp:256                     
-      0.0           21,095          6        3,515.8        3,361.0        3,149        4,170        411.3  Enqueue Upload@m_weno.fpp:191                    
-      0.0           21,047          6        3,507.8        3,335.0        3,070        4,146        470.1  Enqueue Upload@m_mpi_common.fpp:81               
-      0.0           21,033          6        3,505.5        3,479.5        3,017        3,957        406.4  Enqueue Upload@m_rhs.fpp:372                     
-      0.0           21,000          6        3,500.0        3,342.5        3,087        4,075        450.9  Enqueue Upload@m_cbc.fpp:363                     
-      0.0           20,957          1       20,957.0       20,957.0       20,957       20,957          0.0  Enter Data@m_weno.fpp:189                        
-      0.0           20,905          6        3,484.2        3,332.0        3,055        4,292        494.6  Enqueue Upload@m_variables_conversion.fpp:629    
-      0.0           20,902          6        3,483.7        3,403.0        2,979        4,345        502.9  Enqueue Upload@m_rhs.fpp:483                     
-      0.0           20,888          6        3,481.3        3,517.0        3,025        4,090        415.2  Enqueue Upload@m_rhs.fpp:392                     
-      0.0           20,875          6        3,479.2        3,321.5        3,090        4,006        398.3  Enqueue Upload@m_variables_conversion.fpp:628    
-      0.0           20,760          6        3,460.0        3,324.0        3,102        4,009        366.8  Enqueue Upload@m_cbc.fpp:263                     
-      0.0           20,756          6        3,459.3        3,292.5        3,092        3,975        373.8  Enqueue Upload@m_rhs.fpp:324                     
-      0.0           20,754          1       20,754.0       20,754.0       20,754       20,754          0.0  Enter Data@m_riemann_solvers.fpp:3321            
-      0.0           20,734          6        3,455.7        3,274.5        3,063        4,010        405.0  Enqueue Upload@m_rhs.fpp:488                     
-      0.0           20,624          1       20,624.0       20,624.0       20,624       20,624          0.0  Enter Data@m_time_steppers.fpp:327               
-      0.0           20,608          6        3,434.7        3,286.0        3,065        4,082        420.4  Enqueue Upload@m_global_parameters.fpp:1251      
-      0.0           20,604          6        3,434.0        3,338.0        2,884        4,325        532.9  Enqueue Upload@m_cbc.fpp:325                     
-      0.0           20,598          6        3,433.0        3,456.0        2,984        4,007        393.8  Enqueue Upload@m_rhs.fpp:487                     
-      0.0           20,590          6        3,431.7        3,289.0        3,034        4,192        471.0  Enqueue Upload@m_rhs.fpp:389                     
-      0.0           20,523          6        3,420.5        3,183.5        3,026        4,187        487.1  Enqueue Upload@m_variables_conversion.fpp:630    
-      0.0           20,431          6        3,405.2        3,286.5        3,002        4,033        415.2  Enqueue Upload@m_weno.fpp:194                    
-      0.0           20,354          6        3,392.3        3,259.0        3,022        4,080        434.9  Enqueue Upload@m_rhs.fpp:325                     
-      0.0           20,316          6        3,386.0        3,366.0        2,930        3,852        383.7  Enqueue Upload@m_cbc.fpp:403                     
-      0.0           20,311          6        3,385.2        3,255.5        2,972        3,971        382.8  Enqueue Upload@m_variables_conversion.fpp:626    
-      0.0           20,279          6        3,379.8        3,176.0        3,063        3,882        383.3  Enqueue Upload@m_rhs.fpp:273                     
-      0.0           20,269          6        3,378.2        3,166.5        3,056        3,896        389.3  Enqueue Upload@m_rhs.fpp:574                     
-      0.0           20,207          6        3,367.8        3,257.0        2,933        4,064        448.1  Enqueue Upload@m_variables_conversion.fpp:627    
-      0.0           20,203          6        3,367.2        3,256.0        2,919        4,145        469.6  Enqueue Upload@m_cbc.fpp:289                     
-      0.0           20,143          1       20,143.0       20,143.0       20,143       20,143          0.0  Enter Data@m_time_steppers.fpp:325               
-      0.0           20,089          6        3,348.2        3,203.0        2,949        4,018        408.3  Enqueue Upload@m_global_parameters.fpp:1252      
-      0.0           20,063          1       20,063.0       20,063.0       20,063       20,063          0.0  Update@m_cbc.fpp:383                             
-      0.0           20,007          1       20,007.0       20,007.0       20,007       20,007          0.0  Enter Data@m_time_steppers.fpp:324               
-      0.0           19,987          6        3,331.2        3,174.5        2,889        3,839        400.2  Enqueue Upload@m_rhs.fpp:388                     
-      0.0           19,961          6        3,326.8        3,178.0        2,915        3,982        428.4  Enqueue Upload@m_rhs.fpp:391                     
-      0.0           19,877          6        3,312.8        3,232.5        2,923        3,912        375.5  Enqueue Upload@m_rhs.fpp:390                     
-      0.0           19,872          2        9,936.0        9,936.0        3,582       16,290      8,985.9  Enqueue Upload@m_rhs.fpp:580                     
-      0.0           19,766          6        3,294.3        3,133.0        2,964        3,914        398.9  Enqueue Upload@m_rhs.fpp:484                     
-      0.0           19,511          3        6,503.7        6,463.0        6,457        6,591         75.7  Wait@m_rhs.fpp:278                               
-      0.0           19,480          3        6,493.3        6,454.0        6,351        6,675        165.5  Wait@m_rhs.fpp:372                               
-      0.0           19,406          5        3,881.2        3,838.0        3,247        4,631        508.5  Enqueue Upload@m_time_steppers.fpp:254           
-      0.0           19,396          3        6,465.3        6,451.0        6,357        6,588        116.2  Wait@m_rhs.fpp:378                               
-      0.0           19,370          1       19,370.0       19,370.0       19,370       19,370          0.0  Update@m_mpi_common.fpp:79                       
-      0.0           19,314          3        6,438.0        6,461.0        6,357        6,496         72.3  Wait@m_rhs.fpp:375                               
-      0.0           19,244          6        3,207.3        3,222.0        3,055        3,302         92.1  Enqueue Upload@m_cbc.fpp:375                     
-      0.0           19,114          1       19,114.0       19,114.0       19,114       19,114          0.0  Enter Data@m_time_steppers.fpp:251               
-      0.0           19,090          3        6,363.3        6,352.0        6,322        6,416         48.0  Wait@m_rhs.fpp:495                               
-      0.0           19,065          3        6,355.0        6,310.0        6,288        6,467         97.6  Wait@m_rhs.fpp:493                               
-      0.0           19,049          3        6,349.7        6,353.0        6,313        6,383         35.1  Wait@m_rhs.fpp:494                               
-      0.0           18,935          3        6,311.7        6,264.0        6,243        6,428        101.3  Wait@m_rhs.fpp:277                               
-      0.0           18,891          3        6,297.0        5,618.0        4,671        8,602      2,051.6  Enqueue Upload@m_riemann_solvers.fpp:3341        
-      0.0           18,763          1       18,763.0       18,763.0       18,763       18,763          0.0  Wait@m_global_parameters.fpp:1222                
-      0.0           18,334          1       18,334.0       18,334.0       18,334       18,334          0.0  Update@m_global_parameters.fpp:1221              
-      0.0           18,268          1       18,268.0       18,268.0       18,268       18,268          0.0  Enter Data@m_time_steppers.fpp:322               
-      0.0           18,120          5        3,624.0        3,445.0        3,148        4,179        461.8  Enqueue Upload@m_time_steppers.fpp:295           
-      0.0           17,480          4        4,370.0        4,355.5        3,093        5,676      1,155.6  Enqueue Upload@m_rhs.fpp:216                     
-      0.0           17,043          1       17,043.0       17,043.0       17,043       17,043          0.0  Enter Data@m_rhs.fpp:368                         
-      0.0           17,029          1       17,029.0       17,029.0       17,029       17,029          0.0  Enter Data@m_time_steppers.fpp:254               
-      0.0           16,864          1       16,864.0       16,864.0       16,864       16,864          0.0  Wait@m_riemann_solvers.fpp:3407                  
-      0.0           16,602          2        8,301.0        8,301.0        1,298       15,304      9,903.7  Wait@m_rhs.fpp:2147                              
-      0.0           16,588          1       16,588.0       16,588.0       16,588       16,588          0.0  Wait@m_global_parameters.fpp:1242                
-      0.0           16,370          1       16,370.0       16,370.0       16,370       16,370          0.0  Wait@m_riemann_solvers.fpp:3373                  
-      0.0           16,266          1       16,266.0       16,266.0       16,266       16,266          0.0  Wait@m_weno.fpp:140                              
-      0.0           16,258          5        3,251.6        3,104.0        2,772        4,090        520.1  Enqueue Upload@m_cbc.fpp.f90:450                 
-      0.0           16,247          1       16,247.0       16,247.0       16,247       16,247          0.0  Wait@m_rhs.fpp:310                               
-      0.0           16,225          1       16,225.0       16,225.0       16,225       16,225          0.0  Wait@m_cbc.fpp:363                               
-      0.0           16,219          1       16,219.0       16,219.0       16,219       16,219          0.0  Enter Data@m_rhs.fpp:392                         
-      0.0           16,204          1       16,204.0       16,204.0       16,204       16,204          0.0  Wait@m_rhs.fpp:298                               
-      0.0           16,168          1       16,168.0       16,168.0       16,168       16,168          0.0  Enter Data@m_time_steppers.fpp:295               
-      0.0           16,153          1       16,153.0       16,153.0       16,153       16,153          0.0  Wait@m_variables_conversion.fpp:626              
-      0.0           16,140          1       16,140.0       16,140.0       16,140       16,140          0.0  Enter Data@m_time_steppers.fpp:292               
-      0.0           16,125          1       16,125.0       16,125.0       16,125       16,125          0.0  Wait@m_rhs.fpp:312                               
-      0.0           16,116          1       16,116.0       16,116.0       16,116       16,116          0.0  Wait@m_data_output.fpp:1758                      
-      0.0           16,115          1       16,115.0       16,115.0       16,115       16,115          0.0  Wait@m_riemann_solvers.fpp:3376                  
-      0.0           16,099          1       16,099.0       16,099.0       16,099       16,099          0.0  Wait@m_weno.fpp:149                              
-      0.0           16,093          1       16,093.0       16,093.0       16,093       16,093          0.0  Wait@m_riemann_solvers.fpp:3382                  
-      0.0           16,092          1       16,092.0       16,092.0       16,092       16,092          0.0  Wait@m_riemann_solvers.fpp:3352                  
-      0.0           16,077          1       16,077.0       16,077.0       16,077       16,077          0.0  Wait@m_rhs.fpp:483                               
-      0.0           16,042          1       16,042.0       16,042.0       16,042       16,042          0.0  Wait@m_time_steppers.fpp:132                     
-      0.0           16,024          1       16,024.0       16,024.0       16,024       16,024          0.0  Wait@m_cbc.fpp:197                               
-      0.0           16,024          1       16,024.0       16,024.0       16,024       16,024          0.0  Wait@m_rhs.fpp:300                               
-      0.0           16,012          1       16,012.0       16,012.0       16,012       16,012          0.0  Wait@m_weno.fpp:174                              
-      0.0           16,011          1       16,011.0       16,011.0       16,011       16,011          0.0  Wait@m_cbc.fpp:164                               
-      0.0           16,002          1       16,002.0       16,002.0       16,002       16,002          0.0  Wait@m_cbc.fpp:263                               
-      0.0           16,001          1       16,001.0       16,001.0       16,001       16,001          0.0  Wait@m_cbc.fpp:201                               
-      0.0           15,997          1       15,997.0       15,997.0       15,997       15,997          0.0  Update@m_rhs.fpp:182                             
-      0.0           15,980          1       15,980.0       15,980.0       15,980       15,980          0.0  Wait@m_riemann_solvers.fpp:3355                  
-      0.0           15,945          1       15,945.0       15,945.0       15,945       15,945          0.0  Wait@m_weno.fpp:191                              
-      0.0           15,928          1       15,928.0       15,928.0       15,928       15,928          0.0  Wait@m_rhs.fpp:325                               
-      0.0           15,923          1       15,923.0       15,923.0       15,923       15,923          0.0  Wait@m_cbc.fpp:366                               
-      0.0           15,921          1       15,921.0       15,921.0       15,921       15,921          0.0  Wait@m_variables_conversion.fpp:630              
-      0.0           15,914          1       15,914.0       15,914.0       15,914       15,914          0.0  Wait@m_variables_conversion.fpp:627              
-      0.0           15,904          1       15,904.0       15,904.0       15,904       15,904          0.0  Wait@m_weno.fpp:202                              
-      0.0           15,896          1       15,896.0       15,896.0       15,896       15,896          0.0  Wait@m_riemann_solvers.fpp:3349                  
-      0.0           15,891          1       15,891.0       15,891.0       15,891       15,891          0.0  Wait@m_variables_conversion.fpp:631              
-      0.0           15,885          1       15,885.0       15,885.0       15,885       15,885          0.0  Wait@m_weno.fpp:166                              
-      0.0           15,881          1       15,881.0       15,881.0       15,881       15,881          0.0  Wait@m_weno.fpp:197                              
-      0.0           15,873          1       15,873.0       15,873.0       15,873       15,873          0.0  Wait@m_time_steppers.fpp:220                     
-      0.0           15,870          1       15,870.0       15,870.0       15,870       15,870          0.0  Wait@m_cbc.fpp:158                               
-      0.0           15,865          1       15,865.0       15,865.0       15,865       15,865          0.0  Wait@m_rhs.fpp:326                               
-      0.0           15,862          1       15,862.0       15,862.0       15,862       15,862          0.0  Wait@m_riemann_solvers.fpp:3379                  
-      0.0           15,853          1       15,853.0       15,853.0       15,853       15,853          0.0  Wait@m_time_steppers.fpp:260                     
-      0.0           15,846          1       15,846.0       15,846.0       15,846       15,846          0.0  Wait@m_cbc.fpp:328                               
-      0.0           15,799          1       15,799.0       15,799.0       15,799       15,799          0.0  Wait@m_cbc.fpp:211                               
-      0.0           15,788          1       15,788.0       15,788.0       15,788       15,788          0.0  Wait@m_cbc.fpp:168                               
-      0.0           15,774          1       15,774.0       15,774.0       15,774       15,774          0.0  Wait@m_rhs.fpp:324                               
-      0.0           15,773          1       15,773.0       15,773.0       15,773       15,773          0.0  Wait@m_rhs.fpp:489                               
-      0.0           15,749          1       15,749.0       15,749.0       15,749       15,749          0.0  Wait@m_global_parameters.fpp:1253                
-      0.0           15,749          1       15,749.0       15,749.0       15,749       15,749          0.0  Wait@m_variables_conversion.fpp:628              
-      0.0           15,741          1       15,741.0       15,741.0       15,741       15,741          0.0  Wait@m_weno.fpp:194                              
-      0.0           15,715          1       15,715.0       15,715.0       15,715       15,715          0.0  Wait@m_boundary_common.fpp:1826                  
-      0.0           15,714          1       15,714.0       15,714.0       15,714       15,714          0.0  Wait@m_cbc.fpp:289                               
-      0.0           15,710          1       15,710.0       15,710.0       15,710       15,710          0.0  Wait@m_rhs.fpp:291                               
-      0.0           15,700          1       15,700.0       15,700.0       15,700       15,700          0.0  Wait@m_cbc.fpp:292                               
-      0.0           15,671          1       15,671.0       15,671.0       15,671       15,671          0.0  Wait@m_cbc.fpp:236                               
-      0.0           15,667          1       15,667.0       15,667.0       15,667       15,667          0.0  Update@m_cbc.fpp:396                             
-      0.0           15,649          1       15,649.0       15,649.0       15,649       15,649          0.0  Wait@m_rhs.fpp:388                               
-      0.0           15,648          1       15,648.0       15,648.0       15,648       15,648          0.0  Wait@m_rhs.fpp:488                               
-      0.0           15,632          1       15,632.0       15,632.0       15,632       15,632          0.0  Wait@m_rhs.fpp:273                               
-      0.0           15,626          1       15,626.0       15,626.0       15,626       15,626          0.0  Wait@m_riemann_solvers.fpp:3410                  
-      0.0           15,600          1       15,600.0       15,600.0       15,600       15,600          0.0  Wait@m_rhs.fpp:487                               
-      0.0           15,597          1       15,597.0       15,597.0       15,597       15,597          0.0  Wait@m_rhs.fpp:389                               
-      0.0           15,592          1       15,592.0       15,592.0       15,592       15,592          0.0  Wait@m_cbc.fpp:191                               
-      0.0           15,590          1       15,590.0       15,590.0       15,590       15,590          0.0  Wait@m_global_parameters.fpp:1251                
-      0.0           15,587          1       15,587.0       15,587.0       15,587       15,587          0.0  Wait@m_riemann_solvers.fpp:3401                  
-      0.0           15,576          1       15,576.0       15,576.0       15,576       15,576          0.0  Wait@m_variables_conversion.fpp:629              
-      0.0           15,551          1       15,551.0       15,551.0       15,551       15,551          0.0  Wait@m_global_parameters.fpp:1252                
-      0.0           15,542          1       15,542.0       15,542.0       15,542       15,542          0.0  Wait@m_global_parameters.fpp:1243                
-      0.0           15,501          1       15,501.0       15,501.0       15,501       15,501          0.0  Wait@m_riemann_solvers.fpp:3346                  
-      0.0           15,466          1       15,466.0       15,466.0       15,466       15,466          0.0  Wait@m_cbc.fpp:154                               
-      0.0           15,440          1       15,440.0       15,440.0       15,440       15,440          0.0  Wait@m_rhs.fpp:391                               
-      0.0           15,437          1       15,437.0       15,437.0       15,437       15,437          0.0  Wait@m_cbc.fpp:242                               
-      0.0           15,436          1       15,436.0       15,436.0       15,436       15,436          0.0  Wait@m_cbc.fpp:207                               
-      0.0           15,434          1       15,434.0       15,434.0       15,434       15,434          0.0  Wait@m_global_parameters.fpp:1247                
-      0.0           15,394          1       15,394.0       15,394.0       15,394       15,394          0.0  Wait@m_weno.fpp:139                              
-      0.0           15,337          1       15,337.0       15,337.0       15,337       15,337          0.0  Wait@m_weno.fpp:195                              
-      0.0           15,299          1       15,299.0       15,299.0       15,299       15,299          0.0  Wait@m_variables_conversion.fpp:632              
-      0.0           15,260          1       15,260.0       15,260.0       15,260       15,260          0.0  Wait@m_time_steppers.fpp:95                      
-      0.0           15,248          1       15,248.0       15,248.0       15,248       15,248          0.0  Wait@m_rhs.fpp:274                               
-      0.0           15,236          1       15,236.0       15,236.0       15,236       15,236          0.0  Wait@m_cbc.fpp:252                               
-      0.0           15,230          1       15,230.0       15,230.0       15,230       15,230          0.0  Wait@m_rhs.fpp:293                               
-      0.0           15,208          1       15,208.0       15,208.0       15,208       15,208          0.0  Wait@m_weno.fpp:171                              
-      0.0           15,193          1       15,193.0       15,193.0       15,193       15,193          0.0  Wait@m_riemann_solvers.fpp:3404                  
-      0.0           15,192          1       15,192.0       15,192.0       15,192       15,192          0.0  Wait@m_rhs.fpp:387                               
-      0.0           15,192          1       15,192.0       15,192.0       15,192       15,192          0.0  Wait@m_rhs.fpp:484                               
-      0.0           15,172          1       15,172.0       15,172.0       15,172       15,172          0.0  Wait@m_weno.fpp:168                              
-      0.0           15,169          1       15,169.0       15,169.0       15,169       15,169          0.0  Wait@m_time_steppers.fpp:302                     
-      0.0           15,161          1       15,161.0       15,161.0       15,161       15,161          0.0  Wait@m_rhs.fpp:390                               
-      0.0           15,158          4        3,789.5        3,785.0        3,380        4,208        436.4  Enqueue Upload@m_time_steppers.fpp:98            
-      0.0           15,143          1       15,143.0       15,143.0       15,143       15,143          0.0  Wait@m_weno.fpp:142                              
-      0.0           15,121          1       15,121.0       15,121.0       15,121       15,121          0.0  Wait@m_global_parameters.fpp:1246                
-      0.0           15,068          1       15,068.0       15,068.0       15,068       15,068          0.0  Wait@m_weno.fpp:172                              
-      0.0           15,050          1       15,050.0       15,050.0       15,050       15,050          0.0  Wait@m_variables_conversion.fpp:633              
-      0.0           15,038          1       15,038.0       15,038.0       15,038       15,038          0.0  Wait@m_cbc.fpp:325                               
-      0.0           15,028          1       15,028.0       15,028.0       15,028       15,028          0.0  Update@m_global_parameters.fpp:1178              
-      0.0           14,985          1       14,985.0       14,985.0       14,985       14,985          0.0  Wait@m_cbc.fpp:246                               
-      0.0           14,952          1       14,952.0       14,952.0       14,952       14,952          0.0  Update@m_cbc.fpp:389                             
-      0.0           14,951          1       14,951.0       14,951.0       14,951       14,951          0.0  Wait@m_global_parameters.fpp:1248                
-      0.0           14,896          1       14,896.0       14,896.0       14,896       14,896          0.0  Wait@m_weno.fpp:136                              
-      0.0           14,721          4        3,680.3        3,578.5        3,364        4,200        362.7  Enqueue Upload@m_cbc.fpp:592                     
-      0.0           14,708          1       14,708.0       14,708.0       14,708       14,708          0.0  Update@m_global_parameters.fpp:1188              
-      0.0           14,700          1       14,700.0       14,700.0       14,700       14,700          0.0  Update@m_global_parameters.fpp:1224              
-      0.0           14,531          1       14,531.0       14,531.0       14,531       14,531          0.0  Wait@m_boundary_common.fpp:1821                  
-      0.0           14,510          3        4,836.7        4,915.0        3,947        5,648        853.2  Enqueue Upload@m_rhs.fpp:217                     
-      0.0           14,508          4        3,627.0        3,636.0        3,041        4,195        479.3  Enqueue Upload@m_rhs.fpp:209                     
-      0.0           14,352          4        3,588.0        3,091.0        2,921        5,249      1,110.9  Enqueue Upload@m_global_parameters.fpp:1226      
-      0.0           14,159          1       14,159.0       14,159.0       14,159       14,159          0.0  Wait@m_boundary_common.fpp:1816                  
-      0.0           13,981          1       13,981.0       13,981.0       13,981       13,981          0.0  Update@m_cbc.fpp:592                             
-      0.0           13,223          2        6,611.5        6,611.5        6,600        6,623         16.3  Wait@m_cbc.fpp.f90:450                           
-      0.0           13,129          2        6,564.5        6,564.5        6,491        6,638        103.9  Wait@m_rhs.fpp:209                               
-      0.0           12,883          2        6,441.5        6,441.5        6,386        6,497         78.5  Wait@m_rhs.fpp:216                               
-      0.0           12,722         16          795.1          756.0          722          982         84.9  Exit Data@m_rhs.fpp:2253                         
-      0.0           12,640          2        6,320.0        6,320.0        6,310        6,330         14.1  Wait@m_time_steppers.fpp:98                      
-      0.0           12,056          1       12,056.0       12,056.0       12,056       12,056          0.0  Update@m_global_parameters.fpp:1160              
-      0.0           11,653          1       11,653.0       11,653.0       11,653       11,653          0.0  Update@m_global_parameters.fpp:1150              
-      0.0           11,485          3        3,828.3        3,751.0        3,257        4,477        613.7  Enqueue Upload@m_time_steppers.fpp:257           
-      0.0           11,341          3        3,780.3        3,845.0        3,279        4,217        472.3  Enqueue Upload@m_boundary_common.fpp:55          
-      0.0           11,258          2        5,629.0        5,629.0        5,118        6,140        722.7  Enqueue Upload@m_rhs.fpp:518                     
-      0.0           11,246          3        3,748.7        3,559.0        3,377        4,310        494.6  Enqueue Upload@m_time_steppers.fpp:220           
-      0.0           11,077          4        2,769.3        2,418.0        2,319        3,922        770.3  Enqueue Upload@m_global_parameters.fpp:1216      
-      0.0           11,065          2        5,532.5        5,532.5        2,782        8,283      3,889.8  Enqueue Upload@m_cbc.fpp:383                     
-      0.0           11,025          1       11,025.0       11,025.0       11,025       11,025          0.0  Update@m_global_parameters.fpp:1218              
-      0.0           10,990          3        3,663.3        3,378.0        3,235        4,377        622.2  Enqueue Upload@m_time_steppers.fpp:95            
-      0.0           10,979          1       10,979.0       10,979.0       10,979       10,979          0.0  Update@m_riemann_solvers.fpp:3326                
-      0.0           10,974          1       10,974.0       10,974.0       10,974       10,974          0.0  Enter Data@m_global_parameters.fpp:1234          
-      0.0           10,881          3        3,627.0        3,309.0        3,300        4,272        558.6  Enqueue Upload@m_riemann_solvers.fpp:3321        
-      0.0           10,828          3        3,609.3        3,458.0        3,245        4,125        459.1  Enqueue Upload@m_weno.fpp:142                    
-      0.0           10,709          3        3,569.7        3,348.0        3,210        4,151        508.2  Enqueue Upload@m_variables_conversion.fpp:623    
-      0.0           10,663          3        3,554.3        3,360.0        3,220        4,083        463.2  Enqueue Upload@m_time_steppers.fpp:298           
-      0.0           10,383          3        3,461.0        3,206.0        3,156        4,021        485.6  Enqueue Upload@m_rhs.fpp:326                     
-      0.0           10,218          3        3,406.0        3,504.0        3,011        3,703        356.3  Enqueue Upload@m_weno.fpp:197                    
-      0.0           10,207          3        3,402.3        3,169.0        3,114        3,924        452.6  Enqueue Upload@m_riemann_solvers.fpp:3410        
-      0.0           10,183          1       10,183.0       10,183.0       10,183       10,183          0.0  Wait@m_global_parameters.fpp:1219                
-      0.0           10,149          3        3,383.0        3,223.0        3,106        3,820        382.9  Enqueue Upload@m_time_steppers.fpp:132           
-      0.0           10,149          3        3,383.0        3,180.0        3,124        3,845        401.1  Enqueue Upload@m_time_steppers.fpp:260           
-      0.0           10,127          3        3,375.7        3,156.0        3,138        3,833        396.2  Enqueue Upload@m_cbc.fpp:292                     
-      0.0           10,115          3        3,371.7        3,131.0        3,128        3,856        419.4  Enqueue Upload@m_variables_conversion.fpp:633    
-      0.0           10,035          4        2,508.8        2,378.5        2,197        3,081        391.9  Enqueue Upload@m_global_parameters.fpp:1219      
-      0.0           10,021          3        3,340.3        3,083.0        3,059        3,879        466.7  Enqueue Upload@m_cbc.fpp:328                     
-      0.0           10,005          3        3,335.0        3,089.0        3,072        3,844        440.9  Enqueue Upload@m_weno.fpp:174                    
-      0.0            9,935          3        3,311.7        3,190.0        3,014        3,731        373.7  Enqueue Upload@m_time_steppers.fpp:302           
-      0.0            9,799          3        3,266.3        3,079.0        2,987        3,733        406.8  Enqueue Upload@m_rhs.fpp:489                     
-      0.0            9,782          3        3,260.7        3,187.0        2,932        3,663        371.0  Enqueue Upload@m_global_parameters.fpp:1253      
-      0.0            9,696          3        3,232.0        3,109.0        2,933        3,654        375.9  Enqueue Upload@m_rhs.fpp:274                     
-      0.0            9,671          3        3,223.7        3,098.0        2,923        3,650        379.4  Enqueue Upload@m_cbc.fpp:366                     
-      0.0            9,233          8        1,154.1        1,122.0          930        1,441        161.9  Exit Data@m_rhs.fpp:2257                         
-      0.0            8,976          1        8,976.0        8,976.0        8,976        8,976          0.0  Wait@m_global_parameters.fpp:1216                
-      0.0            8,849          2        4,424.5        4,424.5        3,979        4,870        630.0  Enqueue Upload@m_time_steppers.fpp:321           
-      0.0            8,833          1        8,833.0        8,833.0        8,833        8,833          0.0  Enter Data@m_time_steppers.fpp:298               
-      0.0            8,751          1        8,751.0        8,751.0        8,751        8,751          0.0  Wait@m_global_parameters.fpp:1221                
-      0.0            8,729          1        8,729.0        8,729.0        8,729        8,729          0.0  Enter Data@m_rhs.fpp:181                         
-      0.0            8,566          8        1,070.8        1,097.0          867        1,142         87.5  Exit Data@m_rhs.fpp:2258                         
-      0.0            8,427          1        8,427.0        8,427.0        8,427        8,427          0.0  Enter Data@m_time_steppers.fpp:257               
-      0.0            8,379          2        4,189.5        4,189.5        3,801        4,578        549.4  Enqueue Upload@m_rhs.fpp:184                     
-      0.0            8,158          1        8,158.0        8,158.0        8,158        8,158          0.0  Wait@m_weno.fpp:630                              
-      0.0            8,149          1        8,149.0        8,149.0        8,149        8,149          0.0  Wait@m_weno.fpp:628                              
-      0.0            8,129          1        8,129.0        8,129.0        8,129        8,129          0.0  Wait@m_weno.fpp:632                              
-      0.0            8,097          2        4,048.5        4,048.5        3,664        4,433        543.8  Enqueue Upload@m_time_steppers.fpp:322           
-      0.0            8,035          1        8,035.0        8,035.0        8,035        8,035          0.0  Wait@m_cbc.fpp:389                               
-      0.0            7,768          2        3,884.0        3,884.0        3,481        4,287        569.9  Enqueue Upload@m_time_steppers.fpp:251           
-      0.0            7,735          2        3,867.5        3,867.5        3,491        4,244        532.5  Enqueue Upload@m_time_steppers.fpp:325           
-      0.0            7,732          2        3,866.0        3,866.0        3,481        4,251        544.5  Enqueue Upload@m_time_steppers.fpp:324           
-      0.0            7,708          1        7,708.0        7,708.0        7,708        7,708          0.0  Wait@m_variables_conversion.fpp:815              
-      0.0            7,683          1        7,683.0        7,683.0        7,683        7,683          0.0  Wait@m_global_parameters.fpp:1188                
-      0.0            7,615          2        3,807.5        3,807.5        3,424        4,191        542.4  Enqueue Upload@m_time_steppers.fpp:327           
-      0.0            7,575          1        7,575.0        7,575.0        7,575        7,575          0.0  Wait@m_global_parameters.fpp:1224                
-      0.0            7,544          1        7,544.0        7,544.0        7,544        7,544          0.0  Wait@m_mpi_common.fpp:79                         
-      0.0            7,523          2        3,761.5        3,761.5        3,405        4,118        504.2  Enqueue Upload@m_rhs.fpp:367                     
-      0.0            7,475          2        3,737.5        3,737.5        3,338        4,137        565.0  Enqueue Upload@m_rhs.fpp:369                     
-      0.0            7,406          1        7,406.0        7,406.0        7,406        7,406          0.0  Wait@m_time_steppers.fpp:298                     
-      0.0            7,395          3        2,465.0        2,417.0        2,383        2,595        113.9  Enqueue Upload@m_global_parameters.fpp:1221      
-      0.0            7,395          2        3,697.5        3,697.5        3,339        4,056        507.0  Enqueue Upload@m_rhs.fpp:185                     
-      0.0            7,298          2        3,649.0        3,649.0        3,253        4,045        560.0  Enqueue Upload@m_time_steppers.fpp:292           
-      0.0            7,285          1        7,285.0        7,285.0        7,285        7,285          0.0  Wait@m_global_parameters.fpp:1241                
-      0.0            7,199          2        3,599.5        3,599.5        3,242        3,957        505.6  Enqueue Upload@m_rhs.fpp:646                     
-      0.0            7,103          1        7,103.0        7,103.0        7,103        7,103          0.0  Wait@m_cbc.fpp:383                               
-      0.0            7,028          1        7,028.0        7,028.0        7,028        7,028          0.0  Wait@m_cbc.fpp:428                               
-      0.0            6,978          1        6,978.0        6,978.0        6,978        6,978          0.0  Wait@m_cbc.fpp:396                               
-      0.0            6,952          1        6,952.0        6,952.0        6,952        6,952          0.0  Wait@m_cbc.fpp:256                               
-      0.0            6,946          2        3,473.0        3,473.0        3,139        3,807        472.3  Enqueue Upload@m_rhs.fpp:368                     
-      0.0            6,933          1        6,933.0        6,933.0        6,933        6,933          0.0  Wait@m_cbc.fpp:148                               
-      0.0            6,906          1        6,906.0        6,906.0        6,906        6,906          0.0  Wait@m_global_parameters.fpp:1226                
-      0.0            6,886          1        6,886.0        6,886.0        6,886        6,886          0.0  Wait@m_rhs.fpp:392                               
-      0.0            6,836          1        6,836.0        6,836.0        6,836        6,836          0.0  Wait@m_rhs.fpp:184                               
-      0.0            6,831          1        6,831.0        6,831.0        6,831        6,831          0.0  Wait@m_time_steppers.fpp:257                     
-      0.0            6,815          1        6,815.0        6,815.0        6,815        6,815          0.0  Wait@m_rhs.fpp:182                               
-      0.0            6,785          1        6,785.0        6,785.0        6,785        6,785          0.0  Wait@m_global_parameters.fpp:1150                
-      0.0            6,753          1        6,753.0        6,753.0        6,753        6,753          0.0  Wait@m_cbc.fpp:375                               
-      0.0            6,720          1        6,720.0        6,720.0        6,720        6,720          0.0  Wait@m_rhs.fpp:646                               
-      0.0            6,710          1        6,710.0        6,710.0        6,710        6,710          0.0  Wait@m_time_steppers.fpp:325                     
-      0.0            6,691          2        3,345.5        3,345.5        2,942        3,749        570.6  Enqueue Upload@m_rhs.fpp:182                     
-      0.0            6,617          1        6,617.0        6,617.0        6,617        6,617          0.0  Wait@m_global_parameters.fpp:1178                
-      0.0            6,604          1        6,604.0        6,604.0        6,604        6,604          0.0  Wait@m_riemann_solvers.fpp:3321                  
-      0.0            6,588          1        6,588.0        6,588.0        6,588        6,588          0.0  Wait@m_global_parameters.fpp:1218                
-      0.0            6,587          1        6,587.0        6,587.0        6,587        6,587          0.0  Wait@m_time_steppers.fpp:251                     
-      0.0            6,578          1        6,578.0        6,578.0        6,578        6,578          0.0  Wait@m_weno.fpp:179                              
-      0.0            6,552          1        6,552.0        6,552.0        6,552        6,552          0.0  Wait@m_riemann_solvers.fpp:3326                  
-      0.0            6,538          1        6,538.0        6,538.0        6,538        6,538          0.0  Wait@m_rhs.fpp:580                               
-      0.0            6,530          2        3,265.0        3,265.0        3,244        3,286         29.7  Enqueue Upload@m_global_parameters.fpp:1178      
-      0.0            6,453          1        6,453.0        6,453.0        6,453        6,453          0.0  Wait@m_boundary_common.fpp:55                    
-      0.0            6,437          1        6,437.0        6,437.0        6,437        6,437          0.0  Wait@m_time_steppers.fpp:322                     
-      0.0            6,434          8          804.3          787.5          762          887         42.7  Exit Data@m_time_steppers.fpp:1112               
-      0.0            6,432          1        6,432.0        6,432.0        6,432        6,432          0.0  Wait@m_time_steppers.fpp:319                     
-      0.0            6,422          1        6,422.0        6,422.0        6,422        6,422          0.0  Wait@m_cbc.fpp:592                               
-      0.0            6,422          1        6,422.0        6,422.0        6,422        6,422          0.0  Wait@m_global_parameters.fpp:1160                
-      0.0            6,420          1        6,420.0        6,420.0        6,420        6,420          0.0  Wait@m_time_steppers.fpp:292                     
-      0.0            6,401          1        6,401.0        6,401.0        6,401        6,401          0.0  Wait@m_time_steppers.fpp:254                     
-      0.0            6,365          1        6,365.0        6,365.0        6,365        6,365          0.0  Wait@m_time_steppers.fpp:295                     
-      0.0            6,356          1        6,356.0        6,356.0        6,356        6,356          0.0  Wait@m_rhs.fpp:368                               
-      0.0            6,335          1        6,335.0        6,335.0        6,335        6,335          0.0  Wait@m_weno.fpp:134                              
-      0.0            6,309          1        6,309.0        6,309.0        6,309        6,309          0.0  Wait@m_weno.fpp:189                              
-      0.0            6,305          1        6,305.0        6,305.0        6,305        6,305          0.0  Wait@m_time_steppers.fpp:327                     
-      0.0            6,302          1        6,302.0        6,302.0        6,302        6,302          0.0  Wait@m_time_steppers.fpp:324                     
-      0.0            6,286          1        6,286.0        6,286.0        6,286        6,286          0.0  Wait@m_rhs.fpp:369                               
-      0.0            6,279          1        6,279.0        6,279.0        6,279        6,279          0.0  Wait@m_rhs.fpp:185                               
-      0.0            6,273          1        6,273.0        6,273.0        6,273        6,273          0.0  Wait@m_global_parameters.fpp:1027                
-      0.0            6,234          1        6,234.0        6,234.0        6,234        6,234          0.0  Wait@m_time_steppers.fpp:328                     
-      0.0            6,201          1        6,201.0        6,201.0        6,201        6,201          0.0  Wait@m_rhs.fpp:367                               
-      0.0            6,183          1        6,183.0        6,183.0        6,183        6,183          0.0  Enqueue Upload@m_time_steppers.fpp:319           
-      0.0            6,177          1        6,177.0        6,177.0        6,177        6,177          0.0  Wait@m_time_steppers.fpp:321                     
-      0.0            6,157          1        6,157.0        6,157.0        6,157        6,157          0.0  Wait@m_rhs.fpp:518                               
-      0.0            5,720          1        5,720.0        5,720.0        5,720        5,720          0.0  Enter Data@m_variables_conversion.fpp:623        
-      0.0            5,676          1        5,676.0        5,676.0        5,676        5,676          0.0  Enter Data@m_global_parameters.fpp:1235          
-      0.0            5,325          2        2,662.5        2,662.5        2,365        2,960        420.7  Enqueue Upload@m_global_parameters.fpp:1224      
-      0.0            5,261          2        2,630.5        2,630.5        2,581        2,680         70.0  Enqueue Upload@m_global_parameters.fpp:1188      
-      0.0            5,105          2        2,552.5        2,552.5        2,373        2,732        253.9  Enqueue Upload@m_cbc.fpp:389                     
-      0.0            4,590          2        2,295.0        2,295.0        2,199        2,391        135.8  Wait@m_rhs.fpp:210                               
-      0.0            4,546          8          568.3          572.0          511          620         49.0  Exit Data@m_time_steppers.fpp:1153               
-      0.0            4,362          2        2,181.0        2,181.0        2,166        2,196         21.2  Wait@m_rhs.fpp:217                               
-      0.0            4,194          1        4,194.0        4,194.0        4,194        4,194          0.0  Enqueue Upload@m_global_parameters.fpp:1160      
-      0.0            3,798          4          949.5          831.0          783        1,353        271.1  Exit Data@m_rhs.fpp:2158                         
-      0.0            3,564          1        3,564.0        3,564.0        3,564        3,564          0.0  Exit Data@m_global_parameters.fpp:1327           
-      0.0            3,552          2        1,776.0        1,776.0        1,604        1,948        243.2  Wait@m_rhs.fpp:2153                              
-      0.0            3,289          4          822.3          820.5          793          855         26.3  Exit Data@m_rhs.fpp:2159                         
-      0.0            3,198          1        3,198.0        3,198.0        3,198        3,198          0.0  Enter Data@m_riemann_solvers.fpp:3341            
-      0.0            3,173          1        3,173.0        3,173.0        3,173        3,173          0.0  Enqueue Upload@m_global_parameters.fpp:1150      
-      0.0            3,145          1        3,145.0        3,145.0        3,145        3,145          0.0  Enqueue Upload@m_global_parameters.fpp:1218      
-      0.0            2,910          1        2,910.0        2,910.0        2,910        2,910          0.0  Enqueue Upload@m_riemann_solvers.fpp:3326        
-      0.0            2,792          1        2,792.0        2,792.0        2,792        2,792          0.0  Exit Data@m_cbc.fpp:1523                         
-      0.0            2,732          1        2,732.0        2,732.0        2,732        2,732          0.0  Wait@m_variables_conversion.fpp:623              
-      0.0            2,695          1        2,695.0        2,695.0        2,695        2,695          0.0  Wait@m_rhs.fpp:181                               
-      0.0            2,479          1        2,479.0        2,479.0        2,479        2,479          0.0  Exit Data@m_riemann_solvers.fpp:5135             
-      0.0            2,425          1        2,425.0        2,425.0        2,425        2,425          0.0  Exit Data@m_variables_conversion.fpp:1583        
-      0.0            2,384          1        2,384.0        2,384.0        2,384        2,384          0.0  Enter Data@m_global_parameters.fpp:1238          
-      0.0            2,285          1        2,285.0        2,285.0        2,285        2,285          0.0  Enter Data@m_global_parameters.fpp:1236          
-      0.0            2,203          1        2,203.0        2,203.0        2,203        2,203          0.0  Exit Data@m_data_output.fpp:1783                 
-      0.0            1,826          3          608.7          569.0          482          775        150.5  Exit Data@m_rhs.fpp:2280                         
-      0.0            1,591          1        1,591.0        1,591.0        1,591        1,591          0.0  Wait@m_global_parameters.fpp:1234                
-      0.0            1,580          1        1,580.0        1,580.0        1,580        1,580          0.0  Exit Data@m_weno.fpp:1409                        
-      0.0            1,362          1        1,362.0        1,362.0        1,362        1,362          0.0  Wait@m_global_parameters.fpp:1235                
-      0.0            1,255          1        1,255.0        1,255.0        1,255        1,255          0.0  Exit Data@m_riemann_solvers.fpp:5163             
-      0.0            1,224          1        1,224.0        1,224.0        1,224        1,224          0.0  Wait@m_riemann_solvers.fpp:3341                  
-      0.0            1,184          1        1,184.0        1,184.0        1,184        1,184          0.0  Exit Data@m_rhs.fpp:2277                         
-      0.0            1,179          2          589.5          589.5          433          746        221.3  Exit Data@m_time_steppers.fpp:1093               
-      0.0            1,042          1        1,042.0        1,042.0        1,042        1,042          0.0  Wait@m_global_parameters.fpp:1238                
-      0.0            1,025          1        1,025.0        1,025.0        1,025        1,025          0.0  Exit Data@m_rhs.fpp:2283                         
-      0.0            1,013          1        1,013.0        1,013.0        1,013        1,013          0.0  Exit Data@m_riemann_solvers.fpp:5162             
-      0.0            1,007          1        1,007.0        1,007.0        1,007        1,007          0.0  Wait@m_global_parameters.fpp:1236                
-      0.0              978          1          978.0          978.0          978          978          0.0  Exit Data@m_cbc.fpp:1548                         
-      0.0              818          1          818.0          818.0          818          818          0.0  Exit Data@m_cbc.fpp:1537                         
-      0.0              736          1          736.0          736.0          736          736          0.0  Exit Data@m_rhs.fpp:2163                         
-      0.0              735          1          735.0          735.0          735          735          0.0  Exit Data@m_weno.fpp:1412                        
-      0.0              709          1          709.0          709.0          709          709          0.0  Exit Data@m_global_parameters.fpp:1330           
-      0.0              692          1          692.0          692.0          692          692          0.0  Exit Data@m_riemann_solvers.fpp:5137             
-      0.0              687          1          687.0          687.0          687          687          0.0  Exit Data@m_weno.fpp:1430                        
-      0.0              664          1          664.0          664.0          664          664          0.0  Exit Data@m_riemann_solvers.fpp:5164             
-      0.0              662          1          662.0          662.0          662          662          0.0  Exit Data@m_riemann_solvers.fpp:5136             
-      0.0              661          1          661.0          661.0          661          661          0.0  Exit Data@m_weno.fpp:1432                        
-      0.0              660          1          660.0          660.0          660          660          0.0  Exit Data@m_riemann_solvers.fpp:5138             
-      0.0              658          1          658.0          658.0          658          658          0.0  Exit Data@m_riemann_solvers.fpp:5151             
-      0.0              657          1          657.0          657.0          657          657          0.0  Exit Data@m_cbc.fpp:1527                         
-      0.0              655          1          655.0          655.0          655          655          0.0  Exit Data@m_rhs.fpp:2166                         
-      0.0              653          1          653.0          653.0          653          653          0.0  Exit Data@m_riemann_solvers.fpp:5161             
-      0.0              652          1          652.0          652.0          652          652          0.0  Exit Data@m_weno.fpp:1420                        
-      0.0              651          1          651.0          651.0          651          651          0.0  Exit Data@m_riemann_solvers.fpp:5149             
-      0.0              647          1          647.0          647.0          647          647          0.0  Exit Data@m_global_parameters.fpp:1333           
-      0.0              646          1          646.0          646.0          646          646          0.0  Exit Data@m_riemann_solvers.fpp:5148             
-      0.0              644          1          644.0          644.0          644          644          0.0  Exit Data@m_riemann_solvers.fpp:5150             
-      0.0              640          1          640.0          640.0          640          640          0.0  Exit Data@m_weno.fpp:1433                        
-      0.0              635          1          635.0          635.0          635          635          0.0  Exit Data@m_weno.fpp:1423                        
-      0.0              634          1          634.0          634.0          634          634          0.0  Exit Data@m_cbc.fpp:1532                         
-      0.0              626          1          626.0          626.0          626          626          0.0  Exit Data@m_rhs.fpp:2170                         
-      0.0              623          1          623.0          623.0          623          623          0.0  Exit Data@m_weno.fpp:1413                        
-      0.0              621          1          621.0          621.0          621          621          0.0  Exit Data@m_cbc.fpp:1525                         
-      0.0              611          1          611.0          611.0          611          611          0.0  Exit Data@m_time_steppers.fpp:1149               
-      0.0              608          1          608.0          608.0          608          608          0.0  Exit Data@m_weno.fpp:1422                        
-      0.0              602          1          602.0          602.0          602          602          0.0  Exit Data@m_weno.fpp:1414                        
-      0.0              600          1          600.0          600.0          600          600          0.0  Exit Data@m_cbc.fpp:1539                         
-      0.0              594          1          594.0          594.0          594          594          0.0  Exit Data@m_rhs.fpp:2245                         
-      0.0              583          1          583.0          583.0          583          583          0.0  Exit Data@m_rhs.fpp:2246                         
-      0.0              580          1          580.0          580.0          580          580          0.0  Exit Data@m_time_steppers.fpp:1097               
-      0.0              578          1          578.0          578.0          578          578          0.0  Exit Data@m_cbc.fpp:1561                         
-      0.0              560          1          560.0          560.0          560          560          0.0  Exit Data@m_cbc.fpp:1541                         
-      0.0              555          1          555.0          555.0          555          555          0.0  Exit Data@m_weno.fpp:1424                        
-      0.0              543          1          543.0          543.0          543          543          0.0  Exit Data@m_weno.fpp:1434                        
-      0.0              541          1          541.0          541.0          541          541          0.0  Exit Data@m_cbc.fpp:1530                         
-      0.0              536          1          536.0          536.0          536          536          0.0  Exit Data@m_cbc.fpp:1534                         
-      0.0              531          1          531.0          531.0          531          531          0.0  Exit Data@m_rhs.fpp:2162                         
-      0.0              526          1          526.0          526.0          526          526          0.0  Exit Data@m_cbc.fpp:1545                         
-      0.0              495          1          495.0          495.0          495          495          0.0  Exit Data@m_cbc.fpp:1552                         
-      0.0              490          1          490.0          490.0          490          490          0.0  Exit Data@m_time_steppers.fpp:1156               
-      0.0              489          1          489.0          489.0          489          489          0.0  Exit Data@m_cbc.fpp:1563                         
-      0.0              488          1          488.0          488.0          488          488          0.0  Exit Data@m_cbc.fpp:1569                         
-      0.0              488          1          488.0          488.0          488          488          0.0  Exit Data@m_cbc.fpp:1571                         
-      0.0              476          1          476.0          476.0          476          476          0.0  Exit Data@m_cbc.fpp:1554                         
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_boundary_common.fpp:1826                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp.f90:450                          
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:148                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:154                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:158                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:164                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:168                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:191                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:197                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:201                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:207                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:211                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:236                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:242                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:246                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:252                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:256                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:263                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:289                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:325                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:363                              
-      0.0                0          2            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:396                              
-      0.0                0          2            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:400                              
-      0.0                0          2            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:401                              
-      0.0                0          2            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:402                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:592                              
-      0.0                0          4            0.0            0.0            0            0          0.0  Alloc@m_cbc.fpp:760                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_data_output.fpp:1758                     
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_global_parameters.fpp:1238               
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_global_parameters.fpp:1241               
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_global_parameters.fpp:1242               
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_global_parameters.fpp:1243               
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_global_parameters.fpp:1246               
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_global_parameters.fpp:1247               
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_global_parameters.fpp:1248               
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_global_parameters.fpp:1251               
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_global_parameters.fpp:1252               
-      0.0                0          2            0.0            0.0            0            0          0.0  Alloc@m_mpi_common.fpp:79                        
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:184                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:185                              
-      0.0                0          8            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:188                              
-      0.0                0          4            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:192                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:217                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:273                              
-      0.0                0          3            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:277                              
-      0.0                0          3            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:278                              
-      0.0                0          9            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:280                              
-      0.0                0         10            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:281                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:291                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:293                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:298                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:300                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:310                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:312                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:324                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:325                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:367                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:368                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:369                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:378                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:387                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:388                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:389                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:390                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:391                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:392                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:483                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:484                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:487                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:488                              
-      0.0                0          3            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:493                              
-      0.0                0          3            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:494                              
-      0.0                0          3            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:495                              
-      0.0                0          8            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:499                              
-      0.0                0          8            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:503                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:518                              
-      0.0                0         16            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:543                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:595                              
-      0.0                0          2            0.0            0.0            0            0          0.0  Alloc@m_rhs.fpp:653                              
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3341                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3346                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3349                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3352                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3355                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3373                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3376                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3379                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3382                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3401                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3404                 
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_riemann_solvers.fpp:3407                 
-      0.0                0         17            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:103                    
-      0.0                0          9            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:135                    
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:257                    
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:298                    
-      0.0                0          8            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:305                    
-      0.0                0          2            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:319                    
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:321                    
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:322                    
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:324                    
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:325                    
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:327                    
-      0.0                0          2            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:328                    
-      0.0                0          2            0.0            0.0            0            0          0.0  Alloc@m_time_steppers.fpp:98                     
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_variables_conversion.fpp:623             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_variables_conversion.fpp:626             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_variables_conversion.fpp:627             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_variables_conversion.fpp:628             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_variables_conversion.fpp:629             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_variables_conversion.fpp:630             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_variables_conversion.fpp:631             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_variables_conversion.fpp:632             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_variables_conversion.fpp:815             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:134                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:136                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:139                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:140                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:149                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:166                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:168                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:171                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:172                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:179                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:189                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:191                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:194                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:195                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:202                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:628                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:630                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Alloc@m_weno.fpp:632                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_boundary_common.fpp:1826                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp.f90:450                         
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:148                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:154                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:158                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:164                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:168                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:191                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:197                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:201                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:207                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:211                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:236                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:242                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:246                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:252                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:256                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:263                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:289                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:325                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:363                             
-      0.0                0          2            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:396                             
-      0.0                0          2            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:400                             
-      0.0                0          2            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:401                             
-      0.0                0          2            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:402                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:592                             
-      0.0                0      1,080            0.0            0.0            0            0          0.0  Create@m_cbc.fpp:760                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_data_output.fpp:1758                    
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_global_parameters.fpp:1238              
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_global_parameters.fpp:1241              
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_global_parameters.fpp:1242              
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_global_parameters.fpp:1243              
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_global_parameters.fpp:1246              
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_global_parameters.fpp:1247              
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_global_parameters.fpp:1248              
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_global_parameters.fpp:1251              
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_global_parameters.fpp:1252              
-      0.0                0          2            0.0            0.0            0            0          0.0  Create@m_mpi_common.fpp:79                       
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:184                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:185                             
-      0.0                0          8            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:188                             
-      0.0                0          4            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:192                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:217                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:273                             
-      0.0                0          3            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:277                             
-      0.0                0          3            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:278                             
-      0.0                0          9            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:280                             
-      0.0                0         10            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:281                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:291                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:293                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:298                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:300                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:310                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:312                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:324                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:325                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:367                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:368                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:369                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:378                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:387                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:388                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:389                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:390                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:391                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:392                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:483                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:484                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:487                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:488                             
-      0.0                0          3            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:493                             
-      0.0                0          3            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:494                             
-      0.0                0          3            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:495                             
-      0.0                0          8            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:499                             
-      0.0                0          8            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:503                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:518                             
-      0.0                0         16            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:543                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:595                             
-      0.0                0          2            0.0            0.0            0            0          0.0  Create@m_rhs.fpp:653                             
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3341                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3346                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3349                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3352                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3355                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3373                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3376                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3379                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3382                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3401                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3404                
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_riemann_solvers.fpp:3407                
-      0.0                0         17            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:103                   
-      0.0                0          9            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:135                   
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:257                   
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:298                   
-      0.0                0          8            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:305                   
-      0.0                0          2            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:319                   
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:321                   
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:322                   
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:324                   
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:325                   
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:327                   
-      0.0                0          2            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:328                   
-      0.0                0          2            0.0            0.0            0            0          0.0  Create@m_time_steppers.fpp:98                    
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_variables_conversion.fpp:623            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_variables_conversion.fpp:626            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_variables_conversion.fpp:627            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_variables_conversion.fpp:628            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_variables_conversion.fpp:629            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_variables_conversion.fpp:630            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_variables_conversion.fpp:631            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_variables_conversion.fpp:632            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_variables_conversion.fpp:815            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:134                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:136                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:139                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:140                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:149                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:166                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:168                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:171                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:172                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:179                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:189                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:191                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:194                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:195                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:202                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:628                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:630                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Create@m_weno.fpp:632                            
-      0.0                0      1,080            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1024                            
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1523                            
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1525                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1527                            
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1530                            
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1532                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1534                            
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1537                            
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1539                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1541                            
-      0.0                0          8            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1545                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1548                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1552                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1554                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1561                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1563                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1569                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_cbc.fpp:1571                            
-      0.0                0          3            0.0            0.0            0            0          0.0  Delete@m_global_parameters.fpp:1327              
-      0.0                0          3            0.0            0.0            0            0          0.0  Delete@m_global_parameters.fpp:1330              
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_global_parameters.fpp:1333              
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_rhs.fpp:2162                            
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_rhs.fpp:2163                            
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_rhs.fpp:2166                            
-      0.0                0          3            0.0            0.0            0            0          0.0  Delete@m_rhs.fpp:2170                            
-      0.0                0          3            0.0            0.0            0            0          0.0  Delete@m_rhs.fpp:2245                            
-      0.0                0          3            0.0            0.0            0            0          0.0  Delete@m_rhs.fpp:2280                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_rhs.fpp:2283                            
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5135                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5136                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5137                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5138                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5148                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5149                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5150                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5151                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5161                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5162                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5163                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_riemann_solvers.fpp:5164                
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_time_steppers.fpp:1093                  
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_time_steppers.fpp:1112                  
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_time_steppers.fpp:1153                  
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_time_steppers.fpp:1156                  
-      0.0                0          5            0.0            0.0            0            0          0.0  Delete@m_variables_conversion.fpp:1583           
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1409                           
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1412                           
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1413                           
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1414                           
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1420                           
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1422                           
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1423                           
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1424                           
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1430                           
-      0.0                0          2            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1432                           
-      0.0                0          1            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1433                           
-      0.0                0          8            0.0            0.0            0            0          0.0  Delete@m_weno.fpp:1434                           
-
-Generated:
-    /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report3.nsys-rep
-    /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report3.sqlite
-
-+-----------------------------------------------------------------------------------------------------------+ 
-| Finished MFC:                                                                                             |
-| * Total-time:    38s                                 * Exit Code:     0                                   |
-| * End-time:      14:01:25                            * End-date:      14:01:25                            |
-+-----------------------------------------------------------------------------------------------------------+ 
-
-mfc: (venv) Exiting the Python virtual environment.
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 185a84892e..8ccab395be 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -399,6 +399,31 @@ contains
 
     end subroutine s_mpi_reduce_stability_criteria_extrema
 
+    !>  The following subroutine takes the inputted variable and
+        !!     determines its sum on the entire computational domain.
+        !!  @param var_loc holds the local value to be reduced among
+        !!      all the processors in communicator. On output, the variable holds
+        !!      the sum, reduced amongst all of the local values.
+    subroutine s_mpi_reduce_int_sum(var_loc)
+
+        integer, intent(inout) :: var_loc
+
+#ifdef MFC_MPI
+
+        ! Temporary storage variable that holds the reduced sum value
+        integer :: var_glb
+
+        ! Performing reduction procedure and eventually storing its result
+        ! into the variable that was initially inputted into the subroutine
+        call MPI_REDUCE(var_loc, var_glb, 1, MPI_INTEGER, &
+                        MPI_SUM, 0, MPI_COMM_WORLD, ierr)
+
+        var_loc = var_glb
+
+#endif
+
+    end subroutine s_mpi_reduce_int_sum
+
     !>  The following subroutine takes the input local variable
         !!      from all processors and reduces to the sum of all
         !!      values. The reduced variable is recorded back onto the
@@ -1061,7 +1086,13 @@ contains
             !! Remaining number of cells, in a particular coordinate direction,
             !! after the majority is divided up among the available processors
 
-        integer :: i, j !< Generic loop iterators
+        integer :: i, j, k !< Generic loop iterators
+
+        integer, dimension(1:num_dims) :: neighbor_coords
+
+        nidx(1)%beg = 0; nidx(1)%end = 0
+        nidx(2)%beg = 0; nidx(2)%end = 0
+        nidx(3)%beg = 0; nidx(3)%end = 0
 
         if (num_procs == 1 .and. parallel_io) then
             do i = 1, num_dims
@@ -1227,6 +1258,7 @@ contains
                     call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
                                        bc_z%beg, ierr)
                     proc_coords(3) = proc_coords(3) + 1
+                    nidx(3)%beg = -1
                 end if
 
                 ! Boundary condition at the end
@@ -1235,6 +1267,7 @@ contains
                     call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
                                        bc_z%end, ierr)
                     proc_coords(3) = proc_coords(3) - 1
+                    nidx(3)%end = 1
                 end if
 
 #ifdef MFC_POST_PROCESS
@@ -1366,6 +1399,7 @@ contains
                 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
                                    bc_y%beg, ierr)
                 proc_coords(2) = proc_coords(2) + 1
+                nidx(2)%beg = -1
             end if
 
             ! Boundary condition at the end
@@ -1374,6 +1408,7 @@ contains
                 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
                                    bc_y%end, ierr)
                 proc_coords(2) = proc_coords(2) - 1
+                nidx(2)%end = 1
             end if
 
 #ifdef MFC_POST_PROCESS
@@ -1457,6 +1492,7 @@ contains
             proc_coords(1) = proc_coords(1) - 1
             call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
             proc_coords(1) = proc_coords(1) + 1
+            nidx(1)%beg = -1
         end if
 
         ! Boundary condition at the end
@@ -1464,6 +1500,7 @@ contains
             proc_coords(1) = proc_coords(1) + 1
             call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
             proc_coords(1) = proc_coords(1) - 1
+            nidx(1)%end = 1
         end if
 
 #ifdef MFC_POST_PROCESS
@@ -1509,6 +1546,24 @@ contains
             end if
 #endif
         end if
+
+        @:ALLOCATE(neighbor_ranks(nidx(1)%beg:nidx(1)%end, &
+                                  nidx(2)%beg:nidx(2)%end, &
+                                  nidx(3)%beg:nidx(3)%end))
+
+        do k = nidx(3)%beg, nidx(3)%end
+            do j = nidx(2)%beg, nidx(2)%end
+                do i = nidx(1)%beg, nidx(1)%end
+                    if (abs(i) + abs(j) + abs(k) > 0) then
+                        neighbor_coords(1) = proc_coords(1) + i
+                        if (num_dims > 1) neighbor_coords(2) = proc_coords(2) + j
+                        if (num_dims > 2) neighbor_coords(3) = proc_coords(3) + k
+                        call MPI_CART_RANK(MPI_COMM_CART, neighbor_coords, &
+                                   neighbor_ranks(i, j, k), ierr)
+                    end if
+                end do
+            end do
+        end do
 #endif
 
     end subroutine s_mpi_decompose_computational_domain
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index 48780f00d4..0f45f59cde 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -171,6 +171,11 @@ module m_global_parameters
     integer, allocatable, dimension(:) :: proc_coords !<
     !! Processor coordinates in MPI_CART_COMM
 
+    type(int_bounds_info), dimension(3) :: nidx
+
+    integer, allocatable, dimension(:,:,:) :: neighbor_ranks
+    !! Neighbor ranks for lagrangian particle communication
+
     integer, allocatable, dimension(:) :: start_idx !<
     !! Starting cell-center index of local processor in global grid
 
diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp
index d73f3e8ce6..fecb7acc9c 100644
--- a/src/pre_process/m_global_parameters.fpp
+++ b/src/pre_process/m_global_parameters.fpp
@@ -163,6 +163,11 @@ module m_global_parameters
     integer, allocatable, dimension(:) :: proc_coords !<
     !! Processor coordinates in MPI_CART_COMM
 
+    type(int_bounds_info), dimension(3) :: nidx
+
+    integer, allocatable, dimension(:,:,:) :: neighbor_ranks
+    !! Neighbor ranks for lagrangian particle communication
+
     integer, allocatable, dimension(:) :: start_idx !<
     !! Starting cell-center index of local processor in global grid
 
diff --git "a/src/simulation/\\" "b/src/simulation/\\"
new file mode 100644
index 0000000000..2559c0ffff
--- /dev/null
+++ "b/src/simulation/\\"
@@ -0,0 +1,1837 @@
+!>
+!! @file m_bubbles_EL.fpp
+!! @brief Contains module m_bubbles_EL
+
+#:include 'macros.fpp'
+
+!> @brief This module is used to to compute the volume-averaged bubble model
+module m_bubbles_EL
+
+    use m_global_parameters             !< Definitions of the global parameters
+
+    use m_mpi_proxy                     !< Message passing interface (MPI) module proxy
+
+    use m_mpi_common
+
+    use m_bubbles_EL_kernels            !< Definitions of the kernel functions
+
+    use m_bubbles                       !< General bubble dynamics procedures
+
+    use m_variables_conversion          !< State variables type conversion procedures
+
+    use m_compile_specific
+
+    use m_boundary_common
+
+    use m_sim_helpers
+
+    use m_helper
+
+    use m_ibm
+
+    implicit none
+
+    !(nBub)
+    integer, allocatable, dimension(:, :) :: lag_id                 !< Global and local IDs
+    real(wp), allocatable, dimension(:) :: bub_R0            !< Initial bubble radius
+    real(wp), allocatable, dimension(:) :: Rmax_stats        !< Maximum radius
+    real(wp), allocatable, dimension(:) :: Rmin_stats        !< Minimum radius
+    real(wp), allocatable, dimension(:) :: gas_mg            !< Bubble's gas mass
+    real(wp), allocatable, dimension(:) :: gas_betaT         !< heatflux model (Preston et al., 2007)
+    real(wp), allocatable, dimension(:) :: gas_betaC         !< massflux model (Preston et al., 2007)
+    real(wp), allocatable, dimension(:) :: bub_dphidt        !< subgrid velocity potential (Maeda & Colonius, 2018)
+    !(nBub, 1 -> actual val or 2 -> temp val)
+    real(wp), allocatable, dimension(:, :) :: gas_p          !< Pressure in the bubble
+    real(wp), allocatable, dimension(:, :) :: gas_mv         !< Vapor mass in the bubble
+    real(wp), allocatable, dimension(:, :) :: intfc_rad      !< Bubble radius
+    real(wp), allocatable, dimension(:, :) :: intfc_vel      !< Velocity of the bubble interface
+    !(nBub, 1-> x or 2->y or 3 ->z, 1 -> actual or 2 -> temporal val)
+    real(wp), allocatable, dimension(:, :, :) :: mtn_pos     !< Bubble's position
+    real(wp), allocatable, dimension(:, :, :) :: mtn_posPrev !< Bubble's previous position
+    real(wp), allocatable, dimension(:, :, :) :: mtn_vel     !< Bubble's velocity
+    real(wp), allocatable, dimension(:, :, :) :: mtn_s       !< Bubble's computational cell position in real format
+    !(nBub, 1-> x or 2->y or 3 ->z, time-stage)
+    real(wp), allocatable, dimension(:, :) :: intfc_draddt   !< Time derivative of bubble's radius
+    real(wp), allocatable, dimension(:, :) :: intfc_dveldt   !< Time derivative of bubble's interface velocity
+    real(wp), allocatable, dimension(:, :) :: gas_dpdt       !< Time derivative of gas pressure
+    real(wp), allocatable, dimension(:, :) :: gas_dmvdt      !< Time derivative of the vapor mass in the bubble
+    real(wp), allocatable, dimension(:, :, :) :: mtn_dposdt  !< Time derivative of the bubble's position
+    real(wp), allocatable, dimension(:, :, :) :: mtn_dveldt  !< Time derivative of the bubble's velocity
+
+    !$acc declare create(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt,       &
+    !$acc gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, &
+    !$acc gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt)
+
+    integer, private :: lag_num_ts                                  !<  Number of time stages in the time-stepping scheme
+
+    !$acc declare create(lag_num_ts)
+
+    integer :: nBubs                            !< Number of bubbles in the local domain
+    real(wp) :: Rmax_glb, Rmin_glb       !< Maximum and minimum bubbe size in the local domain
+    type(vector_field) :: q_beta                !< Projection of the lagrangian particles in the Eulerian framework
+    integer :: q_beta_idx                       !< Size of the q_beta vector field
+
+    !$acc declare create(nBubs, Rmax_glb, Rmin_glb, q_beta, q_beta_idx)
+
+contains
+
+    !> Initializes the lagrangian subgrid bubble solver
+        !! @param q_cons_vf Initial conservative variables
+    subroutine s_initialize_bubbles_EL_module(q_cons_vf)
+
+        type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
+
+        integer :: nBubs_glb, i
+
+        ! Setting number of time-stages for selected time-stepping scheme
+        lag_num_ts = time_stepper
+
+        ! Allocate space for the Eulerian fields needed to map the effect of the bubbles
+        if (lag_params%solver_approach == 1) then
+            ! One-way coupling
+            q_beta_idx = 3
+        elseif (lag_params%solver_approach == 2) then
+            ! Two-way coupling
+            q_beta_idx = 4
+            if (p == 0) then
+                !Subgrid noise model for 2D approximation
+                q_beta_idx = 6
+            end if
+        else
+            call s_mpi_abort('Please check the lag_params%solver_approach input')
+        end if
+
+        !$acc update device(lag_num_ts, q_beta_idx)
+
+        @:ALLOCATE(q_beta%vf(1:q_beta_idx))
+
+        do i = 1, q_beta_idx
+            @:ALLOCATE(q_beta%vf(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, &
+                idwbuff(2)%beg:idwbuff(2)%end, &
+                idwbuff(3)%beg:idwbuff(3)%end))
+        end do
+
+        @:ACC_SETUP_VFs(q_beta)
+
+        ! Allocating space for lagrangian variables
+        nBubs_glb = lag_params%nBubs_glb
+
+        @:ALLOCATE(lag_id(1:nBubs_glb, 1:2))
+        @:ALLOCATE(bub_R0(1:nBubs_glb))
+        @:ALLOCATE(Rmax_stats(1:nBubs_glb))
+        @:ALLOCATE(Rmin_stats(1:nBubs_glb))
+        @:ALLOCATE(gas_mg(1:nBubs_glb))
+        @:ALLOCATE(gas_betaT(1:nBubs_glb))
+        @:ALLOCATE(gas_betaC(1:nBubs_glb))
+        @:ALLOCATE(bub_dphidt(1:nBubs_glb))
+        @:ALLOCATE(gas_p(1:nBubs_glb, 1:2))
+        @:ALLOCATE(gas_mv(1:nBubs_glb, 1:2))
+        @:ALLOCATE(intfc_rad(1:nBubs_glb, 1:2))
+        @:ALLOCATE(intfc_vel(1:nBubs_glb, 1:2))
+        @:ALLOCATE(mtn_pos(1:nBubs_glb, 1:3, 1:2))
+        @:ALLOCATE(mtn_posPrev(1:nBubs_glb, 1:3, 1:2))
+        @:ALLOCATE(mtn_vel(1:nBubs_glb, 1:3, 1:2))
+        @:ALLOCATE(mtn_s(1:nBubs_glb, 1:3, 1:2))
+        @:ALLOCATE(intfc_draddt(1:nBubs_glb, 1:lag_num_ts))
+        @:ALLOCATE(intfc_dveldt(1:nBubs_glb, 1:lag_num_ts))
+        @:ALLOCATE(gas_dpdt(1:nBubs_glb, 1:lag_num_ts))
+        @:ALLOCATE(gas_dmvdt(1:nBubs_glb, 1:lag_num_ts))
+        @:ALLOCATE(mtn_dposdt(1:nBubs_glb, 1:3, 1:lag_num_ts))
+        @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts))
+
+        if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol
+
+        if (num_procs > 1) call s_initialize_particles_mpi(lag_num_ts)
+
+        ! Starting bubbles
+        call s_start_lagrange_inputs()
+        call s_read_input_bubbles(q_cons_vf)
+
+    end subroutine s_initialize_bubbles_EL_module
+
+    !> The purpose of this procedure is to start lagrange bubble parameters applying nondimensionalization if needed
+    subroutine s_start_lagrange_inputs()
+
+        integer :: id_bubbles, id_host
+        real(wp) :: rho0, c0, T0, x0, p0
+
+        id_bubbles = num_fluids
+        id_host = num_fluids - 1
+
+        !Reference values
+        rho0 = lag_params%rho0
+        c0 = lag_params%c0
+        T0 = lag_params%T0
+        x0 = lag_params%x0
+        p0 = rho0*c0*c0
+
+        !Update inputs
+        Tw = lag_params%Thost/T0
+        pv = fluid_pp(id_host)%pv/p0
+        gamma_v = fluid_pp(id_bubbles)%gamma_v
+        gamma_n = fluid_pp(id_host)%gamma_v
+        k_vl = fluid_pp(id_bubbles)%k_v*(T0/(x0*rho0*c0*c0*c0))
+        k_nl = fluid_pp(id_host)%k_v*(T0/(x0*rho0*c0*c0*c0))
+        cp_v = fluid_pp(id_bubbles)%cp_v*(T0/(c0*c0))
+        cp_n = fluid_pp(id_host)%cp_v*(T0/(c0*c0))
+        R_v = (R_uni/fluid_pp(id_bubbles)%M_v)*(T0/(c0*c0))
+        R_n = (R_uni/fluid_pp(id_host)%M_v)*(T0/(c0*c0))
+        lag_params%diffcoefvap = lag_params%diffcoefvap/(x0*c0)
+        ss = fluid_pp(id_host)%ss/(rho0*x0*c0*c0)
+        mul0 = fluid_pp(id_host)%mul0/(rho0*x0*c0)
+
+        ! Parameters used in bubble_model
+        Web = 1._wp/ss
+        Re_inv = mul0
+
+        ! Need improvements to accept polytropic gas compression, isothermal and adiabatic thermal models, and
+        ! the Gilmore and RP bubble models.
+        polytropic = .false.    ! Forcing no polytropic model
+        thermal = 3             ! Forcing constant transfer coefficient model based on Preston et al., 2007
+        ! If Keller-Miksis model is not selected, then no radial motion
+
+    end subroutine s_start_lagrange_inputs
+
+    !> The purpose of this procedure is to obtain the initial bubbles' information
+        !! @param q_cons_vf Conservative variables
+    subroutine s_read_input_bubbles(q_cons_vf)
+
+        type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
+
+        real(wp), dimension(8) :: inputBubble
+        real(wp) :: qtime
+        integer :: id, bub_id, save_count
+        integer :: i, ios
+        logical :: file_exist, indomain
+
+        character(LEN=path_len + 2*name_len) :: path_D_dir !<
+
+        ! Initialize number of particles
+        bub_id = 0
+        id = 0
+
+        ! Read the input lag_bubble file or restart point
+        if (cfl_dt) then
+            save_count = n_start
+            qtime = n_start*t_save
+        else
+            save_count = t_step_start
+            qtime = t_step_start*dt
+        end if
+
+        if (save_count == 0) then
+            if (proc_rank == 0) print *, 'Reading lagrange bubbles input file.'
+            inquire (file='input/lag_bubbles.dat', exist=file_exist)
+            if (file_exist) then
+                open (94, file='input/lag_bubbles.dat', form='formatted', iostat=ios)
+                do while (ios == 0)
+                    read (94, *, iostat=ios) (inputBubble(i), i=1, 8)
+                    if (ios /= 0) cycle
+                    indomain = particle_in_domain(inputBubble(1:3))
+                    id = id + 1
+                    if (id > lag_params%nBubs_glb .and. proc_rank == 0) then
+                        call s_mpi_abort("Current number of bubbles is larger than nBubs_glb")
+                    end if
+                    if (indomain) then
+                        bub_id = bub_id + 1
+                        call s_add_bubbles(inputBubble, q_cons_vf, bub_id)
+                        lag_id(bub_id, 1) = id      !global ID
+                        lag_id(bub_id, 2) = bub_id  !local ID
+                        nBubs = bub_id              ! local number of bubbles
+                    end if
+                end do
+                close (94)
+            else
+                call s_mpi_abort("Initialize the lagrange bubbles in input/lag_bubbles.dat")
+            end if
+        else
+            if (proc_rank == 0) print *, 'Restarting lagrange bubbles at save_count: ', save_count
+            call s_restart_bubbles(bub_id, save_count)
+        end if
+
+        print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id
+
+        call s_mpi_reduce_int_sum(bub_id)
+
+        if (proc_rank == 0) then
+            if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat')
+        end if
+
+        !$acc update device(bubbles_lagrange, lag_params)
+
+        !$acc update device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC,   &
+        !$acc bub_dphidt, gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel,       &
+        !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, nBubs)
+
+        Rmax_glb = min(dflt_real, -dflt_real)
+        Rmin_glb = max(dflt_real, -dflt_real)
+        !$acc update device(Rmax_glb, Rmin_glb)
+
+        !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc)
+
+        !Populate temporal variables
+        call s_transfer_data_to_tmp()
+        call s_smear_voidfraction()
+
+        if (lag_params%write_bubbles) call s_write_lag_particles(qtime)
+
+        if (save_count == 0) then
+            ! Create ./D directory
+            write (path_D_dir, '(A,I0,A,I0)') trim(case_dir)//'/D'
+            call my_inquire(path_D_dir, file_exist)
+            if (.not. file_exist) call s_create_directory(trim(path_D_dir))
+            call s_write_restart_lag_bubbles(save_count) ! Needed for post_processing
+            call s_write_void_evol(qtime)
+        end if
+
+    end subroutine s_read_input_bubbles
+
+    !> The purpose of this procedure is to obtain the information of the bubbles when starting fresh
+        !! @param inputBubble Bubble information
+        !! @param q_cons_vf Conservative variables
+        !! @param bub_id Local id of the bubble
+    subroutine s_add_bubbles(inputBubble, q_cons_vf, bub_id)
+
+        type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
+        real(wp), dimension(8), intent(in) :: inputBubble
+        integer, intent(in) :: bub_id
+        integer :: i
+
+        real(wp) :: pliq, volparticle, concvap, totalmass, kparticle, cpparticle
+        real(wp) :: omegaN, PeG, PeT, rhol, pcrit, qv, gamma, pi_inf, dynP
+        integer, dimension(3) :: cell
+        real(wp), dimension(2) :: Re
+        real(wp) :: massflag, heatflag, Re_trans, Im_trans
+
+        massflag = 0._wp
+        heatflag = 0._wp
+        if (lag_params%massTransfer_model) massflag = 1._wp
+        if (lag_params%heatTransfer_model) heatflag = 1._wp
+
+        bub_R0(bub_id) = inputBubble(7)
+        Rmax_stats(bub_id) = min(dflt_real, -dflt_real)
+        Rmin_stats(bub_id) = max(dflt_real, -dflt_real)
+        bub_dphidt(bub_id) = 0._wp
+        intfc_rad(bub_id, 1) = inputBubble(7)
+        intfc_vel(bub_id, 1) = inputBubble(8)
+        mtn_pos(bub_id, 1:3, 1) = inputBubble(1:3)
+        mtn_posPrev(bub_id, 1:3, 1) = mtn_pos(bub_id, 1:3, 1)
+        mtn_vel(bub_id, 1:3, 1) = inputBubble(4:6)
+
+        if (cyl_coord .and. p == 0) then
+            mtn_pos(bub_id, 2, 1) = sqrt(mtn_pos(bub_id, 2, 1)**2._wp + &
+                                         mtn_pos(bub_id, 3, 1)**2._wp)
+            !Storing azimuthal angle (-Pi to Pi)) into the third coordinate variable
+            mtn_pos(bub_id, 3, 1) = atan2(inputBubble(3), inputBubble(2))
+            mtn_posPrev(bub_id, 1:3, 1) = mtn_pos(bub_id, 1:3, 1)
+        end if
+
+        cell = -buff_size
+        call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
+
+        ! Check if the bubble is located in the ghost cell of a symmetric boundary
+        if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. &
+            (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. &
+            (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. &
+            (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then
+            call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
+        end if
+
+        if (p > 0) then
+            if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. &
+                (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then
+                call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
+            end if
+        end if
+
+        ! If particle is in the ghost cells, find the closest non-ghost cell
+        cell(1) = min(max(cell(1), 0), m)
+        cell(2) = min(max(cell(2), 0), n)
+        if (p > 0) cell(3) = min(max(cell(3), 0), p)
+        call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), &
+                                            rhol, gamma, pi_inf, qv, Re)
+        dynP = 0._wp
+        do i = 1, num_dims
+            dynP = dynP + 0.5_wp*q_cons_vf(contxe + i)%sf(cell(1), cell(2), cell(3))**2/rhol
+        end do
+        pliq = (q_cons_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - dynP - pi_inf)/gamma
+        if (pliq < 0) print *, "Negative pressure", proc_rank, &
+            q_cons_vf(E_idx)%sf(cell(1), cell(2), cell(3)), pi_inf, gamma, pliq, cell, dynP
+
+        ! Initial particle pressure
+        gas_p(bub_id, 1) = pliq + 2._wp*(1._wp/Web)/bub_R0(bub_id)
+        if ((1._wp/Web) /= 0._wp) then
+            pcrit = pv - 4._wp*(1._wp/Web)/(3._wp*sqrt(3._wp*gas_p(bub_id, 1)*bub_R0(bub_id)**3._wp/(2._wp*(1._wp/Web))))
+            pref = gas_p(bub_id, 1)
+        else
+            pcrit = 0._wp
+        end if
+
+        ! Initial particle mass
+        volparticle = 4._wp/3._wp*pi*bub_R0(bub_id)**3._wp ! volume
+        gas_mv(bub_id, 1) = pv*volparticle*(1._wp/(R_v*Tw))*(massflag) ! vapermass
+        gas_mg(bub_id) = (gas_p(bub_id, 1) - pv*(massflag))*volparticle*(1._wp/(R_n*Tw)) ! gasmass
+        if (gas_mg(bub_id) <= 0._wp) then
+            call s_mpi_abort("The initial mass of gas inside the bubble is negative. Check the initial conditions.")
+        end if
+        totalmass = gas_mg(bub_id) + gas_mv(bub_id, 1) ! totalmass
+
+        ! Bubble natural frequency
+        concvap = gas_mv(bub_id, 1)/(gas_mv(bub_id, 1) + gas_mg(bub_id))
+        omegaN = (3._wp*(gas_p(bub_id, 1) - pv*(massflag)) + 4._wp*(1._wp/Web)/bub_R0(bub_id))/rhol
+        if (pv*(massflag) > gas_p(bub_id, 1)) then
+            call s_mpi_abort("Lagrange bubble initially located in a region with pressure below the vapor pressure.")
+        end if
+        omegaN = sqrt(omegaN/bub_R0(bub_id)**2._wp)
+
+        cpparticle = concvap*cp_v + (1._wp - concvap)*cp_n
+        kparticle = concvap*k_vl + (1._wp - concvap)*k_nl
+
+        ! Mass and heat transfer coefficients (based on Preston 2007)
+        PeT = totalmass/volparticle*cpparticle*bub_R0(bub_id)**2._wp*omegaN/kparticle
+        call s_transcoeff(1._wp, PeT, Re_trans, Im_trans)
+        gas_betaT(bub_id) = Re_trans*(heatflag)*kparticle
+
+        PeG = bub_R0(bub_id)**2._wp*omegaN/lag_params%diffcoefvap
+        call s_transcoeff(1._wp, PeG, Re_trans, Im_trans)
+        gas_betaC(bub_id) = Re_trans*(massflag)*lag_params%diffcoefvap
+
+        if (gas_mg(bub_id) <= 0._wp) then
+            call s_mpi_abort("Negative gas mass in the bubble, check if the bubble is in the domain.")
+        end if
+
+    end subroutine s_add_bubbles
+
+    !> The purpose of this procedure is to obtain the information of the bubbles from a restart point.
+        !! @param bub_id Local ID of the particle
+        !! @param save_count File identifier
+    subroutine s_restart_bubbles(bub_id, save_count)
+
+        integer, intent(inout) :: bub_id, save_count
+
+        character(LEN=path_len + 2*name_len) :: file_loc
+
+#ifdef MFC_MPI
+        real(wp), dimension(20) :: inputvals
+        integer, dimension(MPI_STATUS_SIZE) :: status
+        integer(kind=MPI_OFFSET_KIND) :: disp
+        integer :: view
+
+        integer, dimension(3) :: cell
+        logical :: indomain, particle_file, file_exist
+
+        integer, dimension(2) :: gsizes, lsizes, start_idx_part
+        integer :: ifile, ierr, tot_data, id
+        integer :: i
+
+        write (file_loc, '(a,i0,a)') 'lag_bubbles_mpi_io_', save_count, '.dat'
+        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+        inquire (file=trim(file_loc), exist=file_exist)
+
+        if (file_exist) then
+            if (proc_rank == 0) then
+                open (9, file=trim(file_loc), form='unformatted', status='unknown')
+                read (9) tot_data, mytime, dt
+                close (9)
+                print *, 'Reading lag_bubbles_mpi_io: ', tot_data, mytime, dt
+            end if
+        else
+            print '(a)', trim(file_loc)//' is missing. exiting.'
+            call s_mpi_abort
+        end if
+
+        call MPI_BCAST(tot_data, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(mytime, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+
+        gsizes(1) = tot_data
+        gsizes(2) = 21
+        lsizes(1) = tot_data
+        lsizes(2) = 21
+        start_idx_part(1) = 0
+        start_idx_part(2) = 0
+
+        call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
+                                      MPI_ORDER_FORTRAN, mpi_p, view, ierr)
+        call MPI_type_COMMIT(view, ierr)
+
+        ! Open the file to write all flow variables
+        write (file_loc, '(a,i0,a)') 'lag_bubbles_', save_count, '.dat'
+        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+        inquire (file=trim(file_loc), exist=particle_file)
+
+        if (particle_file) then
+            call MPI_FILE_open(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
+            disp = 0._wp
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
+                                   'native', mpi_info_null, ierr)
+            allocate (MPI_IO_DATA_lag_bubbles(tot_data, 1:21))
+            call MPI_FILE_read_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*tot_data, &
+                                   mpi_p, status, ierr)
+            do i = 1, tot_data
+                id = int(MPI_IO_DATA_lag_bubbles(i, 1))
+                inputvals(1:20) = MPI_IO_DATA_lag_bubbles(i, 2:21)
+                indomain = particle_in_domain(inputvals(1:3))
+                if (indomain .and. (id > 0)) then
+                    bub_id = bub_id + 1
+                    nBubs = bub_id                  ! local number of bubbles
+                    lag_id(bub_id, 1) = id          ! global ID
+                    lag_id(bub_id, 2) = bub_id      ! local ID
+                    mtn_pos(bub_id, 1:3, 1) = inputvals(1:3)
+                    mtn_posPrev(bub_id, 1:3, 1) = inputvals(4:6)
+                    mtn_vel(bub_id, 1:3, 1) = inputvals(7:9)
+                    intfc_rad(bub_id, 1) = inputvals(10)
+                    intfc_vel(bub_id, 1) = inputvals(11)
+                    bub_R0(bub_id) = inputvals(12)
+                    Rmax_stats(bub_id) = inputvals(13)
+                    Rmin_stats(bub_id) = inputvals(14)
+                    bub_dphidt(bub_id) = inputvals(15)
+                    gas_p(bub_id, 1) = inputvals(16)
+                    gas_mv(bub_id, 1) = inputvals(17)
+                    gas_mg(bub_id) = inputvals(18)
+                    gas_betaT(bub_id) = inputvals(19)
+                    gas_betaC(bub_id) = inputvals(20)
+                    cell = -buff_size
+                    call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
+                end if
+            end do
+            deallocate (MPI_IO_DATA_lag_bubbles)
+        end if
+        call MPI_FILE_CLOSE(ifile, ierr)
+#endif
+
+    end subroutine s_restart_bubbles
+
+    !>  Contains the bubble dynamics subroutines.
+        !! @param q_cons_vf Conservative variables
+        !! @param q_prim_vf Primitive variables
+        !! @param rhs_vf Calculated change of conservative variables
+        !! @param t_step Current time step
+        !! @param stage Current stage in the time-stepper algorithm
+    subroutine s_compute_bubble_EL_dynamics(q_cons_vf, q_prim_vf, t_step, rhs_vf, stage)
+
+        type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
+        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
+        type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf
+        integer, intent(in) :: t_step, stage
+
+        real(wp) :: myVapFlux
+        real(wp) :: preterm1, term2, paux, pint, Romega, term1_fac
+        real(wp) :: myR_m, mygamma_m, myPb, myMass_n, myMass_v
+        real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot
+        real(wp) :: myPinf, aux1, aux2, myCson, myRho
+        real(wp) :: gamma, pi_inf, qv
+        real(wp), dimension(contxe) :: myalpha_rho, myalpha
+        real(wp), dimension(2) :: Re
+        integer, dimension(3) :: cell
+
+        integer :: adap_dt_stop_max, adap_dt_stop !< Fail-safe exit if max iteration count reached
+        real(wp) :: dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu !< Dummy variables for unified subgrid bubble subroutines
+
+        integer :: i, k, l
+
+        call nvtxStartRange("LAGRANGE-BUBBLE-DYNAMICS")
+
+        ! Subgrid p_inf model based on Maeda and Colonius (2018).
+        if (lag_params%pressure_corrector) then
+            ! Calculate velocity potentials (valid for one bubble per cell)
+            !$acc parallel loop gang vector default(present) private(k, cell)
+            do k = 1, nBubs
+                call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega)
+                myR0 = bub_R0(k)
+                myR = intfc_rad(k, 2)
+                myV = intfc_vel(k, 2)
+                myPb = gas_p(k, 2)
+                pint = f_cpbw_KM(myR0, myR, myV, myPb)
+                pint = pint + 0.5_wp*myV**2._wp
+                if (lag_params%cluster_type == 2) then
+                    bub_dphidt(k) = (paux - pint) + term2
+                    ! Accounting for the potential induced by the bubble averaged over the control volume
+                    ! Note that this is based on the incompressible flow assumption near the bubble.
+                    term1_fac = 3._wp/2._wp*(myR*(Romega**2._wp - myR**2._wp))/(Romega**3._wp - myR**3._wp)
+                    bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac)
+                end if
+            end do
+        end if
+
+        ! Radial motion model
+        adap_dt_stop_max = 0
+        !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) &
+        !$acc reduction(MAX:adap_dt_stop_max) copy(adap_dt_stop_max) copyin(stage)
+        do k = 1, nBubs
+
+            ! Current bubble state
+            myPb = gas_p(k, 2)
+            myMass_n = gas_mg(k)
+            myMass_v = gas_mv(k, 2)
+            myR = intfc_rad(k, 2)
+            myV = intfc_vel(k, 2)
+            myBeta_c = gas_betaC(k)
+            myBeta_t = gas_betaT(k)
+            myR0 = bub_R0(k)
+
+            ! Vapor and heat fluxes
+            myVapFlux = f_vflux(myR, myV, myPb, myMass_v, k, myMass_n, myBeta_c, myR_m, mygamma_m)
+            myPbdot = f_bpres_dot(myVapFlux, myR, myV, myPb, myMass_v, k, myBeta_t, myR_m, mygamma_m)
+            myMvdot = 4._wp*pi*myR**2._wp*myVapFlux
+
+            ! Obtaining driving pressure
+            call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2)
+
+            ! Obtain liquid density and computing speed of sound from pinf
+            !$acc loop seq
+            do i = 1, num_fluids
+                myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3))
+                myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3))
+            end do
+            call s_convert_species_to_mixture_variables_acc(myRho, gamma, pi_inf, qv, myalpha, &
+                                                            myalpha_rho, Re, cell(1), cell(2), cell(3))
+            call s_compute_cson_from_pinf(k, q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson)
+
+            ! Adaptive time stepping
+            adap_dt_stop = 0
+
+            if (adap_dt) then
+
+                call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, &
+                                    dmntait, dmBtait, dm_bub_adv_src, dm_divu, &
+                                    k, myMass_v, myMass_n, myBeta_c, &
+                                    myBeta_t, myCson, adap_dt_stop)
+
+                ! Update bubble state
+                intfc_rad(k, 1) = myR
+                intfc_vel(k, 1) = myV
+                gas_p(k, 1) = myPb
+                gas_mv(k, 1) = myMass_v
+
+            else
+
+                ! Radial acceleration from bubble models
+                intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, &
+                                                 myPb, myPbdot, dmalf, dmntait, dmBtait, &
+                                                 dm_bub_adv_src, dm_divu, &
+                                                 myCson)
+                print*, proc_rank, intfc_dveldt(k, stage)
+                intfc_draddt(k, stage) = myV
+                gas_dmvdt(k, stage) = myMvdot
+                gas_dpdt(k, stage) = myPbdot
+
+            end if
+
+            do l = 1, num_dims
+                if (lag_params%vel_model == 1) then
+                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), &
+                                                cell, l, q_prim_vf)
+                    mtn_dveldt(k, l, stage) = 0._wp
+                elseif (lag_params%vel_model == 2) then
+                    mtn_dposdt(k, l, stage) = mtn_vel(k,l,2)
+                    mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), &
+                                                intfc_rad(k,2), mtn_vel(k,l,2), &
+                                                gas_mg(k), gas_mv(k, 2), &
+                                                Re(1), myRho, cell, l, q_prim_vf)
+                else
+                    mtn_dposdt(k, l, stage) = 0._wp
+                    mtn_dveldt(k, l, stage) = 0._wp
+                end if
+            end do
+
+            adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop)
+
+        end do
+
+        if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.")
+
+        call nvtxEndRange
+
+    end subroutine s_compute_bubble_EL_dynamics
+
+    !>  The purpose of this subroutine is to obtain the bubble source terms based on Maeda and Colonius (2018)
+        !!      and add them to the RHS scalar field.
+        !! @param q_cons_vf Conservative variables
+        !! @param q_prim_vf Conservative variables
+        !! @param rhs_vf Time derivative of the conservative variables
+    subroutine s_compute_bubbles_EL_source(q_cons_vf, q_prim_vf, rhs_vf)
+
+        type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
+        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
+        type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf
+
+        integer :: i, j, k, l
+
+        if (.not. adap_dt) call s_smear_voidfraction()
+
+        if (lag_params%solver_approach == 2) then
+
+            if (p == 0) then
+                !$acc parallel loop collapse(4) gang vector default(present)
+                do k = 0, p
+                    do j = 0, n
+                        do i = 0, m
+                            do l = 1, E_idx
+                                if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then
+                                    rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + &
+                                                            q_cons_vf(l)%sf(i, j, k)*(q_beta%vf(2)%sf(i, j, k) + &
+                                                                                      q_beta%vf(5)%sf(i, j, k))
+
+                                end if
+                            end do
+                        end do
+                    end do
+                end do
+            else
+                !$acc parallel loop collapse(4) gang vector default(present)
+                do k = 0, p
+                    do j = 0, n
+                        do i = 0, m
+                            do l = 1, E_idx
+                                if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then
+                                    rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + &
+                                                            q_cons_vf(l)%sf(i, j, k)/q_beta%vf(1)%sf(i, j, k)* &
+                                                            q_beta%vf(2)%sf(i, j, k)
+                                end if
+                            end do
+                        end do
+                    end do
+                end do
+            end if
+
+            do l = 1, num_dims
+
+                call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l)
+
+                !$acc parallel loop collapse(3) gang vector default(present)
+                do k = 0, p
+                    do j = 0, n
+                        do i = 0, m
+                            if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then
+                                rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - &
+                                                                 (1._wp - q_beta%vf(1)%sf(i, j, k))/ &
+                                                                 q_beta%vf(1)%sf(i, j, k)* &
+                                                                 q_beta%vf(3)%sf(i, j, k)
+                            end if
+                        end do
+                    end do
+                end do
+
+                !source in energy
+                !$acc parallel loop collapse(3) gang vector default(present)
+                do k = idwbuff(3)%beg, idwbuff(3)%end
+                    do j = idwbuff(2)%beg, idwbuff(2)%end
+                        do i = idwbuff(1)%beg, idwbuff(1)%end
+                            q_beta%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k)
+                        end do
+                    end do
+                end do
+
+                call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l)
+
+                !$acc parallel loop collapse(3) gang vector default(present)
+                do k = 0, p
+                    do j = 0, n
+                        do i = 0, m
+                            if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then
+                                rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - &
+                                                            q_beta%vf(4)%sf(i, j, k)*(1._wp - q_beta%vf(1)%sf(i, j, k))/ &
+                                                            q_beta%vf(1)%sf(i, j, k)
+                            end if
+                        end do
+                    end do
+                end do
+            end do
+
+        end if
+
+    end subroutine s_compute_bubbles_EL_source
+
+    !>  This procedure computes the speed of sound from a given driving pressure
+        !! @param bub_id Bubble id
+        !! @param q_prim_vf Primitive variables
+        !! @param pinf Driving pressure
+        !! @param cell Bubble cell
+        !! @param rhol Liquid density
+        !! @param gamma Liquid specific heat ratio
+        !! @param pi_inf Liquid stiffness
+        !! @param cson Calculated speed of sound
+    subroutine s_compute_cson_from_pinf(bub_id, q_prim_vf, pinf, cell, rhol, gamma, pi_inf, cson)
+#ifdef _CRAYFTN
+        !DIR$ INLINEALWAYS s_compute_cson_from_pinf
+#else
+        !$acc routine seq
+#endif
+        integer, intent(in) :: bub_id
+        type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
+        real(wp), intent(in) :: pinf, rhol, gamma, pi_inf
+        integer, dimension(3), intent(in) :: cell
+        real(wp), intent(out) :: cson
+
+        real(wp) :: E, H
+        real(wp), dimension(num_dims) :: vel
+        integer :: i
+
+        !$acc loop seq
+        do i = 1, num_dims
+            vel(i) = q_prim_vf(i + contxe)%sf(cell(1), cell(2), cell(3))
+        end do
+        E = gamma*pinf + pi_inf + 0.5_wp*rhol*dot_product(vel, vel)
+        H = (E + pinf)/rhol
+        cson = sqrt((H - 0.5_wp*dot_product(vel, vel))/gamma)
+
+    end subroutine s_compute_cson_from_pinf
+
+    !>  The purpose of this subroutine is to smear the effect of the bubbles in the Eulerian framework
+    subroutine s_smear_voidfraction()
+
+        integer :: i, j, k, l
+
+        call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS")
+
+        !$acc parallel loop collapse(4) gang vector default(present)
+        do i = 1, q_beta_idx
+            do l = idwbuff(3)%beg, idwbuff(3)%end
+                do k = idwbuff(2)%beg, idwbuff(2)%end
+                    do j = idwbuff(1)%beg, idwbuff(1)%end
+                        q_beta%vf(i)%sf(j, k, l) = 0._wp
+                    end do
+                end do
+            end do
+        end do
+
+        call s_smoothfunction(nBubs, intfc_rad, intfc_vel, &
+                              mtn_s, mtn_pos, q_beta)
+
+        !Store 1-beta
+        !$acc parallel loop collapse(3) gang vector default(present)
+        do l = idwbuff(3)%beg, idwbuff(3)%end
+            do k = idwbuff(2)%beg, idwbuff(2)%end
+                do j = idwbuff(1)%beg, idwbuff(1)%end
+                    q_beta%vf(1)%sf(j, k, l) = 1._wp - q_beta%vf(1)%sf(j, k, l)
+                    ! Limiting void fraction given max value
+                    q_beta%vf(1)%sf(j, k, l) = max(q_beta%vf(1)%sf(j, k, l), &
+                                                   1._wp - lag_params%valmaxvoid)
+                end do
+            end do
+        end do
+
+        call nvtxEndRange
+
+    end subroutine s_smear_voidfraction
+
+    !> The purpose of this procedure is obtain the bubble driving pressure p_inf
+        !! @param bub_id Particle identifier
+        !! @param q_prim_vf  Primitive variables
+        !! @param ptype 1: p at infinity, 2: averaged P at the bubble location
+        !! @param f_pinfl Driving pressure
+        !! @param cell Bubble cell
+        !! @param Romega Control volume radius
+    subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega)
+#ifdef _CRAYFTN
+        !DIR$ INLINEALWAYS s_get_pinf
+#else
+        !$acc routine seq
+#endif
+        integer, intent(in) :: bub_id, ptype
+        type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
+        real(wp), intent(out) :: f_pinfl
+        integer, dimension(3), intent(out) :: cell
+        real(wp), intent(out), optional :: preterm1, term2, Romega
+
+        real(wp), dimension(3) :: scoord, psi
+        real(wp) :: dc, vol, aux
+        real(wp) :: volgas, term1, Rbeq, denom
+        real(wp) :: charvol, charpres, charvol2, charpres2
+        integer, dimension(3) :: cellaux
+        integer :: i, j, k
+        integer :: smearGrid, smearGridz
+        logical :: celloutside
+
+        f_pinfl = 0._wp
+
+        if (lag_params%vel_model > 0) then
+            cell = -buff_size
+            call s_locate_cell(mtn_pos(bub_id, 1:3, 2), cell, mtn_s(bub_id, 1:3, 2))
+            scoord = mtn_s(bub_id, 1:3, 2)
+        else
+            scoord = mtn_s(bub_id, 1:3, 2)
+            cell(:) = int(scoord(:))
+        end if
+
+        !$acc loop seq
+        do i = 1, num_dims
+            if (scoord(i) < 0._wp) cell(i) = cell(i) - 1
+        end do
+
+        if ((lag_params%cluster_type == 1)) then
+            !< Getting p_cell in terms of only the current cell by interpolation
+
+            !< Getting the cell volulme as Omega
+            if (p > 0) then
+                vol = dx(cell(1))*dy(cell(2))*dz(cell(3))
+            else
+                if (cyl_coord) then
+                    vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi
+                else
+                    vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth
+                end if
+            end if
+
+            !< Obtain bilinear interpolation coefficients, based on the current location of the bubble.
+            psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1)
+            if (cell(1) == (m + buff_size)) then
+                cell(1) = cell(1) - 1
+                psi(1) = 1._wp
+            else if (cell(1) == (-buff_size)) then
+                psi(1) = 0._wp
+            else
+                if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1
+                psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1))))
+            end if
+
+            psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1)
+            if (cell(2) == (n + buff_size)) then
+                cell(2) = cell(2) - 1
+                psi(2) = 1._wp
+            else if (cell(2) == (-buff_size)) then
+                psi(2) = 0._wp
+            else
+                if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1
+                psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2))))
+            end if
+
+            if (p > 0) then
+                psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1)
+                if (cell(3) == (p + buff_size)) then
+                    cell(3) = cell(3) - 1
+                    psi(3) = 1._wp
+                else if (cell(3) == (-buff_size)) then
+                    psi(3) = 0._wp
+                else
+                    if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1
+                    psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3))))
+                end if
+            else
+                psi(3) = 0._wp
+            end if
+
+            !< Perform bilinear interpolation
+            if (p == 0) then  !2D
+                f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))
+                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2))
+                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2)
+                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2)
+            else              !3D
+                f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))*(1._wp - psi(3))
+                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2))*(1._wp - psi(3))
+                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2)*(1._wp - psi(3))
+                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2)*(1._wp - psi(3))
+                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1)*(1._wp - psi(1))*(1._wp - psi(2))*psi(3)
+                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3) + 1)*psi(1)*(1._wp - psi(2))*psi(3)
+                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3) + 1)*psi(1)*psi(2)*psi(3)
+                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3) + 1)*(1._wp - psi(1))*psi(2)*psi(3)
+            end if
+
+            !R_Omega
+            dc = (3._wp*vol/(4._wp*pi))**(1._wp/3._wp)
+
+        else if (lag_params%cluster_type >= 2) then
+            ! Bubble dynamic closure from Maeda and Colonius (2018)
+
+            ! Include the cell that contains the bubble (mapCells+1+mapCells)
+            smearGrid = mapCells - (-mapCells) + 1
+            smearGridz = smearGrid
+            if (p == 0) smearGridz = 1
+
+            charvol = 0._wp
+            charpres = 0._wp
+            charvol2 = 0._wp
+            charpres2 = 0._wp
+            vol = 0._wp
+
+            !$acc loop seq
+            do i = 1, smearGrid
+                !$acc loop seq
+                do j = 1, smearGrid
+                    !$acc loop seq
+                    do k = 1, smearGridz
+                        cellaux(1) = cell(1) + i - (mapCells + 1)
+                        cellaux(2) = cell(2) + j - (mapCells + 1)
+                        cellaux(3) = cell(3) + k - (mapCells + 1)
+                        if (p == 0) cellaux(3) = 0
+
+                        !< check if the current cell is outside the computational domain or not (including ghost cells)
+                        celloutside = .false.
+                        if (num_dims == 2) then
+                            if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then
+                                celloutside = .true.
+                            end if
+                            if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then
+                                celloutside = .true.
+                            end if
+                            if ((cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then
+                                celloutside = .true.
+                            end if
+                        else
+                            if ((cellaux(3) < -buff_size) .or. (cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then
+                                celloutside = .true.
+                            end if
+
+                            if ((cellaux(3) > p + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then
+                                celloutside = .true.
+                            end if
+                        end if
+                        if (.not. celloutside) then
+                            if (cyl_coord .and. (p == 0) .and. (y_cc(cellaux(2)) < 0._wp)) then
+                                celloutside = .true.
+                            end if
+                        end if
+
+                        if (.not. celloutside) then
+                            !< Obtaining the cell volulme
+                            if (p > 0) then
+                                vol = dx(cellaux(1))*dy(cellaux(2))*dz(cellaux(3))
+                            else
+                                if (cyl_coord) then
+                                    vol = dx(cellaux(1))*dy(cellaux(2))*y_cc(cellaux(2))*2._wp*pi
+                                else
+                                    vol = dx(cellaux(1))*dy(cellaux(2))*lag_params%charwidth
+                                end if
+                            end if
+                            !< Update values
+                            charvol = charvol + vol
+                            charpres = charpres + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3))*vol
+                            charvol2 = charvol2 + vol*q_beta%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3))
+                            charpres2 = charpres2 + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3)) &
+                                        *vol*q_beta%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3))
+                        end if
+
+                    end do
+                end do
+            end do
+
+            f_pinfl = charpres2/charvol2
+            vol = charvol
+            dc = (3._wp*abs(vol)/(4._wp*pi))**(1._wp/3._wp)
+
+        end if
+
+        if (lag_params%pressure_corrector) then
+
+            !Valid if only one bubble exists per cell
+            volgas = intfc_rad(bub_id, 2)**3._wp
+            denom = intfc_rad(bub_id, 2)**2._wp
+            term1 = bub_dphidt(bub_id)*intfc_rad(bub_id, 2)**2._wp
+            term2 = intfc_vel(bub_id, 2)*intfc_rad(bub_id, 2)**2._wp
+
+            Rbeq = volgas**(1._wp/3._wp) !surrogate bubble radius
+            aux = dc**3._wp - Rbeq**3._wp
+            term2 = term2/denom
+            term2 = 3._wp/2._wp*term2**2._wp*Rbeq**3._wp*(1._wp - Rbeq/dc)/aux
+            preterm1 = 3._wp/2._wp*Rbeq*(dc**2._wp - Rbeq**2._wp)/(aux*denom)
+
+            !Control volume radius
+            if (ptype == 2) Romega = dc
+
+            ! Getting p_inf
+            if (ptype == 1) then
+                f_pinfl = f_pinfl + preterm1*term1 + term2
+            end if
+
+        end if
+
+    end subroutine s_get_pinf
+
+    !>  This subroutine updates the Lagrange variables using the tvd RK time steppers.
+        !!      The time derivative of the bubble variables must be stored at every stage to avoid precision errors.
+        !! @param stage Current tvd RK stage
+    subroutine s_update_lagrange_tdv_rk(stage)
+
+        integer, intent(in) :: stage
+
+        integer :: k
+
+        if (time_stepper == 1) then ! 1st order TVD RK
+            !$acc parallel loop gang vector default(present) private(k)
+            do k = 1, nBubs
+                !u{1} = u{n} +  dt * RHS{n}
+                intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
+                intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1)
+                mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 1)
+                mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1)
+                mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1)
+                gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1)
+                gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
+            end do
+
+            if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
+
+            call s_transfer_data_to_tmp()
+            call s_write_void_evol(mytime)
+            if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
+
+            if (lag_params%write_bubbles) then
+                !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel)
+                call s_write_lag_particles(mytime)
+            end if
+
+        elseif (time_stepper == 2) then ! 2nd order TVD RK
+            if (stage == 1) then
+                !$acc parallel loop gang vector default(present) private(k)
+                do k = 1, nBubs
+                    !u{1} = u{n} +  dt * RHS{n}
+                    intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
+                    intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1)
+                    mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1)
+                    mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1)
+                    mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1)
+                    gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1)
+                    gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
+                end do
+
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
+
+            elseif (stage == 2) then
+                !$acc parallel loop gang vector default(present) private(k)
+                do k = 1, nBubs
+                    !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1})
+                    intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp
+                    intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp
+                    mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2)
+                    mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp
+                    mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp
+                    gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp
+                    gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp
+                end do
+
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
+
+                call s_transfer_data_to_tmp()
+                call s_write_void_evol(mytime)
+                if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
+                if (lag_params%write_bubbles) then
+                    !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel)
+                    call s_write_lag_particles(mytime)
+                end if
+
+            end if
+
+        elseif (time_stepper == 3) then ! 3rd order TVD RK
+            if (stage == 1) then
+                !$acc parallel loop gang vector default(present) private(k)
+                do k = 1, nBubs
+                    !u{1} = u{n} +  dt * RHS{n}
+                    intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
+                    intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1)
+                    mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1)
+                    mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1)
+                    mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1)
+                    gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1)
+                    gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
+                end do
+
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
+
+            elseif (stage == 2) then
+                !$acc parallel loop gang vector default(present) private(k)
+                do k = 1, nBubs
+                    !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}]
+                    intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp
+                    intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp
+                    mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 2)
+                    mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp
+                    mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp
+                    gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp
+                    gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp
+                end do
+
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
+
+            elseif (stage == 3) then
+                !$acc parallel loop gang vector default(present) private(k)
+                do k = 1, nBubs
+                    !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}]
+                    intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3))
+                    intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3))
+                    mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2)
+                    mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3))
+                    mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3))
+                    gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3))
+                    gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3))
+                end do
+
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
+
+                call s_transfer_data_to_tmp()
+                call s_write_void_evol(mytime)
+                if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
+
+                if (lag_params%write_bubbles) then
+                    !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel)
+                    call s_write_lag_particles(mytime)
+                end if
+
+            end if
+        end if
+
+    end subroutine s_update_lagrange_tdv_rk
+
+    !> This subroutine enforces reflective and wall boundary conditions for EL bubbles
+    subroutine s_enforce_EL_bubbles_boundary_conditions(dest)
+
+        integer, intent(in) :: dest
+        integer :: k, i, patch_id
+        integer, dimension(3) :: cell
+
+        !$acc parallel loop gang vector default(present) private(cell)
+        do k = 1, nBubs
+            if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then
+                mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest)
+            elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then
+                mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest)
+            elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size - mapCells)) then
+                call s_remove_lag_bubble(k)
+            elseif (mtn_pos(k, 1, dest) < x_cb(mapCells -buff_size - 1)) then
+                call s_remove_lag_bubble(k)
+            end if
+
+            if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                .and. mtn_pos(k,2,dest) < y_cb(-1) + intfc_rad(k,dest)) then
+                mtn_pos(k, 2, dest) = y_cb(-1) + intfc_rad(k,dest)
+            else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then
+                mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest)
+            elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size - mapCells)) then
+                call s_remove_lag_bubble(k)
+            elseif (mtn_pos(k, 2, dest) < y_cb(mapCells - buff_size - 1)) then
+                call s_remove_lag_bubble(k)
+            end if
+
+            if (p > 0) then
+                if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                    .and. mtn_pos(k,3,dest) < z_cb(-1) + intfc_rad(k,dest)) then
+                    mtn_pos(k, 3, dest) = z_cb(-1) + intfc_rad(k,dest)
+                else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+                    .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then
+                    mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest)
+                elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size - mapCells)) then
+                    call s_remove_lag_bubble(k)
+                elseif (mtn_pos(k, 3, dest) < z_cb(mapCells - buff_size - 1)) then
+                    call s_remove_lag_bubble(k)
+                end if
+            end if
+
+            if (ib) then
+                cell = -buff_size
+                call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
+
+                if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then
+                    patch_id = ib_markers%sf(cell(1), cell(2), cell(3))
+
+                    do i = 1, num_dims
+                        mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - &
+                                              levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) &
+                                              * levelset%sf(cell(1), cell(2), cell(3), patch_id)
+                    end do
+                    cell = -buff_size
+                    call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
+                end if
+            end if
+
+        end do
+
+        if (num_procs > 1) then
+            call s_add_particles_to_transfer_list(mtn_pos(:, :, dest), mtn_posPrev(:, :, dest), nbubs)
+            call s_mpi_send_particles(intfc_rad, intfc_draddt, intfc_vel, &
+                                      intfc_draddt, mtn_posPrev, mtn_pos, &
+                                      mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, &
+                                      gas_dpdt, gas_mv, gas_dmvdt, lag_id, bub_R0, lag_num_ts)
+            call s_mpi_recv_particles(intfc_rad, intfc_draddt, intfc_vel, &
+                                      intfc_draddt, mtn_posPrev, mtn_pos, &
+                                      mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, &
+                                      gas_dpdt, gas_mv, gas_dmvdt, lag_id, bub_R0, lag_num_ts, nbubs)
+        end if
+
+    end subroutine s_enforce_EL_bubbles_boundary_conditions
+
+    !> This subroutine returns the computational coordinate of the cell for the given position.
+          !! @param pos Input coordinates
+          !! @param cell Computational coordinate of the cell
+          !! @param scoord Calculated particle coordinates
+    subroutine s_locate_cell(pos, cell, scoord)
+
+        real(wp), dimension(3), intent(in) :: pos
+        real(wp), dimension(3), intent(out) :: scoord
+        integer, dimension(3), intent(inout) :: cell
+
+        integer :: i
+
+        do while (pos(1) < x_cb(cell(1) - 1))
+            cell(1) = cell(1) - 1
+        end do
+
+        do while (pos(1) > x_cb(cell(1)))
+            cell(1) = cell(1) + 1
+        end do
+
+        do while (pos(2) < y_cb(cell(2) - 1))
+            cell(2) = cell(2) - 1
+        end do
+
+        do while (pos(2) > y_cb(cell(2)))
+            cell(2) = cell(2) + 1
+        end do
+
+        if (p > 0) then
+            do while (pos(3) < z_cb(cell(3) - 1))
+                cell(3) = cell(3) - 1
+            end do
+            do while (pos(3) > z_cb(cell(3)))
+                cell(3) = cell(3) + 1
+            end do
+        end if
+
+        ! The numbering of the cell of which left boundary is the domain boundary is 0.
+        ! if comp.coord of the pos is s, the real coordinate of s is
+        ! (the coordinate of the left boundary of the Floor(s)-th cell)
+        ! + (s-(int(s))*(cell-width).
+        ! In other words,  the coordinate of the center of the cell is x_cc(cell).
+
+        !coordinates in computational space
+        scoord(1) = cell(1) + (pos(1) - x_cb(cell(1) - 1))/dx(cell(1))
+        scoord(2) = cell(2) + (pos(2) - y_cb(cell(2) - 1))/dy(cell(2))
+        scoord(3) = 0._wp
+        if (p > 0) scoord(3) = cell(3) + (pos(3) - z_cb(cell(3) - 1))/dz(cell(3))
+        cell(:) = int(scoord(:))
+        do i = 1, num_dims
+            if (scoord(i) < 0._wp) cell(i) = cell(i) - 1
+        end do
+
+    end subroutine s_locate_cell
+
+    !> This subroutine transfer data into the temporal variables.
+    subroutine s_transfer_data_to_tmp()
+
+        integer :: k
+
+        !$acc parallel loop gang vector default(present) private(k)
+        do k = 1, nBubs
+            gas_p(k, 2) = gas_p(k, 1)
+            gas_mv(k, 2) = gas_mv(k, 1)
+            intfc_rad(k, 2) = intfc_rad(k, 1)
+            intfc_vel(k, 2) = intfc_vel(k, 1)
+            mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1)
+            mtn_posPrev(k, 1:3, 2) = mtn_posPrev(k, 1:3, 1)
+            mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1)
+            mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1)
+        end do
+
+    end subroutine s_transfer_data_to_tmp
+
+    !> The purpose of this procedure is to determine if the global coordinates of the bubbles
+        !!      are present in the current MPI processor (including ghost cells).
+        !! @param pos_part Spatial coordinates of the bubble
+    function particle_in_domain(pos_part)
+
+        logical :: particle_in_domain
+        real(wp), dimension(3), intent(in) :: pos_part
+
+        ! 2D
+        if (p == 0 .and. cyl_coord .neqv. .true.) then
+            ! Defining a virtual z-axis that has the same dimensions as y-axis
+            ! defined in the input file
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
+                                  (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. &
+                                  (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= -lag_params%charwidth/2._wp))
+        else
+            ! cyl_coord
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
+                                  (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size - 1), 0._wp)))
+        end if
+
+        ! 3D
+        if (p > 0) then
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
+                                  (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. &
+                                  (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1)))
+        end if
+
+        ! For symmetric and wall boundary condition
+        if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
+            particle_in_domain = (particle_in_domain .and. (pos_part(1) >= x_cb(-1)))
+        end if
+        if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
+            particle_in_domain = (particle_in_domain .and. (pos_part(1) < x_cb(m)))
+        end if
+        if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then
+            particle_in_domain = (particle_in_domain .and. (pos_part(2) >= y_cb(-1)))
+        end if
+        if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then
+            particle_in_domain = (particle_in_domain .and. (pos_part(2) < y_cb(n)))
+        end if
+
+        if (p > 0) then
+            if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
+                particle_in_domain = (particle_in_domain .and. (pos_part(3) >= z_cb(-1)))
+            end if
+            if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
+                particle_in_domain = (particle_in_domain .and. (pos_part(3) < z_cb(p)))
+            end if
+        end if
+
+    end function particle_in_domain
+
+    !> The purpose of this procedure is to determine if the lagrangian bubble is located in the
+        !!       physical domain. The ghost cells are not part of the physical domain.
+        !! @param pos_part Spatial coordinates of the bubble
+    function particle_in_domain_physical(pos_part)
+
+        logical :: particle_in_domain_physical
+        real(wp), dimension(3), intent(in) :: pos_part
+
+        particle_in_domain_physical = ((pos_part(1) < x_cb(m)) .and. (pos_part(1) >= x_cb(-1)) .and. &
+                                       (pos_part(2) < y_cb(n)) .and. (pos_part(2) >= y_cb(-1)))
+
+        if (p > 0) then
+            particle_in_domain_physical = (particle_in_domain_physical .and. (pos_part(3) < z_cb(p)) .and. (pos_part(3) >= z_cb(-1)))
+        end if
+
+    end function particle_in_domain_physical
+
+    !> The purpose of this procedure is to calculate the gradient of a scalar field along the x, y and z directions
+        !!      following a second-order central difference considering uneven widths
+        !! @param q Input scalar field
+        !! @param dq Output gradient of q
+        !! @param dir Gradient spatial direction
+    subroutine s_gradient_dir(q, dq, dir)
+
+        type(scalar_field), intent(inout) :: q
+        type(scalar_field), intent(inout) :: dq
+        integer, intent(in) :: dir
+
+        integer :: i, j, k
+
+        if (dir == 1) then
+            ! Gradient in x dir.
+            !$acc parallel loop collapse(3) gang vector default(present)
+            do k = 0, p
+                do j = 0, n
+                    do i = 0, m
+                        dq%sf(i, j, k) = q%sf(i, j, k)*(dx(i + 1) - dx(i - 1)) &
+                                         + q%sf(i + 1, j, k)*(dx(i) + dx(i - 1)) &
+                                         - q%sf(i - 1, j, k)*(dx(i) + dx(i + 1))
+                        dq%sf(i, j, k) = dq%sf(i, j, k)/ &
+                                         ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1)))
+                    end do
+                end do
+            end do
+        else
+            if (dir == 2) then
+                ! Gradient in y dir.
+                !$acc parallel loop collapse(3) gang vector default(present)
+                do k = 0, p
+                    do j = 0, n
+                        do i = 0, m
+                            dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) &
+                                             + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) &
+                                             - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1))
+                            dq%sf(i, j, k) = dq%sf(i, j, k)/ &
+                                             ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1)))
+                        end do
+                    end do
+                end do
+            else
+                ! Gradient in z dir.
+                !$acc parallel loop collapse(3) gang vector default(present)
+                do k = 0, p
+                    do j = 0, n
+                        do i = 0, m
+                            dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) &
+                                             + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) &
+                                             - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1))
+                            dq%sf(i, j, k) = dq%sf(i, j, k)/ &
+                                             ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1)))
+                        end do
+                    end do
+                end do
+            end if
+        end if
+
+    end subroutine s_gradient_dir
+
+    !> Subroutine that writes on each time step the changes of the lagrangian bubbles.
+        !!  @param q_time Current time
+    subroutine s_write_lag_particles(qtime)
+
+        real(wp), intent(in) :: qtime
+        integer :: k
+
+        character(LEN=path_len + 2*name_len) :: file_loc
+        logical :: file_exist
+
+        write (file_loc, '(A,I0,A)') 'lag_bubble_evol_', proc_rank, '.dat'
+        file_loc = trim(case_dir)//'/D/'//trim(file_loc)
+        inquire (FILE=trim(file_loc), EXIST=file_exist)
+
+        if (.not. file_exist) then
+            open (11, FILE=trim(file_loc), FORM='formatted', position='rewind')
+            write (11, *) 'currentTime, particleID, x, y, z, ', &
+                'coreVaporMass, coreVaporConcentration, radius, interfaceVelocity, ', &
+                'corePressure'
+        else
+            open (11, FILE=trim(file_loc), FORM='formatted', position='append')
+        end if
+
+        ! Cycle through list
+        do k = 1, nBubs
+            write (11, '(6X,f12.6,I24.8,8e24.8)') &
+                qtime, &
+                lag_id(k, 1), &
+                mtn_pos(k, 1, 1), &
+                mtn_pos(k, 2, 1), &
+                mtn_pos(k, 3, 1), &
+                gas_mv(k, 1), &
+                gas_mv(k, 1)/(gas_mv(k, 1) + gas_mg(k)), &
+                intfc_rad(k, 1), &
+                intfc_vel(k, 1), &
+                gas_p(k, 1)
+        end do
+
+        close (11)
+
+    end subroutine s_write_lag_particles
+
+    !>  Subroutine that writes some useful statistics related to the volume fraction
+            !!       of the particles (void fraction) in the computatioational domain
+            !!       on each time step.
+            !!  @param q_time Current time
+    subroutine s_write_void_evol(qtime)
+
+        real(wp), intent(in) :: qtime
+        real(wp) :: volcell, voltot
+        real(wp) :: lag_void_max, lag_void_avg, lag_vol
+        real(wp) :: void_max_glb, void_avg_glb, vol_glb
+
+        integer :: i, j, k
+
+        character(LEN=path_len + 2*name_len) :: file_loc
+        logical :: file_exist
+
+        if (proc_rank == 0) then
+            write (file_loc, '(A)') 'voidfraction.dat'
+            file_loc = trim(case_dir)//'/D/'//trim(file_loc)
+            inquire (FILE=trim(file_loc), EXIST=file_exist)
+            if (.not. file_exist) then
+                open (12, FILE=trim(file_loc), FORM='formatted', position='rewind')
+                !write (12, *) 'currentTime, averageVoidFraction, ', &
+                !    'maximumVoidFraction, totalParticlesVolume'
+                !write (12, *) 'The averageVoidFraction value does ', &
+                !    'not reflect the real void fraction in the cloud since the ', &
+                !    'cells which do not have bubbles are not accounted'
+            else
+                open (12, FILE=trim(file_loc), FORM='formatted', position='append')
+            end if
+        end if
+
+        lag_void_max = 0._wp
+        lag_void_avg = 0._wp
+        lag_vol = 0._wp
+        !$acc parallel loop collapse(3) gang vector default(present) reduction(+:lag_vol,lag_void_avg) &
+        !$acc reduction(MAX:lag_void_max) copy(lag_vol, lag_void_avg, lag_void_max)
+        do k = 0, p
+            do j = 0, n
+                do i = 0, m
+                    lag_void_max = max(lag_void_max, 1._wp - q_beta%vf(1)%sf(i, j, k))
+                    call s_get_char_vol(i, j, k, volcell)
+                    if ((1._wp - q_beta%vf(1)%sf(i, j, k)) > 5.0d-11) then
+                        lag_void_avg = lag_void_avg + (1._wp - q_beta%vf(1)%sf(i, j, k))*volcell
+                        lag_vol = lag_vol + volcell
+                    end if
+                end do
+            end do
+        end do
+
+#ifdef MFC_MPI
+        if (num_procs > 1) then
+            call s_mpi_allreduce_max(lag_void_max, void_max_glb)
+            lag_void_max = void_max_glb
+            call s_mpi_allreduce_sum(lag_vol, vol_glb)
+            lag_vol = vol_glb
+            call s_mpi_allreduce_sum(lag_void_avg, void_avg_glb)
+            lag_void_avg = void_avg_glb
+        end if
+#endif
+        voltot = lag_void_avg
+        ! This voidavg value does not reflect the real void fraction in the cloud
+        ! since the cell which does not have bubbles are not accounted
+        if (lag_vol > 0._wp) lag_void_avg = lag_void_avg/lag_vol
+
+        if (proc_rank == 0) then
+            write (12, '(6X,4e24.8)') &
+                qtime, &
+                lag_void_avg, &
+                lag_void_max, &
+                voltot
+            close (12)
+        end if
+
+    end subroutine s_write_void_evol
+
+    !>  Subroutine that writes the restarting files for the particles in the lagrangian solver.
+        !!  @param t_step Current time step
+    subroutine s_write_restart_lag_bubbles(t_step)
+
+        ! Generic string used to store the address of a particular file
+        integer, intent(in) :: t_step
+
+        character(LEN=path_len + 2*name_len) :: file_loc
+        logical :: file_exist
+        integer :: bub_id, tot_part, tot_part_wrtn, npart_wrtn
+        integer :: i, k
+
+#ifdef MFC_MPI
+        ! For Parallel I/O
+        integer :: ifile, ierr
+        integer, dimension(MPI_STATUS_SIZE) :: status
+        integer(KIND=MPI_OFFSET_KIND) :: disp
+        integer :: view
+        integer, dimension(2) :: gsizes, lsizes, start_idx_part
+        integer, dimension(num_procs) :: part_order, part_ord_mpi
+
+        bub_id = 0._wp
+        if (nBubs /= 0) then
+            do k = 1, nBubs
+                if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then
+                    bub_id = bub_id + 1
+                end if
+            end do
+        end if
+
+        if (.not. parallel_io) return
+
+        ! Total number of particles
+        call MPI_ALLREDUCE(bub_id, tot_part, 1, MPI_integer, &
+                           MPI_SUM, MPI_COMM_WORLD, ierr)
+
+        ! Total number of particles written so far
+        call MPI_ALLREDUCE(npart_wrtn, tot_part_wrtn, 1, MPI_integer, &
+                           MPI_SUM, MPI_COMM_WORLD, ierr)
+
+        lsizes(1) = max(1, bub_id)
+        lsizes(2) = 21
+
+        ! if the partcle number is zero, put 1 since MPI cannot deal with writing
+        ! zero particle
+        part_order(:) = 1
+        part_order(proc_rank + 1) = max(1, bub_id)
+
+        call MPI_ALLREDUCE(part_order, part_ord_mpi, num_procs, MPI_integer, &
+                           MPI_MAX, MPI_COMM_WORLD, ierr)
+
+        gsizes(1) = sum(part_ord_mpi(1:num_procs))
+        gsizes(2) = 21
+
+        start_idx_part(1) = sum(part_ord_mpi(1:proc_rank + 1)) - part_ord_mpi(proc_rank + 1)
+        start_idx_part(2) = 0
+
+        write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat'
+        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+        inquire (FILE=trim(file_loc), EXIST=file_exist)
+        if (file_exist .and. proc_rank == 0) then
+            call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr)
+        end if
+
+        ! Writing down the total number of particles
+        if (proc_rank == 0) then
+            open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown')
+            write (9) gsizes(1), mytime, dt
+            close (9)
+        end if
+
+        call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
+                                      MPI_ORDER_FORTRAN, mpi_p, view, ierr)
+        call MPI_type_COMMIT(view, ierr)
+
+        allocate (MPI_IO_DATA_lag_bubbles(1:max(1, bub_id), 1:21))
+
+        ! Open the file to write all flow variables
+        write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
+        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+        inquire (FILE=trim(file_loc), EXIST=file_exist)
+        if (file_exist .and. proc_rank == 0) then
+            call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr)
+        end if
+
+        call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
+                           mpi_info_int, ifile, ierr)
+
+        disp = 0._wp
+
+        call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
+                               'native', mpi_info_null, ierr)
+
+        ! Cycle through list
+        i = 1
+
+        if (bub_id == 0) then
+            MPI_IO_DATA_lag_bubbles(1, 1:21) = 0._wp
+        else
+
+            do k = 1, nBubs
+
+                if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then
+
+                    MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_id(k, 1))
+                    MPI_IO_DATA_lag_bubbles(i, 2:4) = mtn_pos(k, 1:3, 1)
+                    MPI_IO_DATA_lag_bubbles(i, 5:7) = mtn_posPrev(k, 1:3, 1)
+                    MPI_IO_DATA_lag_bubbles(i, 8:10) = mtn_vel(k, 1:3, 1)
+                    MPI_IO_DATA_lag_bubbles(i, 11) = intfc_rad(k, 1)
+                    MPI_IO_DATA_lag_bubbles(i, 12) = intfc_vel(k, 1)
+                    MPI_IO_DATA_lag_bubbles(i, 13) = bub_R0(k)
+                    MPI_IO_DATA_lag_bubbles(i, 14) = Rmax_stats(k)
+                    MPI_IO_DATA_lag_bubbles(i, 15) = Rmin_stats(k)
+                    MPI_IO_DATA_lag_bubbles(i, 16) = bub_dphidt(k)
+                    MPI_IO_DATA_lag_bubbles(i, 17) = gas_p(k, 1)
+                    MPI_IO_DATA_lag_bubbles(i, 18) = gas_mv(k, 1)
+                    MPI_IO_DATA_lag_bubbles(i, 19) = gas_mg(k)
+                    MPI_IO_DATA_lag_bubbles(i, 20) = gas_betaT(k)
+                    MPI_IO_DATA_lag_bubbles(i, 21) = gas_betaC(k)
+
+                    i = i + 1
+
+                end if
+
+            end do
+
+        end if
+
+        call MPI_FILE_write_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*max(1, bub_id), &
+                                mpi_p, status, ierr)
+
+        call MPI_FILE_CLOSE(ifile, ierr)
+
+        deallocate (MPI_IO_DATA_lag_bubbles)
+
+#endif
+
+    end subroutine s_write_restart_lag_bubbles
+
+    !>  This procedure calculates the maximum and minimum radius of each bubble.
+    subroutine s_calculate_lag_bubble_stats()
+
+        integer :: k
+
+        !$acc parallel loop gang vector default(present) reduction(MAX:Rmax_glb) &
+        !$acc reduction(MIN: Rmin_glb) copy(Rmax_glb, Rmin_glb)
+        do k = 1, nBubs
+            Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k))
+            Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k))
+            Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k))
+            Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k))
+        end do
+
+    end subroutine s_calculate_lag_bubble_stats
+
+    !>  Subroutine that writes the maximum and minimum radius of each bubble.
+    subroutine s_write_lag_bubble_stats()
+
+        integer :: k
+        character(LEN=path_len + 2*name_len) :: file_loc
+
+        write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat'
+        file_loc = trim(case_dir)//'/D/'//trim(file_loc)
+
+        !$acc update host(Rmax_glb, Rmin_glb)
+
+        open (13, FILE=trim(file_loc), FORM='formatted', position='rewind')
+        write (13, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb'
+
+        do k = 1, nBubs
+            write (13, '(6X,2I24.8,5e24.8)') &
+                proc_rank, &
+                lag_id(k, 1), &
+                mtn_pos(k, 1, 1), &
+                mtn_pos(k, 2, 1), &
+                mtn_pos(k, 3, 1), &
+                Rmax_stats(k), &
+                Rmin_stats(k)
+        end do
+
+        close (13)
+
+    end subroutine s_write_lag_bubble_stats
+
+    !> The purpose of this subroutine is to remove one specific particle if dt is too small.
+          !! @param bub_id Particle id
+    subroutine s_remove_lag_bubble(bub_id)
+
+        integer, intent(in) :: bub_id
+
+        integer :: i
+
+        !$acc loop seq
+        do i = bub_id, nBubs - 1
+            lag_id(i, 1) = lag_id(i + 1, 1)
+            bub_R0(i) = bub_R0(i + 1)
+            Rmax_stats(i) = Rmax_stats(i + 1)
+            Rmin_stats(i) = Rmin_stats(i + 1)
+            gas_mg(i) = gas_mg(i + 1)
+            gas_betaT(i) = gas_betaT(i + 1)
+            gas_betaC(i) = gas_betaC(i + 1)
+            bub_dphidt(i) = bub_dphidt(i + 1)
+            gas_p(i, 1:2) = gas_p(i + 1, 1:2)
+            gas_mv(i, 1:2) = gas_mv(i + 1, 1:2)
+            intfc_rad(i, 1:2) = intfc_rad(i + 1, 1:2)
+            intfc_vel(i, 1:2) = intfc_vel(i + 1, 1:2)
+            mtn_pos(i, 1:3, 1:2) = mtn_pos(i + 1, 1:3, 1:2)
+            mtn_posPrev(i, 1:3, 1:2) = mtn_posPrev(i + 1, 1:3, 1:2)
+            mtn_vel(i, 1:3, 1:2) = mtn_vel(i + 1, 1:3, 1:2)
+            mtn_s(i, 1:3, 1:2) = mtn_s(i + 1, 1:3, 1:2)
+            intfc_draddt(i, 1:lag_num_ts) = intfc_draddt(i + 1, 1:lag_num_ts)
+            intfc_dveldt(i, 1:lag_num_ts) = intfc_dveldt(i + 1, 1:lag_num_ts)
+            gas_dpdt(i, 1:lag_num_ts) = gas_dpdt(i + 1, 1:lag_num_ts)
+            gas_dmvdt(i, 1:lag_num_ts) = gas_dmvdt(i + 1, 1:lag_num_ts)
+        end do
+
+        nBubs = nBubs - 1
+        !$acc update device(nBubs)
+
+    end subroutine s_remove_lag_bubble
+
+    !> The purpose of this subroutine is to deallocate variables
+    subroutine s_finalize_lagrangian_solver()
+
+        integer :: i
+
+        do i = 1, q_beta_idx
+            @:DEALLOCATE(q_beta%vf(i)%sf)
+        end do
+        @:DEALLOCATE(q_beta%vf)
+
+        !Deallocating space
+        @:DEALLOCATE(lag_id)
+        @:DEALLOCATE(bub_R0)
+        @:DEALLOCATE(Rmax_stats)
+        @:DEALLOCATE(Rmin_stats)
+        @:DEALLOCATE(gas_mg)
+        @:DEALLOCATE(gas_betaT)
+        @:DEALLOCATE(gas_betaC)
+        @:DEALLOCATE(bub_dphidt)
+        @:DEALLOCATE(gas_p)
+        @:DEALLOCATE(gas_mv)
+        @:DEALLOCATE(intfc_rad)
+        @:DEALLOCATE(intfc_vel)
+        @:DEALLOCATE(mtn_pos)
+        @:DEALLOCATE(mtn_posPrev)
+        @:DEALLOCATE(mtn_vel)
+        @:DEALLOCATE(mtn_s)
+        @:DEALLOCATE(intfc_draddt)
+        @:DEALLOCATE(intfc_dveldt)
+        @:DEALLOCATE(gas_dpdt)
+        @:DEALLOCATE(gas_dmvdt)
+        @:DEALLOCATE(mtn_dposdt)
+        @:DEALLOCATE(mtn_dveldt)
+
+    end subroutine s_finalize_lagrangian_solver
+
+end module m_bubbles_EL
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index beda4de145..674d07d049 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -11,6 +11,8 @@ module m_bubbles_EL
 
     use m_mpi_proxy                     !< Message passing interface (MPI) module proxy
 
+    use m_mpi_common
+
     use m_bubbles_EL_kernels            !< Definitions of the kernel functions
 
     use m_bubbles                       !< General bubble dynamics procedures
@@ -139,6 +141,8 @@ contains
 
         if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol
 
+        if (num_procs > 1) call s_initialize_particles_mpi(lag_num_ts)
+
         ! Starting bubbles
         call s_start_lagrange_inputs()
         call s_read_input_bubbles(q_cons_vf)
@@ -245,10 +249,14 @@ contains
             call s_restart_bubbles(bub_id, save_count)
         end if
 
-        if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat')
-
         print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id
 
+        call s_mpi_reduce_int_sum(bub_id)
+
+        if (proc_rank == 0) then
+            if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat')
+        end if
+
         !$acc update device(bubbles_lagrange, lag_params)
 
         !$acc update device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC,   &
@@ -571,8 +579,6 @@ contains
             ! Obtaining driving pressure
             call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2)
 
-            !if (k == 14) print*, k, cell(1), cell(2), cell(3)
-
             ! Obtain liquid density and computing speed of sound from pinf
             !$acc loop seq
             do i = 1, num_fluids
@@ -600,7 +606,6 @@ contains
                 gas_mv(k, 1) = myMass_v
 
             else
-
                 ! Radial acceleration from bubble models
                 intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, &
                                                  myPb, myPbdot, dmalf, dmntait, dmBtait, &
@@ -614,13 +619,13 @@ contains
 
             do l = 1, num_dims
                 if (lag_params%vel_model == 1) then
-                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), &
+                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), &
                                                 cell, l, q_prim_vf)
                     mtn_dveldt(k, l, stage) = 0._wp
                 elseif (lag_params%vel_model == 2) then
                     mtn_dposdt(k, l, stage) = mtn_vel(k,l,2)
                     mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), &
-                                                intfc_rad(k,1), mtn_vel(k,l,2), &
+                                                intfc_rad(k,2), mtn_vel(k,l,2), &
                                                 gas_mg(k), gas_mv(k, 2), &
                                                 Re(1), myRho, cell, l, q_prim_vf)
                 else
@@ -843,12 +848,7 @@ contains
         if (lag_params%vel_model > 0) then
             cell = -buff_size
             call s_locate_cell(mtn_pos(bub_id, 1:3, 2), cell, mtn_s(bub_id, 1:3, 2))
-            !print*, mtn_pos(bub_id, 1, 2), mtn_pos(bub_id, 2, 2)
             scoord = mtn_s(bub_id, 1:3, 2)
-
-            !if (k == 14) print*, "pinf cell", cell
-            !if (k == 14) print*, "pinf scoord", scoord
-            !if (k == 14) print*, "pinf pos", mtn_pos(bub_id,:,2)
         else
             scoord = mtn_s(bub_id, 1:3, 2)
             cell(:) = int(scoord(:))
@@ -1053,6 +1053,7 @@ contains
                 !u{1} = u{n} +  dt * RHS{n}
                 intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
                 intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1)
+                mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 1)
                 mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1)
                 mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1)
                 gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1)
@@ -1077,6 +1078,7 @@ contains
                     !u{1} = u{n} +  dt * RHS{n}
                     intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
                     intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1)
+                    mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1)
                     mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1)
                     mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1)
                     gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1)
@@ -1091,6 +1093,7 @@ contains
                     !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1})
                     intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp
                     intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp
+                    mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2)
                     mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp
                     mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp
                     gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp
@@ -1116,6 +1119,7 @@ contains
                     !u{1} = u{n} +  dt * RHS{n}
                     intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
                     intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1)
+                    mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1)
                     mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1)
                     mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1)
                     gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1)
@@ -1130,6 +1134,7 @@ contains
                     !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}]
                     intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp
                     intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp
+                    mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 2)
                     mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp
                     mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp
                     gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp
@@ -1144,6 +1149,7 @@ contains
                     !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}]
                     intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3))
                     intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3))
+                    mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2)
                     mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3))
                     mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3))
                     gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3))
@@ -1175,13 +1181,16 @@ contains
 
         !$acc parallel loop gang vector default(present) private(cell)
         do k = 1, nBubs
-
             if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                 .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then
                 mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest)
-            else if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
+            elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                 .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then
                 mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest)
+            elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size - mapCells)) then
+                call s_remove_lag_bubble(k)
+            elseif (mtn_pos(k, 1, dest) < x_cb(mapCells -buff_size - 1)) then
+                call s_remove_lag_bubble(k)
             end if
 
             if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
@@ -1190,6 +1199,10 @@ contains
             else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                 .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then
                 mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest)
+            elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size - mapCells)) then
+                call s_remove_lag_bubble(k)
+            elseif (mtn_pos(k, 2, dest) < y_cb(mapCells - buff_size - 1)) then
+                call s_remove_lag_bubble(k)
             end if
 
             if (p > 0) then
@@ -1199,16 +1212,20 @@ contains
                 else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                     .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then
                     mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest)
+                elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size - mapCells)) then
+                    call s_remove_lag_bubble(k)
+                elseif (mtn_pos(k, 3, dest) < z_cb(mapCells - buff_size - 1)) then
+                    call s_remove_lag_bubble(k)
                 end if
             end if
 
             if (ib) then
                 cell = -buff_size
                 call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
-                !if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell
+
                 if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then
                     patch_id = ib_markers%sf(cell(1), cell(2), cell(3))
-                    if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell
+
                     do i = 1, num_dims
                         mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - &
                                               levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) &
@@ -1216,12 +1233,23 @@ contains
                     end do
                     cell = -buff_size
                     call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
-                    if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell
                 end if
             end if
 
         end do
 
+        if (num_procs > 1) then
+            call s_add_particles_to_transfer_list(mtn_pos(:, :, dest), mtn_posPrev(:, :, dest), nbubs)
+            call s_mpi_send_particles(intfc_rad, intfc_draddt, intfc_vel, &
+                                      intfc_draddt, mtn_posPrev, mtn_pos, &
+                                      mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, &
+                                      gas_dpdt, gas_mv, gas_dmvdt, gas_mg, lag_id, bub_R0, lag_num_ts)
+            call s_mpi_recv_particles(intfc_rad, intfc_draddt, intfc_vel, &
+                                      intfc_draddt, mtn_posPrev, mtn_pos, &
+                                      mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, &
+                                      gas_dpdt, gas_mv, gas_dmvdt, gas_mg, lag_id, bub_R0, lag_num_ts, nbubs)
+        end if
+
     end subroutine s_enforce_EL_bubbles_boundary_conditions
 
     !> This subroutine returns the computational coordinate of the cell for the given position.
@@ -1310,20 +1338,26 @@ contains
         if (p == 0 .and. cyl_coord .neqv. .true.) then
             ! Defining a virtual z-axis that has the same dimensions as y-axis
             ! defined in the input file
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
-                                  (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. &
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. &
+                                  (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. &
+                                  (pos_part(2) < y_cb(n + buff_size - mapCells)) .and. &
+                                  (pos_part(2) >= y_cb(-buff_size - 1 + mapCells)) .and. &
                                   (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= -lag_params%charwidth/2._wp))
         else
             ! cyl_coord
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. &
+                                  (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. &
                                   (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size - 1), 0._wp)))
         end if
 
         ! 3D
         if (p > 0) then
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
-                                  (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. &
-                                  (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1)))
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. &
+                                  (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. &
+                                  (pos_part(2) < y_cb(n + buff_size - mapCells)) .and. &
+                                  (pos_part(2) >= y_cb(-buff_size - 1 + mapCells)) .and. &
+                                  (pos_part(3) < z_cb(p + buff_size - mapCells)) .and. &
+                                  (pos_part(3) >= z_cb(-buff_size - 1 + mapCells)))
         end if
 
         ! For symmetric and wall boundary condition
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index ea0281ce14..b55b6c7e9c 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -128,7 +128,9 @@ contains
             s_coord(1:3) = lbk_s(l, 1:3, 2)
             center(1:2) = lbk_pos(l, 1:2, 2)
             if (p > 0) center(3) = lbk_pos(l, 3, 2)
+            cell = -buff_size
             call s_get_cell(s_coord, cell)
+            !print*, s_coord
             call s_compute_stddsv(cell, volpart, stddsv)
 
             strength_vol = volpart
@@ -250,7 +252,7 @@ contains
                 theta = 0._wp
                 Nr = ceiling(lag_params%charwidth/(y_cb(cellaux(2)) - y_cb(cellaux(2) - 1)))
                 Nr_count = 1._wp - mapCells*1._wp
-                dzp = y_cb(cellaux(2) + 1) - y_cb(cellaux(2))
+                dzp = y_cb(cellaux(2)) - y_cb(cellaux(2) - 1)
                 Lz2 = (center(3) - (dzp*(0.5_wp + Nr_count) - lag_params%charwidth/2._wp))**2._wp
                 distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + Lz2)
                 func = dzp/lag_params%charwidth*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp
@@ -281,21 +283,27 @@ contains
         celloutside = .false.
 
         if (num_dims == 2) then
-            if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then
+            if ((cellaux(1) < -buff_size + mapCells) .or. &
+                (cellaux(2) < -buff_size + mapCells)) then
                 celloutside = .true.
             end if
             if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then
                 celloutside = .true.
             end if
-            if ((cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then
+            if ((cellaux(2) > n + buff_size - mapCells) .or. &
+                (cellaux(1) > m + buff_size - mapCells)) then
                 celloutside = .true.
             end if
         else
-            if ((cellaux(3) < -buff_size) .or. (cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then
+            if ((cellaux(3) < -buff_size + mapCells) .or. &
+                (cellaux(1) < -buff_size + mapCells) .or. &
+                (cellaux(2) < -buff_size + mapCells)) then
                 celloutside = .true.
             end if
 
-            if ((cellaux(3) > p + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then
+            if ((cellaux(3) > p + buff_size - mapCells) .or. &
+                (cellaux(2) > n + buff_size - mapCells) .or. &
+                (cellaux(1) > m + buff_size - mapCells)) then
                 celloutside = .true.
             end if
         end if
@@ -412,7 +420,7 @@ contains
             !!      real type into integer.
             !! @param s Computational coordinates of the bubble, real type
             !! @param get_cell Computational coordinates of the bubble, integer type
-    subroutine s_get_cell(s_cell, get_cell)
+    subroutine  s_get_cell(s_cell, get_cell)
 #ifdef _CRAYFTN
         !DIR$ INLINEALWAYS s_get_cell
 #else
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 6b3ef62bd1..5960b3a84f 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -205,6 +205,11 @@ module m_global_parameters
     integer, allocatable, dimension(:) :: proc_coords !<
     !! Processor coordinates in MPI_CART_COMM
 
+    type(int_bounds_info), dimension(3) :: nidx !< Indicies for neighboring processors
+
+    integer, allocatable, dimension(:,:,:) :: neighbor_ranks
+    !! Neighbor ranks for lagrangian particle communication
+
     integer, allocatable, dimension(:) :: start_idx !<
     !! Starting cell-center index of local processor in global grid
 
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 0a29555e0a..992e3470e1 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -44,12 +44,19 @@ module m_mpi_proxy
 
     !> @name Generic flags used to identify and report MPI errors
     !> @{
-    integer, private :: ierr
+    integer, private :: request, ierr
     !> @}
 
     integer :: i_halo_size
     !$acc declare create(i_halo_size)
 
+    integer, dimension(-1:1, -1:1, -1:1) :: p_send_counts, p_recv_counts
+    integer, dimension(:,:,:,:), allocatable :: p_send_ids
+    character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff
+    type(bounds_info), dimension(3) :: comm_coords
+    integer :: p_buff_size
+    !$acc declare create(p_send_counts, comm_coords)
+
 contains
 
     subroutine s_initialize_mpi_proxy_module()
@@ -72,12 +79,40 @@ contains
             end if
 
             !$acc update device(i_halo_size)
+
             @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size))
         end if
 #endif
 
     end subroutine s_initialize_mpi_proxy_module
 
+    subroutine s_initialize_particles_mpi(lag_num_ts)
+
+        integer :: i, j, k
+        integer :: real_size, int_size, nVar, lag_num_ts
+
+        call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr)
+        call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr)
+
+        nVar = 28 + 10*lag_num_ts + int_size
+        p_buff_size = nVar*lag_params%nBubs_glb*real_size
+        @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size))
+
+        comm_coords(1)%beg = x_cb(buff_size - mapCells - 1)
+        comm_coords(1)%end = x_cb(m - buff_size + mapCells)
+        if (n > 0) then
+            comm_coords(2)%beg = y_cb(buff_size - mapCells - 1)
+            comm_coords(2)%end = y_cb(m - buff_size + mapCells)
+            if (p > 0) then
+                comm_coords(3)%beg = z_cb(buff_size - mapCells - 1)
+                comm_coords(3)%end = z_cb(p - buff_size + mapCells)
+            end if
+        end if
+
+        @:ALLOCATE(p_send_ids(nidx(1)%beg:nidx(1)%end, nidx(2)%beg:nidx(2)%end, nidx(3)%beg:nidx(3)%end, 0:lag_params%nBubs_glb))
+
+    end subroutine s_initialize_particles_mpi
+
     !>  Since only the processor with rank 0 reads and verifies
         !!      the consistency of user inputs, these are initially not
         !!      available to the other processors. Then, the purpose of
@@ -135,7 +170,7 @@ contains
 
         if (bubbles_lagrange) then
             #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', &
-                & 'write_bubbles', 'write_bubbles_stats']
+                & 'write_bubbles', 'write_bubbles_stats', 'vel_model', 'drag_model']
                 call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
             #:endfor
 
@@ -417,6 +452,191 @@ contains
 
     end subroutine s_mpi_sendrecv_ib_buffers
 
+    subroutine s_add_particles_to_transfer_list(pos, posPrev, nbub)
+
+        real(wp), dimension(:,:) :: pos, posPrev
+        integer :: bubID, nbub
+        integer :: i, j, k
+
+        do k = nidx(3)%beg, nidx(3)%end
+            do j = nidx(2)%beg, nidx(2)%end
+                do i = nidx(1)%beg, nidx(1)%end
+                    p_send_counts(i,j,k) = 0
+                end do
+            end do
+        end do
+
+        do k = 1, nbub
+            ! Left face
+            if (posPrev(k,1) > comm_coords(1)%beg .and. pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then
+                p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k
+                p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1
+
+                if (n > 0) then
+                    ! Left bottom corner
+                    if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
+                        p_send_ids(-1, -1, 0, p_send_counts(-1, -1, 0)) = k
+                        p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1
+                        p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
+                        p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
+
+                    ! Left top corner
+                    elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
+                        p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k
+                        p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1
+                        p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
+                        p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+                    end if
+                end if
+
+            ! Right face
+            elseif (posPrev(k,1) < comm_coords(1)%end .and. pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then
+                p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k
+                p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1
+
+                if (n > 0) then
+                    ! Right bottom corner
+                    if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
+                        p_send_ids(1, -1, 0, p_send_counts(1, -1, 0)) = k
+                        p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1
+                        p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
+                        p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
+
+                    ! Right top corner
+                    elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
+                        p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k
+                        p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1
+                        p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
+                        p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+                    end if
+                end if
+
+            ! Bottom face (corners already accounted for)
+            elseif (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
+                p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
+                p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
+
+            ! Top face (corners already accounted for)
+            elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
+                p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
+                p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+            end if
+        end do
+
+    end subroutine s_add_particles_to_transfer_list
+
+    subroutine s_mpi_send_particles(rad, drad, rvel, drvel, posPrev, pos, dpos, &
+                                    vel, dvel, gas_p, dgasp, gas_mv, dgasmv, gas_mg, lag_id, bub_R0, lag_num_ts)
+
+        real(wp), dimension(:) :: bub_R0, gas_mg
+        real(wp), dimension(:, :) :: rad, drad, rvel, drvel, gas_p, dgasp, gas_mv, dgasmv
+        real(wp), dimension(:, :, :) :: pos, dpos, posPrev, vel, dvel
+        integer, dimension(:,:) :: lag_id
+        integer :: i, j, k, l, q
+        integer :: position, bub_id, lag_num_ts
+
+#ifdef MFC_MPI
+        do k = nidx(3)%beg, nidx(3)%end
+            do j = nidx(2)%beg, nidx(2)%end
+                do i = nidx(1)%beg, nidx(1)%end
+                    if (abs(i) + abs(j) + abs(k) > 0) then
+                        call MPI_Isend(p_send_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, request, ierr)
+                        if (p_send_counts(i,j,k) > 0) then
+                            print*, "SEND", proc_rank, p_send_counts(i,j,k)
+                        end if
+                        if (p_send_counts(i,j,k) > 0) then
+                            position = 0
+                            do l = 0, p_send_counts(i,j,k) - 1
+                                bub_id = p_send_ids(i,j,k,l)
+                                do q = 1, 2
+                                    call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                end do
+                                do q = 1, lag_num_ts
+                                    call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                end do
+                                call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            end do
+
+                            call MPI_Isend(p_send_buff, position, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, request, ierr)
+                        end if
+                    end if
+                end do
+            end do
+        end do
+#endif
+
+    end subroutine s_mpi_send_particles
+
+    subroutine s_mpi_recv_particles(rad, drad, rvel, drvel, posPrev, pos, dpos, &
+                                    vel, dvel, gas_p, dgasp, gas_mv, dgasmv, gas_mg, lag_id, bub_R0, lag_num_ts, nbubs)
+
+        real(wp), dimension(:) :: bub_R0, gas_mg
+        real(wp), dimension(:, :) :: rad, drad, rvel, drvel, gas_p, dgasp, gas_mv, dgasmv
+        real(wp), dimension(:, :, :) :: pos, dpos, posPrev, vel, dvel
+        integer, dimension(:, :) :: lag_id
+        integer :: i, j, k, l, q
+        integer :: position, bub_id, nbubs, lag_num_ts
+
+#ifdef MFC_MPI
+        do k = nidx(3)%beg, nidx(3)%end
+            do j = nidx(2)%beg, nidx(2)%end
+                do i = nidx(1)%beg, nidx(1)%end
+                    if (abs(i) + abs(j) + abs(k) > 0) then
+                        call MPI_recv(p_recv_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+                        if (p_recv_counts(i,j,k) > 0) then
+                            print*, "RECV", proc_rank, p_recv_counts(i,j,k)
+                        end if
+                        if (p_recv_counts(i,j,k) > 0) then
+                            position = 0
+                            call MPI_recv(p_recv_buff, p_buff_size, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                            do l = 0, p_recv_counts(i,j,k) - 1
+                                nbubs = nbubs + 1
+                                bub_id = nbubs
+                                do q = 1, 2
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                end do
+                                do q = 1, lag_num_ts
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                end do
+                                call MPI_Unpack(p_recv_buff, p_buff_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                lag_id(bub_id, 2) = bub_id
+                            end do
+                        end if
+                    end if
+                end do
+            end do
+        end do
+#endif
+
+    end subroutine s_mpi_recv_particles
+
     subroutine s_mpi_send_random_number(phi_rn, num_freq)
         integer, intent(in) :: num_freq
         real(wp), intent(inout), dimension(1:num_freq) :: phi_rn
diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp
index 3fec8127a7..c8299635df 100644
--- a/src/simulation/m_rhs.fpp
+++ b/src/simulation/m_rhs.fpp
@@ -904,13 +904,6 @@ contains
         end if
 
         if (bubbles_lagrange) then
-            ! RHS additions for sub-grid bubbles_lagrange
-            call nvtxStartRange("RHS-EL-BUBBLES-SRC")
-            call s_compute_bubbles_EL_source( &
-                q_cons_qp%vf(1:sys_size), &
-                q_prim_qp%vf(1:sys_size), &
-                rhs_vf)
-            call nvtxEndRange
             ! Compute bubble dynamics
             if (.not. adap_dt) then
                 call nvtxStartRange("RHS-EL-BUBBLES-DYN")
@@ -922,6 +915,14 @@ contains
                     stage)
                 call nvtxEndRange
             end if
+
+            ! RHS additions for sub-grid bubbles_lagrange
+            call nvtxStartRange("RHS-EL-BUBBLES-SRC")
+            call s_compute_bubbles_EL_source( &
+                q_cons_qp%vf(1:sys_size), &
+                q_prim_qp%vf(1:sys_size), &
+                rhs_vf)
+            call nvtxEndRange
         end if
 
         if (chemistry .and. chem_params%reactions) then
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index de07dfd631..5af09f5ed6 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -1020,7 +1020,7 @@ contains
         integer :: i, j, k, l
 
         call nvtxStartRange("RHS-BODYFORCES")
-        call s_compute_body_forces_rhs(q_cons_vf, q_prim_vf, rhs_vf)
+        call s_compute_body_forces_rhs(q_prim_vf, q_cons_vf, rhs_vf)
 
         !$acc parallel loop collapse(4) gang vector default(present)
         do i = momxb, E_idx

From 08227856d6e0f49d5dc4eb1a99f562aa8f65cece Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Tue, 10 Jun 2025 11:58:52 -0400
Subject: [PATCH 31/62] remove file

---
 "src/simulation/\\" | 1837 -------------------------------------------
 1 file changed, 1837 deletions(-)
 delete mode 100644 "src/simulation/\\"

diff --git "a/src/simulation/\\" "b/src/simulation/\\"
deleted file mode 100644
index 2559c0ffff..0000000000
--- "a/src/simulation/\\"
+++ /dev/null
@@ -1,1837 +0,0 @@
-!>
-!! @file m_bubbles_EL.fpp
-!! @brief Contains module m_bubbles_EL
-
-#:include 'macros.fpp'
-
-!> @brief This module is used to to compute the volume-averaged bubble model
-module m_bubbles_EL
-
-    use m_global_parameters             !< Definitions of the global parameters
-
-    use m_mpi_proxy                     !< Message passing interface (MPI) module proxy
-
-    use m_mpi_common
-
-    use m_bubbles_EL_kernels            !< Definitions of the kernel functions
-
-    use m_bubbles                       !< General bubble dynamics procedures
-
-    use m_variables_conversion          !< State variables type conversion procedures
-
-    use m_compile_specific
-
-    use m_boundary_common
-
-    use m_sim_helpers
-
-    use m_helper
-
-    use m_ibm
-
-    implicit none
-
-    !(nBub)
-    integer, allocatable, dimension(:, :) :: lag_id                 !< Global and local IDs
-    real(wp), allocatable, dimension(:) :: bub_R0            !< Initial bubble radius
-    real(wp), allocatable, dimension(:) :: Rmax_stats        !< Maximum radius
-    real(wp), allocatable, dimension(:) :: Rmin_stats        !< Minimum radius
-    real(wp), allocatable, dimension(:) :: gas_mg            !< Bubble's gas mass
-    real(wp), allocatable, dimension(:) :: gas_betaT         !< heatflux model (Preston et al., 2007)
-    real(wp), allocatable, dimension(:) :: gas_betaC         !< massflux model (Preston et al., 2007)
-    real(wp), allocatable, dimension(:) :: bub_dphidt        !< subgrid velocity potential (Maeda & Colonius, 2018)
-    !(nBub, 1 -> actual val or 2 -> temp val)
-    real(wp), allocatable, dimension(:, :) :: gas_p          !< Pressure in the bubble
-    real(wp), allocatable, dimension(:, :) :: gas_mv         !< Vapor mass in the bubble
-    real(wp), allocatable, dimension(:, :) :: intfc_rad      !< Bubble radius
-    real(wp), allocatable, dimension(:, :) :: intfc_vel      !< Velocity of the bubble interface
-    !(nBub, 1-> x or 2->y or 3 ->z, 1 -> actual or 2 -> temporal val)
-    real(wp), allocatable, dimension(:, :, :) :: mtn_pos     !< Bubble's position
-    real(wp), allocatable, dimension(:, :, :) :: mtn_posPrev !< Bubble's previous position
-    real(wp), allocatable, dimension(:, :, :) :: mtn_vel     !< Bubble's velocity
-    real(wp), allocatable, dimension(:, :, :) :: mtn_s       !< Bubble's computational cell position in real format
-    !(nBub, 1-> x or 2->y or 3 ->z, time-stage)
-    real(wp), allocatable, dimension(:, :) :: intfc_draddt   !< Time derivative of bubble's radius
-    real(wp), allocatable, dimension(:, :) :: intfc_dveldt   !< Time derivative of bubble's interface velocity
-    real(wp), allocatable, dimension(:, :) :: gas_dpdt       !< Time derivative of gas pressure
-    real(wp), allocatable, dimension(:, :) :: gas_dmvdt      !< Time derivative of the vapor mass in the bubble
-    real(wp), allocatable, dimension(:, :, :) :: mtn_dposdt  !< Time derivative of the bubble's position
-    real(wp), allocatable, dimension(:, :, :) :: mtn_dveldt  !< Time derivative of the bubble's velocity
-
-    !$acc declare create(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt,       &
-    !$acc gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, &
-    !$acc gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt)
-
-    integer, private :: lag_num_ts                                  !<  Number of time stages in the time-stepping scheme
-
-    !$acc declare create(lag_num_ts)
-
-    integer :: nBubs                            !< Number of bubbles in the local domain
-    real(wp) :: Rmax_glb, Rmin_glb       !< Maximum and minimum bubbe size in the local domain
-    type(vector_field) :: q_beta                !< Projection of the lagrangian particles in the Eulerian framework
-    integer :: q_beta_idx                       !< Size of the q_beta vector field
-
-    !$acc declare create(nBubs, Rmax_glb, Rmin_glb, q_beta, q_beta_idx)
-
-contains
-
-    !> Initializes the lagrangian subgrid bubble solver
-        !! @param q_cons_vf Initial conservative variables
-    subroutine s_initialize_bubbles_EL_module(q_cons_vf)
-
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
-
-        integer :: nBubs_glb, i
-
-        ! Setting number of time-stages for selected time-stepping scheme
-        lag_num_ts = time_stepper
-
-        ! Allocate space for the Eulerian fields needed to map the effect of the bubbles
-        if (lag_params%solver_approach == 1) then
-            ! One-way coupling
-            q_beta_idx = 3
-        elseif (lag_params%solver_approach == 2) then
-            ! Two-way coupling
-            q_beta_idx = 4
-            if (p == 0) then
-                !Subgrid noise model for 2D approximation
-                q_beta_idx = 6
-            end if
-        else
-            call s_mpi_abort('Please check the lag_params%solver_approach input')
-        end if
-
-        !$acc update device(lag_num_ts, q_beta_idx)
-
-        @:ALLOCATE(q_beta%vf(1:q_beta_idx))
-
-        do i = 1, q_beta_idx
-            @:ALLOCATE(q_beta%vf(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, &
-                idwbuff(2)%beg:idwbuff(2)%end, &
-                idwbuff(3)%beg:idwbuff(3)%end))
-        end do
-
-        @:ACC_SETUP_VFs(q_beta)
-
-        ! Allocating space for lagrangian variables
-        nBubs_glb = lag_params%nBubs_glb
-
-        @:ALLOCATE(lag_id(1:nBubs_glb, 1:2))
-        @:ALLOCATE(bub_R0(1:nBubs_glb))
-        @:ALLOCATE(Rmax_stats(1:nBubs_glb))
-        @:ALLOCATE(Rmin_stats(1:nBubs_glb))
-        @:ALLOCATE(gas_mg(1:nBubs_glb))
-        @:ALLOCATE(gas_betaT(1:nBubs_glb))
-        @:ALLOCATE(gas_betaC(1:nBubs_glb))
-        @:ALLOCATE(bub_dphidt(1:nBubs_glb))
-        @:ALLOCATE(gas_p(1:nBubs_glb, 1:2))
-        @:ALLOCATE(gas_mv(1:nBubs_glb, 1:2))
-        @:ALLOCATE(intfc_rad(1:nBubs_glb, 1:2))
-        @:ALLOCATE(intfc_vel(1:nBubs_glb, 1:2))
-        @:ALLOCATE(mtn_pos(1:nBubs_glb, 1:3, 1:2))
-        @:ALLOCATE(mtn_posPrev(1:nBubs_glb, 1:3, 1:2))
-        @:ALLOCATE(mtn_vel(1:nBubs_glb, 1:3, 1:2))
-        @:ALLOCATE(mtn_s(1:nBubs_glb, 1:3, 1:2))
-        @:ALLOCATE(intfc_draddt(1:nBubs_glb, 1:lag_num_ts))
-        @:ALLOCATE(intfc_dveldt(1:nBubs_glb, 1:lag_num_ts))
-        @:ALLOCATE(gas_dpdt(1:nBubs_glb, 1:lag_num_ts))
-        @:ALLOCATE(gas_dmvdt(1:nBubs_glb, 1:lag_num_ts))
-        @:ALLOCATE(mtn_dposdt(1:nBubs_glb, 1:3, 1:lag_num_ts))
-        @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts))
-
-        if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol
-
-        if (num_procs > 1) call s_initialize_particles_mpi(lag_num_ts)
-
-        ! Starting bubbles
-        call s_start_lagrange_inputs()
-        call s_read_input_bubbles(q_cons_vf)
-
-    end subroutine s_initialize_bubbles_EL_module
-
-    !> The purpose of this procedure is to start lagrange bubble parameters applying nondimensionalization if needed
-    subroutine s_start_lagrange_inputs()
-
-        integer :: id_bubbles, id_host
-        real(wp) :: rho0, c0, T0, x0, p0
-
-        id_bubbles = num_fluids
-        id_host = num_fluids - 1
-
-        !Reference values
-        rho0 = lag_params%rho0
-        c0 = lag_params%c0
-        T0 = lag_params%T0
-        x0 = lag_params%x0
-        p0 = rho0*c0*c0
-
-        !Update inputs
-        Tw = lag_params%Thost/T0
-        pv = fluid_pp(id_host)%pv/p0
-        gamma_v = fluid_pp(id_bubbles)%gamma_v
-        gamma_n = fluid_pp(id_host)%gamma_v
-        k_vl = fluid_pp(id_bubbles)%k_v*(T0/(x0*rho0*c0*c0*c0))
-        k_nl = fluid_pp(id_host)%k_v*(T0/(x0*rho0*c0*c0*c0))
-        cp_v = fluid_pp(id_bubbles)%cp_v*(T0/(c0*c0))
-        cp_n = fluid_pp(id_host)%cp_v*(T0/(c0*c0))
-        R_v = (R_uni/fluid_pp(id_bubbles)%M_v)*(T0/(c0*c0))
-        R_n = (R_uni/fluid_pp(id_host)%M_v)*(T0/(c0*c0))
-        lag_params%diffcoefvap = lag_params%diffcoefvap/(x0*c0)
-        ss = fluid_pp(id_host)%ss/(rho0*x0*c0*c0)
-        mul0 = fluid_pp(id_host)%mul0/(rho0*x0*c0)
-
-        ! Parameters used in bubble_model
-        Web = 1._wp/ss
-        Re_inv = mul0
-
-        ! Need improvements to accept polytropic gas compression, isothermal and adiabatic thermal models, and
-        ! the Gilmore and RP bubble models.
-        polytropic = .false.    ! Forcing no polytropic model
-        thermal = 3             ! Forcing constant transfer coefficient model based on Preston et al., 2007
-        ! If Keller-Miksis model is not selected, then no radial motion
-
-    end subroutine s_start_lagrange_inputs
-
-    !> The purpose of this procedure is to obtain the initial bubbles' information
-        !! @param q_cons_vf Conservative variables
-    subroutine s_read_input_bubbles(q_cons_vf)
-
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
-
-        real(wp), dimension(8) :: inputBubble
-        real(wp) :: qtime
-        integer :: id, bub_id, save_count
-        integer :: i, ios
-        logical :: file_exist, indomain
-
-        character(LEN=path_len + 2*name_len) :: path_D_dir !<
-
-        ! Initialize number of particles
-        bub_id = 0
-        id = 0
-
-        ! Read the input lag_bubble file or restart point
-        if (cfl_dt) then
-            save_count = n_start
-            qtime = n_start*t_save
-        else
-            save_count = t_step_start
-            qtime = t_step_start*dt
-        end if
-
-        if (save_count == 0) then
-            if (proc_rank == 0) print *, 'Reading lagrange bubbles input file.'
-            inquire (file='input/lag_bubbles.dat', exist=file_exist)
-            if (file_exist) then
-                open (94, file='input/lag_bubbles.dat', form='formatted', iostat=ios)
-                do while (ios == 0)
-                    read (94, *, iostat=ios) (inputBubble(i), i=1, 8)
-                    if (ios /= 0) cycle
-                    indomain = particle_in_domain(inputBubble(1:3))
-                    id = id + 1
-                    if (id > lag_params%nBubs_glb .and. proc_rank == 0) then
-                        call s_mpi_abort("Current number of bubbles is larger than nBubs_glb")
-                    end if
-                    if (indomain) then
-                        bub_id = bub_id + 1
-                        call s_add_bubbles(inputBubble, q_cons_vf, bub_id)
-                        lag_id(bub_id, 1) = id      !global ID
-                        lag_id(bub_id, 2) = bub_id  !local ID
-                        nBubs = bub_id              ! local number of bubbles
-                    end if
-                end do
-                close (94)
-            else
-                call s_mpi_abort("Initialize the lagrange bubbles in input/lag_bubbles.dat")
-            end if
-        else
-            if (proc_rank == 0) print *, 'Restarting lagrange bubbles at save_count: ', save_count
-            call s_restart_bubbles(bub_id, save_count)
-        end if
-
-        print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id
-
-        call s_mpi_reduce_int_sum(bub_id)
-
-        if (proc_rank == 0) then
-            if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat')
-        end if
-
-        !$acc update device(bubbles_lagrange, lag_params)
-
-        !$acc update device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC,   &
-        !$acc bub_dphidt, gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel,       &
-        !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, nBubs)
-
-        Rmax_glb = min(dflt_real, -dflt_real)
-        Rmin_glb = max(dflt_real, -dflt_real)
-        !$acc update device(Rmax_glb, Rmin_glb)
-
-        !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc)
-
-        !Populate temporal variables
-        call s_transfer_data_to_tmp()
-        call s_smear_voidfraction()
-
-        if (lag_params%write_bubbles) call s_write_lag_particles(qtime)
-
-        if (save_count == 0) then
-            ! Create ./D directory
-            write (path_D_dir, '(A,I0,A,I0)') trim(case_dir)//'/D'
-            call my_inquire(path_D_dir, file_exist)
-            if (.not. file_exist) call s_create_directory(trim(path_D_dir))
-            call s_write_restart_lag_bubbles(save_count) ! Needed for post_processing
-            call s_write_void_evol(qtime)
-        end if
-
-    end subroutine s_read_input_bubbles
-
-    !> The purpose of this procedure is to obtain the information of the bubbles when starting fresh
-        !! @param inputBubble Bubble information
-        !! @param q_cons_vf Conservative variables
-        !! @param bub_id Local id of the bubble
-    subroutine s_add_bubbles(inputBubble, q_cons_vf, bub_id)
-
-        type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
-        real(wp), dimension(8), intent(in) :: inputBubble
-        integer, intent(in) :: bub_id
-        integer :: i
-
-        real(wp) :: pliq, volparticle, concvap, totalmass, kparticle, cpparticle
-        real(wp) :: omegaN, PeG, PeT, rhol, pcrit, qv, gamma, pi_inf, dynP
-        integer, dimension(3) :: cell
-        real(wp), dimension(2) :: Re
-        real(wp) :: massflag, heatflag, Re_trans, Im_trans
-
-        massflag = 0._wp
-        heatflag = 0._wp
-        if (lag_params%massTransfer_model) massflag = 1._wp
-        if (lag_params%heatTransfer_model) heatflag = 1._wp
-
-        bub_R0(bub_id) = inputBubble(7)
-        Rmax_stats(bub_id) = min(dflt_real, -dflt_real)
-        Rmin_stats(bub_id) = max(dflt_real, -dflt_real)
-        bub_dphidt(bub_id) = 0._wp
-        intfc_rad(bub_id, 1) = inputBubble(7)
-        intfc_vel(bub_id, 1) = inputBubble(8)
-        mtn_pos(bub_id, 1:3, 1) = inputBubble(1:3)
-        mtn_posPrev(bub_id, 1:3, 1) = mtn_pos(bub_id, 1:3, 1)
-        mtn_vel(bub_id, 1:3, 1) = inputBubble(4:6)
-
-        if (cyl_coord .and. p == 0) then
-            mtn_pos(bub_id, 2, 1) = sqrt(mtn_pos(bub_id, 2, 1)**2._wp + &
-                                         mtn_pos(bub_id, 3, 1)**2._wp)
-            !Storing azimuthal angle (-Pi to Pi)) into the third coordinate variable
-            mtn_pos(bub_id, 3, 1) = atan2(inputBubble(3), inputBubble(2))
-            mtn_posPrev(bub_id, 1:3, 1) = mtn_pos(bub_id, 1:3, 1)
-        end if
-
-        cell = -buff_size
-        call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
-
-        ! Check if the bubble is located in the ghost cell of a symmetric boundary
-        if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. &
-            (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. &
-            (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. &
-            (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then
-            call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
-        end if
-
-        if (p > 0) then
-            if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. &
-                (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then
-                call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
-            end if
-        end if
-
-        ! If particle is in the ghost cells, find the closest non-ghost cell
-        cell(1) = min(max(cell(1), 0), m)
-        cell(2) = min(max(cell(2), 0), n)
-        if (p > 0) cell(3) = min(max(cell(3), 0), p)
-        call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), &
-                                            rhol, gamma, pi_inf, qv, Re)
-        dynP = 0._wp
-        do i = 1, num_dims
-            dynP = dynP + 0.5_wp*q_cons_vf(contxe + i)%sf(cell(1), cell(2), cell(3))**2/rhol
-        end do
-        pliq = (q_cons_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - dynP - pi_inf)/gamma
-        if (pliq < 0) print *, "Negative pressure", proc_rank, &
-            q_cons_vf(E_idx)%sf(cell(1), cell(2), cell(3)), pi_inf, gamma, pliq, cell, dynP
-
-        ! Initial particle pressure
-        gas_p(bub_id, 1) = pliq + 2._wp*(1._wp/Web)/bub_R0(bub_id)
-        if ((1._wp/Web) /= 0._wp) then
-            pcrit = pv - 4._wp*(1._wp/Web)/(3._wp*sqrt(3._wp*gas_p(bub_id, 1)*bub_R0(bub_id)**3._wp/(2._wp*(1._wp/Web))))
-            pref = gas_p(bub_id, 1)
-        else
-            pcrit = 0._wp
-        end if
-
-        ! Initial particle mass
-        volparticle = 4._wp/3._wp*pi*bub_R0(bub_id)**3._wp ! volume
-        gas_mv(bub_id, 1) = pv*volparticle*(1._wp/(R_v*Tw))*(massflag) ! vapermass
-        gas_mg(bub_id) = (gas_p(bub_id, 1) - pv*(massflag))*volparticle*(1._wp/(R_n*Tw)) ! gasmass
-        if (gas_mg(bub_id) <= 0._wp) then
-            call s_mpi_abort("The initial mass of gas inside the bubble is negative. Check the initial conditions.")
-        end if
-        totalmass = gas_mg(bub_id) + gas_mv(bub_id, 1) ! totalmass
-
-        ! Bubble natural frequency
-        concvap = gas_mv(bub_id, 1)/(gas_mv(bub_id, 1) + gas_mg(bub_id))
-        omegaN = (3._wp*(gas_p(bub_id, 1) - pv*(massflag)) + 4._wp*(1._wp/Web)/bub_R0(bub_id))/rhol
-        if (pv*(massflag) > gas_p(bub_id, 1)) then
-            call s_mpi_abort("Lagrange bubble initially located in a region with pressure below the vapor pressure.")
-        end if
-        omegaN = sqrt(omegaN/bub_R0(bub_id)**2._wp)
-
-        cpparticle = concvap*cp_v + (1._wp - concvap)*cp_n
-        kparticle = concvap*k_vl + (1._wp - concvap)*k_nl
-
-        ! Mass and heat transfer coefficients (based on Preston 2007)
-        PeT = totalmass/volparticle*cpparticle*bub_R0(bub_id)**2._wp*omegaN/kparticle
-        call s_transcoeff(1._wp, PeT, Re_trans, Im_trans)
-        gas_betaT(bub_id) = Re_trans*(heatflag)*kparticle
-
-        PeG = bub_R0(bub_id)**2._wp*omegaN/lag_params%diffcoefvap
-        call s_transcoeff(1._wp, PeG, Re_trans, Im_trans)
-        gas_betaC(bub_id) = Re_trans*(massflag)*lag_params%diffcoefvap
-
-        if (gas_mg(bub_id) <= 0._wp) then
-            call s_mpi_abort("Negative gas mass in the bubble, check if the bubble is in the domain.")
-        end if
-
-    end subroutine s_add_bubbles
-
-    !> The purpose of this procedure is to obtain the information of the bubbles from a restart point.
-        !! @param bub_id Local ID of the particle
-        !! @param save_count File identifier
-    subroutine s_restart_bubbles(bub_id, save_count)
-
-        integer, intent(inout) :: bub_id, save_count
-
-        character(LEN=path_len + 2*name_len) :: file_loc
-
-#ifdef MFC_MPI
-        real(wp), dimension(20) :: inputvals
-        integer, dimension(MPI_STATUS_SIZE) :: status
-        integer(kind=MPI_OFFSET_KIND) :: disp
-        integer :: view
-
-        integer, dimension(3) :: cell
-        logical :: indomain, particle_file, file_exist
-
-        integer, dimension(2) :: gsizes, lsizes, start_idx_part
-        integer :: ifile, ierr, tot_data, id
-        integer :: i
-
-        write (file_loc, '(a,i0,a)') 'lag_bubbles_mpi_io_', save_count, '.dat'
-        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (file=trim(file_loc), exist=file_exist)
-
-        if (file_exist) then
-            if (proc_rank == 0) then
-                open (9, file=trim(file_loc), form='unformatted', status='unknown')
-                read (9) tot_data, mytime, dt
-                close (9)
-                print *, 'Reading lag_bubbles_mpi_io: ', tot_data, mytime, dt
-            end if
-        else
-            print '(a)', trim(file_loc)//' is missing. exiting.'
-            call s_mpi_abort
-        end if
-
-        call MPI_BCAST(tot_data, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-        call MPI_BCAST(mytime, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-        call MPI_BCAST(dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-
-        gsizes(1) = tot_data
-        gsizes(2) = 21
-        lsizes(1) = tot_data
-        lsizes(2) = 21
-        start_idx_part(1) = 0
-        start_idx_part(2) = 0
-
-        call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
-                                      MPI_ORDER_FORTRAN, mpi_p, view, ierr)
-        call MPI_type_COMMIT(view, ierr)
-
-        ! Open the file to write all flow variables
-        write (file_loc, '(a,i0,a)') 'lag_bubbles_', save_count, '.dat'
-        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (file=trim(file_loc), exist=particle_file)
-
-        if (particle_file) then
-            call MPI_FILE_open(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
-                               mpi_info_int, ifile, ierr)
-            disp = 0._wp
-            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
-                                   'native', mpi_info_null, ierr)
-            allocate (MPI_IO_DATA_lag_bubbles(tot_data, 1:21))
-            call MPI_FILE_read_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*tot_data, &
-                                   mpi_p, status, ierr)
-            do i = 1, tot_data
-                id = int(MPI_IO_DATA_lag_bubbles(i, 1))
-                inputvals(1:20) = MPI_IO_DATA_lag_bubbles(i, 2:21)
-                indomain = particle_in_domain(inputvals(1:3))
-                if (indomain .and. (id > 0)) then
-                    bub_id = bub_id + 1
-                    nBubs = bub_id                  ! local number of bubbles
-                    lag_id(bub_id, 1) = id          ! global ID
-                    lag_id(bub_id, 2) = bub_id      ! local ID
-                    mtn_pos(bub_id, 1:3, 1) = inputvals(1:3)
-                    mtn_posPrev(bub_id, 1:3, 1) = inputvals(4:6)
-                    mtn_vel(bub_id, 1:3, 1) = inputvals(7:9)
-                    intfc_rad(bub_id, 1) = inputvals(10)
-                    intfc_vel(bub_id, 1) = inputvals(11)
-                    bub_R0(bub_id) = inputvals(12)
-                    Rmax_stats(bub_id) = inputvals(13)
-                    Rmin_stats(bub_id) = inputvals(14)
-                    bub_dphidt(bub_id) = inputvals(15)
-                    gas_p(bub_id, 1) = inputvals(16)
-                    gas_mv(bub_id, 1) = inputvals(17)
-                    gas_mg(bub_id) = inputvals(18)
-                    gas_betaT(bub_id) = inputvals(19)
-                    gas_betaC(bub_id) = inputvals(20)
-                    cell = -buff_size
-                    call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
-                end if
-            end do
-            deallocate (MPI_IO_DATA_lag_bubbles)
-        end if
-        call MPI_FILE_CLOSE(ifile, ierr)
-#endif
-
-    end subroutine s_restart_bubbles
-
-    !>  Contains the bubble dynamics subroutines.
-        !! @param q_cons_vf Conservative variables
-        !! @param q_prim_vf Primitive variables
-        !! @param rhs_vf Calculated change of conservative variables
-        !! @param t_step Current time step
-        !! @param stage Current stage in the time-stepper algorithm
-    subroutine s_compute_bubble_EL_dynamics(q_cons_vf, q_prim_vf, t_step, rhs_vf, stage)
-
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf
-        integer, intent(in) :: t_step, stage
-
-        real(wp) :: myVapFlux
-        real(wp) :: preterm1, term2, paux, pint, Romega, term1_fac
-        real(wp) :: myR_m, mygamma_m, myPb, myMass_n, myMass_v
-        real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot
-        real(wp) :: myPinf, aux1, aux2, myCson, myRho
-        real(wp) :: gamma, pi_inf, qv
-        real(wp), dimension(contxe) :: myalpha_rho, myalpha
-        real(wp), dimension(2) :: Re
-        integer, dimension(3) :: cell
-
-        integer :: adap_dt_stop_max, adap_dt_stop !< Fail-safe exit if max iteration count reached
-        real(wp) :: dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu !< Dummy variables for unified subgrid bubble subroutines
-
-        integer :: i, k, l
-
-        call nvtxStartRange("LAGRANGE-BUBBLE-DYNAMICS")
-
-        ! Subgrid p_inf model based on Maeda and Colonius (2018).
-        if (lag_params%pressure_corrector) then
-            ! Calculate velocity potentials (valid for one bubble per cell)
-            !$acc parallel loop gang vector default(present) private(k, cell)
-            do k = 1, nBubs
-                call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega)
-                myR0 = bub_R0(k)
-                myR = intfc_rad(k, 2)
-                myV = intfc_vel(k, 2)
-                myPb = gas_p(k, 2)
-                pint = f_cpbw_KM(myR0, myR, myV, myPb)
-                pint = pint + 0.5_wp*myV**2._wp
-                if (lag_params%cluster_type == 2) then
-                    bub_dphidt(k) = (paux - pint) + term2
-                    ! Accounting for the potential induced by the bubble averaged over the control volume
-                    ! Note that this is based on the incompressible flow assumption near the bubble.
-                    term1_fac = 3._wp/2._wp*(myR*(Romega**2._wp - myR**2._wp))/(Romega**3._wp - myR**3._wp)
-                    bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac)
-                end if
-            end do
-        end if
-
-        ! Radial motion model
-        adap_dt_stop_max = 0
-        !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) &
-        !$acc reduction(MAX:adap_dt_stop_max) copy(adap_dt_stop_max) copyin(stage)
-        do k = 1, nBubs
-
-            ! Current bubble state
-            myPb = gas_p(k, 2)
-            myMass_n = gas_mg(k)
-            myMass_v = gas_mv(k, 2)
-            myR = intfc_rad(k, 2)
-            myV = intfc_vel(k, 2)
-            myBeta_c = gas_betaC(k)
-            myBeta_t = gas_betaT(k)
-            myR0 = bub_R0(k)
-
-            ! Vapor and heat fluxes
-            myVapFlux = f_vflux(myR, myV, myPb, myMass_v, k, myMass_n, myBeta_c, myR_m, mygamma_m)
-            myPbdot = f_bpres_dot(myVapFlux, myR, myV, myPb, myMass_v, k, myBeta_t, myR_m, mygamma_m)
-            myMvdot = 4._wp*pi*myR**2._wp*myVapFlux
-
-            ! Obtaining driving pressure
-            call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2)
-
-            ! Obtain liquid density and computing speed of sound from pinf
-            !$acc loop seq
-            do i = 1, num_fluids
-                myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3))
-                myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3))
-            end do
-            call s_convert_species_to_mixture_variables_acc(myRho, gamma, pi_inf, qv, myalpha, &
-                                                            myalpha_rho, Re, cell(1), cell(2), cell(3))
-            call s_compute_cson_from_pinf(k, q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson)
-
-            ! Adaptive time stepping
-            adap_dt_stop = 0
-
-            if (adap_dt) then
-
-                call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, &
-                                    dmntait, dmBtait, dm_bub_adv_src, dm_divu, &
-                                    k, myMass_v, myMass_n, myBeta_c, &
-                                    myBeta_t, myCson, adap_dt_stop)
-
-                ! Update bubble state
-                intfc_rad(k, 1) = myR
-                intfc_vel(k, 1) = myV
-                gas_p(k, 1) = myPb
-                gas_mv(k, 1) = myMass_v
-
-            else
-
-                ! Radial acceleration from bubble models
-                intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, &
-                                                 myPb, myPbdot, dmalf, dmntait, dmBtait, &
-                                                 dm_bub_adv_src, dm_divu, &
-                                                 myCson)
-                print*, proc_rank, intfc_dveldt(k, stage)
-                intfc_draddt(k, stage) = myV
-                gas_dmvdt(k, stage) = myMvdot
-                gas_dpdt(k, stage) = myPbdot
-
-            end if
-
-            do l = 1, num_dims
-                if (lag_params%vel_model == 1) then
-                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), &
-                                                cell, l, q_prim_vf)
-                    mtn_dveldt(k, l, stage) = 0._wp
-                elseif (lag_params%vel_model == 2) then
-                    mtn_dposdt(k, l, stage) = mtn_vel(k,l,2)
-                    mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), &
-                                                intfc_rad(k,2), mtn_vel(k,l,2), &
-                                                gas_mg(k), gas_mv(k, 2), &
-                                                Re(1), myRho, cell, l, q_prim_vf)
-                else
-                    mtn_dposdt(k, l, stage) = 0._wp
-                    mtn_dveldt(k, l, stage) = 0._wp
-                end if
-            end do
-
-            adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop)
-
-        end do
-
-        if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.")
-
-        call nvtxEndRange
-
-    end subroutine s_compute_bubble_EL_dynamics
-
-    !>  The purpose of this subroutine is to obtain the bubble source terms based on Maeda and Colonius (2018)
-        !!      and add them to the RHS scalar field.
-        !! @param q_cons_vf Conservative variables
-        !! @param q_prim_vf Conservative variables
-        !! @param rhs_vf Time derivative of the conservative variables
-    subroutine s_compute_bubbles_EL_source(q_cons_vf, q_prim_vf, rhs_vf)
-
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf
-
-        integer :: i, j, k, l
-
-        if (.not. adap_dt) call s_smear_voidfraction()
-
-        if (lag_params%solver_approach == 2) then
-
-            if (p == 0) then
-                !$acc parallel loop collapse(4) gang vector default(present)
-                do k = 0, p
-                    do j = 0, n
-                        do i = 0, m
-                            do l = 1, E_idx
-                                if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then
-                                    rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + &
-                                                            q_cons_vf(l)%sf(i, j, k)*(q_beta%vf(2)%sf(i, j, k) + &
-                                                                                      q_beta%vf(5)%sf(i, j, k))
-
-                                end if
-                            end do
-                        end do
-                    end do
-                end do
-            else
-                !$acc parallel loop collapse(4) gang vector default(present)
-                do k = 0, p
-                    do j = 0, n
-                        do i = 0, m
-                            do l = 1, E_idx
-                                if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then
-                                    rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + &
-                                                            q_cons_vf(l)%sf(i, j, k)/q_beta%vf(1)%sf(i, j, k)* &
-                                                            q_beta%vf(2)%sf(i, j, k)
-                                end if
-                            end do
-                        end do
-                    end do
-                end do
-            end if
-
-            do l = 1, num_dims
-
-                call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l)
-
-                !$acc parallel loop collapse(3) gang vector default(present)
-                do k = 0, p
-                    do j = 0, n
-                        do i = 0, m
-                            if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then
-                                rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - &
-                                                                 (1._wp - q_beta%vf(1)%sf(i, j, k))/ &
-                                                                 q_beta%vf(1)%sf(i, j, k)* &
-                                                                 q_beta%vf(3)%sf(i, j, k)
-                            end if
-                        end do
-                    end do
-                end do
-
-                !source in energy
-                !$acc parallel loop collapse(3) gang vector default(present)
-                do k = idwbuff(3)%beg, idwbuff(3)%end
-                    do j = idwbuff(2)%beg, idwbuff(2)%end
-                        do i = idwbuff(1)%beg, idwbuff(1)%end
-                            q_beta%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k)
-                        end do
-                    end do
-                end do
-
-                call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l)
-
-                !$acc parallel loop collapse(3) gang vector default(present)
-                do k = 0, p
-                    do j = 0, n
-                        do i = 0, m
-                            if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then
-                                rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - &
-                                                            q_beta%vf(4)%sf(i, j, k)*(1._wp - q_beta%vf(1)%sf(i, j, k))/ &
-                                                            q_beta%vf(1)%sf(i, j, k)
-                            end if
-                        end do
-                    end do
-                end do
-            end do
-
-        end if
-
-    end subroutine s_compute_bubbles_EL_source
-
-    !>  This procedure computes the speed of sound from a given driving pressure
-        !! @param bub_id Bubble id
-        !! @param q_prim_vf Primitive variables
-        !! @param pinf Driving pressure
-        !! @param cell Bubble cell
-        !! @param rhol Liquid density
-        !! @param gamma Liquid specific heat ratio
-        !! @param pi_inf Liquid stiffness
-        !! @param cson Calculated speed of sound
-    subroutine s_compute_cson_from_pinf(bub_id, q_prim_vf, pinf, cell, rhol, gamma, pi_inf, cson)
-#ifdef _CRAYFTN
-        !DIR$ INLINEALWAYS s_compute_cson_from_pinf
-#else
-        !$acc routine seq
-#endif
-        integer, intent(in) :: bub_id
-        type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
-        real(wp), intent(in) :: pinf, rhol, gamma, pi_inf
-        integer, dimension(3), intent(in) :: cell
-        real(wp), intent(out) :: cson
-
-        real(wp) :: E, H
-        real(wp), dimension(num_dims) :: vel
-        integer :: i
-
-        !$acc loop seq
-        do i = 1, num_dims
-            vel(i) = q_prim_vf(i + contxe)%sf(cell(1), cell(2), cell(3))
-        end do
-        E = gamma*pinf + pi_inf + 0.5_wp*rhol*dot_product(vel, vel)
-        H = (E + pinf)/rhol
-        cson = sqrt((H - 0.5_wp*dot_product(vel, vel))/gamma)
-
-    end subroutine s_compute_cson_from_pinf
-
-    !>  The purpose of this subroutine is to smear the effect of the bubbles in the Eulerian framework
-    subroutine s_smear_voidfraction()
-
-        integer :: i, j, k, l
-
-        call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS")
-
-        !$acc parallel loop collapse(4) gang vector default(present)
-        do i = 1, q_beta_idx
-            do l = idwbuff(3)%beg, idwbuff(3)%end
-                do k = idwbuff(2)%beg, idwbuff(2)%end
-                    do j = idwbuff(1)%beg, idwbuff(1)%end
-                        q_beta%vf(i)%sf(j, k, l) = 0._wp
-                    end do
-                end do
-            end do
-        end do
-
-        call s_smoothfunction(nBubs, intfc_rad, intfc_vel, &
-                              mtn_s, mtn_pos, q_beta)
-
-        !Store 1-beta
-        !$acc parallel loop collapse(3) gang vector default(present)
-        do l = idwbuff(3)%beg, idwbuff(3)%end
-            do k = idwbuff(2)%beg, idwbuff(2)%end
-                do j = idwbuff(1)%beg, idwbuff(1)%end
-                    q_beta%vf(1)%sf(j, k, l) = 1._wp - q_beta%vf(1)%sf(j, k, l)
-                    ! Limiting void fraction given max value
-                    q_beta%vf(1)%sf(j, k, l) = max(q_beta%vf(1)%sf(j, k, l), &
-                                                   1._wp - lag_params%valmaxvoid)
-                end do
-            end do
-        end do
-
-        call nvtxEndRange
-
-    end subroutine s_smear_voidfraction
-
-    !> The purpose of this procedure is obtain the bubble driving pressure p_inf
-        !! @param bub_id Particle identifier
-        !! @param q_prim_vf  Primitive variables
-        !! @param ptype 1: p at infinity, 2: averaged P at the bubble location
-        !! @param f_pinfl Driving pressure
-        !! @param cell Bubble cell
-        !! @param Romega Control volume radius
-    subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega)
-#ifdef _CRAYFTN
-        !DIR$ INLINEALWAYS s_get_pinf
-#else
-        !$acc routine seq
-#endif
-        integer, intent(in) :: bub_id, ptype
-        type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
-        real(wp), intent(out) :: f_pinfl
-        integer, dimension(3), intent(out) :: cell
-        real(wp), intent(out), optional :: preterm1, term2, Romega
-
-        real(wp), dimension(3) :: scoord, psi
-        real(wp) :: dc, vol, aux
-        real(wp) :: volgas, term1, Rbeq, denom
-        real(wp) :: charvol, charpres, charvol2, charpres2
-        integer, dimension(3) :: cellaux
-        integer :: i, j, k
-        integer :: smearGrid, smearGridz
-        logical :: celloutside
-
-        f_pinfl = 0._wp
-
-        if (lag_params%vel_model > 0) then
-            cell = -buff_size
-            call s_locate_cell(mtn_pos(bub_id, 1:3, 2), cell, mtn_s(bub_id, 1:3, 2))
-            scoord = mtn_s(bub_id, 1:3, 2)
-        else
-            scoord = mtn_s(bub_id, 1:3, 2)
-            cell(:) = int(scoord(:))
-        end if
-
-        !$acc loop seq
-        do i = 1, num_dims
-            if (scoord(i) < 0._wp) cell(i) = cell(i) - 1
-        end do
-
-        if ((lag_params%cluster_type == 1)) then
-            !< Getting p_cell in terms of only the current cell by interpolation
-
-            !< Getting the cell volulme as Omega
-            if (p > 0) then
-                vol = dx(cell(1))*dy(cell(2))*dz(cell(3))
-            else
-                if (cyl_coord) then
-                    vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi
-                else
-                    vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth
-                end if
-            end if
-
-            !< Obtain bilinear interpolation coefficients, based on the current location of the bubble.
-            psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1)
-            if (cell(1) == (m + buff_size)) then
-                cell(1) = cell(1) - 1
-                psi(1) = 1._wp
-            else if (cell(1) == (-buff_size)) then
-                psi(1) = 0._wp
-            else
-                if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1
-                psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1))))
-            end if
-
-            psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1)
-            if (cell(2) == (n + buff_size)) then
-                cell(2) = cell(2) - 1
-                psi(2) = 1._wp
-            else if (cell(2) == (-buff_size)) then
-                psi(2) = 0._wp
-            else
-                if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1
-                psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2))))
-            end if
-
-            if (p > 0) then
-                psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1)
-                if (cell(3) == (p + buff_size)) then
-                    cell(3) = cell(3) - 1
-                    psi(3) = 1._wp
-                else if (cell(3) == (-buff_size)) then
-                    psi(3) = 0._wp
-                else
-                    if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1
-                    psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3))))
-                end if
-            else
-                psi(3) = 0._wp
-            end if
-
-            !< Perform bilinear interpolation
-            if (p == 0) then  !2D
-                f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2)
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2)
-            else              !3D
-                f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))*(1._wp - psi(3))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2))*(1._wp - psi(3))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2)*(1._wp - psi(3))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2)*(1._wp - psi(3))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1)*(1._wp - psi(1))*(1._wp - psi(2))*psi(3)
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3) + 1)*psi(1)*(1._wp - psi(2))*psi(3)
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3) + 1)*psi(1)*psi(2)*psi(3)
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3) + 1)*(1._wp - psi(1))*psi(2)*psi(3)
-            end if
-
-            !R_Omega
-            dc = (3._wp*vol/(4._wp*pi))**(1._wp/3._wp)
-
-        else if (lag_params%cluster_type >= 2) then
-            ! Bubble dynamic closure from Maeda and Colonius (2018)
-
-            ! Include the cell that contains the bubble (mapCells+1+mapCells)
-            smearGrid = mapCells - (-mapCells) + 1
-            smearGridz = smearGrid
-            if (p == 0) smearGridz = 1
-
-            charvol = 0._wp
-            charpres = 0._wp
-            charvol2 = 0._wp
-            charpres2 = 0._wp
-            vol = 0._wp
-
-            !$acc loop seq
-            do i = 1, smearGrid
-                !$acc loop seq
-                do j = 1, smearGrid
-                    !$acc loop seq
-                    do k = 1, smearGridz
-                        cellaux(1) = cell(1) + i - (mapCells + 1)
-                        cellaux(2) = cell(2) + j - (mapCells + 1)
-                        cellaux(3) = cell(3) + k - (mapCells + 1)
-                        if (p == 0) cellaux(3) = 0
-
-                        !< check if the current cell is outside the computational domain or not (including ghost cells)
-                        celloutside = .false.
-                        if (num_dims == 2) then
-                            if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then
-                                celloutside = .true.
-                            end if
-                            if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then
-                                celloutside = .true.
-                            end if
-                            if ((cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then
-                                celloutside = .true.
-                            end if
-                        else
-                            if ((cellaux(3) < -buff_size) .or. (cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then
-                                celloutside = .true.
-                            end if
-
-                            if ((cellaux(3) > p + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then
-                                celloutside = .true.
-                            end if
-                        end if
-                        if (.not. celloutside) then
-                            if (cyl_coord .and. (p == 0) .and. (y_cc(cellaux(2)) < 0._wp)) then
-                                celloutside = .true.
-                            end if
-                        end if
-
-                        if (.not. celloutside) then
-                            !< Obtaining the cell volulme
-                            if (p > 0) then
-                                vol = dx(cellaux(1))*dy(cellaux(2))*dz(cellaux(3))
-                            else
-                                if (cyl_coord) then
-                                    vol = dx(cellaux(1))*dy(cellaux(2))*y_cc(cellaux(2))*2._wp*pi
-                                else
-                                    vol = dx(cellaux(1))*dy(cellaux(2))*lag_params%charwidth
-                                end if
-                            end if
-                            !< Update values
-                            charvol = charvol + vol
-                            charpres = charpres + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3))*vol
-                            charvol2 = charvol2 + vol*q_beta%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3))
-                            charpres2 = charpres2 + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3)) &
-                                        *vol*q_beta%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3))
-                        end if
-
-                    end do
-                end do
-            end do
-
-            f_pinfl = charpres2/charvol2
-            vol = charvol
-            dc = (3._wp*abs(vol)/(4._wp*pi))**(1._wp/3._wp)
-
-        end if
-
-        if (lag_params%pressure_corrector) then
-
-            !Valid if only one bubble exists per cell
-            volgas = intfc_rad(bub_id, 2)**3._wp
-            denom = intfc_rad(bub_id, 2)**2._wp
-            term1 = bub_dphidt(bub_id)*intfc_rad(bub_id, 2)**2._wp
-            term2 = intfc_vel(bub_id, 2)*intfc_rad(bub_id, 2)**2._wp
-
-            Rbeq = volgas**(1._wp/3._wp) !surrogate bubble radius
-            aux = dc**3._wp - Rbeq**3._wp
-            term2 = term2/denom
-            term2 = 3._wp/2._wp*term2**2._wp*Rbeq**3._wp*(1._wp - Rbeq/dc)/aux
-            preterm1 = 3._wp/2._wp*Rbeq*(dc**2._wp - Rbeq**2._wp)/(aux*denom)
-
-            !Control volume radius
-            if (ptype == 2) Romega = dc
-
-            ! Getting p_inf
-            if (ptype == 1) then
-                f_pinfl = f_pinfl + preterm1*term1 + term2
-            end if
-
-        end if
-
-    end subroutine s_get_pinf
-
-    !>  This subroutine updates the Lagrange variables using the tvd RK time steppers.
-        !!      The time derivative of the bubble variables must be stored at every stage to avoid precision errors.
-        !! @param stage Current tvd RK stage
-    subroutine s_update_lagrange_tdv_rk(stage)
-
-        integer, intent(in) :: stage
-
-        integer :: k
-
-        if (time_stepper == 1) then ! 1st order TVD RK
-            !$acc parallel loop gang vector default(present) private(k)
-            do k = 1, nBubs
-                !u{1} = u{n} +  dt * RHS{n}
-                intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
-                intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1)
-                mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 1)
-                mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1)
-                mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1)
-                gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1)
-                gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
-            end do
-
-            if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
-
-            call s_transfer_data_to_tmp()
-            call s_write_void_evol(mytime)
-            if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
-
-            if (lag_params%write_bubbles) then
-                !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel)
-                call s_write_lag_particles(mytime)
-            end if
-
-        elseif (time_stepper == 2) then ! 2nd order TVD RK
-            if (stage == 1) then
-                !$acc parallel loop gang vector default(present) private(k)
-                do k = 1, nBubs
-                    !u{1} = u{n} +  dt * RHS{n}
-                    intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
-                    intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1)
-                    mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1)
-                    mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1)
-                    mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1)
-                    gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1)
-                    gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
-                end do
-
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
-
-            elseif (stage == 2) then
-                !$acc parallel loop gang vector default(present) private(k)
-                do k = 1, nBubs
-                    !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1})
-                    intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp
-                    intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp
-                    mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2)
-                    mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp
-                    mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp
-                    gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp
-                    gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp
-                end do
-
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
-
-                call s_transfer_data_to_tmp()
-                call s_write_void_evol(mytime)
-                if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
-                if (lag_params%write_bubbles) then
-                    !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel)
-                    call s_write_lag_particles(mytime)
-                end if
-
-            end if
-
-        elseif (time_stepper == 3) then ! 3rd order TVD RK
-            if (stage == 1) then
-                !$acc parallel loop gang vector default(present) private(k)
-                do k = 1, nBubs
-                    !u{1} = u{n} +  dt * RHS{n}
-                    intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1)
-                    intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1)
-                    mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1)
-                    mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1)
-                    mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1)
-                    gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1)
-                    gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
-                end do
-
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
-
-            elseif (stage == 2) then
-                !$acc parallel loop gang vector default(present) private(k)
-                do k = 1, nBubs
-                    !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}]
-                    intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp
-                    intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp
-                    mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 2)
-                    mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp
-                    mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp
-                    gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp
-                    gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp
-                end do
-
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
-
-            elseif (stage == 3) then
-                !$acc parallel loop gang vector default(present) private(k)
-                do k = 1, nBubs
-                    !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}]
-                    intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3))
-                    intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3))
-                    mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2)
-                    mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3))
-                    mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3))
-                    gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3))
-                    gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3))
-                end do
-
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
-
-                call s_transfer_data_to_tmp()
-                call s_write_void_evol(mytime)
-                if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats()
-
-                if (lag_params%write_bubbles) then
-                    !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel)
-                    call s_write_lag_particles(mytime)
-                end if
-
-            end if
-        end if
-
-    end subroutine s_update_lagrange_tdv_rk
-
-    !> This subroutine enforces reflective and wall boundary conditions for EL bubbles
-    subroutine s_enforce_EL_bubbles_boundary_conditions(dest)
-
-        integer, intent(in) :: dest
-        integer :: k, i, patch_id
-        integer, dimension(3) :: cell
-
-        !$acc parallel loop gang vector default(present) private(cell)
-        do k = 1, nBubs
-            if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then
-                mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest)
-            elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then
-                mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest)
-            elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size - mapCells)) then
-                call s_remove_lag_bubble(k)
-            elseif (mtn_pos(k, 1, dest) < x_cb(mapCells -buff_size - 1)) then
-                call s_remove_lag_bubble(k)
-            end if
-
-            if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                .and. mtn_pos(k,2,dest) < y_cb(-1) + intfc_rad(k,dest)) then
-                mtn_pos(k, 2, dest) = y_cb(-1) + intfc_rad(k,dest)
-            else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then
-                mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest)
-            elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size - mapCells)) then
-                call s_remove_lag_bubble(k)
-            elseif (mtn_pos(k, 2, dest) < y_cb(mapCells - buff_size - 1)) then
-                call s_remove_lag_bubble(k)
-            end if
-
-            if (p > 0) then
-                if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                    .and. mtn_pos(k,3,dest) < z_cb(-1) + intfc_rad(k,dest)) then
-                    mtn_pos(k, 3, dest) = z_cb(-1) + intfc_rad(k,dest)
-                else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                    .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then
-                    mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest)
-                elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size - mapCells)) then
-                    call s_remove_lag_bubble(k)
-                elseif (mtn_pos(k, 3, dest) < z_cb(mapCells - buff_size - 1)) then
-                    call s_remove_lag_bubble(k)
-                end if
-            end if
-
-            if (ib) then
-                cell = -buff_size
-                call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
-
-                if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then
-                    patch_id = ib_markers%sf(cell(1), cell(2), cell(3))
-
-                    do i = 1, num_dims
-                        mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - &
-                                              levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) &
-                                              * levelset%sf(cell(1), cell(2), cell(3), patch_id)
-                    end do
-                    cell = -buff_size
-                    call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
-                end if
-            end if
-
-        end do
-
-        if (num_procs > 1) then
-            call s_add_particles_to_transfer_list(mtn_pos(:, :, dest), mtn_posPrev(:, :, dest), nbubs)
-            call s_mpi_send_particles(intfc_rad, intfc_draddt, intfc_vel, &
-                                      intfc_draddt, mtn_posPrev, mtn_pos, &
-                                      mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, &
-                                      gas_dpdt, gas_mv, gas_dmvdt, lag_id, bub_R0, lag_num_ts)
-            call s_mpi_recv_particles(intfc_rad, intfc_draddt, intfc_vel, &
-                                      intfc_draddt, mtn_posPrev, mtn_pos, &
-                                      mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, &
-                                      gas_dpdt, gas_mv, gas_dmvdt, lag_id, bub_R0, lag_num_ts, nbubs)
-        end if
-
-    end subroutine s_enforce_EL_bubbles_boundary_conditions
-
-    !> This subroutine returns the computational coordinate of the cell for the given position.
-          !! @param pos Input coordinates
-          !! @param cell Computational coordinate of the cell
-          !! @param scoord Calculated particle coordinates
-    subroutine s_locate_cell(pos, cell, scoord)
-
-        real(wp), dimension(3), intent(in) :: pos
-        real(wp), dimension(3), intent(out) :: scoord
-        integer, dimension(3), intent(inout) :: cell
-
-        integer :: i
-
-        do while (pos(1) < x_cb(cell(1) - 1))
-            cell(1) = cell(1) - 1
-        end do
-
-        do while (pos(1) > x_cb(cell(1)))
-            cell(1) = cell(1) + 1
-        end do
-
-        do while (pos(2) < y_cb(cell(2) - 1))
-            cell(2) = cell(2) - 1
-        end do
-
-        do while (pos(2) > y_cb(cell(2)))
-            cell(2) = cell(2) + 1
-        end do
-
-        if (p > 0) then
-            do while (pos(3) < z_cb(cell(3) - 1))
-                cell(3) = cell(3) - 1
-            end do
-            do while (pos(3) > z_cb(cell(3)))
-                cell(3) = cell(3) + 1
-            end do
-        end if
-
-        ! The numbering of the cell of which left boundary is the domain boundary is 0.
-        ! if comp.coord of the pos is s, the real coordinate of s is
-        ! (the coordinate of the left boundary of the Floor(s)-th cell)
-        ! + (s-(int(s))*(cell-width).
-        ! In other words,  the coordinate of the center of the cell is x_cc(cell).
-
-        !coordinates in computational space
-        scoord(1) = cell(1) + (pos(1) - x_cb(cell(1) - 1))/dx(cell(1))
-        scoord(2) = cell(2) + (pos(2) - y_cb(cell(2) - 1))/dy(cell(2))
-        scoord(3) = 0._wp
-        if (p > 0) scoord(3) = cell(3) + (pos(3) - z_cb(cell(3) - 1))/dz(cell(3))
-        cell(:) = int(scoord(:))
-        do i = 1, num_dims
-            if (scoord(i) < 0._wp) cell(i) = cell(i) - 1
-        end do
-
-    end subroutine s_locate_cell
-
-    !> This subroutine transfer data into the temporal variables.
-    subroutine s_transfer_data_to_tmp()
-
-        integer :: k
-
-        !$acc parallel loop gang vector default(present) private(k)
-        do k = 1, nBubs
-            gas_p(k, 2) = gas_p(k, 1)
-            gas_mv(k, 2) = gas_mv(k, 1)
-            intfc_rad(k, 2) = intfc_rad(k, 1)
-            intfc_vel(k, 2) = intfc_vel(k, 1)
-            mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1)
-            mtn_posPrev(k, 1:3, 2) = mtn_posPrev(k, 1:3, 1)
-            mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1)
-            mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1)
-        end do
-
-    end subroutine s_transfer_data_to_tmp
-
-    !> The purpose of this procedure is to determine if the global coordinates of the bubbles
-        !!      are present in the current MPI processor (including ghost cells).
-        !! @param pos_part Spatial coordinates of the bubble
-    function particle_in_domain(pos_part)
-
-        logical :: particle_in_domain
-        real(wp), dimension(3), intent(in) :: pos_part
-
-        ! 2D
-        if (p == 0 .and. cyl_coord .neqv. .true.) then
-            ! Defining a virtual z-axis that has the same dimensions as y-axis
-            ! defined in the input file
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
-                                  (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. &
-                                  (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= -lag_params%charwidth/2._wp))
-        else
-            ! cyl_coord
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
-                                  (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size - 1), 0._wp)))
-        end if
-
-        ! 3D
-        if (p > 0) then
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
-                                  (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. &
-                                  (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1)))
-        end if
-
-        ! For symmetric and wall boundary condition
-        if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
-            particle_in_domain = (particle_in_domain .and. (pos_part(1) >= x_cb(-1)))
-        end if
-        if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
-            particle_in_domain = (particle_in_domain .and. (pos_part(1) < x_cb(m)))
-        end if
-        if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then
-            particle_in_domain = (particle_in_domain .and. (pos_part(2) >= y_cb(-1)))
-        end if
-        if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then
-            particle_in_domain = (particle_in_domain .and. (pos_part(2) < y_cb(n)))
-        end if
-
-        if (p > 0) then
-            if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
-                particle_in_domain = (particle_in_domain .and. (pos_part(3) >= z_cb(-1)))
-            end if
-            if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
-                particle_in_domain = (particle_in_domain .and. (pos_part(3) < z_cb(p)))
-            end if
-        end if
-
-    end function particle_in_domain
-
-    !> The purpose of this procedure is to determine if the lagrangian bubble is located in the
-        !!       physical domain. The ghost cells are not part of the physical domain.
-        !! @param pos_part Spatial coordinates of the bubble
-    function particle_in_domain_physical(pos_part)
-
-        logical :: particle_in_domain_physical
-        real(wp), dimension(3), intent(in) :: pos_part
-
-        particle_in_domain_physical = ((pos_part(1) < x_cb(m)) .and. (pos_part(1) >= x_cb(-1)) .and. &
-                                       (pos_part(2) < y_cb(n)) .and. (pos_part(2) >= y_cb(-1)))
-
-        if (p > 0) then
-            particle_in_domain_physical = (particle_in_domain_physical .and. (pos_part(3) < z_cb(p)) .and. (pos_part(3) >= z_cb(-1)))
-        end if
-
-    end function particle_in_domain_physical
-
-    !> The purpose of this procedure is to calculate the gradient of a scalar field along the x, y and z directions
-        !!      following a second-order central difference considering uneven widths
-        !! @param q Input scalar field
-        !! @param dq Output gradient of q
-        !! @param dir Gradient spatial direction
-    subroutine s_gradient_dir(q, dq, dir)
-
-        type(scalar_field), intent(inout) :: q
-        type(scalar_field), intent(inout) :: dq
-        integer, intent(in) :: dir
-
-        integer :: i, j, k
-
-        if (dir == 1) then
-            ! Gradient in x dir.
-            !$acc parallel loop collapse(3) gang vector default(present)
-            do k = 0, p
-                do j = 0, n
-                    do i = 0, m
-                        dq%sf(i, j, k) = q%sf(i, j, k)*(dx(i + 1) - dx(i - 1)) &
-                                         + q%sf(i + 1, j, k)*(dx(i) + dx(i - 1)) &
-                                         - q%sf(i - 1, j, k)*(dx(i) + dx(i + 1))
-                        dq%sf(i, j, k) = dq%sf(i, j, k)/ &
-                                         ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1)))
-                    end do
-                end do
-            end do
-        else
-            if (dir == 2) then
-                ! Gradient in y dir.
-                !$acc parallel loop collapse(3) gang vector default(present)
-                do k = 0, p
-                    do j = 0, n
-                        do i = 0, m
-                            dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) &
-                                             + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) &
-                                             - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1))
-                            dq%sf(i, j, k) = dq%sf(i, j, k)/ &
-                                             ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1)))
-                        end do
-                    end do
-                end do
-            else
-                ! Gradient in z dir.
-                !$acc parallel loop collapse(3) gang vector default(present)
-                do k = 0, p
-                    do j = 0, n
-                        do i = 0, m
-                            dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) &
-                                             + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) &
-                                             - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1))
-                            dq%sf(i, j, k) = dq%sf(i, j, k)/ &
-                                             ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1)))
-                        end do
-                    end do
-                end do
-            end if
-        end if
-
-    end subroutine s_gradient_dir
-
-    !> Subroutine that writes on each time step the changes of the lagrangian bubbles.
-        !!  @param q_time Current time
-    subroutine s_write_lag_particles(qtime)
-
-        real(wp), intent(in) :: qtime
-        integer :: k
-
-        character(LEN=path_len + 2*name_len) :: file_loc
-        logical :: file_exist
-
-        write (file_loc, '(A,I0,A)') 'lag_bubble_evol_', proc_rank, '.dat'
-        file_loc = trim(case_dir)//'/D/'//trim(file_loc)
-        inquire (FILE=trim(file_loc), EXIST=file_exist)
-
-        if (.not. file_exist) then
-            open (11, FILE=trim(file_loc), FORM='formatted', position='rewind')
-            write (11, *) 'currentTime, particleID, x, y, z, ', &
-                'coreVaporMass, coreVaporConcentration, radius, interfaceVelocity, ', &
-                'corePressure'
-        else
-            open (11, FILE=trim(file_loc), FORM='formatted', position='append')
-        end if
-
-        ! Cycle through list
-        do k = 1, nBubs
-            write (11, '(6X,f12.6,I24.8,8e24.8)') &
-                qtime, &
-                lag_id(k, 1), &
-                mtn_pos(k, 1, 1), &
-                mtn_pos(k, 2, 1), &
-                mtn_pos(k, 3, 1), &
-                gas_mv(k, 1), &
-                gas_mv(k, 1)/(gas_mv(k, 1) + gas_mg(k)), &
-                intfc_rad(k, 1), &
-                intfc_vel(k, 1), &
-                gas_p(k, 1)
-        end do
-
-        close (11)
-
-    end subroutine s_write_lag_particles
-
-    !>  Subroutine that writes some useful statistics related to the volume fraction
-            !!       of the particles (void fraction) in the computatioational domain
-            !!       on each time step.
-            !!  @param q_time Current time
-    subroutine s_write_void_evol(qtime)
-
-        real(wp), intent(in) :: qtime
-        real(wp) :: volcell, voltot
-        real(wp) :: lag_void_max, lag_void_avg, lag_vol
-        real(wp) :: void_max_glb, void_avg_glb, vol_glb
-
-        integer :: i, j, k
-
-        character(LEN=path_len + 2*name_len) :: file_loc
-        logical :: file_exist
-
-        if (proc_rank == 0) then
-            write (file_loc, '(A)') 'voidfraction.dat'
-            file_loc = trim(case_dir)//'/D/'//trim(file_loc)
-            inquire (FILE=trim(file_loc), EXIST=file_exist)
-            if (.not. file_exist) then
-                open (12, FILE=trim(file_loc), FORM='formatted', position='rewind')
-                !write (12, *) 'currentTime, averageVoidFraction, ', &
-                !    'maximumVoidFraction, totalParticlesVolume'
-                !write (12, *) 'The averageVoidFraction value does ', &
-                !    'not reflect the real void fraction in the cloud since the ', &
-                !    'cells which do not have bubbles are not accounted'
-            else
-                open (12, FILE=trim(file_loc), FORM='formatted', position='append')
-            end if
-        end if
-
-        lag_void_max = 0._wp
-        lag_void_avg = 0._wp
-        lag_vol = 0._wp
-        !$acc parallel loop collapse(3) gang vector default(present) reduction(+:lag_vol,lag_void_avg) &
-        !$acc reduction(MAX:lag_void_max) copy(lag_vol, lag_void_avg, lag_void_max)
-        do k = 0, p
-            do j = 0, n
-                do i = 0, m
-                    lag_void_max = max(lag_void_max, 1._wp - q_beta%vf(1)%sf(i, j, k))
-                    call s_get_char_vol(i, j, k, volcell)
-                    if ((1._wp - q_beta%vf(1)%sf(i, j, k)) > 5.0d-11) then
-                        lag_void_avg = lag_void_avg + (1._wp - q_beta%vf(1)%sf(i, j, k))*volcell
-                        lag_vol = lag_vol + volcell
-                    end if
-                end do
-            end do
-        end do
-
-#ifdef MFC_MPI
-        if (num_procs > 1) then
-            call s_mpi_allreduce_max(lag_void_max, void_max_glb)
-            lag_void_max = void_max_glb
-            call s_mpi_allreduce_sum(lag_vol, vol_glb)
-            lag_vol = vol_glb
-            call s_mpi_allreduce_sum(lag_void_avg, void_avg_glb)
-            lag_void_avg = void_avg_glb
-        end if
-#endif
-        voltot = lag_void_avg
-        ! This voidavg value does not reflect the real void fraction in the cloud
-        ! since the cell which does not have bubbles are not accounted
-        if (lag_vol > 0._wp) lag_void_avg = lag_void_avg/lag_vol
-
-        if (proc_rank == 0) then
-            write (12, '(6X,4e24.8)') &
-                qtime, &
-                lag_void_avg, &
-                lag_void_max, &
-                voltot
-            close (12)
-        end if
-
-    end subroutine s_write_void_evol
-
-    !>  Subroutine that writes the restarting files for the particles in the lagrangian solver.
-        !!  @param t_step Current time step
-    subroutine s_write_restart_lag_bubbles(t_step)
-
-        ! Generic string used to store the address of a particular file
-        integer, intent(in) :: t_step
-
-        character(LEN=path_len + 2*name_len) :: file_loc
-        logical :: file_exist
-        integer :: bub_id, tot_part, tot_part_wrtn, npart_wrtn
-        integer :: i, k
-
-#ifdef MFC_MPI
-        ! For Parallel I/O
-        integer :: ifile, ierr
-        integer, dimension(MPI_STATUS_SIZE) :: status
-        integer(KIND=MPI_OFFSET_KIND) :: disp
-        integer :: view
-        integer, dimension(2) :: gsizes, lsizes, start_idx_part
-        integer, dimension(num_procs) :: part_order, part_ord_mpi
-
-        bub_id = 0._wp
-        if (nBubs /= 0) then
-            do k = 1, nBubs
-                if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then
-                    bub_id = bub_id + 1
-                end if
-            end do
-        end if
-
-        if (.not. parallel_io) return
-
-        ! Total number of particles
-        call MPI_ALLREDUCE(bub_id, tot_part, 1, MPI_integer, &
-                           MPI_SUM, MPI_COMM_WORLD, ierr)
-
-        ! Total number of particles written so far
-        call MPI_ALLREDUCE(npart_wrtn, tot_part_wrtn, 1, MPI_integer, &
-                           MPI_SUM, MPI_COMM_WORLD, ierr)
-
-        lsizes(1) = max(1, bub_id)
-        lsizes(2) = 21
-
-        ! if the partcle number is zero, put 1 since MPI cannot deal with writing
-        ! zero particle
-        part_order(:) = 1
-        part_order(proc_rank + 1) = max(1, bub_id)
-
-        call MPI_ALLREDUCE(part_order, part_ord_mpi, num_procs, MPI_integer, &
-                           MPI_MAX, MPI_COMM_WORLD, ierr)
-
-        gsizes(1) = sum(part_ord_mpi(1:num_procs))
-        gsizes(2) = 21
-
-        start_idx_part(1) = sum(part_ord_mpi(1:proc_rank + 1)) - part_ord_mpi(proc_rank + 1)
-        start_idx_part(2) = 0
-
-        write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat'
-        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (FILE=trim(file_loc), EXIST=file_exist)
-        if (file_exist .and. proc_rank == 0) then
-            call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr)
-        end if
-
-        ! Writing down the total number of particles
-        if (proc_rank == 0) then
-            open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown')
-            write (9) gsizes(1), mytime, dt
-            close (9)
-        end if
-
-        call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
-                                      MPI_ORDER_FORTRAN, mpi_p, view, ierr)
-        call MPI_type_COMMIT(view, ierr)
-
-        allocate (MPI_IO_DATA_lag_bubbles(1:max(1, bub_id), 1:21))
-
-        ! Open the file to write all flow variables
-        write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
-        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (FILE=trim(file_loc), EXIST=file_exist)
-        if (file_exist .and. proc_rank == 0) then
-            call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr)
-        end if
-
-        call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
-                           mpi_info_int, ifile, ierr)
-
-        disp = 0._wp
-
-        call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
-                               'native', mpi_info_null, ierr)
-
-        ! Cycle through list
-        i = 1
-
-        if (bub_id == 0) then
-            MPI_IO_DATA_lag_bubbles(1, 1:21) = 0._wp
-        else
-
-            do k = 1, nBubs
-
-                if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then
-
-                    MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_id(k, 1))
-                    MPI_IO_DATA_lag_bubbles(i, 2:4) = mtn_pos(k, 1:3, 1)
-                    MPI_IO_DATA_lag_bubbles(i, 5:7) = mtn_posPrev(k, 1:3, 1)
-                    MPI_IO_DATA_lag_bubbles(i, 8:10) = mtn_vel(k, 1:3, 1)
-                    MPI_IO_DATA_lag_bubbles(i, 11) = intfc_rad(k, 1)
-                    MPI_IO_DATA_lag_bubbles(i, 12) = intfc_vel(k, 1)
-                    MPI_IO_DATA_lag_bubbles(i, 13) = bub_R0(k)
-                    MPI_IO_DATA_lag_bubbles(i, 14) = Rmax_stats(k)
-                    MPI_IO_DATA_lag_bubbles(i, 15) = Rmin_stats(k)
-                    MPI_IO_DATA_lag_bubbles(i, 16) = bub_dphidt(k)
-                    MPI_IO_DATA_lag_bubbles(i, 17) = gas_p(k, 1)
-                    MPI_IO_DATA_lag_bubbles(i, 18) = gas_mv(k, 1)
-                    MPI_IO_DATA_lag_bubbles(i, 19) = gas_mg(k)
-                    MPI_IO_DATA_lag_bubbles(i, 20) = gas_betaT(k)
-                    MPI_IO_DATA_lag_bubbles(i, 21) = gas_betaC(k)
-
-                    i = i + 1
-
-                end if
-
-            end do
-
-        end if
-
-        call MPI_FILE_write_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*max(1, bub_id), &
-                                mpi_p, status, ierr)
-
-        call MPI_FILE_CLOSE(ifile, ierr)
-
-        deallocate (MPI_IO_DATA_lag_bubbles)
-
-#endif
-
-    end subroutine s_write_restart_lag_bubbles
-
-    !>  This procedure calculates the maximum and minimum radius of each bubble.
-    subroutine s_calculate_lag_bubble_stats()
-
-        integer :: k
-
-        !$acc parallel loop gang vector default(present) reduction(MAX:Rmax_glb) &
-        !$acc reduction(MIN: Rmin_glb) copy(Rmax_glb, Rmin_glb)
-        do k = 1, nBubs
-            Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k))
-            Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k))
-            Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k))
-            Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k))
-        end do
-
-    end subroutine s_calculate_lag_bubble_stats
-
-    !>  Subroutine that writes the maximum and minimum radius of each bubble.
-    subroutine s_write_lag_bubble_stats()
-
-        integer :: k
-        character(LEN=path_len + 2*name_len) :: file_loc
-
-        write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat'
-        file_loc = trim(case_dir)//'/D/'//trim(file_loc)
-
-        !$acc update host(Rmax_glb, Rmin_glb)
-
-        open (13, FILE=trim(file_loc), FORM='formatted', position='rewind')
-        write (13, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb'
-
-        do k = 1, nBubs
-            write (13, '(6X,2I24.8,5e24.8)') &
-                proc_rank, &
-                lag_id(k, 1), &
-                mtn_pos(k, 1, 1), &
-                mtn_pos(k, 2, 1), &
-                mtn_pos(k, 3, 1), &
-                Rmax_stats(k), &
-                Rmin_stats(k)
-        end do
-
-        close (13)
-
-    end subroutine s_write_lag_bubble_stats
-
-    !> The purpose of this subroutine is to remove one specific particle if dt is too small.
-          !! @param bub_id Particle id
-    subroutine s_remove_lag_bubble(bub_id)
-
-        integer, intent(in) :: bub_id
-
-        integer :: i
-
-        !$acc loop seq
-        do i = bub_id, nBubs - 1
-            lag_id(i, 1) = lag_id(i + 1, 1)
-            bub_R0(i) = bub_R0(i + 1)
-            Rmax_stats(i) = Rmax_stats(i + 1)
-            Rmin_stats(i) = Rmin_stats(i + 1)
-            gas_mg(i) = gas_mg(i + 1)
-            gas_betaT(i) = gas_betaT(i + 1)
-            gas_betaC(i) = gas_betaC(i + 1)
-            bub_dphidt(i) = bub_dphidt(i + 1)
-            gas_p(i, 1:2) = gas_p(i + 1, 1:2)
-            gas_mv(i, 1:2) = gas_mv(i + 1, 1:2)
-            intfc_rad(i, 1:2) = intfc_rad(i + 1, 1:2)
-            intfc_vel(i, 1:2) = intfc_vel(i + 1, 1:2)
-            mtn_pos(i, 1:3, 1:2) = mtn_pos(i + 1, 1:3, 1:2)
-            mtn_posPrev(i, 1:3, 1:2) = mtn_posPrev(i + 1, 1:3, 1:2)
-            mtn_vel(i, 1:3, 1:2) = mtn_vel(i + 1, 1:3, 1:2)
-            mtn_s(i, 1:3, 1:2) = mtn_s(i + 1, 1:3, 1:2)
-            intfc_draddt(i, 1:lag_num_ts) = intfc_draddt(i + 1, 1:lag_num_ts)
-            intfc_dveldt(i, 1:lag_num_ts) = intfc_dveldt(i + 1, 1:lag_num_ts)
-            gas_dpdt(i, 1:lag_num_ts) = gas_dpdt(i + 1, 1:lag_num_ts)
-            gas_dmvdt(i, 1:lag_num_ts) = gas_dmvdt(i + 1, 1:lag_num_ts)
-        end do
-
-        nBubs = nBubs - 1
-        !$acc update device(nBubs)
-
-    end subroutine s_remove_lag_bubble
-
-    !> The purpose of this subroutine is to deallocate variables
-    subroutine s_finalize_lagrangian_solver()
-
-        integer :: i
-
-        do i = 1, q_beta_idx
-            @:DEALLOCATE(q_beta%vf(i)%sf)
-        end do
-        @:DEALLOCATE(q_beta%vf)
-
-        !Deallocating space
-        @:DEALLOCATE(lag_id)
-        @:DEALLOCATE(bub_R0)
-        @:DEALLOCATE(Rmax_stats)
-        @:DEALLOCATE(Rmin_stats)
-        @:DEALLOCATE(gas_mg)
-        @:DEALLOCATE(gas_betaT)
-        @:DEALLOCATE(gas_betaC)
-        @:DEALLOCATE(bub_dphidt)
-        @:DEALLOCATE(gas_p)
-        @:DEALLOCATE(gas_mv)
-        @:DEALLOCATE(intfc_rad)
-        @:DEALLOCATE(intfc_vel)
-        @:DEALLOCATE(mtn_pos)
-        @:DEALLOCATE(mtn_posPrev)
-        @:DEALLOCATE(mtn_vel)
-        @:DEALLOCATE(mtn_s)
-        @:DEALLOCATE(intfc_draddt)
-        @:DEALLOCATE(intfc_dveldt)
-        @:DEALLOCATE(gas_dpdt)
-        @:DEALLOCATE(gas_dmvdt)
-        @:DEALLOCATE(mtn_dposdt)
-        @:DEALLOCATE(mtn_dveldt)
-
-    end subroutine s_finalize_lagrangian_solver
-
-end module m_bubbles_EL

From 9390c88bb8505e555f0bd1ee2480b3979cd19256 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Wed, 11 Jun 2025 14:55:40 -0400
Subject: [PATCH 32/62] 3D transfer list

---
 src/simulation/m_bubbles_EL.fpp |  2 +-
 src/simulation/m_mpi_proxy.fpp  | 85 +++++++++++++++++++++++++++++----
 2 files changed, 77 insertions(+), 10 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 674d07d049..f5691fdc3b 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -1189,7 +1189,7 @@ contains
                 mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest)
             elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size - mapCells)) then
                 call s_remove_lag_bubble(k)
-            elseif (mtn_pos(k, 1, dest) < x_cb(mapCells -buff_size - 1)) then
+            elseif (mtn_pos(k, 1, dest) < x_cb(mapCells - buff_size - 1)) then
                 call s_remove_lag_bubble(k)
             end if
 
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 992e3470e1..ec92e7079d 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -471,7 +471,6 @@ contains
             if (posPrev(k,1) > comm_coords(1)%beg .and. pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then
                 p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k
                 p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1
-
                 if (n > 0) then
                     ! Left bottom corner
                     if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
@@ -479,21 +478,40 @@ contains
                         p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1
                         p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
                         p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-
+                        if (p > 0) then
+                            ! Left bottom back corner
+                            if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
+                                p_send_ids(-1, -1, -1, p_send_counts(-1, -1, -1)) = k
+                                p_send_counts(-1, -1, -1) = p_send_counts(-1, -1, -1) + 1
+                            ! Left bottom front corner
+                            elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
+                                p_send_ids(-1, -1, 1, p_send_counts(-1, -1, 1)) = k
+                                p_send_counts(-1, -1, 1) = p_send_counts(-1, -1, 1) + 1
+                            end if
+                        end if
                     ! Left top corner
                     elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
                         p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k
                         p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1
                         p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
                         p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+                        if (p > 0) then
+                            ! Left top back corner
+                            if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
+                                p_send_ids(-1, 1, 1, p_send_counts(-1, 1, 1)) = k
+                                p_send_counts(-1, 1, 1) = p_send_counts(-1, 1, 1) + 1
+                            ! Left top front corner
+                            elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
+                                p_send_ids(-1, 1, -1, p_send_counts(-1, 1, -1)) = k
+                                p_send_counts(-1, 1, -1) = p_send_counts(-1, 1, -1) + 1
+                            end if
+                        end if
                     end if
                 end if
-
             ! Right face
             elseif (posPrev(k,1) < comm_coords(1)%end .and. pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then
                 p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k
                 p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1
-
                 if (n > 0) then
                     ! Right bottom corner
                     if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
@@ -501,25 +519,74 @@ contains
                         p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1
                         p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
                         p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-
+                        if (p > 0) then
+                            ! Right bottom back corner
+                            if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
+                                p_send_ids(1, -1, -1, p_send_counts(1, -1, -1)) = k
+                                p_send_counts(1, -1, -1) = p_send_counts(1, -1, -1) + 1
+                            ! Right bottom front corner
+                            elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
+                                p_send_ids(1, -1, 1, p_send_counts(1, -1, 1)) = k
+                                p_send_counts(1, -1, 1) = p_send_counts(1, -1, 1) + 1
+                            end if
+                        end if
                     ! Right top corner
                     elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
                         p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k
                         p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1
                         p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
                         p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+                        if (p > 0) then
+                            ! Right top back corner
+                            if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
+                                p_send_ids(1, 1, -1, p_send_counts(1, 1, -1)) = k
+                                p_send_counts(1, 1, -1) = p_send_counts(1, 1, -1) + 1
+                            ! Right top front corner
+                            elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
+                                p_send_ids(1, 1, 1, p_send_counts(1, 1, 1)) = k
+                                p_send_counts(1, 1, 1) = p_send_counts(1, 1, 1) + 1
+                            end if
+                        end if
                     end if
                 end if
-
             ! Bottom face (corners already accounted for)
             elseif (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
                 p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
                 p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-
+                if (p > 0) then
+                    ! Bottom face back corner
+                    if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
+                        p_send_ids(0, -1, -1, p_send_counts(0, -1, -1)) = k
+                        p_send_counts(0, -1, -1) = p_send_counts(0, -1, -1) + 1
+                    ! Bottom face front corner
+                    elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
+                        p_send_ids(0, -1, 1, p_send_counts(0, -1, 1)) = k
+                        p_send_counts(0, -1, 1) = p_send_counts(0, -1, 1) + 1
+                    end if
+                endif
             ! Top face (corners already accounted for)
             elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
                 p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
                 p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+                if (p > 0) then
+                    ! Top face back corner
+                    if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
+                        p_send_ids(0, 1, -1, p_send_counts(0, 1, -1)) = k
+                        p_send_counts(0, 1, -1) = p_send_counts(0, 1, -1) + 1
+                    ! Top face front corner
+                    elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
+                        p_send_ids(0, 1, 1, p_send_counts(0, 1, 1)) = k
+                        p_send_counts(0, 1, 1) = p_send_counts(0, 1, 1) + 1
+                    end if
+                endif
+            ! Back face (everything else accounted for)
+            elseif (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
+                p_send_ids(0, 0, -1, p_send_counts(0, 0, -1)) = k
+                p_send_counts(0, 0, -1) = p_send_counts(0, 0, -1) + 1
+            ! Front face (everything else accounted for)
+            elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
+                p_send_ids(0, 0, 1, p_send_counts(0, 0, 1)) = k
+                p_send_counts(0, 0, 1) = p_send_counts(0, 0, 1) + 1
             end if
         end do
 
@@ -542,7 +609,7 @@ contains
                     if (abs(i) + abs(j) + abs(k) > 0) then
                         call MPI_Isend(p_send_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, request, ierr)
                         if (p_send_counts(i,j,k) > 0) then
-                            print*, "SEND", proc_rank, p_send_counts(i,j,k)
+                            print*, "SEND", proc_rank, neighbor_ranks(i,j,k), p_send_counts(i,j,k)
                         end if
                         if (p_send_counts(i,j,k) > 0) then
                             position = 0
@@ -597,7 +664,7 @@ contains
                     if (abs(i) + abs(j) + abs(k) > 0) then
                         call MPI_recv(p_recv_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
                         if (p_recv_counts(i,j,k) > 0) then
-                            print*, "RECV", proc_rank, p_recv_counts(i,j,k)
+                            print*, "RECV", neighbor_ranks(i,j,k), proc_rank, p_recv_counts(i,j,k)
                         end if
                         if (p_recv_counts(i,j,k) > 0) then
                             position = 0

From aa259f127288a59ac015199e056bf3d0454b1633 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Wed, 11 Jun 2025 16:13:57 -0400
Subject: [PATCH 33/62] bug fixes

---
 src/simulation/m_bubbles_EL.fpp         | 39 ++++++++++++-------------
 src/simulation/m_bubbles_EL_kernels.fpp | 22 +++++++-------
 src/simulation/m_mpi_proxy.fpp          | 12 ++++----
 3 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index f5691fdc3b..35950f3d01 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -1180,16 +1180,16 @@ contains
         integer, dimension(3) :: cell
 
         !$acc parallel loop gang vector default(present) private(cell)
-        do k = 1, nBubs
+        do k = nBubs, 1, -1
             if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                 .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then
                 mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest)
             elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                 .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then
                 mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest)
-            elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size - mapCells)) then
+            elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size)) then
                 call s_remove_lag_bubble(k)
-            elseif (mtn_pos(k, 1, dest) < x_cb(mapCells - buff_size - 1)) then
+            elseif (mtn_pos(k, 1, dest) < x_cb(buff_size - 1)) then
                 call s_remove_lag_bubble(k)
             end if
 
@@ -1199,9 +1199,9 @@ contains
             else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                 .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then
                 mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest)
-            elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size - mapCells)) then
+            elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size)) then
                 call s_remove_lag_bubble(k)
-            elseif (mtn_pos(k, 2, dest) < y_cb(mapCells - buff_size - 1)) then
+            elseif (mtn_pos(k, 2, dest) < y_cb(buff_size - 1)) then
                 call s_remove_lag_bubble(k)
             end if
 
@@ -1212,9 +1212,9 @@ contains
                 else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                     .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then
                     mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest)
-                elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size - mapCells)) then
+                elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size)) then
                     call s_remove_lag_bubble(k)
-                elseif (mtn_pos(k, 3, dest) < z_cb(mapCells - buff_size - 1)) then
+                elseif (mtn_pos(k, 3, dest) < z_cb(buff_size - 1)) then
                     call s_remove_lag_bubble(k)
                 end if
             end if
@@ -1235,7 +1235,6 @@ contains
                     call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
                 end if
             end if
-
         end do
 
         if (num_procs > 1) then
@@ -1338,26 +1337,26 @@ contains
         if (p == 0 .and. cyl_coord .neqv. .true.) then
             ! Defining a virtual z-axis that has the same dimensions as y-axis
             ! defined in the input file
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. &
-                                  (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. &
-                                  (pos_part(2) < y_cb(n + buff_size - mapCells)) .and. &
-                                  (pos_part(2) >= y_cb(-buff_size - 1 + mapCells)) .and. &
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. &
+                                  (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
+                                  (pos_part(2) < y_cb(n + buff_size)) .and. &
+                                  (pos_part(2) >= y_cb(-buff_size - 1)) .and. &
                                   (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= -lag_params%charwidth/2._wp))
         else
             ! cyl_coord
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. &
-                                  (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. &
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. &
+                                  (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
                                   (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size - 1), 0._wp)))
         end if
 
         ! 3D
         if (p > 0) then
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. &
-                                  (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. &
-                                  (pos_part(2) < y_cb(n + buff_size - mapCells)) .and. &
-                                  (pos_part(2) >= y_cb(-buff_size - 1 + mapCells)) .and. &
-                                  (pos_part(3) < z_cb(p + buff_size - mapCells)) .and. &
-                                  (pos_part(3) >= z_cb(-buff_size - 1 + mapCells)))
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. &
+                                  (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
+                                  (pos_part(2) < y_cb(n + buff_size)) .and. &
+                                  (pos_part(2) >= y_cb(-buff_size - 1)) .and. &
+                                  (pos_part(3) < z_cb(p + buff_size)) .and. &
+                                  (pos_part(3) >= z_cb(-buff_size - 1)))
         end if
 
         ! For symmetric and wall boundary condition
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index b55b6c7e9c..1e15c772a4 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -283,27 +283,27 @@ contains
         celloutside = .false.
 
         if (num_dims == 2) then
-            if ((cellaux(1) < -buff_size + mapCells) .or. &
-                (cellaux(2) < -buff_size + mapCells)) then
+            if ((cellaux(1) < -buff_size) .or. &
+                (cellaux(2) < -buff_size)) then
                 celloutside = .true.
             end if
-            if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then
+            if (cyl_coord .and. cellaux(2) < 0) then
                 celloutside = .true.
             end if
-            if ((cellaux(2) > n + buff_size - mapCells) .or. &
-                (cellaux(1) > m + buff_size - mapCells)) then
+            if ((cellaux(2) > n + buff_size) .or. &
+                (cellaux(1) > m + buff_size)) then
                 celloutside = .true.
             end if
         else
-            if ((cellaux(3) < -buff_size + mapCells) .or. &
-                (cellaux(1) < -buff_size + mapCells) .or. &
-                (cellaux(2) < -buff_size + mapCells)) then
+            if ((cellaux(3) < -buff_size) .or. &
+                (cellaux(1) < -buff_size) .or. &
+                (cellaux(2) < -buff_size)) then
                 celloutside = .true.
             end if
 
-            if ((cellaux(3) > p + buff_size - mapCells) .or. &
-                (cellaux(2) > n + buff_size - mapCells) .or. &
-                (cellaux(1) > m + buff_size - mapCells)) then
+            if ((cellaux(3) > p + buff_size) .or. &
+                (cellaux(2) > n + buff_size) .or. &
+                (cellaux(1) > m + buff_size)) then
                 celloutside = .true.
             end if
         end if
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index ec92e7079d..7f78ac2831 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -98,14 +98,14 @@ contains
         p_buff_size = nVar*lag_params%nBubs_glb*real_size
         @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size))
 
-        comm_coords(1)%beg = x_cb(buff_size - mapCells - 1)
-        comm_coords(1)%end = x_cb(m - buff_size + mapCells)
+        comm_coords(1)%beg = x_cb(buff_size - 1)
+        comm_coords(1)%end = x_cb(m - buff_size)
         if (n > 0) then
-            comm_coords(2)%beg = y_cb(buff_size - mapCells - 1)
-            comm_coords(2)%end = y_cb(m - buff_size + mapCells)
+            comm_coords(2)%beg = y_cb(buff_size)
+            comm_coords(2)%end = y_cb(n - buff_size)
             if (p > 0) then
-                comm_coords(3)%beg = z_cb(buff_size - mapCells - 1)
-                comm_coords(3)%end = z_cb(p - buff_size + mapCells)
+                comm_coords(3)%beg = z_cb(buff_size - 1)
+                comm_coords(3)%end = z_cb(p - buff_size)
             end if
         end if
 

From abf82613c1dcd9ca223e516ee3021d04fde8086e Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Sat, 14 Jun 2025 12:29:49 -0400
Subject: [PATCH 34/62] Simple cases work

---
 src/common/m_helper_basic.f90           |   7 +-
 src/pre_process/m_global_parameters.fpp |  43 +++-
 src/pre_process/m_mpi_proxy.fpp         |   2 +-
 src/pre_process/m_start_up.fpp          |   2 +-
 src/simulation/m_bubbles_EL.fpp         | 274 +++++++++++++++---------
 src/simulation/m_bubbles_EL_kernels.fpp |  42 ++--
 src/simulation/m_global_parameters.fpp  |   6 +-
 src/simulation/m_mpi_proxy.fpp          | 256 +++++++++++-----------
 src/simulation/m_start_up.fpp           |   2 +-
 toolchain/mfc/run/case_dicts.py         |   2 +
 10 files changed, 388 insertions(+), 248 deletions(-)

diff --git a/src/common/m_helper_basic.f90 b/src/common/m_helper_basic.f90
index ae3221a318..44ec039f9e 100644
--- a/src/common/m_helper_basic.f90
+++ b/src/common/m_helper_basic.f90
@@ -76,9 +76,10 @@ logical function f_is_integer(var) result(res)
     end function f_is_integer
 
     subroutine s_configure_coordinate_bounds(weno_polyn, buff_size, idwint, idwbuff, &
-                                             viscous, bubbles_lagrange, m, n, p, num_dims)
+                                             viscous, bubbles_lagrange, m, n, p, &
+                                             num_dims, fd_number)
 
-        integer, intent(in) :: weno_polyn, m, n, p, num_dims
+        integer, intent(in) :: weno_polyn, m, n, p, num_dims, fd_number
         integer, intent(inout) :: buff_size
         type(int_bounds_info), dimension(3), intent(inout) :: idwint, idwbuff
         logical, intent(in) :: viscous, bubbles_lagrange
@@ -95,7 +96,7 @@ subroutine s_configure_coordinate_bounds(weno_polyn, buff_size, idwint, idwbuff,
 
         ! Correction for smearing function in the lagrangian subgrid bubble model
         if (bubbles_lagrange) then
-            buff_size = max(buff_size, 6)
+            buff_size = max(buff_size + fd_number, 6 + fd_number)
         end if
 
         ! Configuring Coordinate Direction Indexes
diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp
index fecb7acc9c..40eada055a 100644
--- a/src/pre_process/m_global_parameters.fpp
+++ b/src/pre_process/m_global_parameters.fpp
@@ -123,6 +123,21 @@ module m_global_parameters
     ! Stands for "InDices With BUFFer".
     type(int_bounds_info) :: idwbuff(1:3)
 
+    integer :: fd_order !<
+    !! The order of the finite-difference (fd) approximations of the first-order
+    !! derivatives that need to be evaluated when the CoM or flow probe data
+    !! files are to be written at each time step
+
+    integer :: fd_number !<
+    !! The finite-difference number is given by MAX(1, fd_order/2). Essentially,
+    !! it is a measure of the half-size of the finite-difference stencil for the
+    !! selected order of accuracy.
+
+    !> @name lagrangian subgrid bubble parameters
+    !> @{!
+    type(bubbles_lagrange_parameters) :: lag_params     !< Lagrange bubbles' parameters
+    !> @}
+
     type(int_bounds_info) :: bc_x, bc_y, bc_z !<
     !! Boundary conditions in the x-, y- and z-coordinate directions
 
@@ -386,6 +401,28 @@ contains
         ! Initial condition parameters
         num_patches = dflt_int
 
+        fd_order = dflt_int
+        lag_params%cluster_type = dflt_int
+        lag_params%pressure_corrector = .false.
+        lag_params%smooth_type = dflt_int
+        lag_params%heatTransfer_model = .false.
+        lag_params%massTransfer_model = .false.
+        lag_params%write_bubbles = .false.
+        lag_params%write_bubbles_stats = .false.
+        lag_params%nBubs_glb = dflt_int
+        lag_params%vel_model = dflt_int
+        lag_params%drag_model = dflt_int
+        lag_params%c_d = dflt_real
+        lag_params%epsilonb = 1._wp
+        lag_params%charwidth = dflt_real
+        lag_params%valmaxvoid = dflt_real
+        lag_params%c0 = dflt_real
+        lag_params%rho0 = dflt_real
+        lag_params%T0 = dflt_real
+        lag_params%Thost = dflt_real
+        lag_params%x0 = dflt_real
+        lag_params%diffcoefvap = dflt_real
+
         do i = 1, num_patches_max
             patch_icpp(i)%geometry = dflt_int
             patch_icpp(i)%model_scale(:) = 1._wp
@@ -858,10 +895,14 @@ contains
         chemxb = species_idx%beg
         chemxe = species_idx%end
 
+        if (lag_params%vel_model /= 0) then
+            fd_number = max(1, fd_order/2)
+        end if
+
         call s_configure_coordinate_bounds(weno_polyn, buff_size, &
                                            idwint, idwbuff, viscous, &
                                            bubbles_lagrange, m, n, p, &
-                                           num_dims)
+                                           num_dims, fd_number)
 
 #ifdef MFC_MPI
 
diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp
index abc47cdb65..b6196618b4 100644
--- a/src/pre_process/m_mpi_proxy.fpp
+++ b/src/pre_process/m_mpi_proxy.fpp
@@ -47,7 +47,7 @@ contains
             & 'weno_order', 'precision', 'perturb_flow_fluid', &
             & 'perturb_sph_fluid', 'num_patches', 'thermal', 'nb', 'dist_type',&
             & 'R0_type', 'relax_model', 'num_ibs', 'n_start', 'elliptic_smoothing_iters', &
-            & 'num_bc_patches' ]
+            & 'num_bc_patches', 'lag_params%vel_model' ]
             call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp
index 25b42eab52..5823a91451 100644
--- a/src/pre_process/m_start_up.fpp
+++ b/src/pre_process/m_start_up.fpp
@@ -149,7 +149,7 @@ contains
             n_start_old, surface_tension, hyperelasticity, pre_stress, &
             elliptic_smoothing, elliptic_smoothing_iters, &
             viscous, bubbles_lagrange, bc_x, bc_y, bc_z, num_bc_patches, &
-            patch_bc, Bx0, relativity, cont_damage
+            patch_bc, Bx0, relativity, cont_damage, fd_order, lag_params
 
         ! Inquiring the status of the pre_process.inp file
         file_loc = 'pre_process.inp'
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 35950f3d01..59ba9377f3 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -116,7 +116,6 @@ contains
         ! Allocating space for lagrangian variables
         nBubs_glb = lag_params%nBubs_glb
 
-        @:ALLOCATE(lag_id(1:nBubs_glb, 1:2))
         @:ALLOCATE(bub_R0(1:nBubs_glb))
         @:ALLOCATE(Rmax_stats(1:nBubs_glb))
         @:ALLOCATE(Rmin_stats(1:nBubs_glb))
@@ -124,6 +123,7 @@ contains
         @:ALLOCATE(gas_betaT(1:nBubs_glb))
         @:ALLOCATE(gas_betaC(1:nBubs_glb))
         @:ALLOCATE(bub_dphidt(1:nBubs_glb))
+        @:ALLOCATE(lag_id(1:nBubs_glb, 1:2))
         @:ALLOCATE(gas_p(1:nBubs_glb, 1:2))
         @:ALLOCATE(gas_mv(1:nBubs_glb, 1:2))
         @:ALLOCATE(intfc_rad(1:nBubs_glb, 1:2))
@@ -227,7 +227,7 @@ contains
                 do while (ios == 0)
                     read (94, *, iostat=ios) (inputBubble(i), i=1, 8)
                     if (ios /= 0) cycle
-                    indomain = particle_in_domain(inputBubble(1:3))
+                    indomain = particle_in_domain_physical(inputBubble(1:3))
                     id = id + 1
                     if (id > lag_params%nBubs_glb .and. proc_rank == 0) then
                         call s_mpi_abort("Current number of bubbles is larger than nBubs_glb")
@@ -269,6 +269,28 @@ contains
 
         !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc)
 
+        call sleep(proc_rank)
+        print*, "Processor", proc_rank
+        do i = 1, nBubs
+            print*, i, lag_id(i,1), mtn_pos(i, 1:3, 1)
+        end do
+
+        if (num_procs > 1) then
+            call s_add_particles_to_transfer_list_IC(mtn_pos(:, :, 1), nbubs)
+            call s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+                                      gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
+                                      intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
+                                      mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
+                                      gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts)
+            call s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+                                      gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
+                                      intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
+                                      mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
+                                      gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
+            !if (proc_rank == 0 .or. proc_rank == 2) print*, "b", proc_rank, mtn_pos
+            !call sleep(1)
+        end if
+
         !Populate temporal variables
         call s_transfer_data_to_tmp()
         call s_smear_voidfraction()
@@ -326,28 +348,29 @@ contains
             mtn_posPrev(bub_id, 1:3, 1) = mtn_pos(bub_id, 1:3, 1)
         end if
 
-        cell = -buff_size
+        cell = fd_number - buff_size
         call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
 
-        ! Check if the bubble is located in the ghost cell of a symmetric boundary
-        if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. &
-            (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. &
-            (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. &
-            (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then
-            call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
-        end if
-
-        if (p > 0) then
-            if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. &
-                (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then
-                call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
-            end if
-        end if
+         !Check if the bubble is located in the ghost cell of a symmetric boundary
+        !if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. &
+            !(any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. &
+            !(any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. &
+            !(any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then
+            !call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
+        !end if
+
+        !if (p > 0) then
+            !if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. &
+                !(any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then
+                !call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
+            !end if
+        !end if
 
         ! If particle is in the ghost cells, find the closest non-ghost cell
-        cell(1) = min(max(cell(1), 0), m)
-        cell(2) = min(max(cell(2), 0), n)
-        if (p > 0) cell(3) = min(max(cell(3), 0), p)
+        !cell(1) = min(max(cell(1), 0), m)
+        !cell(2) = min(max(cell(2), 0), n)
+        !if (p > 0) cell(3) = min(max(cell(3), 0), p)
+
         call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), &
                                             rhol, gamma, pi_inf, qv, Re)
         dynP = 0._wp
@@ -472,7 +495,7 @@ contains
             do i = 1, tot_data
                 id = int(MPI_IO_DATA_lag_bubbles(i, 1))
                 inputvals(1:20) = MPI_IO_DATA_lag_bubbles(i, 2:21)
-                indomain = particle_in_domain(inputvals(1:3))
+                indomain = particle_in_domain_physical(inputvals(1:3))
                 if (indomain .and. (id > 0)) then
                     bub_id = bub_id + 1
                     nBubs = bub_id                  ! local number of bubbles
@@ -492,7 +515,7 @@ contains
                     gas_mg(bub_id) = inputvals(18)
                     gas_betaT(bub_id) = inputvals(19)
                     gas_betaC(bub_id) = inputvals(20)
-                    cell = -buff_size
+                    cell = fd_number - buff_size
                     call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
                 end if
             end do
@@ -555,6 +578,12 @@ contains
             end do
         end if
 
+        !call sleep(proc_rank)
+        !print*, proc_rank
+        !do k = 1, nBubs
+            !print*, k, lag_id(k, 1), mtn_pos(k,:,:)
+        !end do
+
         ! Radial motion model
         adap_dt_stop_max = 0
         !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) &
@@ -593,7 +622,6 @@ contains
             adap_dt_stop = 0
 
             if (adap_dt) then
-
                 call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, &
                                     dmntait, dmBtait, dm_bub_adv_src, dm_divu, &
                                     k, myMass_v, myMass_n, myBeta_c, &
@@ -627,13 +655,20 @@ contains
                     mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), &
                                                 intfc_rad(k,2), mtn_vel(k,l,2), &
                                                 gas_mg(k), gas_mv(k, 2), &
-                                                Re(1), myRho, cell, l, q_prim_vf)
+                                                Re(1), myRho, cell, l, lag_id(k,1), q_prim_vf)
                 else
                     mtn_dposdt(k, l, stage) = 0._wp
                     mtn_dveldt(k, l, stage) = 0._wp
                 end if
             end do
 
+            !if (lag_id(k,1) == 2) then
+                !print*, "as", proc_rank, stage, k, mtn_dposdt(k,:,stage), mtn_dveldt(k,:,stage)
+            !end if
+
+            !if (proc_rank == 0 .or. proc_rank == 2) print*, "a", proc_rank, mtn_dposdt(:,:,stage), mtn_dveldt(:,:,stage)
+            !call sleep(1)
+
             adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop)
 
         end do
@@ -846,18 +881,19 @@ contains
         f_pinfl = 0._wp
 
         if (lag_params%vel_model > 0) then
-            cell = -buff_size
+            cell = fd_number - buff_size
             call s_locate_cell(mtn_pos(bub_id, 1:3, 2), cell, mtn_s(bub_id, 1:3, 2))
             scoord = mtn_s(bub_id, 1:3, 2)
         else
             scoord = mtn_s(bub_id, 1:3, 2)
             cell(:) = int(scoord(:))
+            !$acc loop seq
+            do i = 1, num_dims
+                if (scoord(i) < 0._wp) cell(i) = cell(i) - 1
+            end do
         end if
-
-        !$acc loop seq
-        do i = 1, num_dims
-            if (scoord(i) < 0._wp) cell(i) = cell(i) - 1
-        end do
+        !print*, "h", proc_rank, cell, mtn_pos(bub_id, 1:3, 2), mtn_s(bub_id, 1:3, 2)
+        !call sleep(1)
 
         if ((lag_params%cluster_type == 1)) then
             !< Getting p_cell in terms of only the current cell by interpolation
@@ -875,42 +911,45 @@ contains
 
             !< Obtain bilinear interpolation coefficients, based on the current location of the bubble.
             psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1)
-            if (cell(1) == (m + buff_size)) then
-                cell(1) = cell(1) - 1
-                psi(1) = 1._wp
-            else if (cell(1) == (-buff_size)) then
-                psi(1) = 0._wp
-            else
-                if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1
+            !if (cell(1) == (m + buff_size - fd_number)) then
+                !cell(1) = cell(1) - 1
+                !psi(1) = 1._wp
+            !else if (cell(1) == (fd_number - buff_size)) then
+                !psi(1) = 0._wp
+            !else
+                !if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1
                 psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1))))
-            end if
+            !end if
 
             psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1)
-            if (cell(2) == (n + buff_size)) then
-                cell(2) = cell(2) - 1
-                psi(2) = 1._wp
-            else if (cell(2) == (-buff_size)) then
-                psi(2) = 0._wp
-            else
-                if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1
+            !if (cell(2) == (n + buff_size - fd_number)) then
+                !cell(2) = cell(2) - 1
+                !psi(2) = 1._wp
+            !else if (cell(2) == (fd_number - buff_size)) then
+                !psi(2) = 0._wp
+            !else
+                !if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1
                 psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2))))
-            end if
+            !end if
 
             if (p > 0) then
                 psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1)
-                if (cell(3) == (p + buff_size)) then
-                    cell(3) = cell(3) - 1
-                    psi(3) = 1._wp
-                else if (cell(3) == (-buff_size)) then
-                    psi(3) = 0._wp
-                else
-                    if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1
+                !if (cell(3) == (p + buff_size - fd_number)) then
+                    !cell(3) = cell(3) - 1
+                    !psi(3) = 1._wp
+                !else if (cell(3) == (fd_number - buff_size)) then
+                    !psi(3) = 0._wp
+                !else
+                    !if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1
                     psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3))))
-                end if
+                !end if
             else
                 psi(3) = 0._wp
             end if
 
+            !print*, "i", proc_rank, cell, mtn_pos(bub_id, 1:3, 2), mtn_s(bub_id, 1:3, 2)
+            !call sleep(1)
+
             !< Perform bilinear interpolation
             if (p == 0) then  !2D
                 f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))
@@ -959,26 +998,32 @@ contains
                         !< check if the current cell is outside the computational domain or not (including ghost cells)
                         celloutside = .false.
                         if (num_dims == 2) then
-                            if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then
+                            if ((cellaux(1) < fd_number - buff_size) .or. &
+                                (cellaux(2) < fd_number - buff_size)) then
                                 celloutside = .true.
                             end if
-                            if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then
+                            if (cyl_coord .and. cellaux(2) < 0) then
                                 celloutside = .true.
                             end if
-                            if ((cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then
+                            if ((cellaux(2) > n + buff_size - fd_number) .or. &
+                                (cellaux(1) > m + buff_size - fd_number)) then
                                 celloutside = .true.
                             end if
                         else
-                            if ((cellaux(3) < -buff_size) .or. (cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then
+                            if ((cellaux(3) < fd_number - buff_size) .or. &
+                                (cellaux(1) < fd_number - buff_size) .or. &
+                                (cellaux(2) < fd_number - buff_size)) then
                                 celloutside = .true.
                             end if
 
-                            if ((cellaux(3) > p + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then
+                            if ((cellaux(3) > p + buff_size - fd_number) .or. &
+                                (cellaux(2) > n + buff_size - fd_number) .or. &
+                                (cellaux(1) > m + buff_size - fd_number)) then
                                 celloutside = .true.
                             end if
                         end if
                         if (.not. celloutside) then
-                            if (cyl_coord .and. (p == 0) .and. (y_cc(cellaux(2)) < 0._wp)) then
+                            if (cyl_coord .and. (p == 0) .and. (cellaux(2) < 0)) then
                                 celloutside = .true.
                             end if
                         end if
@@ -1083,6 +1128,10 @@ contains
                     mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1)
                     gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1)
                     gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
+                    !if (lag_id(k,1) == 2) then
+                        !print*, "g11", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,1), mtn_dveldt(k,:,1)
+                        !print*, "g12", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,2), mtn_dveldt(k,:,2)
+                    !end if
                 end do
 
                 if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
@@ -1098,6 +1147,10 @@ contains
                     mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp
                     gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp
                     gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp
+                    !if (lag_id(k,1) == 2) then
+                        !print*, "g21", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,1), mtn_dveldt(k,:,1)
+                        !print*, "g22", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,2), mtn_dveldt(k,:,2)
+                    !end if
                 end do
 
                 if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
@@ -1178,19 +1231,22 @@ contains
         integer, intent(in) :: dest
         integer :: k, i, patch_id
         integer, dimension(3) :: cell
+        logical, dimension(1:nBubs) :: remove_bubble
 
         !$acc parallel loop gang vector default(present) private(cell)
-        do k = nBubs, 1, -1
+        do k = 1, nBubs
+            remove_bubble(k) = .false.
+
             if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                 .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then
                 mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest)
             elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                 .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then
                 mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest)
-            elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size)) then
-                call s_remove_lag_bubble(k)
-            elseif (mtn_pos(k, 1, dest) < x_cb(buff_size - 1)) then
-                call s_remove_lag_bubble(k)
+            elseif (mtn_pos(k, 1, dest) >= x_cb(m + buff_size - fd_number)) then
+                remove_bubble(k) = .true.
+            elseif (mtn_pos(k, 1, dest) < x_cb(fd_number - buff_size - 1)) then
+                remove_bubble(k) = .true.
             end if
 
             if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
@@ -1199,10 +1255,10 @@ contains
             else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                 .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then
                 mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest)
-            elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size)) then
-                call s_remove_lag_bubble(k)
-            elseif (mtn_pos(k, 2, dest) < y_cb(buff_size - 1)) then
-                call s_remove_lag_bubble(k)
+            elseif (mtn_pos(k, 2, dest) >= y_cb(n + buff_size - fd_number)) then
+                remove_bubble(k) = .true.
+            elseif (mtn_pos(k, 2, dest) < y_cb(fd_number - buff_size - 1)) then
+                remove_bubble(k) = .true.
             end if
 
             if (p > 0) then
@@ -1212,15 +1268,15 @@ contains
                 else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                     .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then
                     mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest)
-                elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size)) then
-                    call s_remove_lag_bubble(k)
-                elseif (mtn_pos(k, 3, dest) < z_cb(buff_size - 1)) then
-                    call s_remove_lag_bubble(k)
+                elseif (mtn_pos(k, 3, dest) >= z_cb(p + buff_size - fd_number)) then
+                    remove_bubble(k) = .true.
+                elseif (mtn_pos(k, 3, dest) < z_cb(fd_number - buff_size - 1)) then
+                    remove_bubble(k) = .true.
                 end if
             end if
 
             if (ib) then
-                cell = -buff_size
+                cell = fd_number - buff_size
                 call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
 
                 if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then
@@ -1231,22 +1287,35 @@ contains
                                               levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) &
                                               * levelset%sf(cell(1), cell(2), cell(3), patch_id)
                     end do
-                    cell = -buff_size
+                    cell = fd_number - buff_size
                     call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
                 end if
             end if
         end do
 
+        !$acc loop seq
+        do k = nBubs, 1, -1
+            if (remove_bubble(k)) call s_remove_lag_bubble(k)
+        end do
+
         if (num_procs > 1) then
             call s_add_particles_to_transfer_list(mtn_pos(:, :, dest), mtn_posPrev(:, :, dest), nbubs)
-            call s_mpi_send_particles(intfc_rad, intfc_draddt, intfc_vel, &
-                                      intfc_draddt, mtn_posPrev, mtn_pos, &
-                                      mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, &
-                                      gas_dpdt, gas_mv, gas_dmvdt, gas_mg, lag_id, bub_R0, lag_num_ts)
-            call s_mpi_recv_particles(intfc_rad, intfc_draddt, intfc_vel, &
-                                      intfc_draddt, mtn_posPrev, mtn_pos, &
-                                      mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, &
-                                      gas_dpdt, gas_mv, gas_dmvdt, gas_mg, lag_id, bub_R0, lag_num_ts, nbubs)
+            call s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+                                      gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
+                                      intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
+                                      mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
+                                       gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts)
+            call s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+                                      gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
+                                      intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
+                                      mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
+                                      gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
+            !do k = 1, nBubs
+                !if (lag_id(k,1) == 2) then
+                    !print*, "c1", proc_rank, lag_id(k, 1), mtn_dveldt(k,:,1), mtn_dposdt(k, :, 1)
+                    !print*, "c2", proc_rank, lag_id(k, 1), mtn_dveldt(k,:,2), mtn_dposdt(k, :, 2)
+                !endif
+            !end do
         end if
 
     end subroutine s_enforce_EL_bubbles_boundary_conditions
@@ -1267,7 +1336,7 @@ contains
             cell(1) = cell(1) - 1
         end do
 
-        do while (pos(1) > x_cb(cell(1)))
+        do while (pos(1) >= x_cb(cell(1)))
             cell(1) = cell(1) + 1
         end do
 
@@ -1275,7 +1344,7 @@ contains
             cell(2) = cell(2) - 1
         end do
 
-        do while (pos(2) > y_cb(cell(2)))
+        do while (pos(2) >= y_cb(cell(2)))
             cell(2) = cell(2) + 1
         end do
 
@@ -1283,7 +1352,7 @@ contains
             do while (pos(3) < z_cb(cell(3) - 1))
                 cell(3) = cell(3) - 1
             end do
-            do while (pos(3) > z_cb(cell(3)))
+            do while (pos(3) >= z_cb(cell(3)))
                 cell(3) = cell(3) + 1
             end do
         end if
@@ -1337,26 +1406,27 @@ contains
         if (p == 0 .and. cyl_coord .neqv. .true.) then
             ! Defining a virtual z-axis that has the same dimensions as y-axis
             ! defined in the input file
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. &
-                                  (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
-                                  (pos_part(2) < y_cb(n + buff_size)) .and. &
-                                  (pos_part(2) >= y_cb(-buff_size - 1)) .and. &
-                                  (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= -lag_params%charwidth/2._wp))
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. &
+                                  (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. &
+                                  (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. &
+                                  (pos_part(2) >= y_cb(fd_number - buff_size - 1)) .and. &
+                                  (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) > -lag_params%charwidth/2._wp))
         else
             ! cyl_coord
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. &
-                                  (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
-                                  (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size - 1), 0._wp)))
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. &
+                                  (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. &
+                                  (abs(pos_part(2)) < y_cb(n + buff_size - fd_number)) .and. &
+                                  (abs(pos_part(2)) >= max(y_cb(fd_number - buff_size - 1), 0._wp)))
         end if
 
         ! 3D
         if (p > 0) then
-            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. &
-                                  (pos_part(1) >= x_cb(-buff_size - 1)) .and. &
-                                  (pos_part(2) < y_cb(n + buff_size)) .and. &
-                                  (pos_part(2) >= y_cb(-buff_size - 1)) .and. &
-                                  (pos_part(3) < z_cb(p + buff_size)) .and. &
-                                  (pos_part(3) >= z_cb(-buff_size - 1)))
+            particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. &
+                                  (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. &
+                                  (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. &
+                                  (pos_part(2) >= y_cb(fd_number - buff_size - 1)) .and. &
+                                  (pos_part(3) < z_cb(p + buff_size - fd_number)) .and. &
+                                  (pos_part(3) >= z_cb(fd_number - buff_size - 1)))
         end if
 
         ! For symmetric and wall boundary condition
@@ -1774,7 +1844,6 @@ contains
 
         !$acc loop seq
         do i = bub_id, nBubs - 1
-            lag_id(i, 1) = lag_id(i + 1, 1)
             bub_R0(i) = bub_R0(i + 1)
             Rmax_stats(i) = Rmax_stats(i + 1)
             Rmin_stats(i) = Rmin_stats(i + 1)
@@ -1782,6 +1851,7 @@ contains
             gas_betaT(i) = gas_betaT(i + 1)
             gas_betaC(i) = gas_betaC(i + 1)
             bub_dphidt(i) = bub_dphidt(i + 1)
+            lag_id(i, 1) = lag_id(i + 1, 1)
             gas_p(i, 1:2) = gas_p(i + 1, 1:2)
             gas_mv(i, 1:2) = gas_mv(i + 1, 1:2)
             intfc_rad(i, 1:2) = intfc_rad(i + 1, 1:2)
@@ -1794,6 +1864,8 @@ contains
             intfc_dveldt(i, 1:lag_num_ts) = intfc_dveldt(i + 1, 1:lag_num_ts)
             gas_dpdt(i, 1:lag_num_ts) = gas_dpdt(i + 1, 1:lag_num_ts)
             gas_dmvdt(i, 1:lag_num_ts) = gas_dmvdt(i + 1, 1:lag_num_ts)
+            mtn_dposdt(i,1:3, 1:lag_num_ts) = mtn_dposdt(i + 1, 1:3, 1:lag_num_ts)
+            mtn_dveldt(i,1:3, 1:lag_num_ts) = mtn_dveldt(i + 1, 1:3, 1:lag_num_ts)
         end do
 
         nBubs = nBubs - 1
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index 1e15c772a4..9fb8002f7d 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -128,7 +128,7 @@ contains
             s_coord(1:3) = lbk_s(l, 1:3, 2)
             center(1:2) = lbk_pos(l, 1:2, 2)
             if (p > 0) center(3) = lbk_pos(l, 3, 2)
-            cell = -buff_size
+            cell = fd_number - buff_size
             call s_get_cell(s_coord, cell)
             !print*, s_coord
             call s_compute_stddsv(cell, volpart, stddsv)
@@ -283,27 +283,27 @@ contains
         celloutside = .false.
 
         if (num_dims == 2) then
-            if ((cellaux(1) < -buff_size) .or. &
-                (cellaux(2) < -buff_size)) then
+            if ((cellaux(1) < fd_number - buff_size) .or. &
+                (cellaux(2) < fd_number - buff_size)) then
                 celloutside = .true.
             end if
             if (cyl_coord .and. cellaux(2) < 0) then
                 celloutside = .true.
             end if
-            if ((cellaux(2) > n + buff_size) .or. &
-                (cellaux(1) > m + buff_size)) then
+            if ((cellaux(2) > n + buff_size - fd_number) .or. &
+                (cellaux(1) > m + buff_size - fd_number)) then
                 celloutside = .true.
             end if
         else
-            if ((cellaux(3) < -buff_size) .or. &
-                (cellaux(1) < -buff_size) .or. &
-                (cellaux(2) < -buff_size)) then
+            if ((cellaux(3) < fd_number - buff_size) .or. &
+                (cellaux(1) < fd_number - buff_size) .or. &
+                (cellaux(2) < fd_number - buff_size)) then
                 celloutside = .true.
             end if
 
-            if ((cellaux(3) > p + buff_size) .or. &
-                (cellaux(2) > n + buff_size) .or. &
-                (cellaux(1) > m + buff_size)) then
+            if ((cellaux(3) > p + buff_size - fd_number) .or. &
+                (cellaux(2) > n + buff_size - fd_number) .or. &
+                (cellaux(1) > m + buff_size - fd_number)) then
                 celloutside = .true.
             end if
         end if
@@ -478,10 +478,10 @@ contains
 
     end function f_interpolate_velocity
 
-    function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,q_prim_vf) result(a)
+    function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,id,q_prim_vf) result(a)
 !$acc routine seq
         integer, dimension(3) :: cell
-        integer :: i
+        integer :: i, id
         type(scalar_field), dimension(sys_size) :: q_prim_vf
         real(wp) :: a, vol, mass, force, vel, area
         real(wp) :: pos, rad, dp, v_rel, mg, mv, Re, rho, c_d
@@ -500,6 +500,15 @@ contains
                   (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
         end if
 
+        !if (i == 1 .and. id == 2) then
+            !print*, "c", proc_rank, dp, &
+                !q_prim_vf(E_idx)%sf(cell(1) - 1, cell(2), cell(3)), &
+                !q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)), &
+                !q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3)), &
+                !pos, x_cc(cell(1) + 1), x_cc(cell(1) - 1), cell
+            !call sleep(1)
+        !end if
+
         vol = (4._wp/3._wp) * pi * rad**3._wp
         force = -1._wp * vol * dp
 
@@ -517,6 +526,11 @@ contains
 
         a = force / (mg + mv)
 
+        !if (i == 1 .and. id == 2) then
+            !print*, "d", proc_rank, vol, force, v_rel, vel, v_rel - vel, a, mg, mv
+            !call sleep(1)
+        !end if
+
     end function f_get_acceleration
 
     function f_get_clift_gauvin_drag_coeff(v_rel, mu_inv, rho, rad) result(c_d)
@@ -528,7 +542,7 @@ contains
         Re_b = max(2._wp * rho * abs(v_rel) * rad * mu_inv, Re_b_min)
 
         C_d = 24._wp * (1 + 0.15_wp * Re_b ** (0.687_wp)) / Re_b
-        !C_d = C_d + 0.42_wp / (1 + (42500 / (Re_b ** 1.16_wp)))
+        C_d = C_d + 0.42_wp / (1 + (42500 / (Re_b ** 1.16_wp)))
 
     end function f_get_clift_gauvin_drag_coeff
 
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 5960b3a84f..7e2385608b 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -1185,10 +1185,14 @@ contains
             fd_number = max(1, fd_order/2)
         end if
 
+        if (bubbles_lagrange) then
+            fd_number = max(1, fd_order/2)
+        end if
+
         call s_configure_coordinate_bounds(weno_polyn, buff_size, &
                                            idwint, idwbuff, viscous, &
                                            bubbles_lagrange, m, n, p, &
-                                           num_dims)
+                                           num_dims, fd_number)
         !$acc update device(idwint, idwbuff)
 
         ! Configuring Coordinate Direction Indexes
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 7f78ac2831..a8215bc8c3 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -89,23 +89,23 @@ contains
     subroutine s_initialize_particles_mpi(lag_num_ts)
 
         integer :: i, j, k
-        integer :: real_size, int_size, nVar, lag_num_ts
+        integer :: real_size, int_size, nReal, lag_num_ts
 
         call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr)
         call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr)
 
-        nVar = 28 + 10*lag_num_ts + int_size
-        p_buff_size = nVar*lag_params%nBubs_glb*real_size
+        nReal = 7 + 16*2 + 10*lag_num_ts
+        p_buff_size = lag_params%nBubs_glb * (nReal * real_size + int_size)
         @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size))
 
-        comm_coords(1)%beg = x_cb(buff_size - 1)
-        comm_coords(1)%end = x_cb(m - buff_size)
+        comm_coords(1)%beg = x_cb(buff_size - fd_number - 1)
+        comm_coords(1)%end = x_cb(m - buff_size + fd_number)
         if (n > 0) then
-            comm_coords(2)%beg = y_cb(buff_size)
-            comm_coords(2)%end = y_cb(n - buff_size)
+            comm_coords(2)%beg = y_cb(buff_size - fd_number - 1)
+            comm_coords(2)%end = y_cb(n - buff_size + fd_number)
             if (p > 0) then
-                comm_coords(3)%beg = z_cb(buff_size - 1)
-                comm_coords(3)%end = z_cb(p - buff_size)
+                comm_coords(3)%beg = z_cb(buff_size - fd_number - 1)
+                comm_coords(3)%end = z_cb(p - buff_size + fd_number)
             end if
         end if
 
@@ -138,7 +138,7 @@ contains
             & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end',  'fd_order',     &
             & 'num_probes', 'num_integrals', 'bubble_model', 'thermal',        &
             & 'R0_type', 'num_source', 'relax_model', 'num_ibs', 'n_start',    &
-            & 'num_bc_patches']
+            & 'num_bc_patches', 'fd_order', 'lag_params%vel_model']
             call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
@@ -469,8 +469,8 @@ contains
         do k = 1, nbub
             ! Left face
             if (posPrev(k,1) > comm_coords(1)%beg .and. pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then
-                p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k
-                p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1
+                !p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k
+                !p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1
                 if (n > 0) then
                     ! Left bottom corner
                     if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
@@ -478,40 +478,18 @@ contains
                         p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1
                         p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
                         p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-                        if (p > 0) then
-                            ! Left bottom back corner
-                            if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
-                                p_send_ids(-1, -1, -1, p_send_counts(-1, -1, -1)) = k
-                                p_send_counts(-1, -1, -1) = p_send_counts(-1, -1, -1) + 1
-                            ! Left bottom front corner
-                            elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
-                                p_send_ids(-1, -1, 1, p_send_counts(-1, -1, 1)) = k
-                                p_send_counts(-1, -1, 1) = p_send_counts(-1, -1, 1) + 1
-                            end if
-                        end if
                     ! Left top corner
                     elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
                         p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k
                         p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1
                         p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
                         p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
-                        if (p > 0) then
-                            ! Left top back corner
-                            if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
-                                p_send_ids(-1, 1, 1, p_send_counts(-1, 1, 1)) = k
-                                p_send_counts(-1, 1, 1) = p_send_counts(-1, 1, 1) + 1
-                            ! Left top front corner
-                            elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
-                                p_send_ids(-1, 1, -1, p_send_counts(-1, 1, -1)) = k
-                                p_send_counts(-1, 1, -1) = p_send_counts(-1, 1, -1) + 1
-                            end if
-                        end if
                     end if
                 end if
             ! Right face
             elseif (posPrev(k,1) < comm_coords(1)%end .and. pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then
-                p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k
-                p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1
+                !p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k
+                !p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1
                 if (n > 0) then
                     ! Right bottom corner
                     if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
@@ -519,124 +497,144 @@ contains
                         p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1
                         p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
                         p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-                        if (p > 0) then
-                            ! Right bottom back corner
-                            if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
-                                p_send_ids(1, -1, -1, p_send_counts(1, -1, -1)) = k
-                                p_send_counts(1, -1, -1) = p_send_counts(1, -1, -1) + 1
-                            ! Right bottom front corner
-                            elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
-                                p_send_ids(1, -1, 1, p_send_counts(1, -1, 1)) = k
-                                p_send_counts(1, -1, 1) = p_send_counts(1, -1, 1) + 1
-                            end if
-                        end if
                     ! Right top corner
                     elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
                         p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k
                         p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1
                         p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
                         p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
-                        if (p > 0) then
-                            ! Right top back corner
-                            if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
-                                p_send_ids(1, 1, -1, p_send_counts(1, 1, -1)) = k
-                                p_send_counts(1, 1, -1) = p_send_counts(1, 1, -1) + 1
-                            ! Right top front corner
-                            elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
-                                p_send_ids(1, 1, 1, p_send_counts(1, 1, 1)) = k
-                                p_send_counts(1, 1, 1) = p_send_counts(1, 1, 1) + 1
-                            end if
-                        end if
                     end if
                 end if
             ! Bottom face (corners already accounted for)
             elseif (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
                 p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
                 p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-                if (p > 0) then
-                    ! Bottom face back corner
-                    if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
-                        p_send_ids(0, -1, -1, p_send_counts(0, -1, -1)) = k
-                        p_send_counts(0, -1, -1) = p_send_counts(0, -1, -1) + 1
-                    ! Bottom face front corner
-                    elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
-                        p_send_ids(0, -1, 1, p_send_counts(0, -1, 1)) = k
-                        p_send_counts(0, -1, 1) = p_send_counts(0, -1, 1) + 1
-                    end if
-                endif
             ! Top face (corners already accounted for)
             elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
                 p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
                 p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
-                if (p > 0) then
-                    ! Top face back corner
-                    if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
-                        p_send_ids(0, 1, -1, p_send_counts(0, 1, -1)) = k
-                        p_send_counts(0, 1, -1) = p_send_counts(0, 1, -1) + 1
-                    ! Top face front corner
-                    elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
-                        p_send_ids(0, 1, 1, p_send_counts(0, 1, 1)) = k
-                        p_send_counts(0, 1, 1) = p_send_counts(0, 1, 1) + 1
-                    end if
-                endif
-            ! Back face (everything else accounted for)
-            elseif (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then
-                p_send_ids(0, 0, -1, p_send_counts(0, 0, -1)) = k
-                p_send_counts(0, 0, -1) = p_send_counts(0, 0, -1) + 1
-            ! Front face (everything else accounted for)
-            elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then
-                p_send_ids(0, 0, 1, p_send_counts(0, 0, 1)) = k
-                p_send_counts(0, 0, 1) = p_send_counts(0, 0, 1) + 1
             end if
         end do
 
     end subroutine s_add_particles_to_transfer_list
 
-    subroutine s_mpi_send_particles(rad, drad, rvel, drvel, posPrev, pos, dpos, &
-                                    vel, dvel, gas_p, dgasp, gas_mv, dgasmv, gas_mg, lag_id, bub_R0, lag_num_ts)
+    subroutine s_add_particles_to_transfer_list_IC(pos, nbub)
+
+        real(wp), dimension(:,:) :: pos
+        integer :: bubID, nbub
+        integer :: i, j, k
+
+        do k = nidx(3)%beg, nidx(3)%end
+            do j = nidx(2)%beg, nidx(2)%end
+                do i = nidx(1)%beg, nidx(1)%end
+                    p_send_counts(i,j,k) = 0
+                end do
+            end do
+        end do
+
+        do k = 1, nbub
+            ! Left face
+            if (pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then
+                !p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k
+                !p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1
+                ! Left bottom corner
+                if (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
+                    p_send_ids(-1, -1, 0, p_send_counts(-1, -1, 0)) = k
+                    p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1
+                    p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
+                    p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
+                ! Left top corner
+                elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
+                    p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k
+                    p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1
+                    p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
+                    p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+                end if
+            ! Right face
+            elseif (pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then
+                !p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k
+                !p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1
+                ! Right bottom corner
+                if (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
+                    p_send_ids(1, -1, 0, p_send_counts(1, -1, 0)) = k
+                    p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1
+                    p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
+                    p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
+                ! Right top corner
+                elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
+                    p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k
+                    p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1
+                    p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
+                    p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+                end if
+            ! Bottom face (corners already accounted for)
+            elseif (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
+                p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
+                p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
+                ! Top face (corners already accounted for)
+            elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
+                p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
+                p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+            end if
+        end do
+
+    end subroutine s_add_particles_to_transfer_list_IC
+
+
+    subroutine s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+                                    gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, &
+                                    rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, &
+                                    dgasmv, dpos, dvel, lag_num_ts)
 
-        real(wp), dimension(:) :: bub_R0, gas_mg
-        real(wp), dimension(:, :) :: rad, drad, rvel, drvel, gas_p, dgasp, gas_mv, dgasmv
-        real(wp), dimension(:, :, :) :: pos, dpos, posPrev, vel, dvel
+        real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt
         integer, dimension(:,:) :: lag_id
-        integer :: i, j, k, l, q
+        real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv
+        real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel
         integer :: position, bub_id, lag_num_ts
 
+        integer :: i, j, k, l, q
+
+
 #ifdef MFC_MPI
         do k = nidx(3)%beg, nidx(3)%end
             do j = nidx(2)%beg, nidx(2)%end
                 do i = nidx(1)%beg, nidx(1)%end
                     if (abs(i) + abs(j) + abs(k) > 0) then
                         call MPI_Isend(p_send_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, request, ierr)
-                        if (p_send_counts(i,j,k) > 0) then
-                            print*, "SEND", proc_rank, neighbor_ranks(i,j,k), p_send_counts(i,j,k)
-                        end if
+                        !if (p_send_counts(i,j,k) > 0) then
+                            !print*, "SEND", proc_rank, neighbor_ranks(i,j,k), p_send_counts(i,j,k)
+                        !end if
                         if (p_send_counts(i,j,k) > 0) then
                             position = 0
                             do l = 0, p_send_counts(i,j,k) - 1
                                 bub_id = p_send_ids(i,j,k,l)
+                                call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                                 do q = 1, 2
+                                    call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                                     call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                                     call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                                     call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                                     call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                                 end do
                                 do q = 1, lag_num_ts
                                     call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                                     call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                                     call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                                     call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                                 end do
-                                call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                             end do
-
                             call MPI_Isend(p_send_buff, position, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, request, ierr)
                         end if
                     end if
@@ -647,15 +645,18 @@ contains
 
     end subroutine s_mpi_send_particles
 
-    subroutine s_mpi_recv_particles(rad, drad, rvel, drvel, posPrev, pos, dpos, &
-                                    vel, dvel, gas_p, dgasp, gas_mv, dgasmv, gas_mg, lag_id, bub_R0, lag_num_ts, nbubs)
+    subroutine s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+                                    gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, &
+                                    rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, &
+                                    dgasmv, dpos, dvel, lag_num_ts, nbubs)
+
+        real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt
+        integer, dimension(:,:) :: lag_id
+        real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv
+        real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel
+        integer :: position, bub_id, lag_num_ts, nbubs
 
-        real(wp), dimension(:) :: bub_R0, gas_mg
-        real(wp), dimension(:, :) :: rad, drad, rvel, drvel, gas_p, dgasp, gas_mv, dgasmv
-        real(wp), dimension(:, :, :) :: pos, dpos, posPrev, vel, dvel
-        integer, dimension(:, :) :: lag_id
         integer :: i, j, k, l, q
-        integer :: position, bub_id, nbubs, lag_num_ts
 
 #ifdef MFC_MPI
         do k = nidx(3)%beg, nidx(3)%end
@@ -663,36 +664,41 @@ contains
                 do i = nidx(1)%beg, nidx(1)%end
                     if (abs(i) + abs(j) + abs(k) > 0) then
                         call MPI_recv(p_recv_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-                        if (p_recv_counts(i,j,k) > 0) then
-                            print*, "RECV", neighbor_ranks(i,j,k), proc_rank, p_recv_counts(i,j,k)
-                        end if
+                        !if (p_recv_counts(i,j,k) > 0) then
+                            !print*, "RECV", neighbor_ranks(i,j,k), proc_rank, p_recv_counts(i,j,k)
+                        !end if
                         if (p_recv_counts(i,j,k) > 0) then
                             position = 0
                             call MPI_recv(p_recv_buff, p_buff_size, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
                             do l = 0, p_recv_counts(i,j,k) - 1
                                 nbubs = nbubs + 1
                                 bub_id = nbubs
+                                call MPI_Unpack(p_recv_buff, p_buff_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_buff_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_buff_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
                                 do q = 1, 2
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
                                     call MPI_Unpack(p_recv_buff, p_buff_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
                                     call MPI_Unpack(p_recv_buff, p_buff_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
                                     call MPI_Unpack(p_recv_buff, p_buff_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
                                     call MPI_Unpack(p_recv_buff, p_buff_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
                                 end do
                                 do q = 1, lag_num_ts
                                     call MPI_Unpack(p_recv_buff, p_buff_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
                                     call MPI_Unpack(p_recv_buff, p_buff_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
                                     call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
                                     call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
                                 end do
-                                call MPI_Unpack(p_recv_buff, p_buff_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
                                 lag_id(bub_id, 2) = bub_id
                             end do
                         end if
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 0668d7f9a3..3fbae2d4d7 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1338,8 +1338,8 @@ contains
 
         call s_initialize_cbc_module()
         call s_initialize_derived_variables()
-        if (bubbles_lagrange) call s_initialize_bubbles_EL_module(q_cons_ts(1)%vf)
 
+        if (bubbles_lagrange) call s_initialize_bubbles_EL_module(q_cons_ts(1)%vf)
         if (hypoelasticity) call s_initialize_hypoelastic_module()
         if (hyperelasticity) call s_initialize_hyperelastic_module()
 
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index ad35b62ae1..c28857c8d4 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -100,6 +100,8 @@ def analytic(self):
     'elliptic_smoothing_iters': ParamType.INT,
     'viscous': ParamType.LOG,
     'bubbles_lagrange': ParamType.LOG,
+    'lag_params%vel_model': ParamType.INT,
+    'fd_order': ParamType.INT,
 })
 
 for ib_id in range(1, 10+1):

From c06231042691eb864ad536a7e90abf87147b4f24 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Sat, 14 Jun 2025 12:58:06 -0400
Subject: [PATCH 35/62] fix build

---
 src/common/m_mpi_common.fpp     | 2 +-
 src/simulation/m_bubbles_EL.fpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 9e0280eb23..f513067d34 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -1077,7 +1077,7 @@ contains
             !! Remaining number of cells, in a particular coordinate direction,
             !! after the majority is divided up among the available processors
 
-        integer :: i, j !< Generic loop iterators
+        integer :: i, j, k !< Generic loop iterators
 
         integer, dimension(1:num_dims) :: neighbor_coords
 
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 9179863314..0c0752dd7a 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -854,7 +854,7 @@ contains
         !! @param f_pinfl Driving pressure
         !! @param cell Bubble cell
         !! @param Romega Control volume radius
-    pure subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega)
+    impure subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega)
 #ifdef _CRAYFTN
         !DIR$ INLINEALWAYS s_get_pinf
 #else
@@ -1321,7 +1321,7 @@ contains
           !! @param pos Input coordinates
           !! @param cell Computational coordinate of the cell
           !! @param scoord Calculated particle coordinates
-    pure subroutine s_locate_cell(pos, cell, scoord)
+    impure subroutine s_locate_cell(pos, cell, scoord)
 
         real(wp), dimension(3), intent(in) :: pos
         real(wp), dimension(3), intent(out) :: scoord

From e1e01043198052ac4f7715b74f6dc96ed1b6231b Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Mon, 16 Jun 2025 18:39:26 -0400
Subject: [PATCH 36/62] remove comments, fix communication, and refactor
 transfer lists

---
 src/post_process/m_start_up.f90         |   2 +-
 src/simulation/m_bubbles_EL.fpp         | 125 ++-------
 src/simulation/m_bubbles_EL_kernels.fpp |  14 -
 src/simulation/m_mpi_proxy.fpp          | 358 ++++++++++--------------
 4 files changed, 181 insertions(+), 318 deletions(-)

diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index 20db2682e6..db1697ebd2 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -680,7 +680,7 @@ impure subroutine s_initialize_modules
         ! Computation of parameters, allocation procedures, and/or any other tasks
         ! needed to properly setup the modules
         call s_initialize_global_parameters_module()
-        if (bubbles_euler .and. nb > 1) call s_simpson
+        if (bubbles_euler .and. nb > 1) call s_simpson(weight, R0)
         if (bubbles_euler .and. .not. polytropic) then
             call s_initialize_nonpoly()
         end if
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 0c0752dd7a..5313c27575 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -269,26 +269,13 @@ contains
 
         !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc)
 
-        call sleep(proc_rank)
-        print*, "Processor", proc_rank
-        do i = 1, nBubs
-            print*, i, lag_id(i,1), mtn_pos(i, 1:3, 1)
-        end do
-
         if (num_procs > 1) then
-            call s_add_particles_to_transfer_list_IC(mtn_pos(:, :, 1), nbubs)
-            call s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-                                      gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
-                                      intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
-                                      mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
-                                      gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts)
-            call s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+            call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, 1))
+            call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
                                       gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
                                       intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
                                       mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
                                       gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
-            !if (proc_rank == 0 .or. proc_rank == 2) print*, "b", proc_rank, mtn_pos
-            !call sleep(1)
         end if
 
         !Populate temporal variables
@@ -351,25 +338,20 @@ contains
         cell = fd_number - buff_size
         call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
 
-         !Check if the bubble is located in the ghost cell of a symmetric boundary
-        !if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. &
-            !(any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. &
-            !(any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. &
-            !(any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then
-            !call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
-        !end if
-
-        !if (p > 0) then
-            !if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. &
-                !(any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then
-                !call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
-            !end if
-        !end if
-
-        ! If particle is in the ghost cells, find the closest non-ghost cell
-        !cell(1) = min(max(cell(1), 0), m)
-        !cell(2) = min(max(cell(2), 0), n)
-        !if (p > 0) cell(3) = min(max(cell(3), 0), p)
+        ! Check if the bubble is located in the ghost cell of a symmetric boundary
+        if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. &
+            (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. &
+            (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. &
+            (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then
+            call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
+        end if
+
+        if (p > 0) then
+            if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. &
+                (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then
+                call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
+            end if
+        end if
 
         call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), &
                                             rhol, gamma, pi_inf, qv, Re)
@@ -576,12 +558,6 @@ contains
             end do
         end if
 
-        !call sleep(proc_rank)
-        !print*, proc_rank
-        !do k = 1, nBubs
-            !print*, k, lag_id(k, 1), mtn_pos(k,:,:)
-        !end do
-
         ! Radial motion model
         adap_dt_stop_max = 0
         !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) &
@@ -660,13 +636,6 @@ contains
                 end if
             end do
 
-            !if (lag_id(k,1) == 2) then
-                !print*, "as", proc_rank, stage, k, mtn_dposdt(k,:,stage), mtn_dveldt(k,:,stage)
-            !end if
-
-            !if (proc_rank == 0 .or. proc_rank == 2) print*, "a", proc_rank, mtn_dposdt(:,:,stage), mtn_dveldt(:,:,stage)
-            !call sleep(1)
-
             adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop)
 
         end do
@@ -889,8 +858,6 @@ contains
                 if (scoord(i) < 0._wp) cell(i) = cell(i) - 1
             end do
         end if
-        !print*, "h", proc_rank, cell, mtn_pos(bub_id, 1:3, 2), mtn_s(bub_id, 1:3, 2)
-        !call sleep(1)
 
         if ((lag_params%cluster_type == 1)) then
             !< Getting p_cell in terms of only the current cell by interpolation
@@ -908,45 +875,18 @@ contains
 
             !< Obtain bilinear interpolation coefficients, based on the current location of the bubble.
             psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1)
-            !if (cell(1) == (m + buff_size - fd_number)) then
-                !cell(1) = cell(1) - 1
-                !psi(1) = 1._wp
-            !else if (cell(1) == (fd_number - buff_size)) then
-                !psi(1) = 0._wp
-            !else
-                !if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1
-                psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1))))
-            !end if
+            psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1))))
 
             psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1)
-            !if (cell(2) == (n + buff_size - fd_number)) then
-                !cell(2) = cell(2) - 1
-                !psi(2) = 1._wp
-            !else if (cell(2) == (fd_number - buff_size)) then
-                !psi(2) = 0._wp
-            !else
-                !if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1
-                psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2))))
-            !end if
+            psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2))))
 
             if (p > 0) then
                 psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1)
-                !if (cell(3) == (p + buff_size - fd_number)) then
-                    !cell(3) = cell(3) - 1
-                    !psi(3) = 1._wp
-                !else if (cell(3) == (fd_number - buff_size)) then
-                    !psi(3) = 0._wp
-                !else
-                    !if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1
-                    psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3))))
-                !end if
+                psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3))))
             else
                 psi(3) = 0._wp
             end if
 
-            !print*, "i", proc_rank, cell, mtn_pos(bub_id, 1:3, 2), mtn_s(bub_id, 1:3, 2)
-            !call sleep(1)
-
             !< Perform bilinear interpolation
             if (p == 0) then  !2D
                 f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))
@@ -1125,10 +1065,6 @@ contains
                     mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1)
                     gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1)
                     gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
-                    !if (lag_id(k,1) == 2) then
-                        !print*, "g11", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,1), mtn_dveldt(k,:,1)
-                        !print*, "g12", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,2), mtn_dveldt(k,:,2)
-                    !end if
                 end do
 
                 if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
@@ -1144,10 +1080,6 @@ contains
                     mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp
                     gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp
                     gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp
-                    !if (lag_id(k,1) == 2) then
-                        !print*, "g21", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,1), mtn_dveldt(k,:,1)
-                        !print*, "g22", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,2), mtn_dveldt(k,:,2)
-                    !end if
                 end do
 
                 if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
@@ -1296,23 +1228,12 @@ contains
         end do
 
         if (num_procs > 1) then
-            call s_add_particles_to_transfer_list(mtn_pos(:, :, dest), mtn_posPrev(:, :, dest), nbubs)
-            call s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-                                      gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
-                                      intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
-                                      mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
-                                       gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts)
-            call s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+            call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, dest), mtn_posPrev(:, :, dest))
+            call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
                                       gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
                                       intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
                                       mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
                                       gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
-            !do k = 1, nBubs
-                !if (lag_id(k,1) == 2) then
-                    !print*, "c1", proc_rank, lag_id(k, 1), mtn_dveldt(k,:,1), mtn_dposdt(k, :, 1)
-                    !print*, "c2", proc_rank, lag_id(k, 1), mtn_dveldt(k,:,2), mtn_dposdt(k, :, 2)
-                !endif
-            !end do
         end if
 
     end subroutine s_enforce_EL_bubbles_boundary_conditions
@@ -1330,18 +1251,22 @@ contains
         integer :: i
 
         do while (pos(1) < x_cb(cell(1) - 1))
+            if (cell(1) == fd_number - buff_size) print*, "xb error", proc_rank, pos
             cell(1) = cell(1) - 1
         end do
 
         do while (pos(1) >= x_cb(cell(1)))
+            if (cell(1) == m + buff_size - fd_number) print*, "xe error", proc_rank, pos
             cell(1) = cell(1) + 1
         end do
 
         do while (pos(2) < y_cb(cell(2) - 1))
+            if (cell(2) == fd_number - buff_size) print*, "yb error", proc_rank, pos
             cell(2) = cell(2) - 1
         end do
 
         do while (pos(2) >= y_cb(cell(2)))
+            if (cell(2) == n + buff_size - fd_number) print*, "ye error", proc_rank, pos
             cell(2) = cell(2) + 1
         end do
 
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index cb9279575d..98cbad4f66 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -500,15 +500,6 @@ contains
                   (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
         end if
 
-        !if (i == 1 .and. id == 2) then
-            !print*, "c", proc_rank, dp, &
-                !q_prim_vf(E_idx)%sf(cell(1) - 1, cell(2), cell(3)), &
-                !q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)), &
-                !q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3)), &
-                !pos, x_cc(cell(1) + 1), x_cc(cell(1) - 1), cell
-            !call sleep(1)
-        !end if
-
         vol = (4._wp/3._wp) * pi * rad**3._wp
         force = -1._wp * vol * dp
 
@@ -526,11 +517,6 @@ contains
 
         a = force / (mg + mv)
 
-        !if (i == 1 .and. id == 2) then
-            !print*, "d", proc_rank, vol, force, v_rel, vel, v_rel - vel, a, mg, mv
-            !call sleep(1)
-        !end if
-
     end function f_get_acceleration
 
     function f_get_clift_gauvin_drag_coeff(v_rel, mu_inv, rho, rad) result(c_d)
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index aa345d4ab7..f79eb26995 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -32,6 +32,8 @@ module m_mpi_proxy
 
     implicit none
 
+    integer, parameter :: n_neighbor = 26
+
     integer, private, allocatable, dimension(:) :: ib_buff_send !<
     !! This variable is utilized to pack and send the buffer of the immersed
     !! boundary markers, for a single computational domain boundary at the
@@ -54,7 +56,7 @@ module m_mpi_proxy
     integer, dimension(:,:,:,:), allocatable :: p_send_ids
     character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff
     type(bounds_info), dimension(3) :: comm_coords
-    integer :: p_buff_size
+    integer :: p_buff_size, p_var_size
     !$acc declare create(p_send_counts, comm_coords)
 
 contains
@@ -94,7 +96,9 @@ contains
         call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr)
 
         nReal = 7 + 16*2 + 10*lag_num_ts
-        p_buff_size = lag_params%nBubs_glb * (nReal * real_size + int_size)
+        p_var_size = (nReal * real_size + int_size)
+        p_buff_size = lag_params%nBubs_glb * p_var_size
+
         @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size))
 
         comm_coords(1)%beg = x_cb(buff_size - fd_number - 1)
@@ -422,9 +426,10 @@ contains
 
     end subroutine s_mpi_sendrecv_ib_buffers
 
-    subroutine s_add_particles_to_transfer_list(pos, posPrev, nbub)
+    subroutine s_add_particles_to_transfer_list(nBub, pos, posPrev)
 
-        real(wp), dimension(:,:) :: pos, posPrev
+        real(wp), dimension(:,:) :: pos
+        real(wp), dimension(:,:), optional :: posPrev
         integer :: bubID, nbub
         integer :: i, j, k
 
@@ -437,185 +442,82 @@ contains
         end do
 
         do k = 1, nbub
-            ! Left face
-            if (posPrev(k,1) > comm_coords(1)%beg .and. pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then
-                !p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k
-                !p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1
+            if (f_crosses_boundary(k, 1, -1, pos, posPrev)) then
+                call s_add_particle_to_direction(k, -1, 0, 0)
                 if (n > 0) then
-                    ! Left bottom corner
-                    if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
-                        p_send_ids(-1, -1, 0, p_send_counts(-1, -1, 0)) = k
-                        p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1
-                        p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
-                        p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-                    ! Left top corner
-                    elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
-                        p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k
-                        p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1
-                        p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
-                        p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+                    if (f_crosses_boundary(k, 2, -1, pos, posPrev)) then
+                        call s_add_particle_to_direction(k, -1, -1, 0)
+                        call s_add_particle_to_direction(k, 0, -1, 0)
+                    elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then
+                        call s_add_particle_to_direction(k, -1, 1, 0)
+                        call s_add_particle_to_direction(k, 0, 1, 0)
                     end if
                 end if
-            ! Right face
-            elseif (posPrev(k,1) < comm_coords(1)%end .and. pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then
-                !p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k
-                !p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1
+            elseif (f_crosses_boundary(k, 1, 1, pos, posPrev)) then
+                call s_add_particle_to_direction(k, 1, 0, 0)
                 if (n > 0) then
-                    ! Right bottom corner
-                    if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
-                        p_send_ids(1, -1, 0, p_send_counts(1, -1, 0)) = k
-                        p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1
-                        p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
-                        p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-                    ! Right top corner
-                    elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
-                        p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k
-                        p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1
-                        p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
-                        p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+                    if (f_crosses_boundary(k, 2, -1, pos, posPrev)) then
+                        call s_add_particle_to_direction(k, 1, -1, 0)
+                        call s_add_particle_to_direction(k, 0, -1, 0)
+                    elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then
+                        call s_add_particle_to_direction(k, 1, 1, 0)
+                        call s_add_particle_to_direction(k, 0, 1, 0)
                     end if
                 end if
-            ! Bottom face (corners already accounted for)
-            elseif (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
-                p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
-                p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-            ! Top face (corners already accounted for)
-            elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
-                p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
-                p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+            elseif (f_crosses_boundary(k, 2, -1, pos, posPrev)) then
+                call s_add_particle_to_direction(k, 0, -1, 0)
+            elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then
+                call s_add_particle_to_direction(k, 0, 1, 0)
             end if
         end do
 
-    end subroutine s_add_particles_to_transfer_list
-
-    subroutine s_add_particles_to_transfer_list_IC(pos, nbub)
+    contains
 
-        real(wp), dimension(:,:) :: pos
-        integer :: bubID, nbub
-        integer :: i, j, k
+        logical function f_crosses_boundary(particle_id, dir, loc, pos, posPrev)
+            integer, intent(in) :: particle_id, dir, loc
+            real(wp), dimension(:,:), intent(in) :: pos
+            real(wp), dimension(:,:), optional, intent(in) :: posPrev
 
-        do k = nidx(3)%beg, nidx(3)%end
-            do j = nidx(2)%beg, nidx(2)%end
-                do i = nidx(1)%beg, nidx(1)%end
-                    p_send_counts(i,j,k) = 0
-                end do
-            end do
-        end do
+            if (loc == -1) then ! Beginning of the domain
+                if (nidx(dir)%beg == 0) then
+                    f_crosses_boundary = .false.
+                    return
+                end if
 
-        do k = 1, nbub
-            ! Left face
-            if (pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then
-                !p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k
-                !p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1
-                ! Left bottom corner
-                if (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
-                    p_send_ids(-1, -1, 0, p_send_counts(-1, -1, 0)) = k
-                    p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1
-                    p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
-                    p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-                ! Left top corner
-                elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
-                    p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k
-                    p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1
-                    p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
-                    p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+                if (present(posPrev)) then
+                    f_crosses_boundary = (posPrev(particle_id,dir) > comm_coords(dir)%beg .and. &
+                                          pos(particle_id,dir) < comm_coords(dir)%beg)
+                else
+                    f_crosses_boundary = (pos(particle_id,dir) < comm_coords(dir)%beg)
                 end if
-            ! Right face
-            elseif (pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then
-                !p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k
-                !p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1
-                ! Right bottom corner
-                if (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
-                    p_send_ids(1, -1, 0, p_send_counts(1, -1, 0)) = k
-                    p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1
-                    p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
-                    p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-                ! Right top corner
-                elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
-                    p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k
-                    p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1
-                    p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
-                    p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
+            elseif (loc == 1) then ! End of the domain
+                if (nidx(dir)%end == 0) then
+                    f_crosses_boundary = .false.
+                    return
+                end if
+
+                if (present(posPrev)) then
+                    f_crosses_boundary = (posPrev(particle_id,dir) < comm_coords(dir)%end .and. &
+                                          pos(particle_id,dir) > comm_coords(dir)%end)
+                else
+                    f_crosses_boundary = (pos(particle_id,dir) > comm_coords(dir)%end)
                 end if
-            ! Bottom face (corners already accounted for)
-            elseif (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then
-                p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k
-                p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1
-                ! Top face (corners already accounted for)
-            elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then
-                p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k
-                p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1
             end if
-        end do
 
-    end subroutine s_add_particles_to_transfer_list_IC
+        end function f_crosses_boundary
 
+        subroutine s_add_particle_to_direction(particle_id, dir_x, dir_y, dir_z)
 
-    subroutine s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-                                    gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, &
-                                    rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, &
-                                    dgasmv, dpos, dvel, lag_num_ts)
+            integer, intent(in) :: particle_id, dir_x, dir_y, dir_z
 
-        real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt
-        integer, dimension(:,:) :: lag_id
-        real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv
-        real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel
-        integer :: position, bub_id, lag_num_ts
+            p_send_ids(dir_x, dir_y, dir_z, p_send_counts(dir_x, dir_y, dir_z)) = particle_id
+            p_send_counts(dir_x, dir_y, dir_z) = p_send_counts(dir_x, dir_y, dir_z) + 1
 
-        integer :: i, j, k, l, q
+        end subroutine s_add_particle_to_direction
 
+    end subroutine s_add_particles_to_transfer_list
 
-#ifdef MFC_MPI
-        do k = nidx(3)%beg, nidx(3)%end
-            do j = nidx(2)%beg, nidx(2)%end
-                do i = nidx(1)%beg, nidx(1)%end
-                    if (abs(i) + abs(j) + abs(k) > 0) then
-                        call MPI_Isend(p_send_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, request, ierr)
-                        !if (p_send_counts(i,j,k) > 0) then
-                            !print*, "SEND", proc_rank, neighbor_ranks(i,j,k), p_send_counts(i,j,k)
-                        !end if
-                        if (p_send_counts(i,j,k) > 0) then
-                            position = 0
-                            do l = 0, p_send_counts(i,j,k) - 1
-                                bub_id = p_send_ids(i,j,k,l)
-                                call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                do q = 1, 2
-                                    call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                end do
-                                do q = 1, lag_num_ts
-                                    call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                end do
-                            end do
-                            call MPI_Isend(p_send_buff, position, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, request, ierr)
-                        end if
-                    end if
-                end do
-            end do
-        end do
-#endif
-
-    end subroutine s_mpi_send_particles
-
-    subroutine s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+    subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
                                     gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, &
                                     rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, &
                                     dgasmv, dpos, dvel, lag_num_ts, nbubs)
@@ -624,61 +526,111 @@ contains
         integer, dimension(:,:) :: lag_id
         real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv
         real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel
-        integer :: position, bub_id, lag_num_ts, nbubs
+        integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size
 
         integer :: i, j, k, l, q
 
-#ifdef MFC_MPI
         do k = nidx(3)%beg, nidx(3)%end
             do j = nidx(2)%beg, nidx(2)%end
                 do i = nidx(1)%beg, nidx(1)%end
                     if (abs(i) + abs(j) + abs(k) > 0) then
-                        call MPI_recv(p_recv_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-                        !if (p_recv_counts(i,j,k) > 0) then
-                            !print*, "RECV", neighbor_ranks(i,j,k), proc_rank, p_recv_counts(i,j,k)
-                        !end if
-                        if (p_recv_counts(i,j,k) > 0) then
-                            position = 0
-                            call MPI_recv(p_recv_buff, p_buff_size, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-                            do l = 0, p_recv_counts(i,j,k) - 1
-                                nbubs = nbubs + 1
-                                bub_id = nbubs
-                                call MPI_Unpack(p_recv_buff, p_buff_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_buff_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_buff_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                do q = 1, 2
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                end do
-                                do q = 1, lag_num_ts
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_buff_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                end do
-                                lag_id(bub_id, 2) = bub_id
+
+                        partner = neighbor_ranks(i,j,k)
+
+                        send_tag = neighbor_tag(i, j, k)
+                        recv_tag = neighbor_tag(-i, -j, -k)
+
+                        call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag,  &
+                                          p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, &
+                                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                        p_recv_size = p_recv_counts(i,j,k) * p_var_size
+
+                        position = 0
+                        do l = 0, p_send_counts(i,j,k) - 1
+                            bub_id = p_send_ids(i,j,k,l)
+                            call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            do q = 1, 2
+                                call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            end do
+                            do q = 1, lag_num_ts
+                                call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            end do
+                        end do
+
+                        send_tag = send_tag + max(num_procs, n_neighbor)
+                        recv_tag = recv_tag + max(num_procs, n_neighbor)
+
+                        call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag,  &
+                                          p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, &
+                                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                        position = 0
+                        do l = 0, p_recv_counts(i,j,k) - 1
+                            nbubs = nbubs + 1
+                            bub_id = nbubs
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            do q = 1, 2
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                            end do
+                            do q = 1, lag_num_ts
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
                             end do
-                        end if
+                            lag_id(bub_id, 2) = bub_id
+
+                        end do
                     end if
                 end do
             end do
         end do
-#endif
 
-    end subroutine s_mpi_recv_particles
+    end subroutine s_mpi_sendrecv_particles
+
+    integer function neighbor_tag(i, j, k) result(tag)
+
+        integer, intent(in) :: i, j, k
+
+        tag = (k + 1)*9 + (j + 1)*3 + (i + 1)
+
+    end function neighbor_tag
 
     impure subroutine s_mpi_send_random_number(phi_rn, num_freq)
         integer, intent(in) :: num_freq

From 7df40fd0d0ca1dcfde3a675e861f0079b4b5ab01 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Sun, 22 Jun 2025 16:30:30 -0400
Subject: [PATCH 37/62] comments and GPU MPI

---
 src/simulation/m_bubbles_EL.fpp         |  37 ++--
 src/simulation/m_bubbles_EL_kernels.fpp |  63 +++---
 src/simulation/m_mpi_proxy.fpp          | 249 +++++++++++++++---------
 3 files changed, 213 insertions(+), 136 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 5313c27575..31d83c12ea 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -617,24 +617,24 @@ contains
                 gas_dmvdt(k, stage) = myMvdot
                 gas_dpdt(k, stage) = myPbdot
 
-            end if
+                do l = 1, num_dims
+                    if (lag_params%vel_model == 1) then
+                        mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), &
+                                                    cell, l, q_prim_vf)
+                        mtn_dveldt(k, l, stage) = 0._wp
+                    elseif (lag_params%vel_model == 2) then
+                        mtn_dposdt(k, l, stage) = mtn_vel(k,l,2)
+                        mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), &
+                                                    intfc_rad(k,2), mtn_vel(k,l,2), &
+                                                    gas_mg(k), gas_mv(k, 2), &
+                                                    Re(1), myRho, cell, l, q_prim_vf)
+                    else
+                        mtn_dposdt(k, l, stage) = 0._wp
+                        mtn_dveldt(k, l, stage) = 0._wp
+                    end if
+                end do
 
-            do l = 1, num_dims
-                if (lag_params%vel_model == 1) then
-                    mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), &
-                                                cell, l, q_prim_vf)
-                    mtn_dveldt(k, l, stage) = 0._wp
-                elseif (lag_params%vel_model == 2) then
-                    mtn_dposdt(k, l, stage) = mtn_vel(k,l,2)
-                    mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), &
-                                                intfc_rad(k,2), mtn_vel(k,l,2), &
-                                                gas_mg(k), gas_mv(k, 2), &
-                                                Re(1), myRho, cell, l, lag_id(k,1), q_prim_vf)
-                else
-                    mtn_dposdt(k, l, stage) = 0._wp
-                    mtn_dveldt(k, l, stage) = 0._wp
-                end if
-            end do
+            end if
 
             adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop)
 
@@ -1155,7 +1155,8 @@ contains
     end subroutine s_update_lagrange_tdv_rk
 
     !> This subroutine enforces reflective and wall boundary conditions for EL bubbles
-    subroutine s_enforce_EL_bubbles_boundary_conditions(dest)
+        !! @param dest Destination for the bubble position update
+    impure subroutine s_enforce_EL_bubbles_boundary_conditions(dest)
 
         integer, intent(in) :: dest
         integer :: k, i, patch_id
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index 98cbad4f66..6f076f36d9 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -437,14 +437,23 @@ contains
 
     end subroutine s_get_cell
 
-    function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v)
+    !! This function interpolates the velocity of Eulerian field at the position
+            !! of the bubble.
+            !! @param pos Position of the bubble in directiion i
+            !! @param cell Computational coordinates of the bubble
+            !! @param i Direction of the velocity (1: x, 2: y, 3: z)
+            !! @param q_prim_vf Eulerian field with primitive variables
+            !! @return v Interpolated velocity at the position of the bubble
+    pure function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v)
 !$acc routine seq
-        integer, dimension(3) :: cell
-        integer :: i
-        type(scalar_field), dimension(sys_size) :: q_prim_vf
+        real(wp), intent(in) :: pos
+        integer, dimension(3), intent(in) :: cell
+        integer, intent(in) :: i
+        type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
+
         real(wp) :: v
         real(wp) :: L1, L2, L3
-        real(wp) :: pos, x1, x2, x3
+        real(wp) :: x1, x2, x3
         real(wp) :: y1, y2, y3
 
         if (i == 1) then
@@ -478,13 +487,28 @@ contains
 
     end function f_interpolate_velocity
 
-    function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,id,q_prim_vf) result(a)
+    !! This function calculates the acceleration of the bubble
+            !!      based on the pressure gradient, velocity, and drag model.
+            !! @param pos Position of the bubble in direction i
+            !! @param rad Radius of the bubble
+            !! @param vel Velocity of the bubble
+            !! @param mg Mass of the gas in the bubble
+            !! @param mv Mass of the liquid in the bubble
+            !! @param Re Reynolds number
+            !! @param rho Density of the fluid
+            !! @param cell Computational coordinates of the bubble
+            !! @param i Direction of the velocity (1: x, 2: y, 3: z)
+            !! @param q_prim_vf Eulerian field with primitive variables
+            !! @return a Acceleration of the bubble in direction i
+    pure function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,q_prim_vf) result(a)
 !$acc routine seq
-        integer, dimension(3) :: cell
-        integer :: i, id
-        type(scalar_field), dimension(sys_size) :: q_prim_vf
-        real(wp) :: a, vol, mass, force, vel, area
-        real(wp) :: pos, rad, dp, v_rel, mg, mv, Re, rho, c_d
+        real(wp), intent(in) :: pos, rad, vel, mg, mv, Re, rho
+        integer, dimension(3), intent(in) :: cell
+        integer, intent(in) :: i
+        type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
+
+        real(wp) :: a
+        real(wp) :: dp, vol, force, v_rel
 
         if (i == 1) then
             dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - &
@@ -509,27 +533,10 @@ contains
             force = force - (4._wp * pi * rad * v_rel) / Re
         else if (lag_params%drag_model == 2) then ! No slip Stokes drag
             force = force - (6._wp * pi * rad * v_rel) / Re
-        elseif (lag_params%drag_model == 3) then ! Clift and Gauvin (1971) Cd fit
-            area = pi * rad**2._wp
-            c_d = f_get_clift_gauvin_drag_coeff(v_rel, Re, rho, rad)
-            force = force - 0.5_wp * sign(1._wp, v_rel) * rho * (v_rel ** 2._wp) * c_d * area
         end if
 
         a = force / (mg + mv)
 
     end function f_get_acceleration
 
-    function f_get_clift_gauvin_drag_coeff(v_rel, mu_inv, rho, rad) result(c_d)
-!$acc routine seq
-        real(wp) :: v_rel, mu_inv, rho, rad
-        real(wp) :: c_d
-        real(wp) :: Re_b ! Bubble Reynolds number
-
-        Re_b = max(2._wp * rho * abs(v_rel) * rad * mu_inv, Re_b_min)
-
-        C_d = 24._wp * (1 + 0.15_wp * Re_b ** (0.687_wp)) / Re_b
-        C_d = C_d + 0.42_wp / (1 + (42500 / (Re_b ** 1.16_wp)))
-
-    end function f_get_clift_gauvin_drag_coeff
-
 end module m_bubbles_EL_kernels
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index f79eb26995..1cb8273c8a 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -87,6 +87,9 @@ contains
 
     end subroutine s_initialize_mpi_proxy_module
 
+    !! This subroutine initializes the MPI buffers and variables
+        !! required for the particle communication.
+        !! @param lag_num_ts Number of stages in time-stepping scheme
     subroutine s_initialize_particles_mpi(lag_num_ts)
 
         integer :: i, j, k
@@ -426,7 +429,13 @@ contains
 
     end subroutine s_mpi_sendrecv_ib_buffers
 
-    subroutine s_add_particles_to_transfer_list(nBub, pos, posPrev)
+    !> This subroutine adds particles to the transfer list for the MPI
+        !! communication.
+        !! @param nBub Current LOCAL number of bubbles
+        !! @param pos Current position of each bubble
+        !! @param posPrev Previous position of each bubble (optional, not used
+        !!                for communication of initial condition)
+    impure subroutine s_add_particles_to_transfer_list(nBub, pos, posPrev)
 
         real(wp), dimension(:,:) :: pos
         real(wp), dimension(:,:), optional :: posPrev
@@ -517,7 +526,33 @@ contains
 
     end subroutine s_add_particles_to_transfer_list
 
-    subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+    !> This subroutine performs the MPI communication for lagrangian particles/
+        !! bubbles.
+        !! @param bub_R0 Initial radius of each bubble
+        !! @param Rmax_stats Maximum radius of each bubble
+        !! @param Rmin_stats Minimum radius of each bubble
+        !! @param gas_mg Mass of gas in each bubble
+        !! @param gas_betaT Heat flux model coefficient for each bubble
+        !! @param gas_betaC mass flux model coefficient for each bubble
+        !! @param bub_dphidt Subgrid velocity potential for each bubble
+        !! @param lag_id Global and local ID of each bubble
+        !! @param gas_p Pressure of the gas in each bubble
+        !! @param gas_mv Mass of vapor in each bubble
+        !! @param rad Radius of each bubble
+        !! @param rvel Radial velocity of each bubble
+        !! @param pos Position of each bubble
+        !! @param posPrev Previous position of each bubble
+        !! @param vel Velocity of each bubble
+        !! @param scoord Cell index in real format of each bubble
+        !! @param drad Radial velocity of each bubble
+        !! @param drvel Radial acceleration of each bubble
+        !! @param dgasp Time derivative of gas pressure in each bubble
+        !! @param dgasmv Time derivative of vapor mass in each bubble
+        !! @param dpos Time derivative of position of each bubble
+        !! @param dvel Time derivative of velocity of each bubble
+        !! @param lag_num_ts Number of stages in time-stepping scheme
+        !! @param nBubs Local number of bubbles
+    impure subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
                                     gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, &
                                     rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, &
                                     dgasmv, dpos, dvel, lag_num_ts, nbubs)
@@ -530,100 +565,134 @@ contains
 
         integer :: i, j, k, l, q
 
-        do k = nidx(3)%beg, nidx(3)%end
-            do j = nidx(2)%beg, nidx(2)%end
-                do i = nidx(1)%beg, nidx(1)%end
-                    if (abs(i) + abs(j) + abs(k) > 0) then
-
-                        partner = neighbor_ranks(i,j,k)
-
-                        send_tag = neighbor_tag(i, j, k)
-                        recv_tag = neighbor_tag(-i, -j, -k)
-
-                        call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag,  &
-                                          p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, &
-                                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                        p_recv_size = p_recv_counts(i,j,k) * p_var_size
-
-                        position = 0
-                        do l = 0, p_send_counts(i,j,k) - 1
-                            bub_id = p_send_ids(i,j,k,l)
-                            call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            do q = 1, 2
-                                call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            end do
-                            do q = 1, lag_num_ts
-                                call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            end do
-                        end do
-
-                        send_tag = send_tag + max(num_procs, n_neighbor)
-                        recv_tag = recv_tag + max(num_procs, n_neighbor)
-
-                        call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag,  &
-                                          p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, &
-                                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                        position = 0
-                        do l = 0, p_recv_counts(i,j,k) - 1
-                            nbubs = nbubs + 1
-                            bub_id = nbubs
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            do q = 1, 2
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                            end do
-                            do q = 1, lag_num_ts
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                            end do
-                            lag_id(bub_id, 2) = bub_id
+        #:for rdma_mpi in [False, True]
+            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
+                #:if rdma_mpi
+                    !$acc host_data use_device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+                    !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, &
+                    !$acc drad, drvel, dgasp, dgasmv, dpos, dvel)
+                    call nvtxStartRange("LAG-COMM-SENDRECV-RDMA")
+                #:else
+                    call nvtxStartRange("LAG-COMM-DEV2HOST")
+                    !$acc update host(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+                    !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, &
+                    !$acc drad, drvel, dgasp, dgasmv, dpos, dvel)
+                    call nvtxEndRange
+                    call nvtxStartRange("LAG-COMM-SENDRECV-NO-RMDA")
+                #:endif
 
+                do k = nidx(3)%beg, nidx(3)%end
+                    do j = nidx(2)%beg, nidx(2)%end
+                        do i = nidx(1)%beg, nidx(1)%end
+                            if (abs(i) + abs(j) + abs(k) > 0) then
+
+                                partner = neighbor_ranks(i,j,k)
+
+                                send_tag = neighbor_tag(i, j, k)
+                                recv_tag = neighbor_tag(-i, -j, -k)
+
+                                call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag,  &
+                                                  p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, &
+                                                  MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                                p_recv_size = p_recv_counts(i,j,k) * p_var_size
+
+                                position = 0
+                                do l = 0, p_send_counts(i,j,k) - 1
+                                    bub_id = p_send_ids(i,j,k,l)
+                                    call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    do q = 1, 2
+                                        call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    end do
+                                    do q = 1, lag_num_ts
+                                        call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                        call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                    end do
+                                end do
+
+                                send_tag = send_tag + max(num_procs, n_neighbor)
+                                recv_tag = recv_tag + max(num_procs, n_neighbor)
+
+                                call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag,  &
+                                                  p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, &
+                                                  MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                                position = 0
+                                do l = 0, p_recv_counts(i,j,k) - 1
+                                    nbubs = nbubs + 1
+                                    bub_id = nbubs
+                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                    do q = 1, 2
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                    end do
+                                    do q = 1, lag_num_ts
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                    end do
+                                    lag_id(bub_id, 2) = bub_id
+
+                                end do
+                            end if
                         end do
-                    end if
+                    end do
                 end do
-            end do
-        end do
+
+                #:if rdma_mpi
+                    !$acc end host_data
+                    !$acc wait
+                #:else
+                    call nvtxStartRange("RHS-COMM-HOST2DEV")
+                    !$acc update host(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+                    !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, &
+                    !$acc drad, drvel, dgasp, dgasmv, dpos, dvel)
+                    call nvtxEndRange
+                #:endif
+            end if
+        #:endfor
 
     end subroutine s_mpi_sendrecv_particles
 
+
+    !! This function returns a unique tag for each neighbor based on its position
+        !! relative to the current process.
+        !! @param i, j, k Indices of the neighbor in the range [-1, 1]
+        !! @return tag Unique integer tag for the neighbor
     integer function neighbor_tag(i, j, k) result(tag)
 
         integer, intent(in) :: i, j, k

From e0d73987306bc9e086c1494eac485e7a7036ef15 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Tue, 24 Jun 2025 14:30:28 -0400
Subject: [PATCH 38/62] 3D Transfer list

---
 src/simulation/m_mpi_proxy.fpp | 96 ++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 1cb8273c8a..788f5297d9 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -457,9 +457,45 @@ contains
                     if (f_crosses_boundary(k, 2, -1, pos, posPrev)) then
                         call s_add_particle_to_direction(k, -1, -1, 0)
                         call s_add_particle_to_direction(k, 0, -1, 0)
+                        if (p > 0) then
+                            if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, -1, -1, -1)
+                                call s_add_particle_to_direction(k, 0, -1, -1)
+                                call s_add_particle_to_direction(k, -1, 0, -1)
+                                call s_add_particle_to_direction(k, 0, 0, -1)
+                            elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, -1, -1, 1)
+                                call s_add_particle_to_direction(k, 0, -1, 1)
+                                call s_add_particle_to_direction(k, -1, 0, 1)
+                                call s_add_particle_to_direction(k, 0, 0, 1)
+                            end if
+                        end if
                     elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then
                         call s_add_particle_to_direction(k, -1, 1, 0)
                         call s_add_particle_to_direction(k, 0, 1, 0)
+                        if (p > 0) then
+                            if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, -1, 1, -1)
+                                call s_add_particle_to_direction(k, 0, 1, -1)
+                                call s_add_particle_to_direction(k, -1, 0, -1)
+                                call s_add_particle_to_direction(k, 0, 0, -1)
+                            elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, -1, 1, 1)
+                                call s_add_particle_to_direction(k, 0, 1, 1)
+                                call s_add_particle_to_direction(k, -1, 0, 1)
+                                call s_add_particle_to_direction(k, 0, 0, 1)
+                            end if
+                        end if
+                    else
+                        if (p > 0) then
+                            if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, -1, 0, -1)
+                                call s_add_particle_to_direction(k, 0, 0, -1)
+                            elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, -1, 0, 1)
+                                call s_add_particle_to_direction(k, 0, 0, 1)
+                            end if
+                        end if
                     end if
                 end if
             elseif (f_crosses_boundary(k, 1, 1, pos, posPrev)) then
@@ -468,15 +504,75 @@ contains
                     if (f_crosses_boundary(k, 2, -1, pos, posPrev)) then
                         call s_add_particle_to_direction(k, 1, -1, 0)
                         call s_add_particle_to_direction(k, 0, -1, 0)
+                        if (p > 0) then
+                            if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, 1, -1, -1)
+                                call s_add_particle_to_direction(k, 0, -1, -1)
+                                call s_add_particle_to_direction(k, 1, 0, -1)
+                                call s_add_particle_to_direction(k, 0, 0, -1)
+                            elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, 1, -1, 1)
+                                call s_add_particle_to_direction(k, 0, -1, 1)
+                                call s_add_particle_to_direction(k, 1, 0, 1)
+                                call s_add_particle_to_direction(k, 0, 0, 1)
+                            end if
+                        end if
                     elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then
                         call s_add_particle_to_direction(k, 1, 1, 0)
                         call s_add_particle_to_direction(k, 0, 1, 0)
+                        if (p > 0) then
+                            if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, 1, 1, -1)
+                                call s_add_particle_to_direction(k, 0, 1, -1)
+                                call s_add_particle_to_direction(k, 1, 0, -1)
+                                call s_add_particle_to_direction(k, 0, 0, -1)
+                            elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, 1, 1, 1)
+                                call s_add_particle_to_direction(k, 0, 1, 1)
+                                call s_add_particle_to_direction(k, 1, 0, 1)
+                                call s_add_particle_to_direction(k, 0, 0, 1)
+                            end if
+                        end if
+                    else
+                        if (p > 0) then
+                            if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, 1, 0, -1)
+                                call s_add_particle_to_direction(k, 0, 0, -1)
+                            elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then
+                                call s_add_particle_to_direction(k, 1, 0, 1)
+                                call s_add_particle_to_direction(k, 0, 0, 1)
+                            end if
+                        end if
                     end if
                 end if
             elseif (f_crosses_boundary(k, 2, -1, pos, posPrev)) then
                 call s_add_particle_to_direction(k, 0, -1, 0)
+                if (p > 0) then
+                    if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then
+                        call s_add_particle_to_direction(k, 0, -1, -1)
+                        call s_add_particle_to_direction(k, 0, 0, -1)
+                    elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then
+                        call s_add_particle_to_direction(k, 0, -1, 1)
+                        call s_add_particle_to_direction(k, 0, 0, 1)
+                    end if
+                end if
             elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then
                 call s_add_particle_to_direction(k, 0, 1, 0)
+                if (p > 0) then
+                    if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then
+                        call s_add_particle_to_direction(k, 0, 1, -1)
+                        call s_add_particle_to_direction(k, 0, 0, -1)
+                    elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then
+                        call s_add_particle_to_direction(k, 0, 1, 1)
+                        call s_add_particle_to_direction(k, 0, 0, 1)
+                    end if
+                end if
+            elseif (p > 0) then
+                if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then
+                    call s_add_particle_to_direction(k, 0, 0, -1)
+                elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then
+                    call s_add_particle_to_direction(k, 0, 0, 1)
+                end if
             end if
         end do
 

From fc94cfaea6e7e55b5355f6e98f6061bc6b4d5247 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Tue, 24 Jun 2025 16:41:18 -0400
Subject: [PATCH 39/62] GPU particle MPI

---
 src/simulation/m_bubbles_EL.fpp | 108 +++++++++++++----
 src/simulation/m_mpi_proxy.fpp  | 206 ++++++++++++++------------------
 2 files changed, 172 insertions(+), 142 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 31d83c12ea..76e06266cc 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -73,6 +73,11 @@ module m_bubbles_EL
 
     !$acc declare create(nBubs, Rmax_glb, Rmin_glb, q_beta, q_beta_idx)
 
+
+    integer, allocatable, dimension(:) :: keep_bubble, prefix_sum
+    integer :: active_bubs
+    !$acc declare create(keep_bubble, prefix_sum, active_bubs)
+
 contains
 
     !> Initializes the lagrangian subgrid bubble solver
@@ -139,6 +144,8 @@ contains
         @:ALLOCATE(mtn_dposdt(1:nBubs_glb, 1:3, 1:lag_num_ts))
         @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts))
 
+        @:ALLOCATE(keep_bubble(1:nBubs_glb), prefix_sum(1:nBubs_glb))
+
         if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol
 
         if (num_procs > 1) call s_initialize_particles_mpi(lag_num_ts)
@@ -257,6 +264,15 @@ contains
             if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat')
         end if
 
+        if (num_procs > 1) then
+            call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, 1))
+            call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+                                      gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
+                                      intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
+                                      mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
+                                      gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
+        end if
+
         !$acc update device(bubbles_lagrange, lag_params)
 
         !$acc update device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC,   &
@@ -269,15 +285,6 @@ contains
 
         !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc)
 
-        if (num_procs > 1) then
-            call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, 1))
-            call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-                                      gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
-                                      intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
-                                      mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
-                                      gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
-        end if
-
         !Populate temporal variables
         call s_transfer_data_to_tmp()
         call s_smear_voidfraction()
@@ -1159,13 +1166,12 @@ contains
     impure subroutine s_enforce_EL_bubbles_boundary_conditions(dest)
 
         integer, intent(in) :: dest
-        integer :: k, i, patch_id
+        integer :: k, i, patch_id, offset
         integer, dimension(3) :: cell
-        logical, dimension(1:nBubs) :: remove_bubble
 
         !$acc parallel loop gang vector default(present) private(cell)
         do k = 1, nBubs
-            remove_bubble(k) = .false.
+            keep_bubble(k) = 1
 
             if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                 .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then
@@ -1174,9 +1180,9 @@ contains
                 .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then
                 mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest)
             elseif (mtn_pos(k, 1, dest) >= x_cb(m + buff_size - fd_number)) then
-                remove_bubble(k) = .true.
+                keep_bubble(k) = 0
             elseif (mtn_pos(k, 1, dest) < x_cb(fd_number - buff_size - 1)) then
-                remove_bubble(k) = .true.
+                keep_bubble(k) = 0
             end if
 
             if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
@@ -1186,9 +1192,9 @@ contains
                 .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then
                 mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest)
             elseif (mtn_pos(k, 2, dest) >= y_cb(n + buff_size - fd_number)) then
-                remove_bubble(k) = .true.
+                keep_bubble(k) = 0
             elseif (mtn_pos(k, 2, dest) < y_cb(fd_number - buff_size - 1)) then
-                remove_bubble(k) = .true.
+                keep_bubble(k) = 0
             end if
 
             if (p > 0) then
@@ -1199,9 +1205,9 @@ contains
                     .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then
                     mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest)
                 elseif (mtn_pos(k, 3, dest) >= z_cb(p + buff_size - fd_number)) then
-                    remove_bubble(k) = .true.
+                    keep_bubble(k) = 0
                 elseif (mtn_pos(k, 3, dest) < z_cb(fd_number - buff_size - 1)) then
-                    remove_bubble(k) = .true.
+                    keep_bubble(k) = 0
                 end if
             end if
 
@@ -1223,20 +1229,76 @@ contains
             end if
         end do
 
-        !$acc loop seq
-        do k = nBubs, 1, -1
-            if (remove_bubble(k)) call s_remove_lag_bubble(k)
+        call nvtxStartRange("LAG-BC-DEV2HOST")
+        !$acc update host(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+        !$acc gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
+        !$acc intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
+        !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
+        !$acc gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, &
+        !$acc keep_bubble, nBubs)
+        call nvtxEndRange
+
+        do k = 1, nBubs
+            if (k == 1) then
+                prefix_sum(k) = keep_bubble(k)
+            else
+                prefix_sum(k) =  prefix_sum(k - 1) + keep_bubble(k)
+            end if
+            if (k == nBubs) active_bubs = prefix_sum(k)
+        end do
+
+        do k = 1, nBubs
+            if (keep_bubble(k) == 1) then
+                if (prefix_sum(k) /= k) then
+                    bub_R0(prefix_sum(k)) = bub_R0(k)
+                    Rmax_stats(prefix_sum(k)) = Rmax_stats(k)
+                    Rmin_stats(prefix_sum(k)) = Rmin_stats(k)
+                    gas_mg(prefix_sum(k)) = gas_mg(k)
+                    gas_betaT(prefix_sum(k)) = gas_betaT(k)
+                    gas_betaC(prefix_sum(k)) = gas_betaC(k)
+                    bub_dphidt(prefix_sum(k)) = bub_dphidt(k)
+                    lag_id(prefix_sum(k), 1) = lag_id(k, 1)
+                    gas_p(prefix_sum(k), 1:2) = gas_p(k, 1:2)
+                    gas_mv(prefix_sum(k), 1:2) = gas_mv(k, 1:2)
+                    intfc_rad(prefix_sum(k), 1:2) = intfc_rad(k, 1:2)
+                    intfc_vel(prefix_sum(k), 1:2) = intfc_vel(k, 1:2)
+                    mtn_pos(prefix_sum(k), 1:3, 1:2) = mtn_pos(k, 1:3, 1:2)
+                    mtn_posPrev(prefix_sum(k), 1:3, 1:2) = mtn_posPrev(k, 1:3, 1:2)
+                    mtn_vel(prefix_sum(k), 1:3, 1:2) = mtn_vel(k, 1:3, 1:2)
+                    mtn_s(prefix_sum(k), 1:3, 1:2) = mtn_s(k, 1:3, 1:2)
+                    intfc_draddt(prefix_sum(k), 1:lag_num_ts) = intfc_draddt(k, 1:lag_num_ts)
+                    intfc_dveldt(prefix_sum(k), 1:lag_num_ts) = intfc_dveldt(k, 1:lag_num_ts)
+                    gas_dpdt(prefix_sum(k), 1:lag_num_ts) = gas_dpdt(k, 1:lag_num_ts)
+                    gas_dmvdt(prefix_sum(k), 1:lag_num_ts) = gas_dmvdt(k, 1:lag_num_ts)
+                    mtn_dposdt(prefix_sum(k), 1:3, 1:lag_num_ts) = mtn_dposdt(k, 1:3, 1:lag_num_ts)
+                    mtn_dveldt(prefix_sum(k), 1:3, 1:lag_num_ts) = mtn_dveldt(k, 1:3, 1:lag_num_ts)
+                end if
+            end if
+            if (k == nBubs) nBubs = active_bubs
         end do
 
         if (num_procs > 1) then
+            call nvtxStartRange("LAG-BC-TRANSFER-LIST")
             call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, dest), mtn_posPrev(:, :, dest))
+            call nvtxEndRange
+
+            call nvtxStartRange("LAG-BC-SENDRECV")
             call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
                                       gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
                                       intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
                                       mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
                                       gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
+            call nvtxEndRange
         end if
 
+        call nvtxStartRange("LAG-BC-HOST2DEV")
+        !$acc update device(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+        !$acc gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
+        !$acc intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
+        !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
+        !$acc gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
+        call nvtxEndRange
+
     end subroutine s_enforce_EL_bubbles_boundary_conditions
 
     !> This subroutine returns the computational coordinate of the cell for the given position.
@@ -1252,22 +1314,18 @@ contains
         integer :: i
 
         do while (pos(1) < x_cb(cell(1) - 1))
-            if (cell(1) == fd_number - buff_size) print*, "xb error", proc_rank, pos
             cell(1) = cell(1) - 1
         end do
 
         do while (pos(1) >= x_cb(cell(1)))
-            if (cell(1) == m + buff_size - fd_number) print*, "xe error", proc_rank, pos
             cell(1) = cell(1) + 1
         end do
 
         do while (pos(2) < y_cb(cell(2) - 1))
-            if (cell(2) == fd_number - buff_size) print*, "yb error", proc_rank, pos
             cell(2) = cell(2) - 1
         end do
 
         do while (pos(2) >= y_cb(cell(2)))
-            if (cell(2) == n + buff_size - fd_number) print*, "ye error", proc_rank, pos
             cell(2) = cell(2) + 1
         end do
 
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 788f5297d9..6428eab9f1 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -579,6 +579,7 @@ contains
     contains
 
         logical function f_crosses_boundary(particle_id, dir, loc, pos, posPrev)
+
             integer, intent(in) :: particle_id, dir, loc
             real(wp), dimension(:,:), intent(in) :: pos
             real(wp), dimension(:,:), optional, intent(in) :: posPrev
@@ -661,126 +662,97 @@ contains
 
         integer :: i, j, k, l, q
 
-        #:for rdma_mpi in [False, True]
-            if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
-                #:if rdma_mpi
-                    !$acc host_data use_device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-                    !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, &
-                    !$acc drad, drvel, dgasp, dgasmv, dpos, dvel)
-                    call nvtxStartRange("LAG-COMM-SENDRECV-RDMA")
-                #:else
-                    call nvtxStartRange("LAG-COMM-DEV2HOST")
-                    !$acc update host(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-                    !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, &
-                    !$acc drad, drvel, dgasp, dgasmv, dpos, dvel)
-                    call nvtxEndRange
-                    call nvtxStartRange("LAG-COMM-SENDRECV-NO-RMDA")
-                #:endif
+        do k = nidx(3)%beg, nidx(3)%end
+            do j = nidx(2)%beg, nidx(2)%end
+                do i = nidx(1)%beg, nidx(1)%end
+                    if (abs(i) + abs(j) + abs(k) > 0) then
+
+                        partner = neighbor_ranks(i,j,k)
+
+                        send_tag = neighbor_tag(i, j, k)
+                        recv_tag = neighbor_tag(-i, -j, -k)
+
+                        call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag,  &
+                                          p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, &
+                                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                        p_recv_size = p_recv_counts(i,j,k) * p_var_size
+
+                        position = 0
+                        do l = 0, p_send_counts(i,j,k) - 1
+                            bub_id = p_send_ids(i,j,k,l)
+                            call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            do q = 1, 2
+                                call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            end do
+                            do q = 1, lag_num_ts
+                                call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                                call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
+                            end do
+                        end do
+
+                        send_tag = send_tag + max(num_procs, n_neighbor)
+                        recv_tag = recv_tag + max(num_procs, n_neighbor)
+
+                        call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag,  &
+                                          p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, &
+                                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+                        position = 0
+                        do l = 0, p_recv_counts(i,j,k) - 1
+                            nbubs = nbubs + 1
+                            bub_id = nbubs
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                            do q = 1, 2
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                            end do
+                            do q = 1, lag_num_ts
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                            end do
+                            lag_id(bub_id, 2) = bub_id
 
-                do k = nidx(3)%beg, nidx(3)%end
-                    do j = nidx(2)%beg, nidx(2)%end
-                        do i = nidx(1)%beg, nidx(1)%end
-                            if (abs(i) + abs(j) + abs(k) > 0) then
-
-                                partner = neighbor_ranks(i,j,k)
-
-                                send_tag = neighbor_tag(i, j, k)
-                                recv_tag = neighbor_tag(-i, -j, -k)
-
-                                call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag,  &
-                                                  p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, &
-                                                  MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                                p_recv_size = p_recv_counts(i,j,k) * p_var_size
-
-                                position = 0
-                                do l = 0, p_send_counts(i,j,k) - 1
-                                    bub_id = p_send_ids(i,j,k,l)
-                                    call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    do q = 1, 2
-                                        call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    end do
-                                    do q = 1, lag_num_ts
-                                        call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                        call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                    end do
-                                end do
-
-                                send_tag = send_tag + max(num_procs, n_neighbor)
-                                recv_tag = recv_tag + max(num_procs, n_neighbor)
-
-                                call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag,  &
-                                                  p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, &
-                                                  MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                                position = 0
-                                do l = 0, p_recv_counts(i,j,k) - 1
-                                    nbubs = nbubs + 1
-                                    bub_id = nbubs
-                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                    do q = 1, 2
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                    end do
-                                    do q = 1, lag_num_ts
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                        call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                    end do
-                                    lag_id(bub_id, 2) = bub_id
-
-                                end do
-                            end if
                         end do
-                    end do
+                    end if
                 end do
-
-                #:if rdma_mpi
-                    !$acc end host_data
-                    !$acc wait
-                #:else
-                    call nvtxStartRange("RHS-COMM-HOST2DEV")
-                    !$acc update host(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-                    !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, &
-                    !$acc drad, drvel, dgasp, dgasmv, dpos, dvel)
-                    call nvtxEndRange
-                #:endif
-            end if
-        #:endfor
+            end do
+        end do
 
     end subroutine s_mpi_sendrecv_particles
 

From 2e89526218b9ff092066393c0971decb95602c83 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Wed, 25 Jun 2025 16:59:24 -0400
Subject: [PATCH 40/62] improve EL I/O and silo output

---
 src/common/m_constants.fpp               |   1 +
 src/post_process/m_data_output.fpp       | 460 ++++++++++++++++++++---
 src/post_process/m_global_parameters.fpp |  36 ++
 src/post_process/m_mpi_proxy.fpp         |   7 +-
 src/post_process/m_start_up.f90          |   9 +-
 src/simulation/m_bubbles_EL.fpp          | 326 ++++++++++------
 src/simulation/m_start_up.fpp            |   1 -
 toolchain/mfc/run/case_dicts.py          |  18 +
 8 files changed, 670 insertions(+), 188 deletions(-)

diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp
index 0c17e9789f..8e6a9a1c34 100644
--- a/src/common/m_constants.fpp
+++ b/src/common/m_constants.fpp
@@ -50,6 +50,7 @@ module m_constants
     ! Lagrange bubbles constants
     integer, parameter :: mapCells = 3 !< Number of cells around the bubble where the smoothening function will have effect
     real(wp), parameter :: R_uni = 8314._wp ! Universal gas constant - J/kmol/K
+    integer, parameter :: lag_io_vars = 21 ! Number of variables per particle for MPI_IO
 
     ! Strang Splitting constants
     real(wp), parameter :: dflt_adap_dt_tol = 1e-4_wp !< Default tolerance for adaptive step size
diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp
index 382f8387ae..c1dde087f5 100644
--- a/src/post_process/m_data_output.fpp
+++ b/src/post_process/m_data_output.fpp
@@ -30,7 +30,8 @@ module m_data_output
  s_open_energy_data_file, &
  s_write_grid_to_formatted_database_file, &
  s_write_variable_to_formatted_database_file, &
- s_write_lag_bubbles_results, &
+ s_write_lag_bubbles_results_to_text, &
+ s_write_lag_bubbles_to_formatted_database_file, &
  s_write_intf_data_file, &
  s_write_energy_data_file, &
  s_close_formatted_database_file, &
@@ -276,12 +277,14 @@ contains
         end if
 
         if (bubbles_lagrange) then !Lagrangian solver
-            dbdir = trim(case_dir)//'/lag_bubbles_post_process'
-            file_loc = trim(dbdir)//'/.'
-            call my_inquire(file_loc, dir_check)
+            if (lag_txt_wrt) then
+                dbdir = trim(case_dir)//'/lag_bubbles_post_process'
+                file_loc = trim(dbdir)//'/.'
+                call my_inquire(file_loc, dir_check)
 
-            if (dir_check .neqv. .true.) then
-                call s_create_directory(trim(dbdir))
+                if (dir_check .neqv. .true.) then
+                    call s_create_directory(trim(dbdir))
+                end if
             end if
         end if
 
@@ -1088,7 +1091,7 @@ contains
 
     !>  Subroutine that writes the post processed results in the folder 'lag_bubbles_data'
             !!  @param t_step Current time step
-    impure subroutine s_write_lag_bubbles_results(t_step)
+    impure subroutine s_write_lag_bubbles_results_to_text(t_step)
 
         integer, intent(in) :: t_step
 
@@ -1106,31 +1109,65 @@ contains
         logical :: lg_bub_file, file_exist
 
         integer, dimension(2) :: gsizes, lsizes, start_idx_part
-        integer :: ifile, ierr, tot_data
+        integer :: ifile, ierr
+        real(wp) :: file_time, file_dt
+        integer :: file_num_procs, file_tot_part, tot_part
         integer :: i
 
-        write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat'
+        integer, dimension(:), allocatable :: proc_bubble_counts
+        real(wp), dimension(1:1, 1:lag_io_vars) :: dummy
+        dummy = 0._wp
+
+        ! Construct file path
+        write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (FILE=trim(file_loc), EXIST=file_exist)
 
-        if (file_exist) then
-            if (proc_rank == 0) then
-                open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown')
-                read (9) tot_data, time_real
-                close (9)
-            end if
-        else
-            print '(A)', trim(file_loc)//' is missing. Exiting.'
-            call s_mpi_abort
+        ! Check if file exists
+        inquire(FILE=trim(file_loc), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!')
         end if
 
-        call MPI_BCAST(tot_data, 1, MPI_integer, 0, MPI_COMM_WORLD, ierr)
-        call MPI_BCAST(time_real, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        if (.not. parallel_io) return
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                              mpi_info_int, ifile, ierr)
+
+            call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr)
+            call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
+
+        call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+        allocate(proc_bubble_counts(file_num_procs))
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                              mpi_info_int, ifile, ierr)
 
-        gsizes(1) = tot_data
-        gsizes(2) = 21
-        lsizes(1) = tot_data
-        lsizes(2) = 21
+            ! Skip to processor counts position
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), &
+                      MPI_OFFSET_KIND)
+            call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr)
+            call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
+
+        call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+        gsizes(1) = file_tot_part
+        gsizes(2) = lag_io_vars
+        lsizes(1) = file_tot_part
+        lsizes(2) = lag_io_vars
         start_idx_part(1) = 0
         start_idx_part(2) = 0
 
@@ -1138,59 +1175,362 @@ contains
                                       MPI_ORDER_FORTRAN, mpi_p, view, ierr)
         call MPI_TYPE_COMMIT(view, ierr)
 
-        write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
+        call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
+                           mpi_info_int, ifile, ierr)
+
+        disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
+                      file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+        call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
+                               'native', mpi_info_null, ierr)
+
+        allocate (MPI_IO_DATA_lg_bubbles(file_tot_part, 1:lag_io_vars))
+
+        call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, lag_io_vars*file_tot_part, &
+                               mpi_p, status, ierr)
+
+        write (file_loc, '(A,I0,A)') 'lag_bubbles_post_process_', t_step, '.dat'
+        file_loc = trim(case_dir)//'/lag_bubbles_post_process/'//trim(file_loc)
+
+        if (proc_rank == 0) then
+            open (unit=29, file=file_loc, form='formatted', position='rewind')
+
+            if (lag_header) then
+                write(29, '(A)', advance='no')
+                if (lag_id_wrt)       write(29, '(A8)', advance='no') 'id, '
+                if (lag_pos_wrt)      write(29, '(3(A17))', advance='no') 'px, ', 'py, ', 'pz, '
+                if (lag_pos_prev_wrt) write(29, '(3(A17))', advance='no') 'pvx, ', 'pvy, ', 'pvz, '
+                if (lag_vel_wrt)      write(29, '(3(A17))', advance='no') 'vx, ', 'vy, ', 'vz, '
+                if (lag_rad_wrt)      write(29, '(A17)', advance='no') 'radius, '
+                if (lag_rvel_wrt)     write(29, '(A17)', advance='no') 'rvel, '
+                if (lag_r0_wrt)       write(29, '(A17)', advance='no') 'r0, '
+                if (lag_rmax_wrt)     write(29, '(A17)', advance='no') 'rmax, '
+                if (lag_rmin_wrt)     write(29, '(A17)', advance='no') 'rmin, '
+                if (lag_dphidt_wrt)   write(29, '(A17)', advance='no') 'dphidt, '
+                if (lag_pres_wrt)     write(29, '(A17)', advance='no') 'pressure, '
+                if (lag_mv_wrt)       write(29, '(A17)', advance='no') 'mv, '
+                if (lag_mg_wrt)       write(29, '(A17)', advance='no') 'mg, '
+                if (lag_betaT_wrt)    write(29, '(A17)', advance='no') 'betaT, '
+                if (lag_betaC_wrt)    write(29, '(A17)', advance='no') 'betaC, '
+                write(29, '(A15)') 'time'
+            end if
+
+            do i = 1, file_tot_part
+                id = int(MPI_IO_DATA_lg_bubbles(i, 1))
+                inputvals(1:20) = MPI_IO_DATA_lg_bubbles(i, 2:21)
+                if (id > 0) then
+                    write(29, '(100(A))', advance='no') ''
+
+                    if (lag_id_wrt)       write(29, '(I6, A)', advance='no') id, ', '
+                    if (lag_pos_wrt)      write(29, '(3(E15.7, A))', advance='no') inputvals(1), ', ', inputvals(2), ', ', inputvals(3), ', '
+                    if (lag_pos_prev_wrt) write(29, '(3(E15.7, A))', advance='no') inputvals(4), ', ', inputvals(5), ', ', inputvals(6), ', '
+                    if (lag_vel_wrt)      write(29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(8), ', '
+                    if (lag_rad_wrt)      write(29, '(E15.7, A)', advance='no') inputvals(10), ', '
+                    if (lag_rvel_wrt)     write(29, '(E15.7, A)', advance='no') inputvals(11), ', '
+                    if (lag_r0_wrt)       write(29, '(E15.7, A)', advance='no') inputvals(12), ', '
+                    if (lag_rmax_wrt)     write(29, '(E15.7, A)', advance='no') inputvals(13), ', '
+                    if (lag_rmin_wrt)     write(29, '(E15.7, A)', advance='no') inputvals(14), ', '
+                    if (lag_dphidt_wrt)   write(29, '(E15.7, A)', advance='no') inputvals(15), ', '
+                    if (lag_pres_wrt)     write(29, '(E15.7, A)', advance='no') inputvals(16), ', '
+                    if (lag_mv_wrt)       write(29, '(E15.7, A)', advance='no') inputvals(17), ', '
+                    if (lag_mg_wrt)       write(29, '(E15.7, A)', advance='no') inputvals(18), ', '
+                    if (lag_betaT_wrt)    write(29, '(E15.7, A)', advance='no') inputvals(19), ', '
+                    if (lag_betaC_wrt)    write(29, '(E15.7, A)', advance='no') inputvals(20), ', '
+                    write(29, '(E15.7)') time_real
+                end if
+            end do
+            close (29)
+        end if
+
+        deallocate (MPI_IO_DATA_lg_bubbles)
+
+        call s_mpi_barrier()
+
+        call MPI_FILE_CLOSE(ifile, ierr)
+#endif
+
+    end subroutine s_write_lag_bubbles_results_to_text
+
+    impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step)
+
+        integer, intent(in) :: t_step
+
+        character(len=len_trim(case_dir) + 3*name_len) :: file_loc
+
+        integer :: id
+
+#ifdef MFC_MPI
+        real(wp), dimension(20) :: inputvals
+        real(wp) :: time_real
+        integer, dimension(MPI_STATUS_SIZE) :: status
+        integer(KIND=MPI_OFFSET_KIND) :: disp
+        integer :: view
+
+        logical :: lg_bub_file, file_exist
+
+        integer, dimension(2) :: gsizes, lsizes, start_idx_part
+        integer :: ifile, ierr, tot_data, valid_data, nBub
+        real(wp) :: file_time, file_dt
+        integer :: file_num_procs, file_tot_part
+        integer, dimension(:), allocatable :: proc_bubble_counts
+        real(wp), dimension(1:1, 1:lag_io_vars) :: dummy
+        character(LEN=4*name_len), dimension(num_procs) :: meshnames
+        integer, dimension(num_procs) :: meshtypes
+
+        integer :: i, j
+
+        real(wp), dimension(:), allocatable :: bub_id
+        real(wp), dimension(:), allocatable :: px, py, pz, ppx, ppy, ppz, vx, vy, vz
+        real(wp), dimension(:), allocatable :: radius, rvel, rnot, rmax, rmin, dphidt
+        real(wp), dimension(:), allocatable :: pressure, mv, mg, betaT, betaC
+
+        dummy = 0._wp
+
+        ! Construct file path
+        write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (FILE=trim(file_loc), EXIST=lg_bub_file)
 
-        if (lg_bub_file) then
+        ! Check if file exists
+        inquire(FILE=trim(file_loc), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!')
+        end if
 
-            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
-                               mpi_info_int, ifile, ierr)
+        if (.not. parallel_io) return
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                              mpi_info_int, ifile, ierr)
+
+            call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr)
+            call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr)
 
-            disp = 0._wp
-            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
-                                   'native', mpi_info_null, ierr)
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
+
+        call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+        allocate(proc_bubble_counts(file_num_procs))
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                              mpi_info_int, ifile, ierr)
+
+            ! Skip to processor counts position
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), &
+                      MPI_OFFSET_KIND)
+            call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr)
+            call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
+
+        call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
 
-            allocate (MPI_IO_DATA_lg_bubbles(tot_data, 1:21))
+        ! Set time variables from file
 
-            call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, 21*tot_data, &
-                                   mpi_p, status, ierr)
+        nBub = proc_bubble_counts(proc_rank + 1)
 
-            write (file_loc, '(A,I0,A)') 'lag_bubbles_post_process_', t_step, '.dat'
-            file_loc = trim(case_dir)//'/lag_bubbles_post_process/'//trim(file_loc)
+        start_idx_part(1) = 0
+        do i = 1, proc_rank
+            start_idx_part(1) = start_idx_part(1) + proc_bubble_counts(i)
+        end do
 
+        start_idx_part(2) = 0
+        lsizes(1) = nBub
+        lsizes(2) = lag_io_vars
+
+        gsizes(1) = file_tot_part
+        gsizes(2) = lag_io_vars
+
+        if (nBub > 0) then
+
+            allocate(bub_id(nBub))
+            allocate(px(nBub))
+            allocate(py(nBub))
+            allocate(pz(nBub))
+            allocate(ppx(nBub))
+            allocate(ppy(nBub))
+            allocate(ppz(nBub))
+            allocate(vx(nBub))
+            allocate(vy(nBub))
+            allocate(vz(nBub))
+            allocate(radius(nBub))
+            allocate(rvel(nBub))
+            allocate(rnot(nBub))
+            allocate(rmax(nBub))
+            allocate(rmin(nBub))
+            allocate(dphidt(nBub))
+            allocate(pressure(nBub))
+            allocate(mv(nBub))
+            allocate(mg(nBub))
+            allocate(betaT(nBub))
+            allocate(betaC(nBub))
+            allocate (MPI_IO_DATA_lg_bubbles(nBub, 1:lag_io_vars))
+
+            call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
+                                           MPI_ORDER_FORTRAN, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
+
+            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
+
+            ! Skip extended header
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
+                      file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
+
+            call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, &
+                                  lag_io_vars * nBub, mpi_p, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+            call MPI_TYPE_FREE(view, ierr)
+
+            ! Extract data from MPI_IO_DATA_lg_bubbles array
+            ! Adjust these indices based on your actual data layout
+            bub_id(:) = MPI_IO_DATA_lg_bubbles(:, 1)
+            px(:) = MPI_IO_DATA_lg_bubbles(:, 2)
+            py(:) = MPI_IO_DATA_lg_bubbles(:, 3)
+            pz(:) = MPI_IO_DATA_lg_bubbles(:, 4)
+            ppx(:) = MPI_IO_DATA_lg_bubbles(:, 5)
+            ppy(:) = MPI_IO_DATA_lg_bubbles(:, 6)
+            ppz(:) = MPI_IO_DATA_lg_bubbles(:, 7)
+            vx(:) = MPI_IO_DATA_lg_bubbles(:, 8)
+            vy(:) = MPI_IO_DATA_lg_bubbles(:, 9)
+            vz(:) = MPI_IO_DATA_lg_bubbles(:, 10)
+            radius(:) = MPI_IO_DATA_lg_bubbles(:, 11)
+            rvel(:) = MPI_IO_DATA_lg_bubbles(:, 12)
+            rnot(:) = MPI_IO_DATA_lg_bubbles(:, 13)
+            rmax(:) = MPI_IO_DATA_lg_bubbles(:, 14)
+            rmin(:) = MPI_IO_DATA_lg_bubbles(:, 15)
+            dphidt(:) = MPI_IO_DATA_lg_bubbles(:, 16)
+            pressure(:) = MPI_IO_DATA_lg_bubbles(:, 17)
+            mv(:) = MPI_IO_DATA_lg_bubbles(:, 18)
+            mg(:) = MPI_IO_DATA_lg_bubbles(:, 19)
+            betaT(:) = MPI_IO_DATA_lg_bubbles(:, 20)
+            betaC(:) = MPI_IO_DATA_lg_bubbles(:, 21)
+
+            ! Next, the root processor proceeds to record all of the spatial
+            ! extents in the formatted database master file. In addition, it
+            ! also records a sub-domain connectivity map so that the entire
+            ! grid may be reassembled by looking at the master file.
             if (proc_rank == 0) then
-                open (unit=29, file=file_loc, form='formatted', position='rewind')
-                !write(29,*) 'lg_bubID, x, y, z, xPrev, yPrev, zPrev, xVel, yVel, ',   &
-                !            'zVel, radius, interfaceVelocity, equilibriumRadius',       &
-                !            'Rmax, Rmin, dphidt, pressure, mv, mg, betaT, betaC, time'
-                do i = 1, tot_data
-                    id = int(MPI_IO_DATA_lg_bubbles(i, 1))
-                    inputvals(1:20) = MPI_IO_DATA_lg_bubbles(i, 2:21)
-                    if (id > 0) then
-                        write (29, 6) int(id), inputvals(1), inputvals(2), &
-                            inputvals(3), inputvals(4), inputvals(5), inputvals(6), inputvals(7), &
-                            inputvals(8), inputvals(9), inputvals(10), inputvals(11), &
-                            inputvals(12), inputvals(13), inputvals(14), inputvals(15), &
-                            inputvals(16), inputvals(17), inputvals(18), inputvals(19), &
-                            inputvals(20), time_real
-6                       format(I6, 21(1x, E15.7))
-                    end if
+
+                do i = 1, num_procs
+                    write (meshnames(i), '(A,I0,A,I0,A)') '../p', i - 1, &
+                        '/', t_step, '.silo:lag_bubbles'
+                    meshtypes(i) = DB_POINTMESH
                 end do
-                close (29)
+                err = DBSET2DSTRLEN(len(meshnames(1)))
+                err = DBPUTMMESH(dbroot, 'lag_bubbles', 16, &
+                                 num_procs, meshnames, &
+                                 len_trim(meshnames), &
+                                 meshtypes, DB_F77NULL, ierr)
             end if
 
-            deallocate (MPI_IO_DATA_lg_bubbles)
+            err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, &
+                         px, py, pz, nBub, &
+                         DB_DOUBLE, DB_F77NULL, ierr)
 
-        end if
+            if (lag_id_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_id', bub_id, nBub, t_step)
+            end if
 
-        call s_mpi_barrier()
+            if (lag_vel_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_vel1', vx, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_vel2', vy, nBub, t_step)
+                if (p > 0) then
+                    call s_write_lag_variable_to_formatted_database_file('part_vel3', vz, nBub, t_step)
+                end if
+            end if
 
-        call MPI_FILE_CLOSE(ifile, ierr)
+            if (lag_rad_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_radius', radius, nBub, t_step)
+            end if
+
+            if (lag_rvel_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rdot', rvel, nBub, t_step)
+            end if
+
+            if (lag_r0_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_r0', rnot, nBub, t_step)
+            end if
+
+            if (lag_rmax_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rmax', rmax, nBub, t_step)
+            end if
+
+            if (lag_rmin_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rmin', rmin, nBub, t_step)
+            end if
+
+            if (lag_dphidt_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_dphidt', dphidt, nBub, t_step)
+            end if
+
+            if (lag_pres_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_pressure', pressure, nBub, t_step)
+            end if
+
+            if (lag_mv_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_mv', mv, nBub, t_step)
+            end if
+
+            if (lag_mg_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_mg', mg, nBub, t_step)
+            end if
+
+            if (lag_betaT_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_betaT', betaT, nBub, t_step)
+            end if
+
+            if (lag_betaC_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_betaC', betaC, nBub, t_step)
+            end if
+
+            deallocate(bub_id, px, py, pz, ppx, ppy, ppz, vx, vy, vz, radius, &
+                       rvel, rnot, rmax, rmin, dphidt, pressure, mv, mg,    &
+                       betaT, betaC)
+        end if
+
+        deallocate (MPI_IO_DATA_lg_bubbles)
 
 #endif
 
-    end subroutine s_write_lag_bubbles_results
+    end subroutine s_write_lag_bubbles_to_formatted_database_file
+
+    subroutine s_write_lag_variable_to_formatted_database_file(varname, data, nBubs, t_step)
+
+        character(len=*), intent(in) :: varname
+        real(wp), dimension(1:nBubs), intent(in) :: data
+        integer, intent(in) :: nBubs, t_step
+
+        character(len=64), dimension(num_procs) :: var_names
+        integer, dimension(num_procs) :: var_types
+        integer :: i
+
+        if (proc_rank == 0) then
+            do i = 1, num_procs
+                write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, &
+                    '/', t_step, '.silo:'//trim(varname)
+                var_types(i) = DB_POINTVAR
+            end do
+            err = DBSET2DSTRLEN(len(var_names(1)))
+            err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), &
+                        num_procs, var_names, &
+                        len_trim(var_names), &
+                        var_types, DB_F77NULL, ierr)
+        end if
+
+        err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), &
+                    'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr)
+
+    end subroutine s_write_lag_variable_to_formatted_database_file
+
     impure subroutine s_write_intf_data_file(q_prim_vf)
 
         type(scalar_field), dimension(sys_size), intent(IN) :: q_prim_vf
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index e667c7eeac..4d0dad06ac 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -256,6 +256,24 @@ module m_global_parameters
     logical :: ib
     logical :: chem_wrt_Y(1:num_species)
     logical :: chem_wrt_T
+    logical :: lag_header
+    logical :: lag_txt_wrt
+    logical :: lag_db_wrt
+    logical :: lag_id_wrt
+    logical :: lag_pos_wrt
+    logical :: lag_pos_prev_wrt
+    logical :: lag_vel_wrt
+    logical :: lag_rad_wrt
+    logical :: lag_rvel_wrt
+    logical :: lag_r0_wrt
+    logical :: lag_rmax_wrt
+    logical :: lag_rmin_wrt
+    logical :: lag_dphidt_wrt
+    logical :: lag_pres_wrt
+    logical :: lag_mv_wrt
+    logical :: lag_mg_wrt
+    logical :: lag_betaT_wrt
+    logical :: lag_betaC_wrt
     !> @}
 
     real(wp), dimension(num_fluids_max) :: schlieren_alpha    !<
@@ -428,6 +446,24 @@ contains
         schlieren_wrt = .false.
         sim_data = .false.
         cf_wrt = .false.
+        lag_txt_wrt = .true.
+        lag_header = .true.
+        lag_db_wrt = .false.
+        lag_id_wrt = .true.
+        lag_pos_wrt = .true.
+        lag_pos_prev_wrt = .false.
+        lag_vel_wrt = .true.
+        lag_rad_wrt = .true.
+        lag_rvel_wrt = .false.
+        lag_r0_wrt = .false.
+        lag_rmax_wrt = .false.
+        lag_rmin_wrt = .false.
+        lag_dphidt_wrt = .false.
+        lag_pres_wrt = .false.
+        lag_mv_wrt = .false.
+        lag_mg_wrt = .false.
+        lag_betaT_wrt = .false.
+        lag_betaC_wrt = .false.
         ib = .false.
 
         schlieren_alpha = dflt_real
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index effe0b9bb3..3917d18b74 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -107,7 +107,12 @@ contains
             & 'file_per_process', 'relax', 'cf_wrt',                           &
             & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt',          &
             & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange',        &
-            & 'output_partial_domain', 'relativity', 'cont_damage' ]
+            & 'output_partial_domain', 'relativity', 'cont_damage',            &
+            & 'lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt',         &
+            & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', &
+            & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt',    &
+            & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt',    &
+            & 'lag_betaT_wrt', 'lag_betaC_wrt' ]
             call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index db1697ebd2..7a0e6d276b 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -89,7 +89,11 @@ impure subroutine s_read_input_file
             cfl_adap_dt, cfl_const_dt, t_save, t_stop, n_start, &
             cfl_target, surface_tension, bubbles_lagrange, &
             sim_data, hyperelasticity, Bx0, relativity, cont_damage, &
-            num_bc_patches
+            num_bc_patches, lag_header, lag_txt_wrt, lag_db_wrt, &
+            lag_id_wrt, lag_pos_wrt, lag_pos_prev_wrt, lag_vel_wrt, &
+            lag_rad_wrt, lag_rvel_wrt, lag_r0_wrt, lag_rmax_wrt, &
+            lag_rmin_wrt, lag_dphidt_wrt, lag_pres_wrt, lag_mv_wrt, &
+            lag_mg_wrt, lag_betaT_wrt, lag_betaC_wrt
 
         ! Inquiring the status of the post_process.inp file
         file_loc = 'post_process.inp'
@@ -663,7 +667,8 @@ impure subroutine s_save_data(t_step, varname, pres, c, H)
             call s_write_variable_to_formatted_database_file(varname, t_step)
             varname(:) = ' '
 
-            call s_write_lag_bubbles_results(t_step) !! Individual bubble evolution
+            if (lag_txt_wrt) call s_write_lag_bubbles_results_to_text(t_step) !! Individual bubble evolution
+            if (lag_db_wrt) call s_write_lag_bubbles_to_formatted_database_file(t_step)
         end if
 
         if (sim_data .and. proc_rank == 0) then
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 31d83c12ea..f91b2def3c 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -415,6 +415,8 @@ contains
         integer, intent(inout) :: bub_id, save_count
 
         character(LEN=path_len + 2*name_len) :: file_loc
+        real(wp) :: file_time, file_dt
+        integer :: file_num_procs, file_tot_part, tot_part
 
 #ifdef MFC_MPI
         real(wp), dimension(20) :: inputvals
@@ -429,81 +431,146 @@ contains
         integer :: ifile, ierr, tot_data, id
         integer :: i
 
-        write (file_loc, '(a,i0,a)') 'lag_bubbles_mpi_io_', save_count, '.dat'
+        integer, dimension(:), allocatable :: proc_bubble_counts
+        real(wp), dimension(1:1, 1:lag_io_vars) :: dummy
+        dummy = 0._wp
+
+        ! Construct file path
+        write(file_loc, '(A,I0,A)') 'lag_bubbles_', save_count, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (file=trim(file_loc), exist=file_exist)
-
-        if (file_exist) then
-            if (proc_rank == 0) then
-                open (9, file=trim(file_loc), form='unformatted', status='unknown')
-                read (9) tot_data, mytime, dt
-                close (9)
-                print *, 'Reading lag_bubbles_mpi_io: ', tot_data, mytime, dt
-            end if
-        else
-            print '(a)', trim(file_loc)//' is missing. exiting.'
-            call s_mpi_abort
+
+        ! Check if file exists
+        inquire(FILE=trim(file_loc), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!')
         end if
 
-        call MPI_BCAST(tot_data, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-        call MPI_BCAST(mytime, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-        call MPI_BCAST(dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        if (.not. parallel_io) return
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                              mpi_info_int, ifile, ierr)
+
+            call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr)
+            call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
+
+        call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+        allocate(proc_bubble_counts(file_num_procs))
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                              mpi_info_int, ifile, ierr)
+
+            ! Skip to processor counts position
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), &
+                      MPI_OFFSET_KIND)
+            call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr)
+            call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
+
+        call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+        ! Set time variables from file
+        mytime = file_time
+        dt = file_dt
+
+        bub_id = proc_bubble_counts(proc_rank + 1)
 
-        gsizes(1) = tot_data
-        gsizes(2) = 21
-        lsizes(1) = tot_data
-        lsizes(2) = 21
         start_idx_part(1) = 0
+        do i = 1, proc_rank
+            start_idx_part(1) = start_idx_part(1) + proc_bubble_counts(i)
+        end do
+
         start_idx_part(2) = 0
+        lsizes(1) = bub_id
+        lsizes(2) = lag_io_vars
 
-        call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
-                                      MPI_ORDER_FORTRAN, mpi_p, view, ierr)
-        call MPI_type_COMMIT(view, ierr)
+        gsizes(1) = file_tot_part
+        gsizes(2) = lag_io_vars
 
-        ! Open the file to write all flow variables
-        write (file_loc, '(a,i0,a)') 'lag_bubbles_', save_count, '.dat'
-        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (file=trim(file_loc), exist=particle_file)
+        if (bub_id > 0) then
+
+            allocate (MPI_IO_DATA_lag_bubbles(bub_id, 1:lag_io_vars))
+
+            call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
+                                           MPI_ORDER_FORTRAN, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
 
-        if (particle_file) then
-            call MPI_FILE_open(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
+            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
                                mpi_info_int, ifile, ierr)
-            disp = 0._wp
-            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
-                                   'native', mpi_info_null, ierr)
-            allocate (MPI_IO_DATA_lag_bubbles(tot_data, 1:21))
-            call MPI_FILE_read_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*tot_data, &
-                                   mpi_p, status, ierr)
-            do i = 1, tot_data
-                id = int(MPI_IO_DATA_lag_bubbles(i, 1))
-                inputvals(1:20) = MPI_IO_DATA_lag_bubbles(i, 2:21)
-                indomain = particle_in_domain_physical(inputvals(1:3))
-                if (indomain .and. (id > 0)) then
-                    bub_id = bub_id + 1
-                    nBubs = bub_id                  ! local number of bubbles
-                    lag_id(bub_id, 1) = id          ! global ID
-                    lag_id(bub_id, 2) = bub_id      ! local ID
-                    mtn_pos(bub_id, 1:3, 1) = inputvals(1:3)
-                    mtn_posPrev(bub_id, 1:3, 1) = inputvals(4:6)
-                    mtn_vel(bub_id, 1:3, 1) = inputvals(7:9)
-                    intfc_rad(bub_id, 1) = inputvals(10)
-                    intfc_vel(bub_id, 1) = inputvals(11)
-                    bub_R0(bub_id) = inputvals(12)
-                    Rmax_stats(bub_id) = inputvals(13)
-                    Rmin_stats(bub_id) = inputvals(14)
-                    bub_dphidt(bub_id) = inputvals(15)
-                    gas_p(bub_id, 1) = inputvals(16)
-                    gas_mv(bub_id, 1) = inputvals(17)
-                    gas_mg(bub_id) = inputvals(18)
-                    gas_betaT(bub_id) = inputvals(19)
-                    gas_betaC(bub_id) = inputvals(20)
-                    cell = fd_number - buff_size
-                    call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
-                end if
+
+            ! Skip extended header
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
+                      file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
+
+            call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lag_bubbles, &
+                                  lag_io_vars * bub_id, mpi_p, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+            call MPI_TYPE_FREE(view, ierr)
+
+            nBubs = bub_id
+
+            do i = 1, bub_id
+                lag_id(i, 1) = int(MPI_IO_DATA_lag_bubbles(i, 1))
+                mtn_pos(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 2:4)
+                mtn_posPrev(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 5:7)
+                mtn_vel(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 8:10)
+                intfc_rad(i, 1) = MPI_IO_DATA_lag_bubbles(i, 11)
+                intfc_vel(i, 1) = MPI_IO_DATA_lag_bubbles(i, 12)
+                bub_R0(i) = MPI_IO_DATA_lag_bubbles(i, 13)
+                Rmax_stats(i) = MPI_IO_DATA_lag_bubbles(i, 14)
+                Rmin_stats(i) = MPI_IO_DATA_lag_bubbles(i, 15)
+                bub_dphidt(i) = MPI_IO_DATA_lag_bubbles(i, 16)
+                gas_p(i, 1) = MPI_IO_DATA_lag_bubbles(i, 17)
+                gas_mv(i, 1) = MPI_IO_DATA_lag_bubbles(i, 18)
+                gas_mg(i) = MPI_IO_DATA_lag_bubbles(i, 19)
+                gas_betaT(i) = MPI_IO_DATA_lag_bubbles(i, 20)
+                gas_betaC(i) = MPI_IO_DATA_lag_bubbles(i, 21)
             end do
+
             deallocate (MPI_IO_DATA_lag_bubbles)
+
+        else
+            nBubs = 0
+
+            call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
+
+            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
+
+            ! Skip extended header
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
+                      file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
+
+            call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+            call MPI_TYPE_FREE(view, ierr)
+        end if
+
+        print*, proc_rank, lag_id
+
+        if (proc_rank == 0) then
+            write(*,'(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count
+            write(*,'(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt
         end if
-        call MPI_FILE_CLOSE(ifile, ierr)
+
+        deallocate(proc_bubble_counts)
 #endif
 
     end subroutine s_restart_bubbles
@@ -1582,7 +1649,7 @@ contains
 
         character(LEN=path_len + 2*name_len) :: file_loc
         logical :: file_exist
-        integer :: bub_id, tot_part, tot_part_wrtn, npart_wrtn
+        integer :: bub_id, tot_part
         integer :: i, k
 
 #ifdef MFC_MPI
@@ -1593,6 +1660,9 @@ contains
         integer :: view
         integer, dimension(2) :: gsizes, lsizes, start_idx_part
         integer, dimension(num_procs) :: part_order, part_ord_mpi
+        integer, dimension(num_procs) :: proc_bubble_counts
+        real(wp), dimension(1:1, 1:lag_io_vars) :: dummy
+        dummy = 0._wp
 
         bub_id = 0._wp
         if (nBubs /= 0) then
@@ -1605,78 +1675,60 @@ contains
 
         if (.not. parallel_io) return
 
+        lsizes(1) = bub_id
+        lsizes(2) = lag_io_vars
+
         ! Total number of particles
         call MPI_ALLREDUCE(bub_id, tot_part, 1, MPI_integer, &
                            MPI_SUM, MPI_COMM_WORLD, ierr)
 
-        ! Total number of particles written so far
-        call MPI_ALLREDUCE(npart_wrtn, tot_part_wrtn, 1, MPI_integer, &
-                           MPI_SUM, MPI_COMM_WORLD, ierr)
-
-        lsizes(1) = max(1, bub_id)
-        lsizes(2) = 21
+        call MPI_ALLGATHER(bub_id, 1, MPI_INTEGER, proc_bubble_counts, 1, MPI_INTEGER, &
+                          MPI_COMM_WORLD, ierr)
 
-        ! if the partcle number is zero, put 1 since MPI cannot deal with writing
-        ! zero particle
-        part_order(:) = 1
-        part_order(proc_rank + 1) = max(1, bub_id)
-
-        call MPI_ALLREDUCE(part_order, part_ord_mpi, num_procs, MPI_integer, &
-                           MPI_MAX, MPI_COMM_WORLD, ierr)
-
-        gsizes(1) = sum(part_ord_mpi(1:num_procs))
-        gsizes(2) = 21
-
-        start_idx_part(1) = sum(part_ord_mpi(1:proc_rank + 1)) - part_ord_mpi(proc_rank + 1)
+        ! Calculate starting index for this processor's particles
+        call MPI_EXSCAN(lsizes(1), start_idx_part(1), 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr)
+        if (proc_rank == 0) start_idx_part(1) = 0
         start_idx_part(2) = 0
 
-        write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat'
+        gsizes(1) = tot_part
+        gsizes(2) = lag_io_vars
+
+        write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (FILE=trim(file_loc), EXIST=file_exist)
-        if (file_exist .and. proc_rank == 0) then
-            call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr)
-        end if
 
-        ! Writing down the total number of particles
+        ! Clean up existing file
         if (proc_rank == 0) then
-            open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown')
-            write (9) gsizes(1), mytime, dt
-            close (9)
+            inquire(FILE=trim(file_loc), EXIST=file_exist)
+            if (file_exist) then
+                call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr)
+            end if
         end if
 
-        call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
-                                      MPI_ORDER_FORTRAN, mpi_p, view, ierr)
-        call MPI_type_COMMIT(view, ierr)
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
 
-        allocate (MPI_IO_DATA_lag_bubbles(1:max(1, bub_id), 1:21))
-
-        ! Open the file to write all flow variables
-        write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
-        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (FILE=trim(file_loc), EXIST=file_exist)
-        if (file_exist .and. proc_rank == 0) then
-            call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr)
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, &
+                              ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
+                              mpi_info_int, ifile, ierr)
+
+            ! Write header using MPI I/O for consistency
+            call MPI_FILE_WRITE(ifile, tot_part, 1, MPI_INTEGER, status, ierr)
+            call MPI_FILE_WRITE(ifile, mytime, 1, mpi_p, status, ierr)
+            call MPI_FILE_WRITE(ifile, dt, 1, mpi_p, status, ierr)
+            call MPI_FILE_WRITE(ifile, num_procs, 1, MPI_INTEGER, status, ierr)
+            call MPI_FILE_WRITE(ifile, proc_bubble_counts, num_procs, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
         end if
 
-        call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
-                           mpi_info_int, ifile, ierr)
-
-        disp = 0._wp
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
 
-        call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
-                               'native', mpi_info_null, ierr)
-
-        ! Cycle through list
-        i = 1
-
-        if (bub_id == 0) then
-            MPI_IO_DATA_lag_bubbles(1, 1:21) = 0._wp
-        else
+        if (bub_id > 0) then
+            allocate (MPI_IO_DATA_lag_bubbles(max(1, bub_id), 1:lag_io_vars))
 
+            i = 1
             do k = 1, nBubs
-
                 if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then
-
                     MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_id(k, 1))
                     MPI_IO_DATA_lag_bubbles(i, 2:4) = mtn_pos(k, 1:3, 1)
                     MPI_IO_DATA_lag_bubbles(i, 5:7) = mtn_posPrev(k, 1:3, 1)
@@ -1692,21 +1744,47 @@ contains
                     MPI_IO_DATA_lag_bubbles(i, 19) = gas_mg(k)
                     MPI_IO_DATA_lag_bubbles(i, 20) = gas_betaT(k)
                     MPI_IO_DATA_lag_bubbles(i, 21) = gas_betaC(k)
-
                     i = i + 1
-
                 end if
-
             end do
 
-        end if
+            call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
+                                           MPI_ORDER_FORTRAN, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
+
+            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, &
+                               ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
+                               mpi_info_int, ifile, ierr)
 
-        call MPI_FILE_write_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*max(1, bub_id), &
-                                mpi_p, status, ierr)
+            ! Skip header (written by rank 0)
+            disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + &
+                      num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
 
-        call MPI_FILE_CLOSE(ifile, ierr)
+            call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA_lag_bubbles, &
+                                   lag_io_vars * bub_id, mpi_p, status, ierr)
 
-        deallocate (MPI_IO_DATA_lag_bubbles)
+            call MPI_FILE_CLOSE(ifile, ierr)
+
+            deallocate (MPI_IO_DATA_lag_bubbles)
+
+        else
+            call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
+
+            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, &
+                               ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
+                               mpi_info_int, ifile, ierr)
+
+            ! Skip header (written by rank 0)
+            disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + &
+                      num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
+
+            call MPI_FILE_WRITE_ALL(ifile, dummy, 0, mpi_p, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
 
 #endif
 
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index dfa071ea67..8d9aa29a36 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1261,7 +1261,6 @@ contains
         call acc_present_dump()
 #endif
 
-
         call s_initialize_mpi_common_module()
         call s_initialize_mpi_proxy_module()
         call s_initialize_variables_conversion_module()
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index c28857c8d4..512c6fc0b6 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -423,6 +423,24 @@ def analytic(self):
     'pres_inf_wrt': ParamType.LOG,
     'cons_vars_wrt': ParamType.LOG,
     'prim_vars_wrt': ParamType.LOG,
+    'lag_header': ParamType.LOG,
+    'lag_txt_wrt': ParamType.LOG,
+    'lag_db_wrt': ParamType.LOG,
+    'lag_id_wrt': ParamType.LOG,
+    'lag_pos_wrt': ParamType.LOG,
+    'lag_pos_prev_wrt': ParamType.LOG,
+    'lag_vel_wrt': ParamType.LOG,
+    'lag_rad_wrt': ParamType.LOG,
+    'lag_rvel_wrt': ParamType.LOG,
+    'lag_r0_wrt': ParamType.LOG,
+    'lag_rmax_wrt': ParamType.LOG,
+    'lag_rmin_wrt': ParamType.LOG,
+    'lag_dphidt_wrt': ParamType.LOG,
+    'lag_pres_wrt': ParamType.LOG,
+    'lag_mv_wrt': ParamType.LOG,
+    'lag_mg_wrt': ParamType.LOG,
+    'lag_betaT_wrt': ParamType.LOG,
+    'lag_betaC_wrt': ParamType.LOG,
     'c_wrt': ParamType.LOG,
     'omega_wrt': ParamType.LOG,
     'qbmm': ParamType.LOG,

From 40009ab37721eaaf335c64a7cbcdc0a1f1c6a699 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Sun, 29 Jun 2025 14:43:01 -0400
Subject: [PATCH 41/62] empty array writing and perlin noise ICs

---
 src/post_process/m_data_output.fpp        | 186 ++++++++++++++++++----
 src/post_process/m_global_parameters.fpp  |   2 +-
 src/post_process/m_mpi_proxy.fpp          |   2 +-
 src/pre_process/include/3dHardcodedIC.fpp |  48 ++++++
 src/pre_process/m_global_parameters.fpp   |   8 +
 src/pre_process/m_mpi_proxy.fpp           |   3 +-
 src/pre_process/m_start_up.fpp            |   3 +-
 src/simulation/m_body_forces.fpp          |   2 +-
 src/simulation/m_bubbles_EL.fpp           |  53 +++---
 src/simulation/m_time_steppers.fpp        |  12 +-
 toolchain/mfc/run/case_dicts.py           |   5 +
 11 files changed, 261 insertions(+), 63 deletions(-)

diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp
index c1dde087f5..eaecab573c 100644
--- a/src/post_process/m_data_output.fpp
+++ b/src/post_process/m_data_output.fpp
@@ -1275,6 +1275,8 @@ contains
         real(wp), dimension(1:1, 1:lag_io_vars) :: dummy
         character(LEN=4*name_len), dimension(num_procs) :: meshnames
         integer, dimension(num_procs) :: meshtypes
+        real(wp) :: dummy_data
+
 
         integer :: i, j
 
@@ -1284,6 +1286,7 @@ contains
         real(wp), dimension(:), allocatable :: pressure, mv, mg, betaT, betaC
 
         dummy = 0._wp
+        dummy_data = 0._wp
 
         ! Construct file path
         write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
@@ -1437,97 +1440,212 @@ contains
                          DB_DOUBLE, DB_F77NULL, ierr)
 
             if (lag_id_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_id', bub_id, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_id', t_step, bub_id, nBub)
             end if
 
             if (lag_vel_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_vel1', vx, nBub, t_step)
-                call s_write_lag_variable_to_formatted_database_file('part_vel2', vy, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_vel1', t_step, vx, nBub)
+                call s_write_lag_variable_to_formatted_database_file('part_vel2', t_step, vy, nBub)
                 if (p > 0) then
-                    call s_write_lag_variable_to_formatted_database_file('part_vel3', vz, nBub, t_step)
+                    call s_write_lag_variable_to_formatted_database_file('part_vel3', t_step, vz, nBub)
                 end if
             end if
 
             if (lag_rad_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_radius', radius, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_radius', t_step, radius, nBub)
             end if
 
             if (lag_rvel_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_rdot', rvel, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_rdot', t_step, rvel, nBub)
             end if
 
             if (lag_r0_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_r0', rnot, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_r0', t_step, rnot, nBub)
             end if
 
             if (lag_rmax_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_rmax', rmax, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_rmax', t_step, rmax, nBub)
             end if
 
             if (lag_rmin_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_rmin', rmin, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_rmin', t_step, rmin, nBub)
             end if
 
             if (lag_dphidt_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_dphidt', dphidt, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_dphidt', t_step, dphidt, nBub)
             end if
 
             if (lag_pres_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_pressure', pressure, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_pressure', t_step, pressure, nBub)
             end if
 
             if (lag_mv_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_mv', mv, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_mv', t_step, mv, nBub)
             end if
 
             if (lag_mg_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_mg', mg, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_mg', t_step, mg, nBub)
             end if
 
             if (lag_betaT_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_betaT', betaT, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_betaT', t_step, betaT, nBub)
             end if
 
             if (lag_betaC_wrt) then
-                call s_write_lag_variable_to_formatted_database_file('part_betaC', betaC, nBub, t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_betaC', t_step, betaC, nBub)
             end if
 
             deallocate(bub_id, px, py, pz, ppx, ppy, ppz, vx, vy, vz, radius, &
                        rvel, rnot, rmax, rmin, dphidt, pressure, mv, mg,    &
                        betaT, betaC)
-        end if
+            deallocate (MPI_IO_DATA_lg_bubbles)
+        else
+            call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
 
-        deallocate (MPI_IO_DATA_lg_bubbles)
+            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
+
+            ! Skip extended header
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
+                      file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
+
+            call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+            call MPI_TYPE_FREE(view, ierr)
+
+            if (proc_rank == 0) then
+
+                do i = 1, num_procs
+                    write (meshnames(i), '(A,I0,A,I0,A)') '../p', i - 1, &
+                        '/', t_step, '.silo:lag_bubbles'
+                    meshtypes(i) = DB_POINTMESH
+                end do
+                err = DBSET2DSTRLEN(len(meshnames(1)))
+                err = DBPUTMMESH(dbroot, 'lag_bubbles', 16, &
+                                 num_procs, meshnames, &
+                                 len_trim(meshnames), &
+                                 meshtypes, DB_F77NULL, ierr)
+            end if
+
+            err = DBSETEMPTYOK(1)
+            err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, &
+                         dummy_data, dummy_data, dummy_data, 0, &
+                         DB_DOUBLE, DB_F77NULL, ierr)
+
+            if (lag_id_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_id', t_step)
+            end if
+
+            if (lag_vel_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_vel1', t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_vel2', t_step)
+                if (p > 0) then
+                    call s_write_lag_variable_to_formatted_database_file('part_vel3', t_step)
+                end if
+            end if
+
+            if (lag_rad_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_radius', t_step)
+            end if
+
+            if (lag_rvel_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rdot', t_step)
+            end if
+
+            if (lag_r0_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_r0', t_step)
+            end if
+
+            if (lag_rmax_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rmax', t_step)
+            end if
+
+            if (lag_rmin_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rmin', t_step)
+            end if
+
+            if (lag_dphidt_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_dphidt', t_step)
+            end if
+
+            if (lag_pres_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_pressure', t_step)
+            end if
+
+            if (lag_mv_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_mv', t_step)
+            end if
+
+            if (lag_mg_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_mg', t_step)
+            end if
+
+            if (lag_betaT_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_betaT', t_step)
+            end if
+
+            if (lag_betaC_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_betaC', t_step)
+            end if
+
+        end if
 
 #endif
 
     end subroutine s_write_lag_bubbles_to_formatted_database_file
 
-    subroutine s_write_lag_variable_to_formatted_database_file(varname, data, nBubs, t_step)
+    subroutine s_write_lag_variable_to_formatted_database_file(varname, t_step, data, nBubs)
 
         character(len=*), intent(in) :: varname
-        real(wp), dimension(1:nBubs), intent(in) :: data
-        integer, intent(in) :: nBubs, t_step
+        integer, intent(in) :: t_step
+        real(wp), dimension(1:nBubs), intent(in), optional :: data
+        integer, intent(in), optional :: nBubs
 
         character(len=64), dimension(num_procs) :: var_names
         integer, dimension(num_procs) :: var_types
+        real(wp) :: dummy_data
         integer :: i
 
-        if (proc_rank == 0) then
-            do i = 1, num_procs
-                write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, &
-                    '/', t_step, '.silo:'//trim(varname)
-                var_types(i) = DB_POINTVAR
-            end do
-            err = DBSET2DSTRLEN(len(var_names(1)))
-            err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), &
-                        num_procs, var_names, &
-                        len_trim(var_names), &
-                        var_types, DB_F77NULL, ierr)
-        end if
+        dummy_data = 0._wp
 
-        err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), &
-                    'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr)
+        if (present(nBubs) .and. present(data)) then
+            if (proc_rank == 0) then
+                do i = 1, num_procs
+                    write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, &
+                        '/', t_step, '.silo:'//trim(varname)
+                    var_types(i) = DB_POINTVAR
+                end do
+                err = DBSET2DSTRLEN(len(var_names(1)))
+                err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), &
+                            num_procs, var_names, &
+                            len_trim(var_names), &
+                            var_types, DB_F77NULL, ierr)
+            end if
+
+            err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), &
+                        'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr)
+        else
+            if (proc_rank == 0) then
+                do i = 1, num_procs
+                    write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, &
+                        '/', t_step, '.silo:'//trim(varname)
+                    var_types(i) = DB_POINTVAR
+                end do
+                err = DBSET2DSTRLEN(len(var_names(1)))
+                err = DBSETEMPTYOK(1)
+                err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), &
+                            num_procs, var_names, &
+                            len_trim(var_names), &
+                            var_types, DB_F77NULL, ierr)
+            end if
+
+            err = DBSETEMPTYOK(1)
+            err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), &
+                        'lag_bubbles', 11, dummy_data, 0, DB_DOUBLE, DB_F77NULL, ierr)
+        end if
 
     end subroutine s_write_lag_variable_to_formatted_database_file
 
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index 4d0dad06ac..22d5739bc9 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -446,7 +446,7 @@ contains
         schlieren_wrt = .false.
         sim_data = .false.
         cf_wrt = .false.
-        lag_txt_wrt = .true.
+        lag_txt_wrt = .false.
         lag_header = .true.
         lag_db_wrt = .false.
         lag_id_wrt = .true.
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index 3917d18b74..cfe43ed80f 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -112,7 +112,7 @@ contains
             & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', &
             & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt',    &
             & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt',    &
-            & 'lag_betaT_wrt', 'lag_betaC_wrt' ]
+            & 'lag_betaT_wrt', 'lag_betaC_wrt', 'bc_io' ]
             call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
diff --git a/src/pre_process/include/3dHardcodedIC.fpp b/src/pre_process/include/3dHardcodedIC.fpp
index e0018598a7..1dc1969392 100644
--- a/src/pre_process/include/3dHardcodedIC.fpp
+++ b/src/pre_process/include/3dHardcodedIC.fpp
@@ -5,7 +5,38 @@
 
     real(wp) :: eps
 
+    real(wp) :: pres
+    real(wp), dimension(0:m_glb, 0:p_glb) :: ih
+    integer :: i, j, pos, start, end
+    character(len=10000) :: line
+    character(len=25) :: value
+
+    if (interface_file /= '.') then
+        open(unit=10, file=trim(interface_file), status="old", action="read")
+        do i = 0, m_glb
+            read(10, '(A)') line  ! Read a full line as a string
+            start = 1
+
+            do j = 0, p_glb
+                end = index(line(start:), ',')  ! Find the next comma
+                if (end == 0) then
+                    value = trim(adjustl(line(start:)))  ! Last value in the line
+                else
+                    value = trim(adjustl(line(start:start+end-2)))  ! Extract substring
+                    start = start + end  ! Move to next value
+                end if
+                read(value, *) ih(i, j)  ! Convert string to numeric value
+                if (.not. f_is_default(normMag)) ih(i,j )= ih(i,j) * normMag
+                if (.not. f_is_default(normFac)) ih(i,j) = ih(i,j) / normFac
+            end do
+        end do
+        close(10)
+
+        print*, "Interface file "//trim(interface_file)//" read"
+    end if
+
     eps = 1e-9_wp
+
 #:enddef
 
 #:def Hardcoded3D()
@@ -56,6 +87,23 @@
             q_prim_vf(advxe)%sf(i, j, k) = patch_icpp(1)%alpha(2)
         end if
 
+    case (302) ! (3D Perlin Noise Interface)
+
+        alph = 0.5_wp * (1 + (1._wp - 2._wp * eps) * &
+                    tanh((ih(start_idx(1) + i,start_idx(3) + k)  - y_cc(j))*100._wp))
+
+        q_prim_vf(advxb)%sf(i,j,k) = alph
+        q_prim_vf(advxe)%sf(i,j,k) = 1._wp - alph
+
+        q_prim_vf(contxb)%sf(i,j,k) = q_prim_vf(advxb)%sf(i,j,k) * 1._wp
+        q_prim_vf(contxe)%sf(i,j,k) = q_prim_vf(advxe)%sf(i,j,k) * (1._wp / 950._wp)
+
+        q_prim_vf(E_idx)%sf(i,j,k) = p0 + &
+            (q_prim_vf(contxb)%sf(i,j,k) + q_prim_vf(contxe)%sf(i,j,k)) * g0 * &
+            (ih(start_idx(1) + i, start_idx(3) + k) - y_cc(j))
+
+        if (surface_tension) q_prim_vf(c_idx)%sf(i,j,k) = alph
+
         ! Put your variable assignments here
     case default
         call s_int_to_str(patch_id, iStr)
diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp
index 5c34086d5a..a8a5192be2 100644
--- a/src/pre_process/m_global_parameters.fpp
+++ b/src/pre_process/m_global_parameters.fpp
@@ -28,6 +28,9 @@ module m_global_parameters
     logical :: old_ic, non_axis_sym               !< Use existing IC data
     integer :: t_step_old, t_step_start           !< Existing IC/grid folder
 
+    character(LEN=path_len) :: interface_file
+    real(wp) :: normFac, normMag, g0, p0
+
     logical :: cfl_adap_dt, cfl_const_dt, cfl_dt
     integer :: n_start, n_start_old
 
@@ -314,6 +317,11 @@ contains
         old_ic = .false.
         t_step_old = dflt_int
         t_step_start = dflt_int
+        interface_file = '.'
+        normFac = dflt_real
+        normMag = dflt_real
+        g0 = dflt_real
+        p0 = dflt_real
 
         cfl_adap_dt = .false.
         cfl_const_dt = .false.
diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp
index c7c7a23add..92f9e0f78a 100644
--- a/src/pre_process/m_mpi_proxy.fpp
+++ b/src/pre_process/m_mpi_proxy.fpp
@@ -41,6 +41,7 @@ contains
 
         ! Logistics
         call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(interface_file, len(interface_file), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
 
         #:for VAR in ['t_step_old', 't_step_start', 'm', 'n', 'p', 'm_glb', 'n_glb', 'p_glb',  &
             & 'loops_x', 'loops_y', 'loops_z', 'model_eqns', 'num_fluids',     &
@@ -70,7 +71,7 @@ contains
             & 'perturb_flow_mag', 'pref', 'rhoref', 'poly_sigma', 'R0ref',     &
             & 'Web', 'Ca', 'Re_inv', 'sigR', 'sigV', 'rhoRV', 'palpha_eps',    &
             & 'ptgalpha_eps', 'sigma', 'pi_fac', 'mixlayer_vel_coef',          &
-            & 'mixlayer_domain', 'Bx0' ]
+            & 'mixlayer_domain', 'Bx0', 'normMag', 'normFac', 'g0', 'p0' ]
             call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp
index 30c38cdfdf..8ee3867cbc 100644
--- a/src/pre_process/m_start_up.fpp
+++ b/src/pre_process/m_start_up.fpp
@@ -149,7 +149,8 @@ contains
             n_start_old, surface_tension, hyperelasticity, pre_stress, &
             elliptic_smoothing, elliptic_smoothing_iters, &
             viscous, bubbles_lagrange, bc_x, bc_y, bc_z, num_bc_patches, &
-            patch_bc, Bx0, relativity, cont_damage, fd_order, lag_params
+            patch_bc, Bx0, relativity, cont_damage, fd_order, lag_params, &
+            interface_file, normFac, normMag, g0, p0
 
         ! Inquiring the status of the pre_process.inp file
         file_loc = 'pre_process.inp'
diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp
index 1ef74cbcee..fe84ec68d4 100644
--- a/src/simulation/m_body_forces.fpp
+++ b/src/simulation/m_body_forces.fpp
@@ -98,7 +98,7 @@ contains
     !! so the system can be advanced in time
     !! @param q_cons_vf Conservative variables
     !! @param q_prim_vf Primitive variables
-    subroutine s_compute_body_forces_rhs(q_cons_vf, q_prim_vf, rhs_vf)
+    subroutine s_compute_body_forces_rhs(q_prim_vf, q_cons_vf, rhs_vf)
 
         type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
         type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index ee45c77e6e..2ca4e2da7c 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -1097,8 +1097,9 @@ contains
     !>  This subroutine updates the Lagrange variables using the tvd RK time steppers.
         !!      The time derivative of the bubble variables must be stored at every stage to avoid precision errors.
         !! @param stage Current tvd RK stage
-    impure subroutine s_update_lagrange_tdv_rk(stage)
+    impure subroutine s_update_lagrange_tdv_rk(q_cons_vf, stage)
 
+        type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
         integer, intent(in) :: stage
 
         integer :: k
@@ -1116,7 +1117,7 @@ contains
                 gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
             end do
 
-            if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
+            if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1)
 
             call s_transfer_data_to_tmp()
             call s_write_void_evol(mytime)
@@ -1141,7 +1142,7 @@ contains
                     gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
                 end do
 
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2)
 
             elseif (stage == 2) then
                 !$acc parallel loop gang vector default(present) private(k)
@@ -1156,7 +1157,7 @@ contains
                     gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp
                 end do
 
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1)
 
                 call s_transfer_data_to_tmp()
                 call s_write_void_evol(mytime)
@@ -1182,7 +1183,7 @@ contains
                     gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
                 end do
 
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2)
 
             elseif (stage == 2) then
                 !$acc parallel loop gang vector default(present) private(k)
@@ -1197,7 +1198,7 @@ contains
                     gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp
                 end do
 
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2)
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2)
 
             elseif (stage == 3) then
                 !$acc parallel loop gang vector default(present) private(k)
@@ -1212,7 +1213,8 @@ contains
                     gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3))
                 end do
 
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1)
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1)
+
 
                 call s_transfer_data_to_tmp()
                 call s_write_void_evol(mytime)
@@ -1230,16 +1232,20 @@ contains
 
     !> This subroutine enforces reflective and wall boundary conditions for EL bubbles
         !! @param dest Destination for the bubble position update
-    impure subroutine s_enforce_EL_bubbles_boundary_conditions(dest)
+    impure subroutine s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest)
 
+        type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
         integer, intent(in) :: dest
         integer :: k, i, patch_id, offset
         integer, dimension(3) :: cell
+        real(wp), dimension(3) :: scoord
 
         !$acc parallel loop gang vector default(present) private(cell)
         do k = 1, nBubs
             keep_bubble(k) = 1
 
+            ! Relocate bubbles at solid boundaries and delete bubbles that leave
+            ! buffer regions
             if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                 .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then
                 mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest)
@@ -1278,20 +1284,31 @@ contains
                 end if
             end if
 
-            if (ib) then
-                cell = fd_number - buff_size
-                call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
+            if (keep_bubble(k) == 1) then
+                ! Remove bubbles that are no longer in a liquid
+                !cell = fd_number - buff_size
+                !call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
 
-                if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then
-                    patch_id = ib_markers%sf(cell(1), cell(2), cell(3))
+                !if (q_cons_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then
+                    !keep_bubble(k) = 0
+                !end if
 
-                    do i = 1, num_dims
-                        mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - &
-                                              levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) &
-                                              * levelset%sf(cell(1), cell(2), cell(3), patch_id)
-                    end do
+                ! Move bubbles back to surface of IB
+                if (ib) then
                     cell = fd_number - buff_size
                     call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
+
+                    if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then
+                        patch_id = ib_markers%sf(cell(1), cell(2), cell(3))
+
+                        do i = 1, num_dims
+                            mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - &
+                                                  levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) &
+                                                  * levelset%sf(cell(1), cell(2), cell(3), patch_id)
+                        end do
+                        cell = fd_number - buff_size
+                        call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
+                    end if
                 end if
             end if
         end do
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index b1c338b5c9..b10e66badf 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -375,7 +375,7 @@ contains
             if (t_step == t_step_stop) return
         end if
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1)
 
         !$acc parallel loop collapse(4) gang vector default(present)
         do i = 1, sys_size
@@ -477,7 +477,7 @@ contains
             if (t_step == t_step_stop) return
         end if
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1)
 
         !$acc parallel loop collapse(4) gang vector default(present)
         do i = 1, sys_size
@@ -549,7 +549,7 @@ contains
 
         call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2)
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=2)
 
         !$acc parallel loop collapse(4) gang vector default(present)
         do i = 1, sys_size
@@ -659,7 +659,7 @@ contains
             if (t_step == t_step_stop) return
         end if
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1)
 
         !$acc parallel loop collapse(4) gang vector default(present)
         do i = 1, sys_size
@@ -731,7 +731,7 @@ contains
 
         call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2)
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=2)
 
         !$acc parallel loop collapse(4) gang vector default(present)
         do i = 1, sys_size
@@ -804,7 +804,7 @@ contains
         ! Stage 3 of 3
         call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 3)
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=3)
 
         !$acc parallel loop collapse(4) gang vector default(present)
         do i = 1, sys_size
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index 512c6fc0b6..1e18dcf69b 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -102,6 +102,11 @@ def analytic(self):
     'bubbles_lagrange': ParamType.LOG,
     'lag_params%vel_model': ParamType.INT,
     'fd_order': ParamType.INT,
+    'normFac': ParamType.REAL,
+    'interface_file': ParamType.STR,
+    'normMag': ParamType.REAL,
+    'g0': ParamType.REAL,
+    'p0': ParamType.REAL,
 })
 
 for ib_id in range(1, 10+1):

From 50e977247071c7a2bc1caefbfa8dab063f4b8946 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Tue, 8 Jul 2025 17:50:36 -0400
Subject: [PATCH 42/62] gpu macros and minro fixes

---
 src/post_process/m_data_output.fpp      |  2 +-
 src/simulation/m_bubbles_EL.fpp         | 39 ++++++++++++-------------
 src/simulation/m_bubbles_EL_kernels.fpp |  4 +--
 src/simulation/m_mpi_proxy.fpp          |  3 +-
 src/simulation/m_surface_tension.fpp    |  2 +-
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp
index 639e376512..8a85f47cc3 100644
--- a/src/post_process/m_data_output.fpp
+++ b/src/post_process/m_data_output.fpp
@@ -1601,7 +1601,7 @@ contains
 
         character(len=*), intent(in) :: varname
         integer, intent(in) :: t_step
-        real(wp), dimension(1:nBubs), intent(in), optional :: data
+        real(wp), dimension(1:), intent(in), optional :: data
         integer, intent(in), optional :: nBubs
 
         character(len=64), dimension(num_procs) :: var_names
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 382d08b279..3720a6a195 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -81,7 +81,7 @@ module m_bubbles_EL
 
     integer, allocatable, dimension(:) :: keep_bubble, prefix_sum
     integer :: active_bubs
-    !$acc declare create(keep_bubble, prefix_sum, active_bubs)
+    $:GPU_DECLARE(create='[keep_bubble, prefix_sum, active_bubs]')
 
 contains
 
@@ -903,7 +903,7 @@ contains
         !! @param f_pinfl Driving pressure
         !! @param cell Bubble cell
         !! @param Romega Control volume radius
-    pure subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega)
+    impure subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega)
         $:GPU_ROUTINE(function_name='s_get_pinf',parallelism='[seq]', &
             & cray_inline=True)
 
@@ -1242,9 +1242,8 @@ contains
         integer, intent(in) :: dest
         integer :: k, i, patch_id, offset
         integer, dimension(3) :: cell
-        real(wp), dimension(3) :: scoord
 
-        !$acc parallel loop gang vector default(present) private(cell)
+        $:GPU_PARALLEL_LOOP(private='[cell]')
         do k = 1, nBubs
             keep_bubble(k) = 1
 
@@ -1290,12 +1289,12 @@ contains
 
             if (keep_bubble(k) == 1) then
                 ! Remove bubbles that are no longer in a liquid
-                !cell = fd_number - buff_size
-                !call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
+                cell = fd_number - buff_size
+                call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
 
-                !if (q_cons_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then
-                    !keep_bubble(k) = 0
-                !end if
+                if (q_cons_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then
+                    keep_bubble(k) = 0
+                end if
 
                 ! Move bubbles back to surface of IB
                 if (ib) then
@@ -1305,11 +1304,13 @@ contains
                     if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then
                         patch_id = ib_markers%sf(cell(1), cell(2), cell(3))
 
+                        $:GPU_LOOP(parallelism='[seq]')
                         do i = 1, num_dims
                             mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - &
                                                   levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) &
                                                   * levelset%sf(cell(1), cell(2), cell(3), patch_id)
                         end do
+
                         cell = fd_number - buff_size
                         call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
                     end if
@@ -1318,12 +1319,11 @@ contains
         end do
 
         call nvtxStartRange("LAG-BC-DEV2HOST")
-        !$acc update host(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-        !$acc gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
-        !$acc intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
-        !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
-        !$acc gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, &
-        !$acc keep_bubble, nBubs)
+        $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+            & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, &
+            & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, &
+            & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, keep_bubble, &
+            & nBubs]')
         call nvtxEndRange
 
         do k = 1, nBubs
@@ -1380,11 +1380,10 @@ contains
         end if
 
         call nvtxStartRange("LAG-BC-HOST2DEV")
-        !$acc update device(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-        !$acc gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
-        !$acc intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
-        !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
-        !$acc gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
+        $:GPU_UPDATE(device='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
+            & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, &
+            & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, &
+            & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts]')
         call nvtxEndRange
 
     end subroutine s_enforce_EL_bubbles_boundary_conditions
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index 9083012bd3..cbc0ca5d4b 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -433,7 +433,7 @@ contains
             !! @param q_prim_vf Eulerian field with primitive variables
             !! @return v Interpolated velocity at the position of the bubble
     pure function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v)
-!$acc routine seq
+        $:GPU_ROUTINE(parallelism='[seq]')
         real(wp), intent(in) :: pos
         integer, dimension(3), intent(in) :: cell
         integer, intent(in) :: i
@@ -489,7 +489,7 @@ contains
             !! @param q_prim_vf Eulerian field with primitive variables
             !! @return a Acceleration of the bubble in direction i
     pure function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,q_prim_vf) result(a)
-!$acc routine seq
+        $:GPU_ROUTINE(parallelism='[seq]')
         real(wp), intent(in) :: pos, rad, vel, mg, mv, Re, rho
         integer, dimension(3), intent(in) :: cell
         integer, intent(in) :: i
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 8aad16be9e..0c9488ed57 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -57,7 +57,8 @@ module m_mpi_proxy
     character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff
     type(bounds_info), dimension(3) :: comm_coords
     integer :: p_buff_size, p_var_size
-    !$acc declare create(p_send_counts, comm_coords)
+
+    $:GPU_DECLARE(create='[p_send_counts, comm_coords]')
 
 contains
 
diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp
index 30b6fcec2b..67c31cf74e 100644
--- a/src/simulation/m_surface_tension.fpp
+++ b/src/simulation/m_surface_tension.fpp
@@ -280,7 +280,7 @@ contains
             do k = 0, n
                 do j = 0, m
                     c_divs(num_dims + 1)%sf(j, k, l) = 0._wp
-                    !s$acc loop seq
+                    $:GPU_LOOP(parallelism='[seq]')
                     do i = 1, num_dims
                         c_divs(num_dims + 1)%sf(j, k, l) = &
                             c_divs(num_dims + 1)%sf(j, k, l) + &

From 35f6b01120851c499c1abc8187bd60fa98d0a689 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Fri, 11 Jul 2025 13:54:30 -0400
Subject: [PATCH 43/62] adap dt moving bubbles and 4th order differences

---
 src/pre_process/m_mpi_proxy.fpp         |   3 +-
 src/simulation/m_bubbles.fpp            |  28 ++-
 src/simulation/m_bubbles_EL.fpp         | 237 ++++++++++++++++++------
 src/simulation/m_bubbles_EL_kernels.fpp | 186 ++++++++++++++-----
 src/simulation/m_mpi_proxy.fpp          |   3 +-
 src/simulation/m_time_steppers.fpp      |  12 +-
 6 files changed, 354 insertions(+), 115 deletions(-)

diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp
index 3e175c768c..5901f253eb 100644
--- a/src/pre_process/m_mpi_proxy.fpp
+++ b/src/pre_process/m_mpi_proxy.fpp
@@ -48,7 +48,8 @@ contains
             & 'weno_order', 'precision', 'perturb_flow_fluid', &
             & 'perturb_sph_fluid', 'num_patches', 'thermal', 'nb', 'dist_type',&
             & 'R0_type', 'relax_model', 'num_ibs', 'n_start', 'elliptic_smoothing_iters', &
-            & 'num_bc_patches', 'lag_params%vel_model', 'mixlayer_perturb_nk' ]
+            & 'num_bc_patches', 'lag_params%vel_model', 'mixlayer_perturb_nk', &
+            & 'fd_order' ]
             call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp
index db04cef7d7..190bab2033 100644
--- a/src/simulation/m_bubbles.fpp
+++ b/src/simulation/m_bubbles.fpp
@@ -17,6 +17,8 @@ module m_bubbles
 
     use m_helper_basic         !< Functions to compare floating point numbers
 
+    use m_bubbles_EL_kernels
+
     implicit none
 
     real(wp) :: chi_vw  !< Bubble wall properties (Ando 2010)
@@ -467,7 +469,8 @@ contains
     pure subroutine s_advance_step(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, &
                                    fntait, fBtait, f_bub_adv_src, f_divu, &
                                    bub_id, fmass_v, fmass_n, fbeta_c, &
-                                   fbeta_t, fCson, adap_dt_stop)
+                                   fbeta_t, fCson, adap_dt_stop, fRe, fPos, &
+                                   fVel, cell, q_prim_vf)
         $:GPU_ROUTINE(function_name='s_advance_step',parallelism='[seq]', &
             & cray_inline=True)
 
@@ -477,14 +480,18 @@ contains
         integer, intent(in) :: bub_id
         real(wp), intent(in) :: fmass_n, fbeta_c, fbeta_t, fCson
         integer, intent(inout) :: adap_dt_stop
+        real(wp), intent(inout), dimension(3), optional :: fPos, fVel
+        real(wp), intent(in), optional :: fRe
+        integer, intent(in), dimension(3), optional :: cell
+        type(scalar_field), intent(in), dimension(sys_size), optional :: q_prim_vf
 
         real(wp), dimension(5) :: err !< Error estimates for adaptive time stepping
         real(wp) :: t_new !< Updated time step size
         real(wp) :: h !< Time step size
         real(wp), dimension(4) :: myR_tmp1, myV_tmp1, myR_tmp2, myV_tmp2 !< Bubble radius, radial velocity, and radial acceleration for the inner loop
         real(wp), dimension(4) :: myPb_tmp1, myMv_tmp1, myPb_tmp2, myMv_tmp2 !< Gas pressure and vapor mass for the inner loop (EL)
-        real(wp) :: fR2, fV2, fpb2, fmass_v2
-        integer :: iter_count
+        real(wp) :: fR2, fV2, fpb2, fmass_v2, vTemp, aTemp
+        integer :: l, iter_count
 
         call s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, &
                                  fntait, fBtait, f_bub_adv_src, f_divu, fCson, h)
@@ -555,6 +562,21 @@ contains
                         ! Update pb and mass_v
                         fpb = myPb_tmp1(4)
                         fmass_v = myMv_tmp1(4)
+
+                        if (lag_params%vel_model == 1) then
+                            do l = 1, num_dims
+                                vTemp = f_interpolate_velocity(fR, cell, l, q_prim_vf)
+                                fPos(l) = fPos(l) + h * vTemp
+                                fVel(l) = vTemp
+                            end do
+                        elseif (lag_params%vel_model == 2) then
+                            do l = 1, num_dims
+                                aTemp = f_get_acceleration(fPos(l), fR, fVel(l), fmass_n, fmass_v, &
+                                                           fRe, fRho, cell, l, q_prim_vf)
+                                fPos(l) = fPos(l) + h * fVel(l)
+                                fVel(l) = fVel(l) + h * aTemp
+                            end do
+                        end if
                     end if
 
                     ! Update step size for the next sub-step
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 3720a6a195..d92a70b239 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -605,15 +605,15 @@ contains
         real(wp) :: myR_m, mygamma_m, myPb, myMass_n, myMass_v
         real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot
         real(wp) :: myPinf, aux1, aux2, myCson, myRho
+        real(wp), dimension(3) :: myPos, myVel
         real(wp) :: gamma, pi_inf, qv
         real(wp), dimension(contxe) :: myalpha_rho, myalpha
         real(wp), dimension(2) :: Re
         integer, dimension(3) :: cell
-
         integer :: adap_dt_stop_max, adap_dt_stop !< Fail-safe exit if max iteration count reached
         real(wp) :: dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu !< Dummy variables for unified subgrid bubble subroutines
 
-        integer :: i, k, l
+        integer :: i, j, k, l
 
         call nvtxStartRange("LAGRANGE-BUBBLE-DYNAMICS")
 
@@ -641,7 +641,7 @@ contains
 
         ! Radial motion model
         adap_dt_stop_max = 0
-        $:GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', &
+        $:GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell,myPos,myVel]', &
             & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', &
             & copy='[adap_dt_stop_max]',copyin='[stage]')
         do k = 1, nBubs
@@ -655,6 +655,8 @@ contains
             myBeta_c = gas_betaC(k)
             myBeta_t = gas_betaT(k)
             myR0 = bub_R0(k)
+            myPos = mtn_pos(k,:,2)
+            myVel = mtn_vel(k,:,2)
 
             ! Vapor and heat fluxes
             call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m)
@@ -681,7 +683,8 @@ contains
                 call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, &
                                     dmntait, dmBtait, dm_bub_adv_src, dm_divu, &
                                     k, myMass_v, myMass_n, myBeta_c, &
-                                    myBeta_t, myCson, adap_dt_stop)
+                                    myBeta_t, myCson, adap_dt_stop, Re(1), &
+                                    myPos, myVel, cell, q_prim_vf)
 
                 ! Update bubble state
                 intfc_rad(k, 1) = myR
@@ -689,6 +692,16 @@ contains
                 gas_p(k, 1) = myPb
                 gas_mv(k, 1) = myMass_v
 
+                if (lag_params%vel_model == 1) then
+                    mtn_posPrev(k,:,1) = mtn_pos(k,:,2)
+                    mtn_pos(k,:,1) = myPos
+                    mtn_vel(k,:,1) = myVel
+                elseif (lag_params%vel_model == 2) then
+                    mtn_posPrev(k,:,1) = mtn_pos(k,:,2)
+                    mtn_pos(k,:,1) = myPos
+                    mtn_vel(k,:,1) = myVel
+                end if
+
             else
                 ! Radial acceleration from bubble models
                 intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, &
@@ -701,21 +714,20 @@ contains
 
                 do l = 1, num_dims
                     if (lag_params%vel_model == 1) then
-                        mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), &
+                        mtn_dposdt(k, l, stage) = f_interpolate_velocity(myPos(l), &
                                                     cell, l, q_prim_vf)
                         mtn_dveldt(k, l, stage) = 0._wp
                     elseif (lag_params%vel_model == 2) then
-                        mtn_dposdt(k, l, stage) = mtn_vel(k,l,2)
-                        mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), &
-                                                    intfc_rad(k,2), mtn_vel(k,l,2), &
-                                                    gas_mg(k), gas_mv(k, 2), &
+                        mtn_dposdt(k, l, stage) = myVel(l)
+                        mtn_dveldt(k, l, stage) = f_get_acceleration(myPos(l), &
+                                                    myR, myVel(l), &
+                                                    myMass_n, myMass_v, &
                                                     Re(1), myRho, cell, l, q_prim_vf)
                     else
                         mtn_dposdt(k, l, stage) = 0._wp
                         mtn_dveldt(k, l, stage) = 0._wp
                     end if
                 end do
-
             end if
 
             adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop)
@@ -724,6 +736,10 @@ contains
 
         if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.")
 
+        if (adap_dt .and. lag_params%vel_model > 0) then
+            call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=1)
+        end if
+
         call nvtxEndRange
 
     end subroutine s_compute_bubble_EL_dynamics
@@ -913,7 +929,8 @@ contains
         integer, dimension(3), intent(out) :: cell
         real(wp), intent(out), optional :: preterm1, term2, Romega
 
-        real(wp), dimension(3) :: scoord, psi
+        real(wp), dimension(3) :: scoord, psi_pos, psi_x, psi_y, psi_z
+        real(wp), xi, eta, zeta
         real(wp) :: dc, vol, aux
         real(wp) :: volgas, term1, Rbeq, denom
         real(wp) :: charvol, charpres, charvol2, charpres2
@@ -940,46 +957,149 @@ contains
         if ((lag_params%cluster_type == 1)) then
             !< Getting p_cell in terms of only the current cell by interpolation
 
-            !< Getting the cell volulme as Omega
-            if (p > 0) then
-                vol = dx(cell(1))*dy(cell(2))*dz(cell(3))
-            else
-                if (cyl_coord) then
-                    vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi
+            if (fd_order == 2) then ! Bilinear interpolation
+
+                if (p > 0) then
+                    vol = dx(cell(1))*dy(cell(2))*dz(cell(3))
                 else
-                    vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth
+                    if (cyl_coord) then
+                        vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi
+                    else
+                        vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth
+                    end if
                 end if
-            end if
 
-            !< Obtain bilinear interpolation coefficients, based on the current location of the bubble.
-            psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1)
-            psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1))))
+                !< Obtain bilinear interpolation coefficients, based on the current location of the bubble.
+                psi_pos(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1)
+                psi_pos(1) = abs((psi_pos(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1))))
 
-            psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1)
-            psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2))))
+                psi_pos(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1)
+                psi_pos(2) = abs((psi_pos(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2))))
 
-            if (p > 0) then
-                psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1)
-                psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3))))
-            else
-                psi(3) = 0._wp
-            end if
+                if (p > 0) then
+                    psi_pos(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1)
+                    psi_pos(3) = abs((psi_pos(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3))))
+                else
+                    psi_pos(3) = 0._wp
+                end if
+
+                ! Calculate bilinear basis functions for each direction
+                ! For normalized coordinate xi in [0, 1], the two basis functions are:
+                ! phi_0(xi) = 1 - xi, phi_1(xi) = xi
+
+                ! X-direction basis functions
+                psi_x(1) = 1._wp - psi_pos(1)  ! Left basis function
+                psi_x(2) = psi_pos(1)          ! Right basis function
+
+                ! Y-direction basis functions
+                psi_y(1) = 1._wp - psi_pos(2)  ! Left basis function
+                psi_y(2) = psi_pos(2)          ! Right basis function
+
+                if (p > 0) then
+                    ! Z-direction basis functions
+                    psi_z(1) = 1._wp - psi_pos(3)  ! Left basis function
+                    psi_z(2) = psi_pos(3)          ! Right basis function
+                else
+                    psi_z(1) = 1._wp
+                    psi_z(2) = 0._wp
+                end if
+
+                !< Perform bilinear interpolation
+                f_pinfl = 0._wp
+
+                if (p == 0) then  !2D - 4 point interpolation (2x2)
+                    do j = 1, 2
+                        do i = 1, 2
+                            f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3)) * &
+                                               psi_x(i) * psi_y(j)
+                        end do
+                    end do
+                else              !3D - 8 point interpolation (2x2x2)
+                    do k = 1, 2
+                        do j = 1, 2
+                            do i = 1, 2
+                                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3) + k - 1) * &
+                                                   psi_x(i) * psi_y(j) * psi_z(k)
+                            end do
+                        end do
+                    end do
+                end if
+
+            elseif (fd_order == 4) then ! Biquadratic interpolation
 
-            !< Perform bilinear interpolation
-            if (p == 0) then  !2D
-                f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2)
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2)
-            else              !3D
-                f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))*(1._wp - psi(3))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2))*(1._wp - psi(3))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2)*(1._wp - psi(3))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2)*(1._wp - psi(3))
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1)*(1._wp - psi(1))*(1._wp - psi(2))*psi(3)
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3) + 1)*psi(1)*(1._wp - psi(2))*psi(3)
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3) + 1)*psi(1)*psi(2)*psi(3)
-                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3) + 1)*(1._wp - psi(1))*psi(2)*psi(3)
+                if (p > 0) then
+                    vol = dx(cell(1))*dy(cell(2))*dz(cell(3))
+                else
+                    if (cyl_coord) then
+                        vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi
+                    else
+                        vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth
+                    end if
+                end if
+
+                !< Obtain biquadratic interpolation coefficients, based on the current location of the bubble.
+                ! For biquadratic interpolation, we need coefficients for 3 points in each direction
+                psi_pos(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1)
+                psi_pos(1) = (psi_pos(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))
+
+                psi_pos(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1)
+                psi_pos(2) = (psi_pos(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))
+
+                if (p > 0) then
+                    psi_pos(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1)
+                    psi_pos(3) = (psi_pos(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))
+                else
+                    psi_pos(3) = 0._wp
+                end if
+
+                ! Calculate biquadratic basis functions for each direction
+                ! For normalized coordinate xi in [-1, 1], the three basis functions are:
+                ! phi_0(xi) = xi*(xi-1)/2, phi_1(xi) = (1-xi)*(1+xi), phi_2(xi) = xi*(xi+1)/2
+
+                ! X-direction basis functions
+                xi = 2._wp*psi_pos(1) - 1._wp  ! Convert to [-1, 1] range
+                psi_x(1) = xi*(xi - 1._wp)/2._wp        ! Left basis function
+                psi_x(2) = (1._wp - xi)*(1._wp + xi)    ! Center basis function
+                psi_x(3) = xi*(xi + 1._wp)/2._wp        ! Right basis function
+
+                ! Y-direction basis functions
+                eta = 2._wp*psi_pos(2) - 1._wp  ! Convert to [-1, 1] range
+                psi_y(1) = eta*(eta - 1._wp)/2._wp      ! Left basis function
+                psi_y(2) = (1._wp - eta)*(1._wp + eta)  ! Center basis function
+                psi_y(3) = eta*(eta + 1._wp)/2._wp      ! Right basis function
+
+                if (p > 0) then
+                    ! Z-direction basis functions
+                    zeta = 2._wp*psi_pos(3) - 1._wp  ! Convert to [-1, 1] range
+                    psi_z(1) = zeta*(zeta - 1._wp)/2._wp      ! Left basis function
+                    psi_z(2) = (1._wp - zeta)*(1._wp + zeta)  ! Center basis function
+                    psi_z(3) = zeta*(zeta + 1._wp)/2._wp      ! Right basis function
+                else
+                    psi_z(1) = 0._wp
+                    psi_z(2) = 1._wp
+                    psi_z(3) = 0._wp
+                end if
+
+                !< Perform biquadratic interpolation
+                f_pinfl = 0._wp
+
+                if (p == 0) then  !2D - 9 point interpolation (3x3)
+                    do j = 1, 3
+                        do i = 1, 3
+                            f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3)) * &
+                                               psi_x(i) * psi_y(j)
+                        end do
+                    end do
+                else              !3D - 27 point interpolation (3x3x3)
+                    do k = 1, 3
+                        do j = 1, 3
+                            do i = 1, 3
+                                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3) + k - 2) * &
+                                                   psi_x(i) * psi_y(j) * psi_z(k)
+                            end do
+                        end do
+                    end do
+                end if
             end if
 
             !R_Omega
@@ -1101,9 +1221,9 @@ contains
     !>  This subroutine updates the Lagrange variables using the tvd RK time steppers.
         !!      The time derivative of the bubble variables must be stored at every stage to avoid precision errors.
         !! @param stage Current tvd RK stage
-    impure subroutine s_update_lagrange_tdv_rk(q_cons_vf, stage)
+    impure subroutine s_update_lagrange_tdv_rk(q_prim_vf, stage)
 
-        type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
+        type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
         integer, intent(in) :: stage
 
         integer :: k
@@ -1121,7 +1241,7 @@ contains
                 gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
             end do
 
-            if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1)
+            if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=1)
 
             call s_transfer_data_to_tmp()
             call s_write_void_evol(mytime)
@@ -1146,7 +1266,7 @@ contains
                     gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
                 end do
 
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2)
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=2)
 
             elseif (stage == 2) then
                 $:GPU_PARALLEL_LOOP(private='[k]')
@@ -1161,7 +1281,7 @@ contains
                     gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp
                 end do
 
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1)
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=1)
 
                 call s_transfer_data_to_tmp()
                 call s_write_void_evol(mytime)
@@ -1187,7 +1307,7 @@ contains
                     gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1)
                 end do
 
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2)
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=2)
 
             elseif (stage == 2) then
                 $:GPU_PARALLEL_LOOP(private='[k]')
@@ -1202,7 +1322,7 @@ contains
                     gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp
                 end do
 
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2)
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=2)
 
             elseif (stage == 3) then
                 $:GPU_PARALLEL_LOOP(private='[k]')
@@ -1217,8 +1337,7 @@ contains
                     gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3))
                 end do
 
-                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1)
-
+                if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=1)
 
                 call s_transfer_data_to_tmp()
                 call s_write_void_evol(mytime)
@@ -1236,9 +1355,9 @@ contains
 
     !> This subroutine enforces reflective and wall boundary conditions for EL bubbles
         !! @param dest Destination for the bubble position update
-    impure subroutine s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest)
+    impure subroutine s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest)
 
-        type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf
+        type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
         integer, intent(in) :: dest
         integer :: k, i, patch_id, offset
         integer, dimension(3) :: cell
@@ -1292,7 +1411,7 @@ contains
                 cell = fd_number - buff_size
                 call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest))
 
-                if (q_cons_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then
+                if (q_prim_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then
                     keep_bubble(k) = 0
                 end if
 
@@ -1322,7 +1441,7 @@ contains
         $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
             & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, &
             & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, &
-            & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, keep_bubble, &
+            & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, keep_bubble, &
             & nBubs]')
         call nvtxEndRange
 
@@ -1383,7 +1502,7 @@ contains
         $:GPU_UPDATE(device='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
             & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, &
             & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, &
-            & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts]')
+            & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, nBubs]')
         call nvtxEndRange
 
     end subroutine s_enforce_EL_bubbles_boundary_conditions
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index cbc0ca5d4b..3424e4081f 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -440,38 +440,86 @@ contains
         type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
 
         real(wp) :: v
-        real(wp) :: L1, L2, L3
-        real(wp) :: x1, x2, x3
-        real(wp) :: y1, y2, y3
-
-        if (i == 1) then
-            x1 = x_cc(cell(1) - 1)
-            y1 = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3))
-            x2 = x_cc(cell(1))
-            y2 = q_prim_vf(momxb)%sf(cell(1)    , cell(2), cell(3))
-            x3 = x_cc(cell(1) + 1)
-            y3 = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3))
-        elseif (i == 2) then
-            x1 = y_cc(cell(2) - 1)
-            y1 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3))
-            x2 = y_cc(cell(2))
-            y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2)    , cell(3))
-            x3 = y_cc(cell(2) + 1)
-            y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3))
-        elseif (i == 3) then
-            x1 = z_cc(cell(3) - 1)
-            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1)
-            x2 = z_cc(cell(3))
-            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)    )
-            x3 = z_cc(cell(3) + 1)
-            y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1)
-        endif
-
-        L1 = ((pos - x2)*(pos - x3)) / ((x1 - x2)*(x1 - x3))
-        L2 = ((pos - x1)*(pos - x3)) / ((x2 - x1)*(x2 - x3))
-        L3 = ((pos - x1)*(pos - x2)) / ((x3 - x1)*(x3 - x2))
-
-        v = L1*y1 + L2*y2 + L3*y3
+        real(wp), dimension(fd_order + 1) :: xi, eta, L
+
+        if (fd_order == 2) then
+            if (i == 1) then
+                xi(1) = x_cc(cell(1) - 1)
+                eta(1) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3))
+                xi(2) = x_cc(cell(1))
+                eta(2) = q_prim_vf(momxb)%sf(cell(1)    , cell(2), cell(3))
+                xi(3) = x_cc(cell(1) + 1)
+                eta(3) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3))
+            elseif (i == 2) then
+                xi(1) = y_cc(cell(2) - 1)
+                eta(1) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3))
+                xi(2) = y_cc(cell(2))
+                eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2)    , cell(3))
+                xi(3) = y_cc(cell(2) + 1)
+                eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3))
+            elseif (i == 3) then
+                xi(1) = z_cc(cell(3) - 1)
+                eta(1) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1)
+                xi(2) = z_cc(cell(3))
+                eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)    )
+                xi(3) = z_cc(cell(3) + 1)
+                eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1)
+            endif
+
+            L(1) = ((pos - xi(2))*(pos - xi(3))) / ((xi(1) - xi(2))*(xi(1) - xi(3)))
+            L(2) = ((pos - xi(1))*(pos - xi(3))) / ((xi(2) - xi(1))*(xi(2) - xi(3)))
+            L(3) = ((pos - xi(1))*(pos - xi(2))) / ((xi(3) - xi(1))*(xi(3) - xi(2)))
+
+            v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3)
+        elseif (fd_order == 4) then
+            if (i == 1) then
+                xi(1) = x_cc(cell(1) - 2)
+                eta(1) = q_prim_vf(momxb)%sf(cell(1) - 2, cell(2), cell(3))
+                xi(2) = x_cc(cell(1) - 1)
+                eta(2) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3))
+                xi(3) = x_cc(cell(1))
+                eta(3) = q_prim_vf(momxb)%sf(cell(1)    , cell(2), cell(3))
+                xi(4) = x_cc(cell(1) + 1)
+                eta(4) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3))
+                xi(5) = x_cc(cell(1) + 2)
+                eta(5) = q_prim_vf(momxb)%sf(cell(1) + 2, cell(2), cell(3))
+            elseif (i == 2) then
+                xi(1) = y_cc(cell(2) - 2)
+                eta(1) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 2, cell(3))
+                xi(2) = y_cc(cell(2) - 1)
+                eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3))
+                xi(3) = y_cc(cell(2))
+                eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2)    , cell(3))
+                xi(4) = y_cc(cell(2) + 1)
+                eta(4) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3))
+                xi(5) = y_cc(cell(2) + 2)
+                eta(5) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 2, cell(3))
+            elseif (i == 3) then
+                xi(1) = z_cc(cell(3) - 2)
+                eta(1) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 2)
+                xi(2) = z_cc(cell(3) - 1)
+                eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1)
+                xi(3) = z_cc(cell(3))
+                eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)    )
+                xi(4) = z_cc(cell(3) + 1)
+                eta(4) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1)
+                xi(5) = z_cc(cell(3) + 2)
+                eta(5) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 2)
+            endif
+
+            L(1) = ((pos - xi(2))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5))) / &
+                   ((xi(1) - xi(2))*(xi(1) - xi(3))*(xi(1) - xi(3))*(xi(2) - xi(5)))
+            L(2) = ((pos - xi(1))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5))) / &
+                   ((xi(2) - xi(1))*(xi(2) - xi(3))*(xi(2) - xi(3))*(xi(2) - xi(5)))
+            L(3) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(4))*(pos - xi(5))) / &
+                   ((xi(3) - xi(1))*(xi(3) - xi(2))*(xi(3) - xi(4))*(xi(3) - xi(5)))
+            L(4) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4))) / &
+                   ((xi(4) - xi(1))*(xi(4) - xi(2))*(xi(4) - xi(3))*(xi(4) - xi(5)))
+            L(5) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4))) / &
+                   ((xi(5) - xi(1))*(xi(5) - xi(2))*(xi(5) - xi(3))*(xi(5) - xi(4)))
+
+            v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) + L(4)*eta(4) + L(5)*eta(5)
+        end if
 
     end function f_interpolate_velocity
 
@@ -497,19 +545,69 @@ contains
 
         real(wp) :: a
         real(wp) :: dp, vol, force, v_rel
+        real(wp), dimension(fd_order - 1) :: xi, eta, L
+
+        if (fd_order == 2) then
+            if (i == 1) then
+                dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - &
+                      q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / &
+                      (x_cc(cell(1) + 1) - x_cc(cell(1) - 1))
+            elseif (i == 2) then
+                dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - &
+                      q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / &
+                      (y_cc(cell(2) + 1) - y_cc(cell(2) - 1))
+            elseif (i == 3) then
+                dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - &
+                      q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / &
+                      (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
+            end if
+        elseif (fd_order == 4) then
+            if (i == 1) then
+                xi(1) = x_cc(cell(1) - 1)
+                eta(1) = (q_prim_vf(E_idx)%sf(cell(1)    ,cell(2),cell(3)) - &
+                      q_prim_vf(E_idx)%sf(cell(1) - 2,cell(2),cell(3))) / &
+                      (x_cc(cell(1)) - x_cc(cell(1) - 2))
+                xi(2) = x_cc(cell(1))
+                eta(2) = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - &
+                      q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / &
+                      (x_cc(cell(1) + 1) - x_cc(cell(1) - 1))
+                xi(3) = x_cc(cell(1) + 1)
+                eta(3) = (q_prim_vf(E_idx)%sf(cell(1) + 2,cell(2),cell(3)) - &
+                      q_prim_vf(E_idx)%sf(cell(1)    ,cell(2),cell(3))) / &
+                      (x_cc(cell(1) + 2) - x_cc(cell(1)))
+            elseif (i == 2) then
+                xi(1) = y_cc(cell(2) - 1)
+                eta(1) = (q_prim_vf(E_idx)%sf(cell(1),cell(2)    ,cell(3)) - &
+                      q_prim_vf(E_idx)%sf(cell(1),cell(2) - 2,cell(3))) / &
+                      (y_cc(cell(2)) - y_cc(cell(2) - 2))
+                xi(2) = y_cc(cell(2))
+                eta(2) = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - &
+                      q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / &
+                      (y_cc(cell(2) + 1) - y_cc(cell(2) - 1))
+                xi(3) = y_cc(cell(2) + 1)
+                eta(3) = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 2,cell(3)) - &
+                      q_prim_vf(E_idx)%sf(cell(1),cell(2)    ,cell(3))) / &
+                      (y_cc(cell(2) + 2) - y_cc(cell(2)))
+            elseif (i == 3) then
+                xi(1) = z_cc(cell(3) - 1)
+                eta(1) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3)    ) - &
+                      q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 2)) / &
+                      (z_cc(cell(3)) - z_cc(cell(3) - 2))
+                xi(2) = y_cc(cell(3))
+                eta(2) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - &
+                      q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / &
+                      (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
+                xi(3) = y_cc(cell(3) + 1)
+                eta(3) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 2) - &
+                      q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3)   )) / &
+                      (z_cc(cell(3) + 2) - z_cc(cell(3)))
+            end if
+
+            L(1) = ((pos - xi(2))*(pos - xi(3))) / ((xi(1) - xi(2))*(xi(1) - xi(3)))
+            L(2) = ((pos - xi(1))*(pos - xi(3))) / ((xi(2) - xi(1))*(xi(2) - xi(3)))
+            L(3) = ((pos - xi(1))*(pos - xi(2))) / ((xi(3) - xi(1))*(xi(3) - xi(2)))
 
-        if (i == 1) then
-            dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - &
-                  q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / &
-                  (x_cc(cell(1) + 1) - x_cc(cell(1) - 1))
-        elseif (i == 2) then
-            dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - &
-                  q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / &
-                  (y_cc(cell(2) + 1) - y_cc(cell(2) - 1))
-        elseif (i == 3) then
-            dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - &
-                  q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / &
-                  (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
+            dp = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3)
         end if
 
         vol = (4._wp/3._wp) * pi * rad**3._wp
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 0c9488ed57..56c19034ff 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -145,7 +145,7 @@ contains
             & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end',  'fd_order',     &
             & 'num_probes', 'num_integrals', 'bubble_model', 'thermal',        &
             & 'R0_type', 'num_source', 'relax_model', 'num_ibs', 'n_start',    &
-            & 'num_bc_patches', 'fd_order', 'lag_params%vel_model']
+            & 'num_bc_patches', 'lag_params%vel_model']
             call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
@@ -755,7 +755,6 @@ contains
 
     end subroutine s_mpi_sendrecv_particles
 
-
     !! This function returns a unique tag for each neighbor based on its position
         !! relative to the current process.
         !! @param i, j, k Indices of the neighbor in the range [-1, 1]
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index e6ade24be1..66dd1cf978 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -375,7 +375,7 @@ contains
             if (t_step == t_step_stop) return
         end if
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=1)
 
         $:GPU_PARALLEL_LOOP(collapse=4)
         do i = 1, sys_size
@@ -477,7 +477,7 @@ contains
             if (t_step == t_step_stop) return
         end if
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=1)
 
         $:GPU_PARALLEL_LOOP(collapse=4)
         do i = 1, sys_size
@@ -549,7 +549,7 @@ contains
 
         call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2)
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=2)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=2)
 
         $:GPU_PARALLEL_LOOP(collapse=4)
         do i = 1, sys_size
@@ -659,7 +659,7 @@ contains
             if (t_step == t_step_stop) return
         end if
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=1)
 
         $:GPU_PARALLEL_LOOP(collapse=4)
         do i = 1, sys_size
@@ -731,7 +731,7 @@ contains
 
         call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2)
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=2)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=2)
 
         $:GPU_PARALLEL_LOOP(collapse=4)
         do i = 1, sys_size
@@ -804,7 +804,7 @@ contains
         ! Stage 3 of 3
         call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 3)
 
-        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=3)
+        if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=3)
 
         $:GPU_PARALLEL_LOOP(collapse=4)
         do i = 1, sys_size

From b4e027233b02e02ec00f1725ba82c43d3bd7907b Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Fri, 11 Jul 2025 13:59:32 -0400
Subject: [PATCH 44/62] bug fix

---
 src/simulation/m_bubbles_EL.fpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index d92a70b239..ac2e166a93 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -930,7 +930,7 @@ contains
         real(wp), intent(out), optional :: preterm1, term2, Romega
 
         real(wp), dimension(3) :: scoord, psi_pos, psi_x, psi_y, psi_z
-        real(wp), xi, eta, zeta
+        real(wp) :: xi, eta, zeta
         real(wp) :: dc, vol, aux
         real(wp) :: volgas, term1, Rbeq, denom
         real(wp) :: charvol, charpres, charvol2, charpres2

From 679926aef6882d39299cc8e15cc7f754f3c638af Mon Sep 17 00:00:00 2001
From: Benjamin Wilfong <bwilfong@dt-login02.delta.ncsa.illinois.edu>
Date: Sun, 27 Jul 2025 12:36:38 -0500
Subject: [PATCH 45/62] finish merge and test

---
 src/common/m_mpi_common.fpp        |  4 ++--
 src/post_process/m_data_output.fpp |  2 ++
 src/post_process/m_mpi_proxy.fpp   |  5 -----
 src/pre_process/m_start_up.fpp     |  6 +-----
 src/simulation/m_mpi_proxy.fpp     | 10 ++++------
 5 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index a7cfb97995..6650b01047 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -413,6 +413,7 @@ contains
         integer, intent(inout) :: var_loc
 
 #ifdef MFC_MPI
+        integer :: ierr !< Generic flag used to identify and report MPI errors
 
         ! Temporary storage variable that holds the reduced sum value
         integer :: var_glb
@@ -423,7 +424,6 @@ contains
                         MPI_SUM, 0, MPI_COMM_WORLD, ierr)
 
         var_loc = var_glb
-
 #endif
 
     end subroutine s_mpi_reduce_int_sum
@@ -1102,7 +1102,7 @@ contains
 
         integer :: recon_order !< reconstruction order
 
-        integer :: i, j !< Generic loop iterators
+        integer :: i, j, k !< Generic loop iterators
         integer :: ierr !< Generic flag used to identify and report MPI errors
 
         ! temp array to store neighbor rank coordinates
diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp
index 3114da7771..51a1827e1c 100644
--- a/src/post_process/m_data_output.fpp
+++ b/src/post_process/m_data_output.fpp
@@ -1614,6 +1614,8 @@ contains
         character(len=64), dimension(num_procs) :: var_names
         integer, dimension(num_procs) :: var_types
         real(wp) :: dummy_data
+
+        integer :: ierr !< Generic flag used to identify and report database errors
         integer :: i
 
         dummy_data = 0._wp
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index 5c3bffad2c..fb535cd955 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -104,17 +104,12 @@ contains
             & 'file_per_process', 'relax', 'cf_wrt', 'igr',                    &
             & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt',          &
             & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange',        &
-<<<<<<< HEAD
             & 'output_partial_domain', 'relativity', 'cont_damage',            &
             & 'lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt',         &
             & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', &
             & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt',    &
             & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt',    &
             & 'lag_betaT_wrt', 'lag_betaC_wrt', 'bc_io' ]
-=======
-            & 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io' ]
-
->>>>>>> upstream/master
             call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp
index 1f70e04e45..8f41a0d0d5 100644
--- a/src/pre_process/m_start_up.fpp
+++ b/src/pre_process/m_start_up.fpp
@@ -149,12 +149,8 @@ contains
             n_start_old, surface_tension, hyperelasticity, pre_stress, &
             elliptic_smoothing, elliptic_smoothing_iters, &
             viscous, bubbles_lagrange, bc_x, bc_y, bc_z, num_bc_patches, &
-<<<<<<< HEAD
             patch_bc, Bx0, relativity, cont_damage, fd_order, lag_params, &
-            interface_file, normFac, normMag, g0, p0
-=======
-            patch_bc, Bx0, relativity, cont_damage, igr, igr_order
->>>>>>> upstream/master
+            interface_file, normFac, normMag, g0, p0, igr, igr_order
 
         ! Inquiring the status of the pre_process.inp file
         file_loc = 'pre_process.inp'
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 7a2b8d5fa1..9e4412ba81 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -90,6 +90,7 @@ contains
 
         integer :: i, j, k
         integer :: real_size, int_size, nReal, lag_num_ts
+        integer :: ierr !< Generic flag used to identify and report MPI errors
 
         call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr)
         call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr)
@@ -140,13 +141,9 @@ contains
             & 'wave_speeds', 'avg_state', 'precision', 'bc_x%beg', 'bc_x%end', &
             & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end',  'fd_order',     &
             & 'num_probes', 'num_integrals', 'bubble_model', 'thermal',        &
-<<<<<<< HEAD
-            & 'R0_type', 'num_source', 'relax_model', 'num_ibs', 'n_start',    &
-            & 'num_bc_patches', 'lag_params%vel_model']
-=======
             & 'num_source','relax_model','num_ibs','n_start','num_igr_iters',  &
-            & 'num_bc_patches','num_igr_warm_start_iters','adap_dt_max_iters'  ]
->>>>>>> upstream/master
+            & 'num_bc_patches','num_igr_warm_start_iters','adap_dt_max_iters', &
+            & 'lag_params%vel_model']
             call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
@@ -667,6 +664,7 @@ contains
         integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size
 
         integer :: i, j, k, l, q
+        integer :: ierr !< Generic flag used to identify and report MPI errors
 
         do k = nidx(3)%beg, nidx(3)%end
             do j = nidx(2)%beg, nidx(2)%end

From ca56a8fec386305065e76dd8b6a66f5a0d8feab3 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Mon, 1 Sep 2025 12:40:01 -0400
Subject: [PATCH 46/62] clean merge

---
 src/common/m_helper_basic.fpp            |   2 +-
 src/common/m_mpi_common.fpp              |   6 +-
 src/post_process/m_data_output.fpp       | 177 +++++++++++------------
 src/post_process/m_global_parameters.fpp |   2 +-
 src/pre_process/m_global_parameters.fpp  |   2 +-
 src/simulation/m_bubbles.fpp             |   6 +-
 src/simulation/m_bubbles_EL.fpp          | 135 +++++++++--------
 src/simulation/m_bubbles_EL_kernels.fpp  | 124 ++++++++--------
 src/simulation/m_global_parameters.fpp   |  12 +-
 src/simulation/m_mpi_proxy.fpp           |  52 +++----
 toolchain/mfc/run/case_dicts.py          |   2 +-
 11 files changed, 259 insertions(+), 261 deletions(-)

diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp
index db5ebc256c..8bd6674eec 100644
--- a/src/common/m_helper_basic.fpp
+++ b/src/common/m_helper_basic.fpp
@@ -10,7 +10,7 @@ module m_helper_basic
 
     implicit none
 
-    private;
+    private; 
     public :: f_approx_equal, &
               f_approx_in_array, &
               f_is_default, &
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index ee71af35ac..5eae8fcabb 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -1641,8 +1641,8 @@ contains
         end if
 
         @:ALLOCATE(neighbor_ranks(nidx(1)%beg:nidx(1)%end, &
-                                  nidx(2)%beg:nidx(2)%end, &
-                                  nidx(3)%beg:nidx(3)%end))
+            nidx(2)%beg:nidx(2)%end, &
+            nidx(3)%beg:nidx(3)%end))
 
         do k = nidx(3)%beg, nidx(3)%end
             do j = nidx(2)%beg, nidx(2)%end
@@ -1652,7 +1652,7 @@ contains
                         if (num_dims > 1) neighbor_coords(2) = proc_coords(2) + j
                         if (num_dims > 2) neighbor_coords(3) = proc_coords(3) + k
                         call MPI_CART_RANK(MPI_COMM_CART, neighbor_coords, &
-                                   neighbor_ranks(i, j, k), ierr)
+                                           neighbor_ranks(i, j, k), ierr)
                     end if
                 end do
             end do
diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp
index 51a1827e1c..2b491f7df2 100644
--- a/src/post_process/m_data_output.fpp
+++ b/src/post_process/m_data_output.fpp
@@ -1126,11 +1126,11 @@ contains
         dummy = 0._wp
 
         ! Construct file path
-        write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
+        write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
 
         ! Check if file exists
-        inquire(FILE=trim(file_loc), EXIST=file_exist)
+        inquire (FILE=trim(file_loc), EXIST=file_exist)
         if (.not. file_exist) then
             call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!')
         end if
@@ -1139,7 +1139,7 @@ contains
 
         if (proc_rank == 0) then
             call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
-                              mpi_info_int, ifile, ierr)
+                               mpi_info_int, ifile, ierr)
 
             call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr)
             call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr)
@@ -1154,15 +1154,15 @@ contains
         call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
         call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
 
-        allocate(proc_bubble_counts(file_num_procs))
+        allocate (proc_bubble_counts(file_num_procs))
 
         if (proc_rank == 0) then
             call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
-                              mpi_info_int, ifile, ierr)
+                               mpi_info_int, ifile, ierr)
 
             ! Skip to processor counts position
             disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), &
-                      MPI_OFFSET_KIND)
+                       MPI_OFFSET_KIND)
             call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr)
             call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr)
 
@@ -1186,7 +1186,7 @@ contains
                            mpi_info_int, ifile, ierr)
 
         disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
-                      file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+                   file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
         call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
                                'native', mpi_info_null, ierr)
 
@@ -1202,47 +1202,47 @@ contains
             open (unit=29, file=file_loc, form='formatted', position='rewind')
 
             if (lag_header) then
-                write(29, '(A)', advance='no')
-                if (lag_id_wrt)       write(29, '(A8)', advance='no') 'id, '
-                if (lag_pos_wrt)      write(29, '(3(A17))', advance='no') 'px, ', 'py, ', 'pz, '
-                if (lag_pos_prev_wrt) write(29, '(3(A17))', advance='no') 'pvx, ', 'pvy, ', 'pvz, '
-                if (lag_vel_wrt)      write(29, '(3(A17))', advance='no') 'vx, ', 'vy, ', 'vz, '
-                if (lag_rad_wrt)      write(29, '(A17)', advance='no') 'radius, '
-                if (lag_rvel_wrt)     write(29, '(A17)', advance='no') 'rvel, '
-                if (lag_r0_wrt)       write(29, '(A17)', advance='no') 'r0, '
-                if (lag_rmax_wrt)     write(29, '(A17)', advance='no') 'rmax, '
-                if (lag_rmin_wrt)     write(29, '(A17)', advance='no') 'rmin, '
-                if (lag_dphidt_wrt)   write(29, '(A17)', advance='no') 'dphidt, '
-                if (lag_pres_wrt)     write(29, '(A17)', advance='no') 'pressure, '
-                if (lag_mv_wrt)       write(29, '(A17)', advance='no') 'mv, '
-                if (lag_mg_wrt)       write(29, '(A17)', advance='no') 'mg, '
-                if (lag_betaT_wrt)    write(29, '(A17)', advance='no') 'betaT, '
-                if (lag_betaC_wrt)    write(29, '(A17)', advance='no') 'betaC, '
-                write(29, '(A15)') 'time'
+                write (29, '(A)', advance='no')
+                if (lag_id_wrt) write (29, '(A8)', advance='no') 'id, '
+                if (lag_pos_wrt) write (29, '(3(A17))', advance='no') 'px, ', 'py, ', 'pz, '
+                if (lag_pos_prev_wrt) write (29, '(3(A17))', advance='no') 'pvx, ', 'pvy, ', 'pvz, '
+                if (lag_vel_wrt) write (29, '(3(A17))', advance='no') 'vx, ', 'vy, ', 'vz, '
+                if (lag_rad_wrt) write (29, '(A17)', advance='no') 'radius, '
+                if (lag_rvel_wrt) write (29, '(A17)', advance='no') 'rvel, '
+                if (lag_r0_wrt) write (29, '(A17)', advance='no') 'r0, '
+                if (lag_rmax_wrt) write (29, '(A17)', advance='no') 'rmax, '
+                if (lag_rmin_wrt) write (29, '(A17)', advance='no') 'rmin, '
+                if (lag_dphidt_wrt) write (29, '(A17)', advance='no') 'dphidt, '
+                if (lag_pres_wrt) write (29, '(A17)', advance='no') 'pressure, '
+                if (lag_mv_wrt) write (29, '(A17)', advance='no') 'mv, '
+                if (lag_mg_wrt) write (29, '(A17)', advance='no') 'mg, '
+                if (lag_betaT_wrt) write (29, '(A17)', advance='no') 'betaT, '
+                if (lag_betaC_wrt) write (29, '(A17)', advance='no') 'betaC, '
+                write (29, '(A15)') 'time'
             end if
 
             do i = 1, file_tot_part
                 id = int(MPI_IO_DATA_lg_bubbles(i, 1))
                 inputvals(1:20) = MPI_IO_DATA_lg_bubbles(i, 2:21)
                 if (id > 0) then
-                    write(29, '(100(A))', advance='no') ''
-
-                    if (lag_id_wrt)       write(29, '(I6, A)', advance='no') id, ', '
-                    if (lag_pos_wrt)      write(29, '(3(E15.7, A))', advance='no') inputvals(1), ', ', inputvals(2), ', ', inputvals(3), ', '
-                    if (lag_pos_prev_wrt) write(29, '(3(E15.7, A))', advance='no') inputvals(4), ', ', inputvals(5), ', ', inputvals(6), ', '
-                    if (lag_vel_wrt)      write(29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(8), ', '
-                    if (lag_rad_wrt)      write(29, '(E15.7, A)', advance='no') inputvals(10), ', '
-                    if (lag_rvel_wrt)     write(29, '(E15.7, A)', advance='no') inputvals(11), ', '
-                    if (lag_r0_wrt)       write(29, '(E15.7, A)', advance='no') inputvals(12), ', '
-                    if (lag_rmax_wrt)     write(29, '(E15.7, A)', advance='no') inputvals(13), ', '
-                    if (lag_rmin_wrt)     write(29, '(E15.7, A)', advance='no') inputvals(14), ', '
-                    if (lag_dphidt_wrt)   write(29, '(E15.7, A)', advance='no') inputvals(15), ', '
-                    if (lag_pres_wrt)     write(29, '(E15.7, A)', advance='no') inputvals(16), ', '
-                    if (lag_mv_wrt)       write(29, '(E15.7, A)', advance='no') inputvals(17), ', '
-                    if (lag_mg_wrt)       write(29, '(E15.7, A)', advance='no') inputvals(18), ', '
-                    if (lag_betaT_wrt)    write(29, '(E15.7, A)', advance='no') inputvals(19), ', '
-                    if (lag_betaC_wrt)    write(29, '(E15.7, A)', advance='no') inputvals(20), ', '
-                    write(29, '(E15.7)') time_real
+                    write (29, '(100(A))', advance='no') ''
+
+                    if (lag_id_wrt) write (29, '(I6, A)', advance='no') id, ', '
+                    if (lag_pos_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(1), ', ', inputvals(2), ', ', inputvals(3), ', '
+                    if (lag_pos_prev_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(4), ', ', inputvals(5), ', ', inputvals(6), ', '
+                    if (lag_vel_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(8), ', '
+                    if (lag_rad_wrt) write (29, '(E15.7, A)', advance='no') inputvals(10), ', '
+                    if (lag_rvel_wrt) write (29, '(E15.7, A)', advance='no') inputvals(11), ', '
+                    if (lag_r0_wrt) write (29, '(E15.7, A)', advance='no') inputvals(12), ', '
+                    if (lag_rmax_wrt) write (29, '(E15.7, A)', advance='no') inputvals(13), ', '
+                    if (lag_rmin_wrt) write (29, '(E15.7, A)', advance='no') inputvals(14), ', '
+                    if (lag_dphidt_wrt) write (29, '(E15.7, A)', advance='no') inputvals(15), ', '
+                    if (lag_pres_wrt) write (29, '(E15.7, A)', advance='no') inputvals(16), ', '
+                    if (lag_mv_wrt) write (29, '(E15.7, A)', advance='no') inputvals(17), ', '
+                    if (lag_mg_wrt) write (29, '(E15.7, A)', advance='no') inputvals(18), ', '
+                    if (lag_betaT_wrt) write (29, '(E15.7, A)', advance='no') inputvals(19), ', '
+                    if (lag_betaC_wrt) write (29, '(E15.7, A)', advance='no') inputvals(20), ', '
+                    write (29, '(E15.7)') time_real
                 end if
             end do
             close (29)
@@ -1284,7 +1284,6 @@ contains
         integer, dimension(num_procs) :: meshtypes
         real(wp) :: dummy_data
 
-
         integer :: i, j
 
         real(wp), dimension(:), allocatable :: bub_id
@@ -1296,11 +1295,11 @@ contains
         dummy_data = 0._wp
 
         ! Construct file path
-        write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
+        write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
 
         ! Check if file exists
-        inquire(FILE=trim(file_loc), EXIST=file_exist)
+        inquire (FILE=trim(file_loc), EXIST=file_exist)
         if (.not. file_exist) then
             call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!')
         end if
@@ -1309,7 +1308,7 @@ contains
 
         if (proc_rank == 0) then
             call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
-                              mpi_info_int, ifile, ierr)
+                               mpi_info_int, ifile, ierr)
 
             call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr)
             call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr)
@@ -1324,15 +1323,15 @@ contains
         call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
         call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
 
-        allocate(proc_bubble_counts(file_num_procs))
+        allocate (proc_bubble_counts(file_num_procs))
 
         if (proc_rank == 0) then
             call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
-                              mpi_info_int, ifile, ierr)
+                               mpi_info_int, ifile, ierr)
 
             ! Skip to processor counts position
             disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), &
-                      MPI_OFFSET_KIND)
+                       MPI_OFFSET_KIND)
             call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr)
             call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr)
 
@@ -1359,31 +1358,31 @@ contains
 
         if (nBub > 0) then
 
-            allocate(bub_id(nBub))
-            allocate(px(nBub))
-            allocate(py(nBub))
-            allocate(pz(nBub))
-            allocate(ppx(nBub))
-            allocate(ppy(nBub))
-            allocate(ppz(nBub))
-            allocate(vx(nBub))
-            allocate(vy(nBub))
-            allocate(vz(nBub))
-            allocate(radius(nBub))
-            allocate(rvel(nBub))
-            allocate(rnot(nBub))
-            allocate(rmax(nBub))
-            allocate(rmin(nBub))
-            allocate(dphidt(nBub))
-            allocate(pressure(nBub))
-            allocate(mv(nBub))
-            allocate(mg(nBub))
-            allocate(betaT(nBub))
-            allocate(betaC(nBub))
+            allocate (bub_id(nBub))
+            allocate (px(nBub))
+            allocate (py(nBub))
+            allocate (pz(nBub))
+            allocate (ppx(nBub))
+            allocate (ppy(nBub))
+            allocate (ppz(nBub))
+            allocate (vx(nBub))
+            allocate (vy(nBub))
+            allocate (vz(nBub))
+            allocate (radius(nBub))
+            allocate (rvel(nBub))
+            allocate (rnot(nBub))
+            allocate (rmax(nBub))
+            allocate (rmin(nBub))
+            allocate (dphidt(nBub))
+            allocate (pressure(nBub))
+            allocate (mv(nBub))
+            allocate (mg(nBub))
+            allocate (betaT(nBub))
+            allocate (betaC(nBub))
             allocate (MPI_IO_DATA_lg_bubbles(nBub, 1:lag_io_vars))
 
             call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
-                                           MPI_ORDER_FORTRAN, mpi_p, view, ierr)
+                                          MPI_ORDER_FORTRAN, mpi_p, view, ierr)
             call MPI_TYPE_COMMIT(view, ierr)
 
             call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
@@ -1391,11 +1390,11 @@ contains
 
             ! Skip extended header
             disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
-                      file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+                       file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
             call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
 
             call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, &
-                                  lag_io_vars * nBub, mpi_p, status, ierr)
+                                   lag_io_vars*nBub, mpi_p, status, ierr)
 
             call MPI_FILE_CLOSE(ifile, ierr)
             call MPI_TYPE_FREE(view, ierr)
@@ -1443,8 +1442,8 @@ contains
             end if
 
             err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, &
-                         px, py, pz, nBub, &
-                         DB_DOUBLE, DB_F77NULL, ierr)
+                          px, py, pz, nBub, &
+                          DB_DOUBLE, DB_F77NULL, ierr)
 
             if (lag_id_wrt) then
                 call s_write_lag_variable_to_formatted_database_file('part_id', t_step, bub_id, nBub)
@@ -1502,9 +1501,9 @@ contains
                 call s_write_lag_variable_to_formatted_database_file('part_betaC', t_step, betaC, nBub)
             end if
 
-            deallocate(bub_id, px, py, pz, ppx, ppy, ppz, vx, vy, vz, radius, &
-                       rvel, rnot, rmax, rmin, dphidt, pressure, mv, mg,    &
-                       betaT, betaC)
+            deallocate (bub_id, px, py, pz, ppx, ppy, ppz, vx, vy, vz, radius, &
+                        rvel, rnot, rmax, rmin, dphidt, pressure, mv, mg, &
+                        betaT, betaC)
             deallocate (MPI_IO_DATA_lg_bubbles)
         else
             call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr)
@@ -1515,7 +1514,7 @@ contains
 
             ! Skip extended header
             disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
-                      file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+                       file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
             call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
 
             call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr)
@@ -1539,8 +1538,8 @@ contains
 
             err = DBSETEMPTYOK(1)
             err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, &
-                         dummy_data, dummy_data, dummy_data, 0, &
-                         DB_DOUBLE, DB_F77NULL, ierr)
+                          dummy_data, dummy_data, dummy_data, 0, &
+                          DB_DOUBLE, DB_F77NULL, ierr)
 
             if (lag_id_wrt) then
                 call s_write_lag_variable_to_formatted_database_file('part_id', t_step)
@@ -1629,13 +1628,13 @@ contains
                 end do
                 err = DBSET2DSTRLEN(len(var_names(1)))
                 err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), &
-                            num_procs, var_names, &
-                            len_trim(var_names), &
-                            var_types, DB_F77NULL, ierr)
+                                num_procs, var_names, &
+                                len_trim(var_names), &
+                                var_types, DB_F77NULL, ierr)
             end if
 
             err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), &
-                        'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr)
+                           'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr)
         else
             if (proc_rank == 0) then
                 do i = 1, num_procs
@@ -1646,14 +1645,14 @@ contains
                 err = DBSET2DSTRLEN(len(var_names(1)))
                 err = DBSETEMPTYOK(1)
                 err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), &
-                            num_procs, var_names, &
-                            len_trim(var_names), &
-                            var_types, DB_F77NULL, ierr)
+                                num_procs, var_names, &
+                                len_trim(var_names), &
+                                var_types, DB_F77NULL, ierr)
             end if
 
             err = DBSETEMPTYOK(1)
             err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), &
-                        'lag_bubbles', 11, dummy_data, 0, DB_DOUBLE, DB_F77NULL, ierr)
+                           'lag_bubbles', 11, dummy_data, 0, DB_DOUBLE, DB_F77NULL, ierr)
         end if
 
     end subroutine s_write_lag_variable_to_formatted_database_file
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index 485df39053..cf07cc287e 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -180,7 +180,7 @@ module m_global_parameters
 
     type(int_bounds_info), dimension(3) :: nidx
 
-    integer, allocatable, dimension(:,:,:) :: neighbor_ranks
+    integer, allocatable, dimension(:, :, :) :: neighbor_ranks
     !! Neighbor ranks for lagrangian particle communication
 
     integer, allocatable, dimension(:) :: start_idx !<
diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp
index 2f3dadfaa4..6d8a5504a9 100644
--- a/src/pre_process/m_global_parameters.fpp
+++ b/src/pre_process/m_global_parameters.fpp
@@ -195,7 +195,7 @@ module m_global_parameters
 
     type(int_bounds_info), dimension(3) :: nidx
 
-    integer, allocatable, dimension(:,:,:) :: neighbor_ranks
+    integer, allocatable, dimension(:, :, :) :: neighbor_ranks
     !! Neighbor ranks for lagrangian particle communication
 
     integer, allocatable, dimension(:) :: start_idx !<
diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp
index 5e3a2ead80..a936254ac2 100644
--- a/src/simulation/m_bubbles.fpp
+++ b/src/simulation/m_bubbles.fpp
@@ -566,15 +566,15 @@ contains
                         if (lag_params%vel_model == 1) then
                             do l = 1, num_dims
                                 vTemp = f_interpolate_velocity(fR, cell, l, q_prim_vf)
-                                fPos(l) = fPos(l) + h * vTemp
+                                fPos(l) = fPos(l) + h*vTemp
                                 fVel(l) = vTemp
                             end do
                         elseif (lag_params%vel_model == 2) then
                             do l = 1, num_dims
                                 aTemp = f_get_acceleration(fPos(l), fR, fVel(l), fmass_n, fmass_v, &
                                                            fRe, fRho, cell, l, q_prim_vf)
-                                fPos(l) = fPos(l) + h * fVel(l)
-                                fVel(l) = fVel(l) + h * aTemp
+                                fPos(l) = fPos(l) + h*fVel(l)
+                                fVel(l) = fVel(l) + h*aTemp
                             end do
                         end if
                     end if
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 4d266a001a..269b096058 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -78,7 +78,6 @@ module m_bubbles_EL
 
     $:GPU_DECLARE(create='[nBubs,Rmax_glb,Rmin_glb,q_beta,q_beta_idx]')
 
-
     integer, allocatable, dimension(:) :: keep_bubble, prefix_sum
     integer :: active_bubs
     $:GPU_DECLARE(create='[keep_bubble, prefix_sum, active_bubs]')
@@ -272,10 +271,10 @@ contains
         if (num_procs > 1) then
             call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, 1))
             call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-                                      gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
-                                      intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
-                                      mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
-                                      gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
+                                          gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
+                                          intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
+                                          mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
+                                          gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
         end if
 
         $:GPU_UPDATE(device='[bubbles_lagrange, lag_params]')
@@ -450,11 +449,11 @@ contains
         dummy = 0._wp
 
         ! Construct file path
-        write(file_loc, '(A,I0,A)') 'lag_bubbles_', save_count, '.dat'
+        write (file_loc, '(A,I0,A)') 'lag_bubbles_', save_count, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
 
         ! Check if file exists
-        inquire(FILE=trim(file_loc), EXIST=file_exist)
+        inquire (FILE=trim(file_loc), EXIST=file_exist)
         if (.not. file_exist) then
             call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!')
         end if
@@ -463,7 +462,7 @@ contains
 
         if (proc_rank == 0) then
             call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
-                              mpi_info_int, ifile, ierr)
+                               mpi_info_int, ifile, ierr)
 
             call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr)
             call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr)
@@ -478,15 +477,15 @@ contains
         call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
         call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
 
-        allocate(proc_bubble_counts(file_num_procs))
+        allocate (proc_bubble_counts(file_num_procs))
 
         if (proc_rank == 0) then
             call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
-                              mpi_info_int, ifile, ierr)
+                               mpi_info_int, ifile, ierr)
 
             ! Skip to processor counts position
             disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), &
-                      MPI_OFFSET_KIND)
+                       MPI_OFFSET_KIND)
             call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr)
             call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr)
 
@@ -518,7 +517,7 @@ contains
             allocate (MPI_IO_DATA_lag_bubbles(bub_id, 1:lag_io_vars))
 
             call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
-                                           MPI_ORDER_FORTRAN, mpi_p, view, ierr)
+                                          MPI_ORDER_FORTRAN, mpi_p, view, ierr)
             call MPI_TYPE_COMMIT(view, ierr)
 
             call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
@@ -526,11 +525,11 @@ contains
 
             ! Skip extended header
             disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
-                      file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+                       file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
             call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
 
             call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lag_bubbles, &
-                                  lag_io_vars * bub_id, mpi_p, status, ierr)
+                                   lag_io_vars*bub_id, mpi_p, status, ierr)
 
             call MPI_FILE_CLOSE(ifile, ierr)
             call MPI_TYPE_FREE(view, ierr)
@@ -568,7 +567,7 @@ contains
 
             ! Skip extended header
             disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
-                      file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+                       file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
             call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
 
             call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr)
@@ -577,14 +576,14 @@ contains
             call MPI_TYPE_FREE(view, ierr)
         end if
 
-        print*, proc_rank, lag_id
+        print *, proc_rank, lag_id
 
         if (proc_rank == 0) then
-            write(*,'(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count
-            write(*,'(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt
+            write (*, '(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count
+            write (*, '(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt
         end if
 
-        deallocate(proc_bubble_counts)
+        deallocate (proc_bubble_counts)
 #endif
 
     end subroutine s_restart_bubbles
@@ -655,8 +654,8 @@ contains
             myBeta_c = gas_betaC(k)
             myBeta_t = gas_betaT(k)
             myR0 = bub_R0(k)
-            myPos = mtn_pos(k,:,2)
-            myVel = mtn_vel(k,:,2)
+            myPos = mtn_pos(k, :, 2)
+            myVel = mtn_vel(k, :, 2)
 
             ! Vapor and heat fluxes
             call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m)
@@ -693,13 +692,13 @@ contains
                 gas_mv(k, 1) = myMass_v
 
                 if (lag_params%vel_model == 1) then
-                    mtn_posPrev(k,:,1) = mtn_pos(k,:,2)
-                    mtn_pos(k,:,1) = myPos
-                    mtn_vel(k,:,1) = myVel
+                    mtn_posPrev(k, :, 1) = mtn_pos(k, :, 2)
+                    mtn_pos(k, :, 1) = myPos
+                    mtn_vel(k, :, 1) = myVel
                 elseif (lag_params%vel_model == 2) then
-                    mtn_posPrev(k,:,1) = mtn_pos(k,:,2)
-                    mtn_pos(k,:,1) = myPos
-                    mtn_vel(k,:,1) = myVel
+                    mtn_posPrev(k, :, 1) = mtn_pos(k, :, 2)
+                    mtn_pos(k, :, 1) = myPos
+                    mtn_vel(k, :, 1) = myVel
                 end if
 
             else
@@ -715,14 +714,14 @@ contains
                 do l = 1, num_dims
                     if (lag_params%vel_model == 1) then
                         mtn_dposdt(k, l, stage) = f_interpolate_velocity(myPos(l), &
-                                                    cell, l, q_prim_vf)
+                                                                         cell, l, q_prim_vf)
                         mtn_dveldt(k, l, stage) = 0._wp
                     elseif (lag_params%vel_model == 2) then
                         mtn_dposdt(k, l, stage) = myVel(l)
                         mtn_dveldt(k, l, stage) = f_get_acceleration(myPos(l), &
-                                                    myR, myVel(l), &
-                                                    myMass_n, myMass_v, &
-                                                    Re(1), myRho, cell, l, q_prim_vf)
+                                                                     myR, myVel(l), &
+                                                                     myMass_n, myMass_v, &
+                                                                     Re(1), myRho, cell, l, q_prim_vf)
                     else
                         mtn_dposdt(k, l, stage) = 0._wp
                         mtn_dveldt(k, l, stage) = 0._wp
@@ -1010,16 +1009,16 @@ contains
                 if (p == 0) then  !2D - 4 point interpolation (2x2)
                     do j = 1, 2
                         do i = 1, 2
-                            f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3)) * &
-                                               psi_x(i) * psi_y(j)
+                            f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3))* &
+                                      psi_x(i)*psi_y(j)
                         end do
                     end do
                 else              !3D - 8 point interpolation (2x2x2)
                     do k = 1, 2
                         do j = 1, 2
                             do i = 1, 2
-                                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3) + k - 1) * &
-                                                   psi_x(i) * psi_y(j) * psi_z(k)
+                                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3) + k - 1)* &
+                                          psi_x(i)*psi_y(j)*psi_z(k)
                             end do
                         end do
                     end do
@@ -1086,16 +1085,16 @@ contains
                 if (p == 0) then  !2D - 9 point interpolation (3x3)
                     do j = 1, 3
                         do i = 1, 3
-                            f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3)) * &
-                                               psi_x(i) * psi_y(j)
+                            f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3))* &
+                                      psi_x(i)*psi_y(j)
                         end do
                     end do
                 else              !3D - 27 point interpolation (3x3x3)
                     do k = 1, 3
                         do j = 1, 3
                             do i = 1, 3
-                                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3) + k - 2) * &
-                                                   psi_x(i) * psi_y(j) * psi_z(k)
+                                f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3) + k - 2)* &
+                                          psi_x(i)*psi_y(j)*psi_z(k)
                             end do
                         end do
                     end do
@@ -1369,11 +1368,11 @@ contains
             ! Relocate bubbles at solid boundaries and delete bubbles that leave
             ! buffer regions
             if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then
-                mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest)
+                .and. mtn_pos(k, 1, dest) < x_cb(-1) + intfc_rad(k, dest)) then
+                mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k, dest)
             elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then
-                mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest)
+                    .and. mtn_pos(k, 1, dest) > x_cb(m) - intfc_rad(k, dest)) then
+                mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k, dest)
             elseif (mtn_pos(k, 1, dest) >= x_cb(m + buff_size - fd_number)) then
                 keep_bubble(k) = 0
             elseif (mtn_pos(k, 1, dest) < x_cb(fd_number - buff_size - 1)) then
@@ -1381,11 +1380,11 @@ contains
             end if
 
             if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                .and. mtn_pos(k,2,dest) < y_cb(-1) + intfc_rad(k,dest)) then
-                mtn_pos(k, 2, dest) = y_cb(-1) + intfc_rad(k,dest)
+                .and. mtn_pos(k, 2, dest) < y_cb(-1) + intfc_rad(k, dest)) then
+                mtn_pos(k, 2, dest) = y_cb(-1) + intfc_rad(k, dest)
             else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then
-                mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest)
+                     .and. mtn_pos(k, 2, dest) > y_cb(n) - intfc_rad(k, dest)) then
+                mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k, dest)
             elseif (mtn_pos(k, 2, dest) >= y_cb(n + buff_size - fd_number)) then
                 keep_bubble(k) = 0
             elseif (mtn_pos(k, 2, dest) < y_cb(fd_number - buff_size - 1)) then
@@ -1394,11 +1393,11 @@ contains
 
             if (p > 0) then
                 if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                    .and. mtn_pos(k,3,dest) < z_cb(-1) + intfc_rad(k,dest)) then
-                    mtn_pos(k, 3, dest) = z_cb(-1) + intfc_rad(k,dest)
+                    .and. mtn_pos(k, 3, dest) < z_cb(-1) + intfc_rad(k, dest)) then
+                    mtn_pos(k, 3, dest) = z_cb(-1) + intfc_rad(k, dest)
                 else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
-                    .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then
-                    mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest)
+                         .and. mtn_pos(k, 3, dest) > z_cb(p) - intfc_rad(k, dest)) then
+                    mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k, dest)
                 elseif (mtn_pos(k, 3, dest) >= z_cb(p + buff_size - fd_number)) then
                     keep_bubble(k) = 0
                 elseif (mtn_pos(k, 3, dest) < z_cb(fd_number - buff_size - 1)) then
@@ -1427,7 +1426,7 @@ contains
                         do i = 1, num_dims
                             mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - &
                                                   levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) &
-                                                  * levelset%sf(cell(1), cell(2), cell(3), patch_id)
+                                                  *levelset%sf(cell(1), cell(2), cell(3), patch_id)
                         end do
 
                         cell = fd_number - buff_size
@@ -1449,7 +1448,7 @@ contains
             if (k == 1) then
                 prefix_sum(k) = keep_bubble(k)
             else
-                prefix_sum(k) =  prefix_sum(k - 1) + keep_bubble(k)
+                prefix_sum(k) = prefix_sum(k - 1) + keep_bubble(k)
             end if
             if (k == nBubs) active_bubs = prefix_sum(k)
         end do
@@ -1491,10 +1490,10 @@ contains
 
             call nvtxStartRange("LAG-BC-SENDRECV")
             call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-                                      gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
-                                      intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
-                                      mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
-                                      gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
+                                          gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
+                                          intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
+                                          mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
+                                          gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
             call nvtxEndRange
         end if
 
@@ -1881,7 +1880,7 @@ contains
                            MPI_SUM, MPI_COMM_WORLD, ierr)
 
         call MPI_ALLGATHER(bub_id, 1, MPI_INTEGER, proc_bubble_counts, 1, MPI_INTEGER, &
-                          MPI_COMM_WORLD, ierr)
+                           MPI_COMM_WORLD, ierr)
 
         ! Calculate starting index for this processor's particles
         call MPI_EXSCAN(lsizes(1), start_idx_part(1), 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr)
@@ -1891,12 +1890,12 @@ contains
         gsizes(1) = tot_part
         gsizes(2) = lag_io_vars
 
-        write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
+        write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
 
         ! Clean up existing file
         if (proc_rank == 0) then
-            inquire(FILE=trim(file_loc), EXIST=file_exist)
+            inquire (FILE=trim(file_loc), EXIST=file_exist)
             if (file_exist) then
                 call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr)
             end if
@@ -1906,8 +1905,8 @@ contains
 
         if (proc_rank == 0) then
             call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, &
-                              ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
-                              mpi_info_int, ifile, ierr)
+                               ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
+                               mpi_info_int, ifile, ierr)
 
             ! Write header using MPI I/O for consistency
             call MPI_FILE_WRITE(ifile, tot_part, 1, MPI_INTEGER, status, ierr)
@@ -1947,7 +1946,7 @@ contains
             end do
 
             call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
-                                           MPI_ORDER_FORTRAN, mpi_p, view, ierr)
+                                          MPI_ORDER_FORTRAN, mpi_p, view, ierr)
             call MPI_TYPE_COMMIT(view, ierr)
 
             call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, &
@@ -1956,11 +1955,11 @@ contains
 
             ! Skip header (written by rank 0)
             disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + &
-                      num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+                       num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
             call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
 
             call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA_lag_bubbles, &
-                                   lag_io_vars * bub_id, mpi_p, status, ierr)
+                                    lag_io_vars*bub_id, mpi_p, status, ierr)
 
             call MPI_FILE_CLOSE(ifile, ierr)
 
@@ -1976,7 +1975,7 @@ contains
 
             ! Skip header (written by rank 0)
             disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + &
-                      num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+                       num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
             call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
 
             call MPI_FILE_WRITE_ALL(ifile, dummy, 0, mpi_p, status, ierr)
@@ -2063,8 +2062,8 @@ contains
             intfc_dveldt(i, 1:lag_num_ts) = intfc_dveldt(i + 1, 1:lag_num_ts)
             gas_dpdt(i, 1:lag_num_ts) = gas_dpdt(i + 1, 1:lag_num_ts)
             gas_dmvdt(i, 1:lag_num_ts) = gas_dmvdt(i + 1, 1:lag_num_ts)
-            mtn_dposdt(i,1:3, 1:lag_num_ts) = mtn_dposdt(i + 1, 1:3, 1:lag_num_ts)
-            mtn_dveldt(i,1:3, 1:lag_num_ts) = mtn_dveldt(i + 1, 1:3, 1:lag_num_ts)
+            mtn_dposdt(i, 1:3, 1:lag_num_ts) = mtn_dposdt(i + 1, 1:3, 1:lag_num_ts)
+            mtn_dveldt(i, 1:3, 1:lag_num_ts) = mtn_dveldt(i + 1, 1:3, 1:lag_num_ts)
         end do
 
         nBubs = nBubs - 1
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index 3424e4081f..3ba776c368 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -447,28 +447,28 @@ contains
                 xi(1) = x_cc(cell(1) - 1)
                 eta(1) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3))
                 xi(2) = x_cc(cell(1))
-                eta(2) = q_prim_vf(momxb)%sf(cell(1)    , cell(2), cell(3))
+                eta(2) = q_prim_vf(momxb)%sf(cell(1), cell(2), cell(3))
                 xi(3) = x_cc(cell(1) + 1)
                 eta(3) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3))
             elseif (i == 2) then
                 xi(1) = y_cc(cell(2) - 1)
                 eta(1) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3))
                 xi(2) = y_cc(cell(2))
-                eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2)    , cell(3))
+                eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2), cell(3))
                 xi(3) = y_cc(cell(2) + 1)
                 eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3))
             elseif (i == 3) then
                 xi(1) = z_cc(cell(3) - 1)
                 eta(1) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1)
                 xi(2) = z_cc(cell(3))
-                eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)    )
+                eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3))
                 xi(3) = z_cc(cell(3) + 1)
                 eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1)
-            endif
+            end if
 
-            L(1) = ((pos - xi(2))*(pos - xi(3))) / ((xi(1) - xi(2))*(xi(1) - xi(3)))
-            L(2) = ((pos - xi(1))*(pos - xi(3))) / ((xi(2) - xi(1))*(xi(2) - xi(3)))
-            L(3) = ((pos - xi(1))*(pos - xi(2))) / ((xi(3) - xi(1))*(xi(3) - xi(2)))
+            L(1) = ((pos - xi(2))*(pos - xi(3)))/((xi(1) - xi(2))*(xi(1) - xi(3)))
+            L(2) = ((pos - xi(1))*(pos - xi(3)))/((xi(2) - xi(1))*(xi(2) - xi(3)))
+            L(3) = ((pos - xi(1))*(pos - xi(2)))/((xi(3) - xi(1))*(xi(3) - xi(2)))
 
             v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3)
         elseif (fd_order == 4) then
@@ -478,7 +478,7 @@ contains
                 xi(2) = x_cc(cell(1) - 1)
                 eta(2) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3))
                 xi(3) = x_cc(cell(1))
-                eta(3) = q_prim_vf(momxb)%sf(cell(1)    , cell(2), cell(3))
+                eta(3) = q_prim_vf(momxb)%sf(cell(1), cell(2), cell(3))
                 xi(4) = x_cc(cell(1) + 1)
                 eta(4) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3))
                 xi(5) = x_cc(cell(1) + 2)
@@ -489,7 +489,7 @@ contains
                 xi(2) = y_cc(cell(2) - 1)
                 eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3))
                 xi(3) = y_cc(cell(2))
-                eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2)    , cell(3))
+                eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2), cell(3))
                 xi(4) = y_cc(cell(2) + 1)
                 eta(4) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3))
                 xi(5) = y_cc(cell(2) + 2)
@@ -500,22 +500,22 @@ contains
                 xi(2) = z_cc(cell(3) - 1)
                 eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1)
                 xi(3) = z_cc(cell(3))
-                eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)    )
+                eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3))
                 xi(4) = z_cc(cell(3) + 1)
                 eta(4) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1)
                 xi(5) = z_cc(cell(3) + 2)
                 eta(5) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 2)
-            endif
+            end if
 
-            L(1) = ((pos - xi(2))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5))) / &
+            L(1) = ((pos - xi(2))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5)))/ &
                    ((xi(1) - xi(2))*(xi(1) - xi(3))*(xi(1) - xi(3))*(xi(2) - xi(5)))
-            L(2) = ((pos - xi(1))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5))) / &
+            L(2) = ((pos - xi(1))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5)))/ &
                    ((xi(2) - xi(1))*(xi(2) - xi(3))*(xi(2) - xi(3))*(xi(2) - xi(5)))
-            L(3) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(4))*(pos - xi(5))) / &
+            L(3) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(4))*(pos - xi(5)))/ &
                    ((xi(3) - xi(1))*(xi(3) - xi(2))*(xi(3) - xi(4))*(xi(3) - xi(5)))
-            L(4) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4))) / &
+            L(4) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4)))/ &
                    ((xi(4) - xi(1))*(xi(4) - xi(2))*(xi(4) - xi(3))*(xi(4) - xi(5)))
-            L(5) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4))) / &
+            L(5) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4)))/ &
                    ((xi(5) - xi(1))*(xi(5) - xi(2))*(xi(5) - xi(3))*(xi(5) - xi(4)))
 
             v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) + L(4)*eta(4) + L(5)*eta(5)
@@ -536,7 +536,7 @@ contains
             !! @param i Direction of the velocity (1: x, 2: y, 3: z)
             !! @param q_prim_vf Eulerian field with primitive variables
             !! @return a Acceleration of the bubble in direction i
-    pure function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,q_prim_vf) result(a)
+    pure function f_get_acceleration(pos, rad, vel, mg, mv, Re, rho, cell, i, q_prim_vf) result(a)
         $:GPU_ROUTINE(parallelism='[seq]')
         real(wp), intent(in) :: pos, rad, vel, mg, mv, Re, rho
         integer, dimension(3), intent(in) :: cell
@@ -549,79 +549,79 @@ contains
 
         if (fd_order == 2) then
             if (i == 1) then
-                dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - &
-                      q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / &
-                      (x_cc(cell(1) + 1) - x_cc(cell(1) - 1))
+                dp = (q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3)) - &
+                      q_prim_vf(E_idx)%sf(cell(1) - 1, cell(2), cell(3)))/ &
+                     (x_cc(cell(1) + 1) - x_cc(cell(1) - 1))
             elseif (i == 2) then
-                dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - &
-                      q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / &
-                      (y_cc(cell(2) + 1) - y_cc(cell(2) - 1))
+                dp = (q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3)) - &
+                      q_prim_vf(E_idx)%sf(cell(1), cell(2) - 1, cell(3)))/ &
+                     (y_cc(cell(2) + 1) - y_cc(cell(2) - 1))
             elseif (i == 3) then
-                dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - &
-                      q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / &
-                      (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
+                dp = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1) - &
+                      q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) - 1))/ &
+                     (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
             end if
         elseif (fd_order == 4) then
             if (i == 1) then
                 xi(1) = x_cc(cell(1) - 1)
-                eta(1) = (q_prim_vf(E_idx)%sf(cell(1)    ,cell(2),cell(3)) - &
-                      q_prim_vf(E_idx)%sf(cell(1) - 2,cell(2),cell(3))) / &
-                      (x_cc(cell(1)) - x_cc(cell(1) - 2))
+                eta(1) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - &
+                          q_prim_vf(E_idx)%sf(cell(1) - 2, cell(2), cell(3)))/ &
+                         (x_cc(cell(1)) - x_cc(cell(1) - 2))
                 xi(2) = x_cc(cell(1))
-                eta(2) = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - &
-                      q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / &
-                      (x_cc(cell(1) + 1) - x_cc(cell(1) - 1))
+                eta(2) = (q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3)) - &
+                          q_prim_vf(E_idx)%sf(cell(1) - 1, cell(2), cell(3)))/ &
+                         (x_cc(cell(1) + 1) - x_cc(cell(1) - 1))
                 xi(3) = x_cc(cell(1) + 1)
-                eta(3) = (q_prim_vf(E_idx)%sf(cell(1) + 2,cell(2),cell(3)) - &
-                      q_prim_vf(E_idx)%sf(cell(1)    ,cell(2),cell(3))) / &
-                      (x_cc(cell(1) + 2) - x_cc(cell(1)))
+                eta(3) = (q_prim_vf(E_idx)%sf(cell(1) + 2, cell(2), cell(3)) - &
+                          q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)))/ &
+                         (x_cc(cell(1) + 2) - x_cc(cell(1)))
             elseif (i == 2) then
                 xi(1) = y_cc(cell(2) - 1)
-                eta(1) = (q_prim_vf(E_idx)%sf(cell(1),cell(2)    ,cell(3)) - &
-                      q_prim_vf(E_idx)%sf(cell(1),cell(2) - 2,cell(3))) / &
-                      (y_cc(cell(2)) - y_cc(cell(2) - 2))
+                eta(1) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - &
+                          q_prim_vf(E_idx)%sf(cell(1), cell(2) - 2, cell(3)))/ &
+                         (y_cc(cell(2)) - y_cc(cell(2) - 2))
                 xi(2) = y_cc(cell(2))
-                eta(2) = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - &
-                      q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / &
-                      (y_cc(cell(2) + 1) - y_cc(cell(2) - 1))
+                eta(2) = (q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3)) - &
+                          q_prim_vf(E_idx)%sf(cell(1), cell(2) - 1, cell(3)))/ &
+                         (y_cc(cell(2) + 1) - y_cc(cell(2) - 1))
                 xi(3) = y_cc(cell(2) + 1)
-                eta(3) = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 2,cell(3)) - &
-                      q_prim_vf(E_idx)%sf(cell(1),cell(2)    ,cell(3))) / &
-                      (y_cc(cell(2) + 2) - y_cc(cell(2)))
+                eta(3) = (q_prim_vf(E_idx)%sf(cell(1), cell(2) + 2, cell(3)) - &
+                          q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)))/ &
+                         (y_cc(cell(2) + 2) - y_cc(cell(2)))
             elseif (i == 3) then
                 xi(1) = z_cc(cell(3) - 1)
-                eta(1) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3)    ) - &
-                      q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 2)) / &
-                      (z_cc(cell(3)) - z_cc(cell(3) - 2))
+                eta(1) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - &
+                          q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) - 2))/ &
+                         (z_cc(cell(3)) - z_cc(cell(3) - 2))
                 xi(2) = y_cc(cell(3))
-                eta(2) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - &
-                      q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / &
-                      (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
+                eta(2) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1) - &
+                          q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) - 1))/ &
+                         (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
                 xi(3) = y_cc(cell(3) + 1)
-                eta(3) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 2) - &
-                      q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3)   )) / &
-                      (z_cc(cell(3) + 2) - z_cc(cell(3)))
+                eta(3) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 2) - &
+                          q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)))/ &
+                         (z_cc(cell(3) + 2) - z_cc(cell(3)))
             end if
 
-            L(1) = ((pos - xi(2))*(pos - xi(3))) / ((xi(1) - xi(2))*(xi(1) - xi(3)))
-            L(2) = ((pos - xi(1))*(pos - xi(3))) / ((xi(2) - xi(1))*(xi(2) - xi(3)))
-            L(3) = ((pos - xi(1))*(pos - xi(2))) / ((xi(3) - xi(1))*(xi(3) - xi(2)))
+            L(1) = ((pos - xi(2))*(pos - xi(3)))/((xi(1) - xi(2))*(xi(1) - xi(3)))
+            L(2) = ((pos - xi(1))*(pos - xi(3)))/((xi(2) - xi(1))*(xi(2) - xi(3)))
+            L(3) = ((pos - xi(1))*(pos - xi(2)))/((xi(3) - xi(1))*(xi(3) - xi(2)))
 
             dp = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3)
         end if
 
-        vol = (4._wp/3._wp) * pi * rad**3._wp
-        force = -1._wp * vol * dp
+        vol = (4._wp/3._wp)*pi*rad**3._wp
+        force = -1._wp*vol*dp
 
-        v_rel = vel - f_interpolate_velocity(pos,cell,i,q_prim_vf)
+        v_rel = vel - f_interpolate_velocity(pos, cell, i, q_prim_vf)
 
         if (lag_params%drag_model == 1) then ! Free slip Stokes drag
-            force = force - (4._wp * pi * rad * v_rel) / Re
+            force = force - (4._wp*pi*rad*v_rel)/Re
         else if (lag_params%drag_model == 2) then ! No slip Stokes drag
-            force = force - (6._wp * pi * rad * v_rel) / Re
+            force = force - (6._wp*pi*rad*v_rel)/Re
         end if
 
-        a = force / (mg + mv)
+        a = force/(mg + mv)
 
     end function f_get_acceleration
 
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 49fac90142..09ee8e3e8e 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -252,9 +252,9 @@ module m_global_parameters
     integer, allocatable, dimension(:) :: proc_coords !<
     !! Processor coordinates in MPI_CART_COMM
 
-    type(int_bounds_info), dimension(3) :: nidx !< Indicies for neighboring processors
+    type(int_bounds_info), dimension(3) :: nidx !< Indices for neighboring processors
 
-    integer, allocatable, dimension(:,:,:) :: neighbor_ranks
+    integer, allocatable, dimension(:, :, :) :: neighbor_ranks
     !! Neighbor ranks for lagrangian particle communication
 
     integer, allocatable, dimension(:) :: start_idx !<
@@ -1352,7 +1352,7 @@ contains
         @:PREFER_GPU(x_cc)
         @:PREFER_GPU(dx)
 
-        if (n == 0) return;
+        if (n == 0) return; 
         @:ALLOCATE(y_cb(-1 - buff_size:n + buff_size))
         @:ALLOCATE(y_cc(-buff_size:n + buff_size))
         @:ALLOCATE(dy(-buff_size:n + buff_size))
@@ -1360,7 +1360,7 @@ contains
         @:PREFER_GPU(y_cc)
         @:PREFER_GPU(dy)
 
-        if (p == 0) return;
+        if (p == 0) return; 
         @:ALLOCATE(z_cb(-1 - buff_size:p + buff_size))
         @:ALLOCATE(z_cc(-buff_size:p + buff_size))
         @:ALLOCATE(dz(-buff_size:p + buff_size))
@@ -1446,10 +1446,10 @@ contains
         ! Deallocating grid variables for the x-, y- and z-directions
         @:DEALLOCATE(x_cb, x_cc, dx)
 
-        if (n == 0) return;
+        if (n == 0) return; 
         @:DEALLOCATE(y_cb, y_cc, dy)
 
-        if (p == 0) return;
+        if (p == 0) return; 
         @:DEALLOCATE(z_cb, z_cc, dz)
 
     end subroutine s_finalize_global_parameters_module
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index d1eb2f3679..549b9f4429 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -48,7 +48,7 @@ module m_mpi_proxy
     $:GPU_DECLARE(create='[i_halo_size]')
 
     integer, dimension(-1:1, -1:1, -1:1) :: p_send_counts, p_recv_counts
-    integer, dimension(:,:,:,:), allocatable :: p_send_ids
+    integer, dimension(:, :, :, :), allocatable :: p_send_ids
     character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff
     type(bounds_info), dimension(3) :: comm_coords
     integer :: p_buff_size, p_var_size
@@ -96,8 +96,8 @@ contains
         call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr)
 
         nReal = 7 + 16*2 + 10*lag_num_ts
-        p_var_size = (nReal * real_size + int_size)
-        p_buff_size = lag_params%nBubs_glb * p_var_size
+        p_var_size = (nReal*real_size + int_size)
+        p_buff_size = lag_params%nBubs_glb*p_var_size
 
         @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size))
 
@@ -449,15 +449,15 @@ contains
         !!                for communication of initial condition)
     impure subroutine s_add_particles_to_transfer_list(nBub, pos, posPrev)
 
-        real(wp), dimension(:,:) :: pos
-        real(wp), dimension(:,:), optional :: posPrev
+        real(wp), dimension(:, :) :: pos
+        real(wp), dimension(:, :), optional :: posPrev
         integer :: bubID, nbub
         integer :: i, j, k
 
         do k = nidx(3)%beg, nidx(3)%end
             do j = nidx(2)%beg, nidx(2)%end
                 do i = nidx(1)%beg, nidx(1)%end
-                    p_send_counts(i,j,k) = 0
+                    p_send_counts(i, j, k) = 0
                 end do
             end do
         end do
@@ -593,8 +593,8 @@ contains
         logical function f_crosses_boundary(particle_id, dir, loc, pos, posPrev)
 
             integer, intent(in) :: particle_id, dir, loc
-            real(wp), dimension(:,:), intent(in) :: pos
-            real(wp), dimension(:,:), optional, intent(in) :: posPrev
+            real(wp), dimension(:, :), intent(in) :: pos
+            real(wp), dimension(:, :), optional, intent(in) :: posPrev
 
             if (loc == -1) then ! Beginning of the domain
                 if (nidx(dir)%beg == 0) then
@@ -603,10 +603,10 @@ contains
                 end if
 
                 if (present(posPrev)) then
-                    f_crosses_boundary = (posPrev(particle_id,dir) > comm_coords(dir)%beg .and. &
-                                          pos(particle_id,dir) < comm_coords(dir)%beg)
+                    f_crosses_boundary = (posPrev(particle_id, dir) > comm_coords(dir)%beg .and. &
+                                          pos(particle_id, dir) < comm_coords(dir)%beg)
                 else
-                    f_crosses_boundary = (pos(particle_id,dir) < comm_coords(dir)%beg)
+                    f_crosses_boundary = (pos(particle_id, dir) < comm_coords(dir)%beg)
                 end if
             elseif (loc == 1) then ! End of the domain
                 if (nidx(dir)%end == 0) then
@@ -615,10 +615,10 @@ contains
                 end if
 
                 if (present(posPrev)) then
-                    f_crosses_boundary = (posPrev(particle_id,dir) < comm_coords(dir)%end .and. &
-                                          pos(particle_id,dir) > comm_coords(dir)%end)
+                    f_crosses_boundary = (posPrev(particle_id, dir) < comm_coords(dir)%end .and. &
+                                          pos(particle_id, dir) > comm_coords(dir)%end)
                 else
-                    f_crosses_boundary = (pos(particle_id,dir) > comm_coords(dir)%end)
+                    f_crosses_boundary = (pos(particle_id, dir) > comm_coords(dir)%end)
                 end if
             end if
 
@@ -662,12 +662,12 @@ contains
         !! @param lag_num_ts Number of stages in time-stepping scheme
         !! @param nBubs Local number of bubbles
     impure subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
-                                    gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, &
-                                    rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, &
-                                    dgasmv, dpos, dvel, lag_num_ts, nbubs)
+                                               gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, &
+                                               rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, &
+                                               dgasmv, dpos, dvel, lag_num_ts, nbubs)
 
         real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt
-        integer, dimension(:,:) :: lag_id
+        integer, dimension(:, :) :: lag_id
         real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv
         real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel
         integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size
@@ -680,20 +680,20 @@ contains
                 do i = nidx(1)%beg, nidx(1)%end
                     if (abs(i) + abs(j) + abs(k) > 0) then
 
-                        partner = neighbor_ranks(i,j,k)
+                        partner = neighbor_ranks(i, j, k)
 
                         send_tag = neighbor_tag(i, j, k)
                         recv_tag = neighbor_tag(-i, -j, -k)
 
-                        call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag,  &
-                                          p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, &
+                        call MPI_Sendrecv(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, &
+                                          p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, &
                                           MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-                        p_recv_size = p_recv_counts(i,j,k) * p_var_size
+                        p_recv_size = p_recv_counts(i, j, k)*p_var_size
 
                         position = 0
-                        do l = 0, p_send_counts(i,j,k) - 1
-                            bub_id = p_send_ids(i,j,k,l)
+                        do l = 0, p_send_counts(i, j, k) - 1
+                            bub_id = p_send_ids(i, j, k, l)
                             call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                             call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
                             call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
@@ -725,12 +725,12 @@ contains
                         send_tag = send_tag + max(num_procs, n_neighbor)
                         recv_tag = recv_tag + max(num_procs, n_neighbor)
 
-                        call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag,  &
+                        call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, &
                                           p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, &
                                           MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
                         position = 0
-                        do l = 0, p_recv_counts(i,j,k) - 1
+                        do l = 0, p_recv_counts(i, j, k) - 1
                             nbubs = nbubs + 1
                             bub_id = nbubs
                             call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index bb6e6484e5..7521249edc 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -329,7 +329,7 @@ def analytic(self):
              'write_bubbles', 'write_bubbles_stats' ]:
     SIMULATION[f'lag_params%{var}'] = ParamType.LOG
 
-for var in [ 'solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb', 
+for var in [ 'solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb',
              'vel_model', 'drag_model']:
     SIMULATION[f'lag_params%{var}'] = ParamType.INT
 

From fff74d879f7384360c541deb01eea75c7326fc06 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Tue, 2 Sep 2025 13:39:16 -0400
Subject: [PATCH 47/62] add perturbed IC stuff

---
 src/post_process/m_mpi_proxy.fpp          |  1 +
 src/pre_process/include/3dHardcodedIC.fpp | 56 +++++++++++++++++++++++
 src/pre_process/m_mpi_proxy.fpp           |  2 +-
 3 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index b096549ddc..053d96d457 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -110,6 +110,7 @@ contains
             & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt',    &
             & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt',    &
             & 'lag_betaT_wrt', 'lag_betaC_wrt', 'bc_io', 'down_sample' ]
+            call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
         call MPI_BCAST(flux_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
diff --git a/src/pre_process/include/3dHardcodedIC.fpp b/src/pre_process/include/3dHardcodedIC.fpp
index 39fb581c0d..5096ec806c 100644
--- a/src/pre_process/include/3dHardcodedIC.fpp
+++ b/src/pre_process/include/3dHardcodedIC.fpp
@@ -7,6 +7,45 @@
     ! Case 302 - IGR Jet
     real(wp) :: r, ux_th, ux_am, p_th, p_am, rho_th, rho_am, y_th, z_th, r_th, eps_smooth
 
+    real(wp), allocatable, dimension(:, :) :: ih
+    integer :: i, j, pos, start, end
+    logical :: file_exist
+    character(len=10000) :: line
+    character(len=25) :: value
+
+    if (patch_icpp(patch_id)%hcid == 303) then
+        allocate(ih(0:m_glb, 0:p_glb))
+
+        if (interface_file == '.') then
+            call s_mpi_abort("Error: interface_file must be specified for hcid=303")
+        else
+            inquire (file=trim(interface_file), exist=file_exist)
+            if (file_exist) then
+                open(unit=10, file=trim(interface_file), status="old", action="read")
+                do i = 0, m_glb
+                    read(10, '(A)') line  ! Read a full line as a string
+                    start = 1
+
+                    do j = 0, p_glb
+                        end = index(line(start:), ',')  ! Find the next comma
+                        if (end == 0) then
+                            value = trim(adjustl(line(start:)))  ! Last value in the line
+                        else
+                            value = trim(adjustl(line(start:start+end-2)))  ! Extract substring
+                            start = start + end  ! Move to next value
+                        end if
+                        read(value, *) ih(i, j)  ! Convert string to numeric value
+                        if (.not. f_is_default(normMag)) ih(i,j )= ih(i,j) * normMag
+                        if (.not. f_is_default(normFac)) ih(i,j) = ih(i,j) + normFac
+                    end do
+                end do
+                close(10)
+            else
+                call s_mpi_abort("Error: interface_file specified for hcid=303 does not exist")
+            end if
+        end if
+    end if
+
     eps = 1e-9_wp
 
 #:enddef
@@ -86,6 +125,23 @@
 
         q_prim_vf(E_idx)%sf(i, j, k) = p_th*f_cut_on(r - r_th, eps_smooth)*f_cut_on(x_cc(i), eps_smooth) + p_am
 
+    case (303) ! 3D Interface from file
+
+        alph = 0.5_wp * (1 + (1._wp - 2._wp * eps) * &
+                    tanh((ih(start_idx(1) + i,start_idx(3) + k)  - y_cc(j))*0.1_wp))
+
+        q_prim_vf(advxb)%sf(i,j,k) = alph
+        q_prim_vf(advxe)%sf(i,j,k) = 1._wp - alph
+
+        q_prim_vf(contxb)%sf(i,j,k) = q_prim_vf(advxb)%sf(i,j,k) * 1._wp
+        q_prim_vf(contxe)%sf(i,j,k) = q_prim_vf(advxe)%sf(i,j,k) * (1._wp / 950._wp)
+
+        q_prim_vf(E_idx)%sf(i,j,k) = p0 + &
+            (q_prim_vf(contxb)%sf(i,j,k) + q_prim_vf(contxe)%sf(i,j,k)) * g0 * &
+            (ih(start_idx(1) + i, start_idx(3) + k) - y_cc(j))
+
+        if (surface_tension) q_prim_vf(c_idx)%sf(i,j,k) = alph
+
     case (370)
         ! This hardcoded case extrudes a 2D profile to initialize a 3D simulation domain
         @: HardcodedReadValues()
diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp
index a6b7c7f85a..d9540c6bc9 100644
--- a/src/pre_process/m_mpi_proxy.fpp
+++ b/src/pre_process/m_mpi_proxy.fpp
@@ -72,7 +72,7 @@ contains
             & 'perturb_flow_mag', 'pref', 'rhoref', 'poly_sigma', 'R0ref',     &
             & 'Web', 'Ca', 'Re_inv', 'sigR', 'sigV', 'rhoRV', 'palpha_eps',    &
             & 'ptgalpha_eps', 'sigma', 'pi_fac', 'mixlayer_vel_coef', 'Bx0',   &
-            & 'mixlayer_perturb_k0']
+            & 'mixlayer_perturb_k0', 'normFac', 'normMag', 'p0', 'g0']
             call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 

From e9a652577c5b01b92694e65e29fbf0e6524cf30f Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Tue, 2 Sep 2025 14:28:26 -0400
Subject: [PATCH 48/62] fix for GNU

---
 src/pre_process/include/3dHardcodedIC.fpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/pre_process/include/3dHardcodedIC.fpp b/src/pre_process/include/3dHardcodedIC.fpp
index 5096ec806c..4a64c01d88 100644
--- a/src/pre_process/include/3dHardcodedIC.fpp
+++ b/src/pre_process/include/3dHardcodedIC.fpp
@@ -8,7 +8,7 @@
     real(wp) :: r, ux_th, ux_am, p_th, p_am, rho_th, rho_am, y_th, z_th, r_th, eps_smooth
 
     real(wp), allocatable, dimension(:, :) :: ih
-    integer :: i, j, pos, start, end
+    integer :: pos, start, end
     logical :: file_exist
     character(len=10000) :: line
     character(len=25) :: value
@@ -35,8 +35,8 @@
                             start = start + end  ! Move to next value
                         end if
                         read(value, *) ih(i, j)  ! Convert string to numeric value
-                        if (.not. f_is_default(normMag)) ih(i,j )= ih(i,j) * normMag
-                        if (.not. f_is_default(normFac)) ih(i,j) = ih(i,j) + normFac
+                        if (.not. f_is_default(normMag)) ih(i, j) = ih(i, j) * normMag
+                        if (.not. f_is_default(normFac)) ih(i, j) = ih(i, j) + normFac
                     end do
                 end do
                 close(10)

From 69b51ea8b9b44c8c3867d50a45aa10ccc2781a9c Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Mon, 15 Sep 2025 11:10:45 -0400
Subject: [PATCH 49/62] periodic BCs mostly working

---
 src/common/m_boundary_common.fpp       |  15 +++
 src/simulation/m_bubbles_EL.fpp        | 176 ++++++++++++++++---------
 src/simulation/m_global_parameters.fpp |  19 +--
 src/simulation/m_ibm.fpp               |  20 +--
 src/simulation/m_mpi_proxy.fpp         | 125 +++++++++++++-----
 src/simulation/m_start_up.fpp          |   8 +-
 toolchain/mfc/run/case_dicts.py        |   4 -
 7 files changed, 245 insertions(+), 122 deletions(-)

diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index 4958bad881..9aeb30eb24 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -1942,6 +1942,21 @@ contains
         offset_x%beg = buff_size; offset_x%end = buff_size
         offset_y%beg = buff_size; offset_y%end = buff_size
         offset_z%beg = buff_size; offset_z%end = buff_size
+
+#ifdef MFC_MPI
+        ! Populate global domain boundaries with stretched grids
+        call s_mpi_allreduce_min(x_cb(-1), glb_bounds(1)%beg)
+        call s_mpi_allreduce_max(x_cb(m), glb_bounds(1)%end)
+
+        if (n > 0) then
+            call s_mpi_allreduce_min(y_cb(-1), glb_bounds(2)%beg)
+            call s_mpi_allreduce_max(y_cb(n), glb_bounds(2)%end)
+            if (p > 0) then
+                call s_mpi_allreduce_min(z_cb(-1), glb_bounds(3)%beg)
+                call s_mpi_allreduce_max(z_cb(p), glb_bounds(3)%end)
+            end if
+        end if
+#endif
 #endif
 
 #ifndef MFC_PRE_PROCESS
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 269b096058..8187b14638 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -79,8 +79,11 @@ module m_bubbles_EL
     $:GPU_DECLARE(create='[nBubs,Rmax_glb,Rmin_glb,q_beta,q_beta_idx]')
 
     integer, allocatable, dimension(:) :: keep_bubble, prefix_sum
+    integer, allocatable, dimension(:) :: wrap_bubble_loc
+    integer, allocatable, dimension(:,:) :: wrap_bubble_dir
     integer :: active_bubs
     $:GPU_DECLARE(create='[keep_bubble, prefix_sum, active_bubs]')
+    $:GPU_DECLARE(create='[wrap_bubble_loc, wrap_bubble_dir]')
 
 contains
 
@@ -110,6 +113,17 @@ contains
             call s_mpi_abort('Please check the lag_params%solver_approach input')
         end if
 
+        pcomm_coords(1)%beg = x_cb(buff_size - fd_number - 1)
+        pcomm_coords(1)%end = x_cb(m - buff_size + fd_number)
+        if (n > 0) then
+            pcomm_coords(2)%beg = y_cb(buff_size - fd_number - 1)
+            pcomm_coords(2)%end = y_cb(n - buff_size + fd_number)
+            if (p > 0) then
+                pcomm_coords(3)%beg = z_cb(buff_size - fd_number - 1)
+                pcomm_coords(3)%end = z_cb(p - buff_size + fd_number)
+            end if
+        end if
+
         $:GPU_UPDATE(device='[lag_num_ts, q_beta_idx]')
 
         @:ALLOCATE(q_beta%vf(1:q_beta_idx))
@@ -149,6 +163,7 @@ contains
         @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts))
 
         @:ALLOCATE(keep_bubble(1:nBubs_glb), prefix_sum(1:nBubs_glb))
+        @:ALLOCATE(wrap_bubble_loc(1:nBubs_glb), wrap_bubble_dir(1:nBubs_glb, 1:num_dims))
 
         if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol
 
@@ -274,7 +289,8 @@ contains
                                           gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
                                           intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
                                           mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
-                                          gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
+                                          gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs, &
+                                          dest=1)
         end if
 
         $:GPU_UPDATE(device='[bubbles_lagrange, lag_params]')
@@ -576,8 +592,6 @@ contains
             call MPI_TYPE_FREE(view, ierr)
         end if
 
-        print *, proc_rank, lag_id
-
         if (proc_rank == 0) then
             write (*, '(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count
             write (*, '(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt
@@ -1358,12 +1372,16 @@ contains
 
         type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
         integer, intent(in) :: dest
-        integer :: k, i, patch_id, offset
+        integer :: k, i, q
+        integer ::  patch_id, newBubs, new_idx
+        real(wp) :: offset
         integer, dimension(3) :: cell
 
         $:GPU_PARALLEL_LOOP(private='[cell]')
         do k = 1, nBubs
             keep_bubble(k) = 1
+            wrap_bubble_loc(k) = 0
+            wrap_bubble_dir(k,:) = 0
 
             ! Relocate bubbles at solid boundaries and delete bubbles that leave
             ! buffer regions
@@ -1373,6 +1391,16 @@ contains
             elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                     .and. mtn_pos(k, 1, dest) > x_cb(m) - intfc_rad(k, dest)) then
                 mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k, dest)
+            elseif (bc_x%beg == BC_PERIODIC .and. mtn_pos(k, 1, dest) < pcomm_coords(1)%beg .and. &
+                    mtn_posPrev(k, 1, dest) > pcomm_coords(1)%beg) then
+                !print*, "1, -1", mtn_pos(k, 1, dest), mtn_posPrev(k, 1, dest), pcomm_coords(1)%beg
+                wrap_bubble_dir(k,1) = 1
+                wrap_bubble_loc(k) = -1
+            elseif (bc_x%end == BC_PERIODIC .and. mtn_pos(k, 1, dest) > pcomm_coords(1)%end .and. &
+                    mtn_posPrev(k, 1, dest) < pcomm_coords(1)%end) then
+                !print*, "1, 1", mtn_pos(k, 1, dest), mtn_posPrev(k, 1, dest), pcomm_coords(1)%end
+                wrap_bubble_dir(k,1) = 1
+                wrap_bubble_loc(k) = 1
             elseif (mtn_pos(k, 1, dest) >= x_cb(m + buff_size - fd_number)) then
                 keep_bubble(k) = 0
             elseif (mtn_pos(k, 1, dest) < x_cb(fd_number - buff_size - 1)) then
@@ -1385,6 +1413,16 @@ contains
             else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                      .and. mtn_pos(k, 2, dest) > y_cb(n) - intfc_rad(k, dest)) then
                 mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k, dest)
+            elseif (bc_y%beg == BC_PERIODIC .and. mtn_pos(k, 2, dest) < pcomm_coords(2)%beg .and. &
+                    mtn_posPrev(k, 2, dest) > pcomm_coords(2)%beg) then
+                !print*, "2, -1", mtn_pos(k, 2, dest), mtn_posPrev(k, 2, dest), pcomm_coords(2)%beg
+                wrap_bubble_dir(k,2) = 1
+                wrap_bubble_loc(k) = -1
+            elseif (bc_y%end == BC_PERIODIC .and. mtn_pos(k, 2, dest) > pcomm_coords(2)%end .and. &
+                    mtn_posPrev(k, 2, dest) < pcomm_coords(2)%end) then
+                !print*, "2, 1", mtn_pos(k, 2, dest), mtn_posPrev(k, 2, dest), pcomm_coords(2)%end
+                wrap_bubble_dir(k,2) = 1
+                wrap_bubble_loc(k) = 1
             elseif (mtn_pos(k, 2, dest) >= y_cb(n + buff_size - fd_number)) then
                 keep_bubble(k) = 0
             elseif (mtn_pos(k, 2, dest) < y_cb(fd_number - buff_size - 1)) then
@@ -1398,6 +1436,14 @@ contains
                 else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) &
                          .and. mtn_pos(k, 3, dest) > z_cb(p) - intfc_rad(k, dest)) then
                     mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k, dest)
+                elseif (bc_z%beg == BC_PERIODIC .and. mtn_pos(k, 3, dest) < pcomm_coords(3)%beg .and. &
+                        mtn_posPrev(k, 3, dest) > pcomm_coords(3)%beg) then
+                    wrap_bubble_dir(k,3) = 1
+                    wrap_bubble_loc(k) = -1
+                elseif (bc_z%end == BC_PERIODIC .and. mtn_pos(k, 3, dest) > pcomm_coords(3)%end .and. &
+                        mtn_posPrev(k, 3, dest) < pcomm_coords(3)%end) then
+                    wrap_bubble_dir(k,3) = 1
+                    wrap_bubble_loc(k) = 1
                 elseif (mtn_pos(k, 3, dest) >= z_cb(p + buff_size - fd_number)) then
                     keep_bubble(k) = 0
                 elseif (mtn_pos(k, 3, dest) < z_cb(fd_number - buff_size - 1)) then
@@ -1436,6 +1482,7 @@ contains
             end if
         end do
 
+        call nvtxStartRange("LAG-BC")
         call nvtxStartRange("LAG-BC-DEV2HOST")
         $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
             & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, &
@@ -1444,6 +1491,7 @@ contains
             & nBubs]')
         call nvtxEndRange
 
+        ! Handle deletion of bubbles leaving local domain
         do k = 1, nBubs
             if (k == 1) then
                 prefix_sum(k) = keep_bubble(k)
@@ -1456,33 +1504,41 @@ contains
         do k = 1, nBubs
             if (keep_bubble(k) == 1) then
                 if (prefix_sum(k) /= k) then
-                    bub_R0(prefix_sum(k)) = bub_R0(k)
-                    Rmax_stats(prefix_sum(k)) = Rmax_stats(k)
-                    Rmin_stats(prefix_sum(k)) = Rmin_stats(k)
-                    gas_mg(prefix_sum(k)) = gas_mg(k)
-                    gas_betaT(prefix_sum(k)) = gas_betaT(k)
-                    gas_betaC(prefix_sum(k)) = gas_betaC(k)
-                    bub_dphidt(prefix_sum(k)) = bub_dphidt(k)
-                    lag_id(prefix_sum(k), 1) = lag_id(k, 1)
-                    gas_p(prefix_sum(k), 1:2) = gas_p(k, 1:2)
-                    gas_mv(prefix_sum(k), 1:2) = gas_mv(k, 1:2)
-                    intfc_rad(prefix_sum(k), 1:2) = intfc_rad(k, 1:2)
-                    intfc_vel(prefix_sum(k), 1:2) = intfc_vel(k, 1:2)
-                    mtn_pos(prefix_sum(k), 1:3, 1:2) = mtn_pos(k, 1:3, 1:2)
-                    mtn_posPrev(prefix_sum(k), 1:3, 1:2) = mtn_posPrev(k, 1:3, 1:2)
-                    mtn_vel(prefix_sum(k), 1:3, 1:2) = mtn_vel(k, 1:3, 1:2)
-                    mtn_s(prefix_sum(k), 1:3, 1:2) = mtn_s(k, 1:3, 1:2)
-                    intfc_draddt(prefix_sum(k), 1:lag_num_ts) = intfc_draddt(k, 1:lag_num_ts)
-                    intfc_dveldt(prefix_sum(k), 1:lag_num_ts) = intfc_dveldt(k, 1:lag_num_ts)
-                    gas_dpdt(prefix_sum(k), 1:lag_num_ts) = gas_dpdt(k, 1:lag_num_ts)
-                    gas_dmvdt(prefix_sum(k), 1:lag_num_ts) = gas_dmvdt(k, 1:lag_num_ts)
-                    mtn_dposdt(prefix_sum(k), 1:3, 1:lag_num_ts) = mtn_dposdt(k, 1:3, 1:lag_num_ts)
-                    mtn_dveldt(prefix_sum(k), 1:3, 1:lag_num_ts) = mtn_dveldt(k, 1:3, 1:lag_num_ts)
+                    call s_copy_lag_bubble(k, prefix_sum(k))
                 end if
             end if
             if (k == nBubs) nBubs = active_bubs
         end do
 
+        ! Handle periodic wrapping of bubbles on same processor
+        newBubs = 0
+        do k = 1, nBubs
+            if (any(wrap_bubble_dir(k, :) == 1)) then
+                newBubs = newBubs + 1
+                new_idx = nBubs + newBubs
+                call s_copy_lag_bubble(k, new_idx)
+                do i = 1, num_dims
+                    if (wrap_bubble_dir(k, i) == 1) then
+                        offset = glb_bounds(i)%end - glb_bounds(i)%beg
+                        if (wrap_bubble_loc(k) == 1) then
+                            do q = 1, 2
+                                mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) - offset
+                                mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) - offset
+                            end do
+                        else if (wrap_bubble_loc(k) == -1) then
+                            do q = 1, 2
+                                mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) + offset
+                                mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) + offset
+                            end do
+                        end if
+                    end if
+                end do
+            end if
+        end do
+        print*, nBubs, newBubs
+        nBubs = nBubs + newBubs
+
+        ! Handle MPI transfer of bubbles going to another processor's local domain
         if (num_procs > 1) then
             call nvtxStartRange("LAG-BC-TRANSFER-LIST")
             call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, dest), mtn_posPrev(:, :, dest))
@@ -1493,7 +1549,8 @@ contains
                                           gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, &
                                           intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, &
                                           mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, &
-                                          gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs)
+                                          gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs, &
+                                          dest)
             call nvtxEndRange
         end if
 
@@ -1503,6 +1560,7 @@ contains
             & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, &
             & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, nBubs]')
         call nvtxEndRange
+        call nvtxEndRange
 
     end subroutine s_enforce_EL_bubbles_boundary_conditions
 
@@ -2034,42 +2092,34 @@ contains
 
     !> The purpose of this subroutine is to remove one specific particle if dt is too small.
           !! @param bub_id Particle id
-    impure subroutine s_remove_lag_bubble(bub_id)
-
-        integer, intent(in) :: bub_id
-
-        integer :: i
-
-        $:GPU_LOOP(parallelism='[seq]')
-        do i = bub_id, nBubs - 1
-            bub_R0(i) = bub_R0(i + 1)
-            Rmax_stats(i) = Rmax_stats(i + 1)
-            Rmin_stats(i) = Rmin_stats(i + 1)
-            gas_mg(i) = gas_mg(i + 1)
-            gas_betaT(i) = gas_betaT(i + 1)
-            gas_betaC(i) = gas_betaC(i + 1)
-            bub_dphidt(i) = bub_dphidt(i + 1)
-            lag_id(i, 1) = lag_id(i + 1, 1)
-            gas_p(i, 1:2) = gas_p(i + 1, 1:2)
-            gas_mv(i, 1:2) = gas_mv(i + 1, 1:2)
-            intfc_rad(i, 1:2) = intfc_rad(i + 1, 1:2)
-            intfc_vel(i, 1:2) = intfc_vel(i + 1, 1:2)
-            mtn_pos(i, 1:3, 1:2) = mtn_pos(i + 1, 1:3, 1:2)
-            mtn_posPrev(i, 1:3, 1:2) = mtn_posPrev(i + 1, 1:3, 1:2)
-            mtn_vel(i, 1:3, 1:2) = mtn_vel(i + 1, 1:3, 1:2)
-            mtn_s(i, 1:3, 1:2) = mtn_s(i + 1, 1:3, 1:2)
-            intfc_draddt(i, 1:lag_num_ts) = intfc_draddt(i + 1, 1:lag_num_ts)
-            intfc_dveldt(i, 1:lag_num_ts) = intfc_dveldt(i + 1, 1:lag_num_ts)
-            gas_dpdt(i, 1:lag_num_ts) = gas_dpdt(i + 1, 1:lag_num_ts)
-            gas_dmvdt(i, 1:lag_num_ts) = gas_dmvdt(i + 1, 1:lag_num_ts)
-            mtn_dposdt(i, 1:3, 1:lag_num_ts) = mtn_dposdt(i + 1, 1:3, 1:lag_num_ts)
-            mtn_dveldt(i, 1:3, 1:lag_num_ts) = mtn_dveldt(i + 1, 1:3, 1:lag_num_ts)
-        end do
-
-        nBubs = nBubs - 1
-        $:GPU_UPDATE(device='[nBubs]')
-
-    end subroutine s_remove_lag_bubble
+    impure subroutine s_copy_lag_bubble(src, dest)
+
+        integer, intent(in) :: src, dest
+
+        bub_R0(dest) = bub_R0(src)
+        Rmax_stats(dest) = Rmax_stats(src)
+        Rmin_stats(dest) = Rmin_stats(src)
+        gas_mg(dest) = gas_mg(src)
+        gas_betaT(dest) = gas_betaT(src)
+        gas_betaC(dest) = gas_betaC(src)
+        bub_dphidt(dest) = bub_dphidt(src)
+        lag_id(dest, 1) = lag_id(src, 1)
+        gas_p(dest, 1:2) = gas_p(src, 1:2)
+        gas_mv(dest, 1:2) = gas_mv(src, 1:2)
+        intfc_rad(dest, 1:2) = intfc_rad(src, 1:2)
+        intfc_vel(dest, 1:2) = intfc_vel(src, 1:2)
+        mtn_vel(dest, 1:3, 1:2) = mtn_vel(src, 1:3, 1:2)
+        mtn_s(dest, 1:3, 1:2) = mtn_s(src, 1:3, 1:2)
+        intfc_draddt(dest, 1:lag_num_ts) = intfc_draddt(src, 1:lag_num_ts)
+        intfc_dveldt(dest, 1:lag_num_ts) = intfc_dveldt(src, 1:lag_num_ts)
+        gas_dpdt(dest, 1:lag_num_ts) = gas_dpdt(src, 1:lag_num_ts)
+        gas_dmvdt(dest, 1:lag_num_ts) = gas_dmvdt(src, 1:lag_num_ts)
+        mtn_dposdt(dest, 1:3, 1:lag_num_ts) = mtn_dposdt(src, 1:3, 1:lag_num_ts)
+        mtn_dveldt(dest, 1:3, 1:lag_num_ts) = mtn_dveldt(src, 1:3, 1:lag_num_ts)
+        mtn_pos(dest, 1:3, 1:2) = mtn_pos(src, 1:3, 1:2)
+        mtn_posPrev(dest, 1:3, 1:2) = mtn_posPrev(src, 1:3, 1:2)
+
+    end subroutine s_copy_lag_bubble
 
     !> The purpose of this subroutine is to deallocate variables
     impure subroutine s_finalize_lagrangian_solver()
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 9e73462189..dbef5fcc98 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -63,6 +63,7 @@ module m_global_parameters
     !> @{
 
     real(wp), target, allocatable, dimension(:) :: x_cb, y_cb, z_cb
+    type(bounds_info), dimension(3) :: glb_bounds !<
     !> @}
 
     !> @name Cell-center (CC) locations in the x-, y- and z-directions, respectively
@@ -70,7 +71,6 @@ module m_global_parameters
 
     real(wp), target, allocatable, dimension(:) :: x_cc, y_cc, z_cc
     !> @}
-    !type(bounds_info) :: x_domain, y_domain, z_domain !<
     !! Locations of the domain bounds in the x-, y- and z-coordinate directions
     !> @name Cell-width distributions in the x-, y- and z-directions, respectively
     !> @{
@@ -231,6 +231,7 @@ module m_global_parameters
 
     integer :: num_bc_patches
     logical :: bc_io
+    logical, dimension(3) :: periodic_bc
     !> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively
     !> @{
     type(int_bounds_info) :: bc_x, bc_y, bc_z
@@ -239,10 +240,6 @@ module m_global_parameters
     $:GPU_DECLARE(create='[bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3]')
     $:GPU_DECLARE(create='[bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3]')
 
-    type(bounds_info) :: x_domain, y_domain, z_domain
-    real(wp) :: x_a, y_a, z_a
-    real(wp) :: x_b, y_b, z_b
-
     logical :: parallel_io !< Format of the data files
     logical :: file_per_process !< shared file or not when using parallel io
     integer :: precision !< Precision of output files
@@ -252,6 +249,9 @@ module m_global_parameters
     integer, allocatable, dimension(:) :: proc_coords !<
     !! Processor coordinates in MPI_CART_COMM
 
+    type(bounds_info), allocatable, dimension(:) :: pcomm_coords
+    !! Coordinates for EL particle transfer
+
     type(int_bounds_info), dimension(3) :: nidx !< Indices for neighboring processors
 
     integer, allocatable, dimension(:, :, :) :: neighbor_ranks
@@ -652,6 +652,7 @@ contains
 
         num_bc_patches = 0
         bc_io = .false.
+        periodic_bc = .false.
 
         bc_x%beg = dflt_int; bc_x%end = dflt_int
         bc_y%beg = dflt_int; bc_y%end = dflt_int
@@ -664,9 +665,9 @@ contains
             #:endfor
         #:endfor
 
-        x_domain%beg = dflt_int; x_domain%end = dflt_int
-        y_domain%beg = dflt_int; y_domain%end = dflt_int
-        z_domain%beg = dflt_int; z_domain%end = dflt_int
+        glb_bounds(1)%beg = dflt_int; glb_bounds(1)%end = dflt_int
+        glb_bounds(2)%beg = dflt_int; glb_bounds(2)%end = dflt_int
+        glb_bounds(3)%beg = dflt_int; glb_bounds(3)%end = dflt_int
 
         ! Fluids physical parameters
         do i = 1, num_fluids_max
@@ -1388,6 +1389,7 @@ contains
         #:endif
 
         allocate (proc_coords(1:num_dims))
+        allocate (pcomm_coords(1:num_dims))
 
         if (parallel_io .neqv. .true.) return
 
@@ -1424,6 +1426,7 @@ contains
         end if
 
         deallocate (proc_coords)
+        deallocate (pcomm_coords)
         if (parallel_io) then
             deallocate (start_idx)
 
diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index 4d01608120..6fe6c3ac64 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -501,17 +501,17 @@ contains
                             ghost_points_in(count)%slip = patch_ib(patch_id)%slip
                             ! ghost_points(count)%rank = proc_rank
 
-                            if ((x_cc(i) - dx(i)) < x_domain%beg) then
+                            if ((x_cc(i) - dx(i)) < glb_bounds(1)%beg) then
                                 ghost_points_in(count)%DB(1) = -1
-                            else if ((x_cc(i) + dx(i)) > x_domain%end) then
+                            else if ((x_cc(i) + dx(i)) > glb_bounds(1)%end) then
                                 ghost_points_in(count)%DB(1) = 1
                             else
                                 ghost_points_in(count)%DB(1) = 0
                             end if
 
-                            if ((y_cc(j) - dy(j)) < y_domain%beg) then
+                            if ((y_cc(j) - dy(j)) < glb_bounds(2)%beg) then
                                 ghost_points_in(count)%DB(2) = -1
-                            else if ((y_cc(j) + dy(j)) > y_domain%end) then
+                            else if ((y_cc(j) + dy(j)) > glb_bounds(2)%end) then
                                 ghost_points_in(count)%DB(2) = 1
                             else
                                 ghost_points_in(count)%DB(2) = 0
@@ -543,25 +543,25 @@ contains
                                     ib_markers%sf(i, j, k)
                                 ghost_points_in(count)%slip = patch_ib(patch_id)%slip
 
-                                if ((x_cc(i) - dx(i)) < x_domain%beg) then
+                                if ((x_cc(i) - dx(i)) < glb_bounds(1)%beg) then
                                     ghost_points_in(count)%DB(1) = -1
-                                else if ((x_cc(i) + dx(i)) > x_domain%end) then
+                                else if ((x_cc(i) + dx(i)) > glb_bounds(1)%end) then
                                     ghost_points_in(count)%DB(1) = 1
                                 else
                                     ghost_points_in(count)%DB(1) = 0
                                 end if
 
-                                if ((y_cc(j) - dy(j)) < y_domain%beg) then
+                                if ((y_cc(j) - dy(j)) < glb_bounds(2)%beg) then
                                     ghost_points_in(count)%DB(2) = -1
-                                else if ((y_cc(j) + dy(j)) > y_domain%end) then
+                                else if ((y_cc(j) + dy(j)) > glb_bounds(2)%end) then
                                     ghost_points_in(count)%DB(2) = 1
                                 else
                                     ghost_points_in(count)%DB(2) = 0
                                 end if
 
-                                if ((z_cc(k) - dz(k)) < z_domain%beg) then
+                                if ((z_cc(k) - dz(k)) < glb_bounds(3)%beg) then
                                     ghost_points_in(count)%DB(3) = -1
-                                else if ((z_cc(k) + dz(k)) > z_domain%end) then
+                                else if ((z_cc(k) + dz(k)) > glb_bounds(3)%end) then
                                     ghost_points_in(count)%DB(3) = 1
                                 else
                                     ghost_points_in(count)%DB(3) = 0
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 2b2f1745b9..27abce892c 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -50,10 +50,9 @@ module m_mpi_proxy
     integer, dimension(-1:1, -1:1, -1:1) :: p_send_counts, p_recv_counts
     integer, dimension(:, :, :, :), allocatable :: p_send_ids
     character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff
-    type(bounds_info), dimension(3) :: comm_coords
     integer :: p_buff_size, p_var_size
 
-    $:GPU_DECLARE(create='[p_send_counts, comm_coords]')
+    $:GPU_DECLARE(create='[p_send_counts]')
 
 contains
 
@@ -92,6 +91,7 @@ contains
         integer :: real_size, int_size, nReal, lag_num_ts
         integer :: ierr !< Generic flag used to identify and report MPI errors
 
+#ifdef MFC_MPI
         call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr)
         call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr)
 
@@ -100,19 +100,8 @@ contains
         p_buff_size = lag_params%nBubs_glb*p_var_size
 
         @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size))
-
-        comm_coords(1)%beg = x_cb(buff_size - fd_number - 1)
-        comm_coords(1)%end = x_cb(m - buff_size + fd_number)
-        if (n > 0) then
-            comm_coords(2)%beg = y_cb(buff_size - fd_number - 1)
-            comm_coords(2)%end = y_cb(n - buff_size + fd_number)
-            if (p > 0) then
-                comm_coords(3)%beg = z_cb(buff_size - fd_number - 1)
-                comm_coords(3)%end = z_cb(p - buff_size + fd_number)
-            end if
-        end if
-
         @:ALLOCATE(p_send_ids(nidx(1)%beg:nidx(1)%end, nidx(2)%beg:nidx(2)%end, nidx(3)%beg:nidx(3)%end, 0:lag_params%nBubs_glb))
+#endif
 
     end subroutine s_initialize_particles_mpi
 
@@ -195,11 +184,8 @@ contains
             & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', &
             & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', &
             & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', &
-            & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end',    &
-            & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', &
-            & 'z_b', 't_stop', 't_save', 'cfl_target', 'Bx0', 'alf_factor',  &
-            & 'tau_star', 'cont_damage_s', 'alpha_bar', 'adap_dt_tol', &
-            & 'ic_eps', 'ic_beta' ]
+            & 't_stop', 't_save', 'cfl_target', 'Bx0', 'alf_factor', 'tau_star', &
+            & 'cont_damage_s', 'alpha_bar', 'adap_dt_tol', 'ic_eps', 'ic_beta' ]
             call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
@@ -285,6 +271,9 @@ contains
         call MPI_BCAST(nv_uvm_igr_temps_on_gpu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
         call MPI_BCAST(nv_uvm_pref_gpu, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
 
+        ! Extra BC Variable
+        call MPI_BCAST(periodic_bc, 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+
 #endif
 
     end subroutine s_mpi_bcast_user_inputs
@@ -612,6 +601,7 @@ contains
                     call s_add_particle_to_direction(k, 0, 0, 1)
                 end if
             end if
+
         end do
 
     contains
@@ -629,10 +619,10 @@ contains
                 end if
 
                 if (present(posPrev)) then
-                    f_crosses_boundary = (posPrev(particle_id, dir) > comm_coords(dir)%beg .and. &
-                                          pos(particle_id, dir) < comm_coords(dir)%beg)
+                    f_crosses_boundary = (posPrev(particle_id, dir) > pcomm_coords(dir)%beg .and. &
+                                          pos(particle_id, dir) < pcomm_coords(dir)%beg)
                 else
-                    f_crosses_boundary = (pos(particle_id, dir) < comm_coords(dir)%beg)
+                    f_crosses_boundary = (pos(particle_id, dir) < pcomm_coords(dir)%beg)
                 end if
             elseif (loc == 1) then ! End of the domain
                 if (nidx(dir)%end == 0) then
@@ -641,10 +631,10 @@ contains
                 end if
 
                 if (present(posPrev)) then
-                    f_crosses_boundary = (posPrev(particle_id, dir) < comm_coords(dir)%end .and. &
-                                          pos(particle_id, dir) > comm_coords(dir)%end)
+                    f_crosses_boundary = (posPrev(particle_id, dir) < pcomm_coords(dir)%end .and. &
+                                          pos(particle_id, dir) > pcomm_coords(dir)%end)
                 else
-                    f_crosses_boundary = (pos(particle_id, dir) > comm_coords(dir)%end)
+                    f_crosses_boundary = (pos(particle_id, dir) > pcomm_coords(dir)%end)
                 end if
             end if
 
@@ -690,31 +680,35 @@ contains
     impure subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
                                                gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, &
                                                rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, &
-                                               dgasmv, dpos, dvel, lag_num_ts, nbubs)
+                                               dgasmv, dpos, dvel, lag_num_ts, nbubs, dest)
 
         real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt
         integer, dimension(:, :) :: lag_id
         real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv
         real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel
-        integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size
+        integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size, dest
 
         integer :: i, j, k, l, q
         integer :: ierr !< Generic flag used to identify and report MPI errors
 
+#ifdef MFC_MPI
         do k = nidx(3)%beg, nidx(3)%end
             do j = nidx(2)%beg, nidx(2)%end
                 do i = nidx(1)%beg, nidx(1)%end
-                    if (abs(i) + abs(j) + abs(k) > 0) then
-
+                    if (abs(i) + abs(j) + abs(k) /= 0) then
                         partner = neighbor_ranks(i, j, k)
-
                         send_tag = neighbor_tag(i, j, k)
-                        recv_tag = neighbor_tag(-i, -j, -k)
+                        recv_tag = send_tag !neighbor_tag(-i,-j,-k)
 
-                        call MPI_Sendrecv(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, &
+                        call MPI_sendrecv(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, &
                                           p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, &
                                           MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
+                        ! Skip if no particles to exchange
+                        if (p_send_counts(i, j, k) == 0 .and. p_recv_counts(i, j, k) == 0) then
+                            cycle
+                        end if
+
                         p_recv_size = p_recv_counts(i, j, k)*p_var_size
 
                         position = 0
@@ -751,7 +745,7 @@ contains
                         send_tag = send_tag + max(num_procs, n_neighbor)
                         recv_tag = recv_tag + max(num_procs, n_neighbor)
 
-                        call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, &
+                        call MPI_sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, &
                                           p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, &
                                           MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
@@ -786,12 +780,16 @@ contains
                                 call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
                             end do
                             lag_id(bub_id, 2) = bub_id
-
                         end do
                     end if
                 end do
             end do
         end do
+#endif
+
+        if (any(periodic_bc)) then
+            call s_wrap_particle_positions(pos, posPrev, nbubs, dest)
+        end if
 
     end subroutine s_mpi_sendrecv_particles
 
@@ -807,6 +805,65 @@ contains
 
     end function neighbor_tag
 
+    subroutine s_wrap_particle_positions(pos, posPrev, nbubs, dest)
+
+        real(wp), dimension(:, :, :) :: pos, posPrev
+        integer :: nbubs, dest
+        integer :: i, q
+        real :: offset
+
+        do i = 1, nbubs
+            if (periodic_bc(1)) then
+                offset = glb_bounds(1)%end - glb_bounds(1)%beg
+                if (pos(i, 1, dest) > x_cb(m + buff_size)) then
+                    do q = 1, 2
+                        pos(i, 1, q) = pos(i, 1, q) - offset
+                        posPrev(i, 1, q) = posPrev(i, 1, q) - offset
+                    end do
+                endif
+                if (pos(i, 1, dest) < x_cb(-1 - buff_size)) then
+                    do q = 1, 2
+                        pos(i, 1, q) = pos(i, 1, q) + offset
+                        posPrev(i, 1, q) = posPrev(i, 1, q) + offset
+                    end do
+                endif
+            end if
+
+            if (periodic_bc(2)) then
+                offset = glb_bounds(2)%end - glb_bounds(2)%beg
+                if (pos(i, 2, dest) > y_cb(n + buff_size)) then
+                    do q = 1, 2
+                        pos(i, 2, q) = pos(i, 2, q) - offset
+                        posPrev(i, 2, q) = posPrev(i, 2, q) - offset
+                    end do
+                endif
+                if (pos(i, 2, dest) < y_cb(-buff_size - 1)) then
+                    do q = 1, 2
+                        pos(i, 2, q) = pos(i, 2, q) + offset
+                        posPrev(i, 2, q) = posPrev(i, 2, q) + offset
+                    end do
+                endif
+            end if
+
+            if (periodic_bc(3)) then
+                offset = glb_bounds(3)%end - glb_bounds(3)%beg
+                if (pos(i, 3, dest) > z_cb(p + buff_size)) then
+                    do q = 1, 2
+                        pos(i, 3, q) = pos(i, 3, q) - offset
+                        posPrev(i, 2, q) = posPrev(i, 2, q) - offset
+                    end do
+                endif
+                if (pos(i, 3, dest) < z_cb(-1 - buff_size)) then
+                    do q = 1, 2
+                        pos(i, 3, q) = pos(i, 3, q) + offset
+                        posPrev(i, 2, q) = posPrev(i, 2, q) + offset
+                    end do
+                endif
+            end if
+        end do
+
+    end subroutine s_wrap_particle_positions
+
     impure subroutine s_mpi_send_random_number(phi_rn, num_freq)
         integer, intent(in) :: num_freq
         real(wp), intent(inout), dimension(1:num_freq) :: phi_rn
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index bc0def7dcf..b4c6b0a3fa 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -155,8 +155,6 @@ contains
             rdma_mpi, teno_CT, mp_weno, weno_avg, &
             riemann_solver, low_Mach, wave_speeds, avg_state, &
             bc_x, bc_y, bc_z, &
-            x_a, y_a, z_a, x_b, y_b, z_b, &
-            x_domain, y_domain, z_domain, &
             hypoelasticity, &
             ib, num_ibs, patch_ib, &
             fluid_pp, probe_wrt, prim_vars_wrt, &
@@ -225,11 +223,15 @@ contains
 
             if (cfl_adap_dt .or. cfl_const_dt) cfl_dt = .true.
 
-            if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == -17) .or. &
+            if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_DIRICHLET) .or. &
                 num_bc_patches > 0) then
                 bc_io = .true.
             endif
 
+            if (bc_x%beg == BC_PERIODIC .and. bc_x%end == BC_PERIODIC) periodic_bc(1) = .true.
+            if (bc_y%beg == BC_PERIODIC .and. bc_y%end == BC_PERIODIC) periodic_bc(2) = .true.
+            if (bc_z%beg == BC_PERIODIC .and. bc_z%end == BC_PERIODIC) periodic_bc(3) = .true.
+
         else
             call s_mpi_abort(trim(file_path)//' is missing. Exiting.')
         end if
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index 7521249edc..bf4f15d7a8 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -382,10 +382,6 @@ def analytic(self):
         SIMULATION[f'{var}_{cmp}'] = ParamType.REAL
     SIMULATION[f'bf_{cmp}'] = ParamType.LOG
 
-
-    for prepend in ["domain%beg", "domain%end"]:
-        SIMULATION[f"{cmp}_{prepend}"] = ParamType.REAL
-
 for probe_id in range(1,10+1):
     for cmp in ["x", "y", "z"]:
         SIMULATION[f'probe({probe_id})%{cmp}'] = ParamType.REAL

From 58a0cba99d6bfe8b7937c86c9327466480acb50d Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Wed, 17 Sep 2025 10:27:33 -0400
Subject: [PATCH 50/62] periodic BCs working with non blocking communication

---
 src/simulation/m_bubbles_EL.fpp | 120 +++++++-------
 src/simulation/m_mpi_proxy.fpp  | 283 +++++++++++++++++++++-----------
 2 files changed, 251 insertions(+), 152 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 8187b14638..5b6e59e6cc 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -79,8 +79,7 @@ module m_bubbles_EL
     $:GPU_DECLARE(create='[nBubs,Rmax_glb,Rmin_glb,q_beta,q_beta_idx]')
 
     integer, allocatable, dimension(:) :: keep_bubble, prefix_sum
-    integer, allocatable, dimension(:) :: wrap_bubble_loc
-    integer, allocatable, dimension(:,:) :: wrap_bubble_dir
+    integer, allocatable, dimension(:,:) :: wrap_bubble_loc, wrap_bubble_dir
     integer :: active_bubs
     $:GPU_DECLARE(create='[keep_bubble, prefix_sum, active_bubs]')
     $:GPU_DECLARE(create='[wrap_bubble_loc, wrap_bubble_dir]')
@@ -163,7 +162,7 @@ contains
         @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts))
 
         @:ALLOCATE(keep_bubble(1:nBubs_glb), prefix_sum(1:nBubs_glb))
-        @:ALLOCATE(wrap_bubble_loc(1:nBubs_glb), wrap_bubble_dir(1:nBubs_glb, 1:num_dims))
+        @:ALLOCATE(wrap_bubble_loc(1:nBubs_glb, 1:num_dims), wrap_bubble_dir(1:nBubs_glb, 1:num_dims))
 
         if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol
 
@@ -1380,7 +1379,7 @@ contains
         $:GPU_PARALLEL_LOOP(private='[cell]')
         do k = 1, nBubs
             keep_bubble(k) = 1
-            wrap_bubble_loc(k) = 0
+            wrap_bubble_loc(k,:) = 0
             wrap_bubble_dir(k,:) = 0
 
             ! Relocate bubbles at solid boundaries and delete bubbles that leave
@@ -1393,14 +1392,12 @@ contains
                 mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k, dest)
             elseif (bc_x%beg == BC_PERIODIC .and. mtn_pos(k, 1, dest) < pcomm_coords(1)%beg .and. &
                     mtn_posPrev(k, 1, dest) > pcomm_coords(1)%beg) then
-                !print*, "1, -1", mtn_pos(k, 1, dest), mtn_posPrev(k, 1, dest), pcomm_coords(1)%beg
                 wrap_bubble_dir(k,1) = 1
-                wrap_bubble_loc(k) = -1
+                wrap_bubble_loc(k,1) = -1
             elseif (bc_x%end == BC_PERIODIC .and. mtn_pos(k, 1, dest) > pcomm_coords(1)%end .and. &
                     mtn_posPrev(k, 1, dest) < pcomm_coords(1)%end) then
-                !print*, "1, 1", mtn_pos(k, 1, dest), mtn_posPrev(k, 1, dest), pcomm_coords(1)%end
                 wrap_bubble_dir(k,1) = 1
-                wrap_bubble_loc(k) = 1
+                wrap_bubble_loc(k,1) = 1
             elseif (mtn_pos(k, 1, dest) >= x_cb(m + buff_size - fd_number)) then
                 keep_bubble(k) = 0
             elseif (mtn_pos(k, 1, dest) < x_cb(fd_number - buff_size - 1)) then
@@ -1415,14 +1412,12 @@ contains
                 mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k, dest)
             elseif (bc_y%beg == BC_PERIODIC .and. mtn_pos(k, 2, dest) < pcomm_coords(2)%beg .and. &
                     mtn_posPrev(k, 2, dest) > pcomm_coords(2)%beg) then
-                !print*, "2, -1", mtn_pos(k, 2, dest), mtn_posPrev(k, 2, dest), pcomm_coords(2)%beg
                 wrap_bubble_dir(k,2) = 1
-                wrap_bubble_loc(k) = -1
+                wrap_bubble_loc(k,2) = -1
             elseif (bc_y%end == BC_PERIODIC .and. mtn_pos(k, 2, dest) > pcomm_coords(2)%end .and. &
                     mtn_posPrev(k, 2, dest) < pcomm_coords(2)%end) then
-                !print*, "2, 1", mtn_pos(k, 2, dest), mtn_posPrev(k, 2, dest), pcomm_coords(2)%end
                 wrap_bubble_dir(k,2) = 1
-                wrap_bubble_loc(k) = 1
+                wrap_bubble_loc(k,2) = 1
             elseif (mtn_pos(k, 2, dest) >= y_cb(n + buff_size - fd_number)) then
                 keep_bubble(k) = 0
             elseif (mtn_pos(k, 2, dest) < y_cb(fd_number - buff_size - 1)) then
@@ -1439,11 +1434,11 @@ contains
                 elseif (bc_z%beg == BC_PERIODIC .and. mtn_pos(k, 3, dest) < pcomm_coords(3)%beg .and. &
                         mtn_posPrev(k, 3, dest) > pcomm_coords(3)%beg) then
                     wrap_bubble_dir(k,3) = 1
-                    wrap_bubble_loc(k) = -1
+                    wrap_bubble_loc(k,3) = -1
                 elseif (bc_z%end == BC_PERIODIC .and. mtn_pos(k, 3, dest) > pcomm_coords(3)%end .and. &
                         mtn_posPrev(k, 3, dest) < pcomm_coords(3)%end) then
                     wrap_bubble_dir(k,3) = 1
-                    wrap_bubble_loc(k) = 1
+                    wrap_bubble_loc(k,3) = 1
                 elseif (mtn_pos(k, 3, dest) >= z_cb(p + buff_size - fd_number)) then
                     keep_bubble(k) = 0
                 elseif (mtn_pos(k, 3, dest) < z_cb(fd_number - buff_size - 1)) then
@@ -1491,52 +1486,57 @@ contains
             & nBubs]')
         call nvtxEndRange
 
-        ! Handle deletion of bubbles leaving local domain
-        do k = 1, nBubs
-            if (k == 1) then
-                prefix_sum(k) = keep_bubble(k)
-            else
-                prefix_sum(k) = prefix_sum(k - 1) + keep_bubble(k)
-            end if
-            if (k == nBubs) active_bubs = prefix_sum(k)
-        end do
+        if (nBubs > 0) then
+            ! Handle deletion of bubbles leaving local domain
+            do k = 1, nBubs
+                if (k == 1) then
+                    prefix_sum(k) = keep_bubble(k)
+                else
+                    prefix_sum(k) = prefix_sum(k - 1) + keep_bubble(k)
+                end if
+            end do
 
-        do k = 1, nBubs
-            if (keep_bubble(k) == 1) then
-                if (prefix_sum(k) /= k) then
-                    call s_copy_lag_bubble(k, prefix_sum(k))
+            active_bubs = prefix_sum(nBubs)
+
+            do k = 1, nBubs
+                if (keep_bubble(k) == 1) then
+                    if (prefix_sum(k) /= k) then
+                        call s_copy_lag_bubble(prefix_sum(k), k)
+                        wrap_bubble_dir(prefix_sum(k), :) = wrap_bubble_dir(k, :)
+                        wrap_bubble_loc(prefix_sum(k), :) = wrap_bubble_loc(k, :)
+                    end if
                 end if
-            end if
-            if (k == nBubs) nBubs = active_bubs
-        end do
+            end do
 
-        ! Handle periodic wrapping of bubbles on same processor
-        newBubs = 0
-        do k = 1, nBubs
-            if (any(wrap_bubble_dir(k, :) == 1)) then
-                newBubs = newBubs + 1
-                new_idx = nBubs + newBubs
-                call s_copy_lag_bubble(k, new_idx)
-                do i = 1, num_dims
-                    if (wrap_bubble_dir(k, i) == 1) then
-                        offset = glb_bounds(i)%end - glb_bounds(i)%beg
-                        if (wrap_bubble_loc(k) == 1) then
-                            do q = 1, 2
-                                mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) - offset
-                                mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) - offset
-                            end do
-                        else if (wrap_bubble_loc(k) == -1) then
-                            do q = 1, 2
-                                mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) + offset
-                                mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) + offset
-                            end do
+            nBubs = active_bubs
+
+            ! Handle periodic wrapping of bubbles on same processor
+            newBubs = 0
+            do k = 1, nBubs
+                if (any(wrap_bubble_dir(k, :) == 1)) then
+                    newBubs = newBubs + 1
+                    new_idx = nBubs + newBubs
+                    call s_copy_lag_bubble(new_idx, k)
+                    do i = 1, num_dims
+                        if (wrap_bubble_dir(k, i) == 1) then
+                            offset = glb_bounds(i)%end - glb_bounds(i)%beg
+                            if (wrap_bubble_loc(k,i) == 1) then
+                                do q = 1, 2
+                                    mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) - offset
+                                    mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) - offset
+                                end do
+                            else if (wrap_bubble_loc(k,i) == -1) then
+                                do q = 1, 2
+                                    mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) + offset
+                                    mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) + offset
+                                end do
+                            end if
                         end if
-                    end if
-                end do
-            end if
-        end do
-        print*, nBubs, newBubs
-        nBubs = nBubs + newBubs
+                    end do
+                end if
+            end do
+            nBubs = nBubs + newBubs
+        end if
 
         ! Handle MPI transfer of bubbles going to another processor's local domain
         if (num_procs > 1) then
@@ -1664,7 +1664,7 @@ contains
         end if
 
         ! 3D
-        if (p > 0) then
+        if (p > 1) then
             particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. &
                                   (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. &
                                   (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. &
@@ -2092,7 +2092,7 @@ contains
 
     !> The purpose of this subroutine is to remove one specific particle if dt is too small.
           !! @param bub_id Particle id
-    impure subroutine s_copy_lag_bubble(src, dest)
+    impure subroutine s_copy_lag_bubble(dest, src)
 
         integer, intent(in) :: src, dest
 
@@ -2110,14 +2110,14 @@ contains
         intfc_vel(dest, 1:2) = intfc_vel(src, 1:2)
         mtn_vel(dest, 1:3, 1:2) = mtn_vel(src, 1:3, 1:2)
         mtn_s(dest, 1:3, 1:2) = mtn_s(src, 1:3, 1:2)
+        mtn_pos(dest, 1:3, 1:2) = mtn_pos(src, 1:3, 1:2)
+        mtn_posPrev(dest, 1:3, 1:2) = mtn_posPrev(src, 1:3, 1:2)
         intfc_draddt(dest, 1:lag_num_ts) = intfc_draddt(src, 1:lag_num_ts)
         intfc_dveldt(dest, 1:lag_num_ts) = intfc_dveldt(src, 1:lag_num_ts)
         gas_dpdt(dest, 1:lag_num_ts) = gas_dpdt(src, 1:lag_num_ts)
         gas_dmvdt(dest, 1:lag_num_ts) = gas_dmvdt(src, 1:lag_num_ts)
         mtn_dposdt(dest, 1:3, 1:lag_num_ts) = mtn_dposdt(src, 1:3, 1:lag_num_ts)
         mtn_dveldt(dest, 1:3, 1:lag_num_ts) = mtn_dveldt(src, 1:3, 1:lag_num_ts)
-        mtn_pos(dest, 1:3, 1:2) = mtn_pos(src, 1:3, 1:2)
-        mtn_posPrev(dest, 1:3, 1:2) = mtn_posPrev(src, 1:3, 1:2)
 
     end subroutine s_copy_lag_bubble
 
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 27abce892c..316cb541d0 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -52,12 +52,21 @@ module m_mpi_proxy
     character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff
     integer :: p_buff_size, p_var_size
 
+    !! EL Bubbles communication variables
+    integer, parameter :: MAX_NEIGHBORS = 27
+    integer :: send_requests(MAX_NEIGHBORS), recv_requests(MAX_NEIGHBORS)
+    integer :: recv_offsets(MAX_NEIGHBORS)
+    integer :: neighbor_list(MAX_NEIGHBORS, 3)
+    integer :: n_neighbors
+
     $:GPU_DECLARE(create='[p_send_counts]')
 
 contains
 
     subroutine s_initialize_mpi_proxy_module()
 
+        integer :: i, j, k
+
 #ifdef MFC_MPI
         if (ib) then
             if (n > 0) then
@@ -101,6 +110,21 @@ contains
 
         @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size))
         @:ALLOCATE(p_send_ids(nidx(1)%beg:nidx(1)%end, nidx(2)%beg:nidx(2)%end, nidx(3)%beg:nidx(3)%end, 0:lag_params%nBubs_glb))
+
+        ! First, collect all neighbor information
+        n_neighbors = 0
+        do k = nidx(3)%beg, nidx(3)%end
+            do j = nidx(2)%beg, nidx(2)%end
+                do i = nidx(1)%beg, nidx(1)%end
+                    if (abs(i) + abs(j) + abs(k) /= 0) then
+                        n_neighbors = n_neighbors + 1
+                        neighbor_list(n_neighbors, 1) = i
+                        neighbor_list(n_neighbors, 2) = j
+                        neighbor_list(n_neighbors, 3) = k
+                    end if
+                end do
+            end do
+        end do
 #endif
 
     end subroutine s_initialize_particles_mpi
@@ -651,7 +675,7 @@ contains
 
     end subroutine s_add_particles_to_transfer_list
 
-    !> This subroutine performs the MPI communication for lagrangian particles/
+        !> This subroutine performs the MPI communication for lagrangian particles/
         !! bubbles.
         !! @param bub_R0 Initial radius of each bubble
         !! @param Rmax_stats Maximum radius of each bubble
@@ -688,103 +712,178 @@ contains
         real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel
         integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size, dest
 
-        integer :: i, j, k, l, q
-        integer :: ierr !< Generic flag used to identify and report MPI errors
+        integer :: i, j, k, l, q, r
+        integer :: req_send, req_recv, ierr !< Generic flag used to identify and report MPI errors
+        integer :: send_count, send_offset, recv_count, recv_offset
 
 #ifdef MFC_MPI
-        do k = nidx(3)%beg, nidx(3)%end
-            do j = nidx(2)%beg, nidx(2)%end
-                do i = nidx(1)%beg, nidx(1)%end
-                    if (abs(i) + abs(j) + abs(k) /= 0) then
-                        partner = neighbor_ranks(i, j, k)
-                        send_tag = neighbor_tag(i, j, k)
-                        recv_tag = send_tag !neighbor_tag(-i,-j,-k)
+        ! Phase 1: Exchange particle counts using non-blocking communication
+        send_count = 0
+        recv_count = 0
+
+        ! Post all receives first
+        do l = 1, n_neighbors
+            i = neighbor_list(l, 1)
+            j = neighbor_list(l, 2)
+            k = neighbor_list(l, 3)
+            partner = neighbor_ranks(i, j, k)
+            recv_tag = neighbor_tag(i, j, k)
+
+            recv_count = recv_count + 1
+            call MPI_Irecv(p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, &
+                          MPI_COMM_WORLD, recv_requests(recv_count), ierr)
+        end do
 
-                        call MPI_sendrecv(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, &
-                                          p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, &
-                                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+        ! Post all sends
+        do l = 1, n_neighbors
+            i = neighbor_list(l, 1)
+            j = neighbor_list(l, 2)
+            k = neighbor_list(l, 3)
+            partner = neighbor_ranks(i, j, k)
+            send_tag = neighbor_tag(-i, -j, -k)
+
+            send_count = send_count + 1
+            call MPI_Isend(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, &
+                          MPI_COMM_WORLD, send_requests(send_count), ierr)
+        end do
 
-                        ! Skip if no particles to exchange
-                        if (p_send_counts(i, j, k) == 0 .and. p_recv_counts(i, j, k) == 0) then
-                            cycle
-                        end if
+        ! Wait for all count exchanges to complete
+        if (recv_count > 0) then
+            call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr)
+        end if
+        if (send_count > 0) then
+            call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr)
+        end if
 
-                        p_recv_size = p_recv_counts(i, j, k)*p_var_size
-
-                        position = 0
-                        do l = 0, p_send_counts(i, j, k) - 1
-                            bub_id = p_send_ids(i, j, k, l)
-                            call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            do q = 1, 2
-                                call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            end do
-                            do q = 1, lag_num_ts
-                                call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                                call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr)
-                            end do
-                        end do
+        ! Phase 2: Exchange particle data using non-blocking communication
+        send_count = 0
+        recv_count = 0
+
+        ! Post all receives for particle data first
+        recv_offset = 1
+        do l = 1, n_neighbors
+            i = neighbor_list(l, 1)
+            j = neighbor_list(l, 2)
+            k = neighbor_list(l, 3)
+
+            if (p_recv_counts(i, j, k) > 0) then
+                partner = neighbor_ranks(i, j, k)
+                p_recv_size = p_recv_counts(i, j, k) * p_var_size
+                recv_tag = neighbor_tag(i, j, k)
+
+                recv_count = recv_count + 1
+                call MPI_Irecv(p_recv_buff(recv_offset), p_recv_size, MPI_PACKED, partner, recv_tag, &
+                              MPI_COMM_WORLD, recv_requests(recv_count), ierr)
+                recv_offsets(l) = recv_offset
+                recv_offset = recv_offset + p_recv_size
+            end if
+        end do
 
-                        send_tag = send_tag + max(num_procs, n_neighbor)
-                        recv_tag = recv_tag + max(num_procs, n_neighbor)
-
-                        call MPI_sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, &
-                                          p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, &
-                                          MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-                        position = 0
-                        do l = 0, p_recv_counts(i, j, k) - 1
-                            nbubs = nbubs + 1
-                            bub_id = nbubs
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                            do q = 1, 2
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                            end do
-                            do q = 1, lag_num_ts
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                                call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr)
-                            end do
-                            lag_id(bub_id, 2) = bub_id
-                        end do
-                    end if
+        ! Pack and send particle data
+        send_offset = 0
+        do l = 1, n_neighbors
+            i = neighbor_list(l, 1)
+            j = neighbor_list(l, 2)
+            k = neighbor_list(l, 3)
+
+            if (p_send_counts(i, j, k) > 0) then
+                partner = neighbor_ranks(i, j, k)
+                send_tag = neighbor_tag(-i, -j, -k)
+
+                ! Pack data for sending
+                position = 0
+                do q = 0, p_send_counts(i, j, k) - 1
+                    bub_id = p_send_ids(i, j, k, q)
+                    call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                    call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                    call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                    call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                    call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                    call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                    call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                    call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                    do r = 1, 2
+                        call MPI_Pack(gas_p(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(gas_mv(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(rad(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(rvel(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(pos(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(posPrev(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(vel(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(scoord(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                    end do
+                    do r = 1, lag_num_ts
+                        call MPI_Pack(drad(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(drvel(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(dgasp(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(dgasmv(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(dpos(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                        call MPI_Pack(dvel(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr)
+                    end do
                 end do
-            end do
+
+                send_count = send_count + 1
+                call MPI_Isend(p_send_buff(send_offset), position, MPI_PACKED, partner, send_tag, &
+                              MPI_COMM_WORLD, send_requests(send_count), ierr)
+                send_offset = send_offset + position
+            end if
         end do
+
+        ! Wait for all recvs for contiguous data to complete
+        call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr)
+
+        ! Process received data as it arrives
+        do l = 1, n_neighbors
+            i = neighbor_list(l, 1)
+            j = neighbor_list(l, 2)
+            k = neighbor_list(l, 3)
+
+            if (p_recv_counts(i, j, k) > 0 .and. abs(i) + abs(j) + abs(k) /= 0) then
+                p_recv_size = p_recv_counts(i, j, k) * p_var_size
+                recv_offset = recv_offsets(l)
+
+                position = 0
+                ! Unpack received data
+                do q = 0, p_recv_counts(i, j, k) - 1
+                    nbubs = nbubs + 1
+                    bub_id = nbubs
+                    call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                    call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                    call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                    call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                    call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                    call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                    call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                    call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                    do r = 1, 2
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_p(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_mv(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rad(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rvel(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, pos(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, posPrev(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, vel(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, scoord(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                    end do
+                    do r = 1, lag_num_ts
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drad(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drvel(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dgasp(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dgasmv(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dpos(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dvel(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr)
+                    end do
+                    lag_id(bub_id, 2) = bub_id
+                end do
+                recv_offset = recv_offset + p_recv_size
+            end if
+
+        end do
+
+        ! Wait for all sends to complete
+        if (send_count > 0) then
+            call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr)
+        end if
 #endif
 
         if (any(periodic_bc)) then
@@ -850,13 +949,13 @@ contains
                 if (pos(i, 3, dest) > z_cb(p + buff_size)) then
                     do q = 1, 2
                         pos(i, 3, q) = pos(i, 3, q) - offset
-                        posPrev(i, 2, q) = posPrev(i, 2, q) - offset
+                        posPrev(i, 3, q) = posPrev(i, 3, q) - offset
                     end do
                 endif
                 if (pos(i, 3, dest) < z_cb(-1 - buff_size)) then
                     do q = 1, 2
                         pos(i, 3, q) = pos(i, 3, q) + offset
-                        posPrev(i, 2, q) = posPrev(i, 2, q) + offset
+                        posPrev(i, 3, q) = posPrev(i, 3, q) + offset
                     end do
                 endif
             end if

From 9b70f9d2ebfd25b2bb95f2936ee66f072bdaa1f6 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Wed, 17 Sep 2025 21:22:26 -0400
Subject: [PATCH 51/62] bug fixes for indexing and GPU communication

---
 src/simulation/m_bubbles_EL.fpp         | 7 +++++--
 src/simulation/m_bubbles_EL_kernels.fpp | 4 ++--
 src/simulation/m_global_parameters.fpp  | 5 +++--
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index 5b6e59e6cc..fc27ec7f09 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -114,12 +114,15 @@ contains
 
         pcomm_coords(1)%beg = x_cb(buff_size - fd_number - 1)
         pcomm_coords(1)%end = x_cb(m - buff_size + fd_number)
+        $:GPU_UPDATE(device='[pcomm_coords(1)]')
         if (n > 0) then
             pcomm_coords(2)%beg = y_cb(buff_size - fd_number - 1)
             pcomm_coords(2)%end = y_cb(n - buff_size + fd_number)
+            $:GPU_UPDATE(device='[pcomm_coords(2)]')
             if (p > 0) then
                 pcomm_coords(3)%beg = z_cb(buff_size - fd_number - 1)
                 pcomm_coords(3)%end = z_cb(p - buff_size + fd_number)
+                $:GPU_UPDATE(device='[pcomm_coords(3)]')
             end if
         end if
 
@@ -1482,8 +1485,8 @@ contains
         $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, &
             & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, &
             & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, &
-            & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, keep_bubble, &
-            & nBubs]')
+            & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, keep_bubble, nBubs, &
+            & wrap_bubble_dir, wrap_bubble_loc]')
         call nvtxEndRange
 
         if (nBubs > 0) then
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index 3ba776c368..35a7528792 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -593,11 +593,11 @@ contains
                 eta(1) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - &
                           q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) - 2))/ &
                          (z_cc(cell(3)) - z_cc(cell(3) - 2))
-                xi(2) = y_cc(cell(3))
+                xi(2) = z_cc(cell(3))
                 eta(2) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1) - &
                           q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) - 1))/ &
                          (z_cc(cell(3) + 1) - z_cc(cell(3) - 1))
-                xi(3) = y_cc(cell(3) + 1)
+                xi(3) = z_cc(cell(3) + 1)
                 eta(3) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 2) - &
                           q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)))/ &
                          (z_cc(cell(3) + 2) - z_cc(cell(3)))
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index dbef5fcc98..29870bb613 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -250,6 +250,7 @@ module m_global_parameters
     !! Processor coordinates in MPI_CART_COMM
 
     type(bounds_info), allocatable, dimension(:) :: pcomm_coords
+    $:GPU_DECLARE(create='[pcomm_coords]')
     !! Coordinates for EL particle transfer
 
     type(int_bounds_info), dimension(3) :: nidx !< Indices for neighboring processors
@@ -1389,7 +1390,7 @@ contains
         #:endif
 
         allocate (proc_coords(1:num_dims))
-        allocate (pcomm_coords(1:num_dims))
+        @:ALLOCATE(pcomm_coords(1:num_dims))
 
         if (parallel_io .neqv. .true.) return
 
@@ -1426,7 +1427,7 @@ contains
         end if
 
         deallocate (proc_coords)
-        deallocate (pcomm_coords)
+        @:DEALLOCATE(pcomm_coords)
         if (parallel_io) then
             deallocate (start_idx)
 

From 49c194f3de9dff1109daf640c7066fec19378ed0 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Wed, 24 Sep 2025 17:47:45 -0400
Subject: [PATCH 52/62] improve run_time.inf output

---
 src/common/m_mpi_common.fpp      |  14 ++++-
 src/simulation/m_data_output.fpp | 102 +++++++++++++++++++++----------
 src/simulation/m_sim_helpers.fpp |  56 ++++++++++++-----
 3 files changed, 123 insertions(+), 49 deletions(-)

diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 5eae8fcabb..56bf14d5a1 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -417,17 +417,21 @@ contains
     impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, &
                                                               vcfl_max_loc, &
                                                               Rc_min_loc, &
+                                                              ccfl_max_loc, &
                                                               icfl_max_glb, &
                                                               vcfl_max_glb, &
-                                                              Rc_min_glb)
+                                                              Rc_min_glb, &
+                                                              ccfl_max_glb)
 
         real(wp), intent(in) :: icfl_max_loc
         real(wp), intent(in) :: vcfl_max_loc
         real(wp), intent(in) :: Rc_min_loc
+        real(wp), intent(in) :: ccfl_max_loc
 
         real(wp), intent(out) :: icfl_max_glb
         real(wp), intent(out) :: vcfl_max_glb
         real(wp), intent(out) :: Rc_min_glb
+        real(wp), intent(out) :: ccfl_max_glb
 
 #ifdef MFC_SIMULATION
 #ifdef MFC_MPI
@@ -448,6 +452,12 @@ contains
                             MPI_COMM_WORLD, ierr)
         end if
 
+        if (surface_tension) then
+            call MPI_REDUCE(Rc_min_loc, Rc_min_glb, 1, &
+                            mpi_p, MPI_MIN, 0, &
+                            MPI_COMM_WORLD, ierr)
+        end if
+
 #else
 
         icfl_max_glb = icfl_max_loc
@@ -457,6 +467,8 @@ contains
             Rc_min_glb = Rc_min_loc
         end if
 
+        if (surface_tension) ccfl_max_glb = ccfl_max_loc
+
 #endif
 #endif
 
diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp
index 5f5b79c481..a6009da7a7 100644
--- a/src/simulation/m_data_output.fpp
+++ b/src/simulation/m_data_output.fpp
@@ -36,7 +36,7 @@ module m_data_output
 
     implicit none
 
-    private; 
+    private;
     public :: s_initialize_data_output_module, &
               s_open_run_time_information_file, &
               s_open_com_files, &
@@ -157,24 +157,20 @@ contains
         write (3, '(A)') ''; write (3, '(A)') ''
 
         ! Generating table header for the stability criteria to be outputted
-        if (cfl_dt) then
-            if (viscous) then
-                write (3, '(A)') '     Time-steps        dt     = Time         ICFL '// &
-                    'Max      VCFL Max        Rc Min       ='
-            else
-                write (3, '(A)') '            Time-steps                dt       Time '// &
-                    '               ICFL Max              '
-            end if
-        else
-            if (viscous) then
-                write (3, '(A)') '     Time-steps        Time         ICFL '// &
-                    'Max      VCFL Max        Rc Min        '
-            else
-                write (3, '(A)') '            Time-steps                Time '// &
-                    '               ICFL Max              '
-            end if
+        write (3, '(13X,A8,13X,A10,13X,A10,13X,A10,)', advance="no") &
+            trim('Time-steps'), trim('dt'), trim('Time'), trim('ICFL Max')
+
+        if (viscous) then
+            write(3, '(13X,A10,13X,A16)', advance="no") &
+                trim('VCFL Max'), trim('Rc Min')
         end if
 
+        if (surface_tension) then
+            write(3, '(13X,A10)', advance="no") trim('CCFL Max')
+        end if
+
+        write(3, *) ! new line
+
     end subroutine s_open_run_time_information_file
 
     !>  This opens a formatted data file where the root processor
@@ -295,8 +291,12 @@ contains
 
                     call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c)
 
-                    if (viscous) then
-                        call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf)
+                    if (viscous .and. surface_tension) then
+                        call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf=vcfl_sf, Rc_sf=Rc_sf, ccfl_sf=ccfl_sf)
+                    elseif (viscous) then
+                        call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf=vcfl_sf, Rc_sf=Rc_sf)
+                    elseif (surface_tension) then
+                        call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, ccfl_sf=ccfl_sf)
                     else
                         call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf)
                     end if
@@ -311,17 +311,18 @@ contains
 
 #ifdef _CRAYFTN
         $:GPU_UPDATE(host='[icfl_sf]')
-
-        if (viscous) then
-            $:GPU_UPDATE(host='[vcfl_sf,Rc_sf]')
-        end if
-
         icfl_max_loc = maxval(icfl_sf)
 
         if (viscous) then
+            $:GPU_UPDATE(host='[vcfl_sf,Rc_sf]')
             vcfl_max_loc = maxval(vcfl_sf)
             Rc_min_loc = minval(Rc_sf)
         end if
+
+        if (surface_tension) then
+            $:GPU_UPDATE(host='[ccfl_sf]')
+            ccfl_max_loc = maxval(ccfl_sf)
+        end if
 #else
         #:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]')
             icfl_max_loc = maxval(icfl_sf)
@@ -332,6 +333,12 @@ contains
                 Rc_min_loc = minval(Rc_sf)
             #:endcall GPU_PARALLEL
         end if
+
+        if (surface_tension) then
+            #:call GPU_PARALLEL(copyout='[ccfl_max_loc]', copyin='[ccfl_sf]')
+                ccfl_max_loc = maxval(ccfl_sf)
+            #:endcall GPU_PARALLEL
+        end if
 #endif
 
         ! Determining global stability criteria extrema at current time-step
@@ -339,13 +346,16 @@ contains
             call s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, &
                                                          vcfl_max_loc, &
                                                          Rc_min_loc, &
+                                                         ccfl_max_loc, &
                                                          icfl_max_glb, &
                                                          vcfl_max_glb, &
-                                                         Rc_min_glb)
+                                                         Rc_min_glb, &
+                                                         ccfl_max_glb)
         else
             icfl_max_glb = icfl_max_loc
             if (viscous) vcfl_max_glb = vcfl_max_loc
             if (viscous) Rc_min_glb = Rc_min_loc
+            if (surface_tension) ccfl_max_glb = ccfl_max_loc
         end if
 
         ! Determining the stability criteria extrema over all the time-steps
@@ -356,18 +366,28 @@ contains
             if (Rc_min_glb < Rc_min) Rc_min = Rc_min_glb
         end if
 
+        if (surface_tension) then
+            if (ccfl_max_glb > ccfl_max) ccfl_max = ccfl_max_glb
+        end if
+
         ! Outputting global stability criteria extrema at current time-step
         if (proc_rank == 0) then
+            write (3, '(13X,I8,13X,F10.6,13X,F10.6,13X,F10.6)', advance="no") &
+                t_step, dt, t_step*dt, icfl_max_glb
+
             if (viscous) then
-                write (3, '(6X,I8,F10.6,6X,6X,F10.6,6X,F9.6,6X,F9.6,6X,F10.6)') &
-                    t_step, dt, t_step*dt, icfl_max_glb, &
+                write (3, '(13X,F10.6,13X,ES16.6)', advance="no") &
                     vcfl_max_glb, &
                     Rc_min_glb
-            else
-                write (3, '(13X,I8,14X,F10.6,14X,F10.6,13X,F9.6)') &
-                    t_step, dt, t_step*dt, icfl_max_glb
             end if
 
+            if (surface_tension) then
+                write (3, '(13X,F10.6)', advance="no") &
+                    ccfl_max_glb
+            end if
+
+            write(3, *) ! new line
+
             if (.not. f_approx_equal(icfl_max_glb, icfl_max_glb)) then
                 call s_mpi_abort('ICFL is NaN. Exiting.')
             elseif (icfl_max_glb > 1._wp) then
@@ -383,6 +403,15 @@ contains
                     call s_mpi_abort('VCFL is greater than 1.0. Exiting.')
                 end if
             end if
+
+            if (surface_tension) then
+                if (.not. f_approx_equal(ccfl_max_glb, ccfl_max_glb)) then
+                    call s_mpi_abort('CCFL is NaN. Exiting.')
+                elseif (ccfl_max_glb > 1._wp) then
+                    print *, 'ccfl', ccfl_max_glb
+                    call s_mpi_abort('CCFL is greater than 1.0. Exiting.')
+                end if
+            end if
         end if
 
         call s_mpi_barrier()
@@ -1756,7 +1785,8 @@ contains
 
         write (3, '(A,F9.6)') 'ICFL Max: ', icfl_max
         if (viscous) write (3, '(A,F9.6)') 'VCFL Max: ', vcfl_max
-        if (viscous) write (3, '(A,F10.6)') 'Rc Min: ', Rc_min
+        if (viscous) write (3, '(A,ES16.6)') 'Rc Min: ', Rc_min
+        if (surface_tension) write (3, '(A,F10.6)') 'CCFL Max: ', ccfl_max
 
         call cpu_time(run_time)
 
@@ -1805,7 +1835,12 @@ contains
                 @:ALLOCATE(Rc_sf  (0:m, 0:n, 0:p))
 
                 vcfl_max = 0._wp
-                Rc_min = 1.e3_wp
+                Rc_min = 1.e12_wp
+            end if
+
+            if (surface_tension) then
+                @:ALLOCATE(ccfl_sf(0:m, 0:n, 0:p))
+                ccfl_max = 0._wp
             end if
         end if
 
@@ -1841,6 +1876,9 @@ contains
             if (viscous) then
                 @:DEALLOCATE(vcfl_sf, Rc_sf)
             end if
+            if (surface_tension) then
+                @:DEALLOCATE(ccfl_sf)
+            end if
         end if
 
         if (down_sample) then
diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp
index df8f6b9eae..c966354a1c 100644
--- a/src/simulation/m_sim_helpers.fpp
+++ b/src/simulation/m_sim_helpers.fpp
@@ -181,12 +181,12 @@ contains
         !! @param icfl_sf cell-centered inviscid cfl number
         !! @param vcfl_sf (optional) cell-centered viscous CFL number
         !! @param Rc_sf (optional) cell centered Rc
-    pure subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf)
+    pure subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf, ccfl_sf)
         $:GPU_ROUTINE(parallelism='[seq]')
         real(wp), intent(in), dimension(num_vels) :: vel
         real(wp), intent(in) :: c, rho
         real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: icfl_sf
-        real(wp), dimension(0:m, 0:n, 0:p), intent(inout), optional :: vcfl_sf, Rc_sf
+        real(wp), dimension(0:m, 0:n, 0:p), intent(inout), optional :: vcfl_sf, Rc_sf, ccfl_sf
         real(wp), dimension(2), intent(in) :: Re_l
         integer, intent(in) :: j, k, l
 
@@ -203,8 +203,7 @@ contains
 
         ! Viscous calculations
         if (viscous) then
-            if (p > 0) then
-                !3D
+            if (p > 0) then ! 3D
                 if (grid_geometry == 3) then
                     fltr_dtheta = f_compute_filtered_dtheta(k, l)
                     vcfl_sf(j, k, l) = maxval(dt/Re_l/rho) &
@@ -221,19 +220,30 @@ contains
                                          dz(l)*(abs(vel(3)) + c)) &
                                      /maxval(1._wp/Re_l)
                 end if
-            elseif (n > 0) then
-                !2D
+            elseif (n > 0) then ! 2D
                 vcfl_sf(j, k, l) = maxval(dt/Re_l/rho)/min(dx(j), dy(k))**2._wp
                 Rc_sf(j, k, l) = min(dx(j)*(abs(vel(1)) + c), &
                                      dy(k)*(abs(vel(2)) + c)) &
                                  /maxval(1._wp/Re_l)
-            else
-                !1D
+            else ! 1D
                 vcfl_sf(j, k, l) = maxval(dt/Re_l/rho)/dx(j)**2._wp
                 Rc_sf(j, k, l) = dx(j)*(abs(vel(1)) + c)/maxval(1._wp/Re_l)
             end if
         end if
 
+        if (surface_tension) then
+            if (p > 0) then ! 3D
+                if (grid_geometry == 3) then
+                    fltr_dtheta = f_compute_filtered_dtheta(k, l)
+                    ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k), fltr_dtheta)**3._wp / sigma)
+                else
+                    ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k), dz(l))**3._wp / sigma)
+                end if
+            elseif (n > 0) then ! 2D
+                ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k))**3._wp / sigma)
+            end if
+        end if
+
     end subroutine s_compute_stability_from_dt
 
     !> Computes dt for a specified CFL number
@@ -252,7 +262,7 @@ contains
         real(wp), dimension(2), intent(in) :: Re_l
         integer, intent(in) :: j, k, l
 
-        real(wp) :: icfl_dt, vcfl_dt
+        real(wp) :: icfl_dt, vcfl_dt, ccfl_dt
         real(wp) :: fltr_dtheta
 
         ! Inviscid CFL calculation
@@ -266,8 +276,7 @@ contains
 
         ! Viscous calculations
         if (viscous) then
-            if (p > 0) then
-                !3D
+            if (p > 0) then ! 3D
                 if (grid_geometry == 3) then
                     fltr_dtheta = f_compute_filtered_dtheta(k, l)
                     vcfl_dt = cfl_target*(min(dx(j), dy(k), fltr_dtheta)**2._wp) &
@@ -276,17 +285,32 @@ contains
                     vcfl_dt = cfl_target*(min(dx(j), dy(k), dz(l))**2._wp) &
                               /minval(1/(rho*Re_l))
                 end if
-            elseif (n > 0) then
-                !2D
+            elseif (n > 0) then ! 2D
                 vcfl_dt = cfl_target*(min(dx(j), dy(k))**2._wp)/maxval((1/Re_l)/rho)
-            else
-                !1D
+            else ! 1D
                 vcfl_dt = cfl_target*(dx(j)**2._wp)/minval(1/(rho*Re_l))
             end if
         end if
 
-        if (any(Re_size > 0)) then
+        if (surface_tension) then
+            if (p > 0) then ! 3D
+                if (grid_geometry == 3) then
+                    fltr_dtheta = f_compute_filtered_dtheta(k, l)
+                    ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k), fltr_dtheta)**3._wp / sigma)
+                else
+                    ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k), dz(l))**3._wp / sigma)
+                end if
+            elseif (n > 0) then ! 2D
+                ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k))**3._wp / sigma)
+            end if
+        end if
+
+        if (any(Re_size > 0) .and. sigma > 0) then
+            max_dt(j, k, l) = min(icfl_dt, vcfl_dt, ccfl_dt)
+        elseif (any(Re_size > 0)) then
             max_dt(j, k, l) = min(icfl_dt, vcfl_dt)
+        elseif (sigma > 0) then
+            max_dt(j, k, l) = min(icfl_dt, ccfl_dt)
         else
             max_dt(j, k, l) = icfl_dt
         end if

From 60375fb0877a7c6558677f373607861eab3190dd Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Wed, 24 Sep 2025 21:28:05 -0400
Subject: [PATCH 53/62] bug fixes for run_time.inf

---
 src/common/m_mpi_common.fpp               |  4 +--
 src/pre_process/include/3dHardcodedIC.fpp | 43 ++++++++++++++++++++--
 src/pre_process/m_assign_variables.fpp    |  2 +-
 src/simulation/m_data_output.fpp          | 44 +++++++++++------------
 src/simulation/m_sim_helpers.fpp          | 12 +++----
 5 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 56bf14d5a1..a770ce1c01 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -453,8 +453,8 @@ contains
         end if
 
         if (surface_tension) then
-            call MPI_REDUCE(Rc_min_loc, Rc_min_glb, 1, &
-                            mpi_p, MPI_MIN, 0, &
+            call MPI_REDUCE(ccfl_max_loc, ccfl_max_glb, 1, &
+                            mpi_p, MPI_MAX, 0, &
                             MPI_COMM_WORLD, ierr)
         end if
 
diff --git a/src/pre_process/include/3dHardcodedIC.fpp b/src/pre_process/include/3dHardcodedIC.fpp
index 4a64c01d88..d23db26630 100644
--- a/src/pre_process/include/3dHardcodedIC.fpp
+++ b/src/pre_process/include/3dHardcodedIC.fpp
@@ -46,6 +46,28 @@
         end if
     end if
 
+    if (patch_icpp(patch_id)%hcid == 304) then
+        allocate(ih(0:n_glb, 0:0))
+        if (interface_file == '.') then
+            call s_mpi_abort("Error: interface_file must be specified for hcid=304")
+        else
+            inquire (file=trim(interface_file), exist=file_exist)
+            if (file_exist) then
+                open(unit=10, file=trim(interface_file), status="old", action="read")
+                do i = 0, n_glb
+                    read(10, '(A)') line  ! Read a full line as a string
+                    value = trim(line)
+                    read(value, *) ih(i, 0)  ! Convert string to numeric value
+                    if (.not. f_is_default(normMag)) ih(i, 0) = ih(i, 0) * normMag
+                    if (.not. f_is_default(normFac)) ih(i, 0) = ih(i, 0) + normFac
+                end do
+                close(10)
+            else
+                call s_mpi_abort("Error: interface_file specified for hcid=304 does not exist")
+            end if
+        end if
+    end if
+
     eps = 1e-9_wp
 
 #:enddef
@@ -125,10 +147,27 @@
 
         q_prim_vf(E_idx)%sf(i, j, k) = p_th*f_cut_on(r - r_th, eps_smooth)*f_cut_on(x_cc(i), eps_smooth) + p_am
 
-    case (303) ! 3D Interface from file
+    case (303) ! 3D Interface from file cartesian
+
+        alph = 0.5_wp * (1 + (1._wp - 2._wp * eps) * &
+                    tanh((ih(start_idx(1) + i,start_idx(3) + k)  - y_cc(j))*(0.5_wp / dx)))
+
+        q_prim_vf(advxb)%sf(i,j,k) = alph
+        q_prim_vf(advxe)%sf(i,j,k) = 1._wp - alph
+
+        q_prim_vf(contxb)%sf(i,j,k) = q_prim_vf(advxb)%sf(i,j,k) * 1._wp
+        q_prim_vf(contxe)%sf(i,j,k) = q_prim_vf(advxe)%sf(i,j,k) * (1._wp / 950._wp)
+
+        q_prim_vf(E_idx)%sf(i,j,k) = p0 + &
+            (q_prim_vf(contxb)%sf(i,j,k) + q_prim_vf(contxe)%sf(i,j,k)) * g0 * &
+            (ih(start_idx(1) + i, start_idx(3) + k) - y_cc(j))
+
+        if (surface_tension) q_prim_vf(c_idx)%sf(i,j,k) = alph
+
+    case (304) ! 3D Interface from file axisymmetric
 
         alph = 0.5_wp * (1 + (1._wp - 2._wp * eps) * &
-                    tanh((ih(start_idx(1) + i,start_idx(3) + k)  - y_cc(j))*0.1_wp))
+                    tanh((ih(start_idx(2) + j,0)  - x_cc(i))*(0.01_wp / dx)))
 
         q_prim_vf(advxb)%sf(i,j,k) = alph
         q_prim_vf(advxe)%sf(i,j,k) = 1._wp - alph
diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp
index f60573b1b9..ca0c7c1c16 100644
--- a/src/pre_process/m_assign_variables.fpp
+++ b/src/pre_process/m_assign_variables.fpp
@@ -684,7 +684,7 @@ contains
 
         if (surface_tension) then
             q_prim_vf(c_idx)%sf(j, k, l) = eta*patch_icpp(patch_id)%cf_val + &
-                                           (1._wp - eta)*patch_icpp(smooth_patch_id)%cf_val
+                                           (1._wp - eta)*orig_prim_vf(c_idx)
         end if
 
         ! Updating the patch identities bookkeeping variable
diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp
index a6009da7a7..b500451c08 100644
--- a/src/simulation/m_data_output.fpp
+++ b/src/simulation/m_data_output.fpp
@@ -157,8 +157,8 @@ contains
         write (3, '(A)') ''; write (3, '(A)') ''
 
         ! Generating table header for the stability criteria to be outputted
-        write (3, '(13X,A8,13X,A10,13X,A10,13X,A10,)', advance="no") &
-            trim('Time-steps'), trim('dt'), trim('Time'), trim('ICFL Max')
+        write (3, '(13X,A9,13X,A10,13X,A10,13X,A10,)', advance="no") &
+            trim('Time-step'), trim('dt'), trim('Time'), trim('ICFL Max')
 
         if (viscous) then
             write(3, '(13X,A10,13X,A16)', advance="no") &
@@ -309,7 +309,7 @@ contains
 
         ! Determining local stability criteria extrema at current time-step
 
-#ifdef _CRAYFTN
+!#ifdef _CRAYFTN
         $:GPU_UPDATE(host='[icfl_sf]')
         icfl_max_loc = maxval(icfl_sf)
 
@@ -323,23 +323,23 @@ contains
             $:GPU_UPDATE(host='[ccfl_sf]')
             ccfl_max_loc = maxval(ccfl_sf)
         end if
-#else
-        #:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]')
-            icfl_max_loc = maxval(icfl_sf)
-        #:endcall GPU_PARALLEL
-        if (viscous) then
-            #:call GPU_PARALLEL(copyout='[vcfl_max_loc, Rc_min_loc]', copyin='[vcfl_sf,Rc_sf]')
-                vcfl_max_loc = maxval(vcfl_sf)
-                Rc_min_loc = minval(Rc_sf)
-            #:endcall GPU_PARALLEL
-        end if
-
-        if (surface_tension) then
-            #:call GPU_PARALLEL(copyout='[ccfl_max_loc]', copyin='[ccfl_sf]')
-                ccfl_max_loc = maxval(ccfl_sf)
-            #:endcall GPU_PARALLEL
-        end if
-#endif
+!#else
+        !#:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]')
+            !icfl_max_loc = maxval(icfl_sf)
+        !#:endcall GPU_PARALLEL
+        !if (viscous) then
+            !#:call GPU_PARALLEL(copyout='[vcfl_max_loc, Rc_min_loc]', copyin='[vcfl_sf,Rc_sf]')
+                !vcfl_max_loc = maxval(vcfl_sf)
+                !Rc_min_loc = minval(Rc_sf)
+            !#:endcall GPU_PARALLEL
+        !end if
+
+        !if (surface_tension) then
+            !#:call GPU_PARALLEL(copyout='[ccfl_max_loc]', copyin='[ccfl_sf]')
+                !ccfl_max_loc = maxval(ccfl_sf)
+            !#:endcall GPU_PARALLEL
+        !end if
+!#endif
 
         ! Determining global stability criteria extrema at current time-step
         if (num_procs > 1) then
@@ -372,8 +372,8 @@ contains
 
         ! Outputting global stability criteria extrema at current time-step
         if (proc_rank == 0) then
-            write (3, '(13X,I8,13X,F10.6,13X,F10.6,13X,F10.6)', advance="no") &
-                t_step, dt, t_step*dt, icfl_max_glb
+            write (3, '(13X,I9,13X,F10.6,13X,F10.6,13X,F10.6)', advance="no") &
+                t_step, dt, mytime, icfl_max_glb
 
             if (viscous) then
                 write (3, '(13X,F10.6,13X,ES16.6)', advance="no") &
diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp
index c966354a1c..01fc7a9ec1 100644
--- a/src/simulation/m_sim_helpers.fpp
+++ b/src/simulation/m_sim_helpers.fpp
@@ -235,12 +235,12 @@ contains
             if (p > 0) then ! 3D
                 if (grid_geometry == 3) then
                     fltr_dtheta = f_compute_filtered_dtheta(k, l)
-                    ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k), fltr_dtheta)**3._wp / sigma)
+                    ccfl_sf(j, k, l) = dt/sqrt(rho * min(dx(j), dy(k), fltr_dtheta)**3._wp / sigma)
                 else
-                    ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k), dz(l))**3._wp / sigma)
+                    ccfl_sf(j, k, l) = dt/sqrt(rho * min(dx(j), dy(k), dz(l))**3._wp / sigma)
                 end if
             elseif (n > 0) then ! 2D
-                ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k))**3._wp / sigma)
+                ccfl_sf(j, k, l) = dt/sqrt(rho * min(dx(j), dy(k))**3._wp / sigma)
             end if
         end if
 
@@ -296,12 +296,12 @@ contains
             if (p > 0) then ! 3D
                 if (grid_geometry == 3) then
                     fltr_dtheta = f_compute_filtered_dtheta(k, l)
-                    ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k), fltr_dtheta)**3._wp / sigma)
+                    ccfl_dt = cfl_target*sqrt(rho * min(dx(j), dy(k), fltr_dtheta)**3._wp / sigma)
                 else
-                    ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k), dz(l))**3._wp / sigma)
+                    ccfl_dt = cfl_target*sqrt(rho * min(dx(j), dy(k), dz(l))**3._wp / sigma)
                 end if
             elseif (n > 0) then ! 2D
-                ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k))**3._wp / sigma)
+                ccfl_dt = cfl_target*sqrt(rho * min(dx(j), dy(k))**3._wp / sigma)
             end if
         end if
 

From 8ffc20cf0e28b0376bc4bd5095f06018cff1b360 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Tue, 7 Oct 2025 11:32:44 -0400
Subject: [PATCH 54/62] added options and drag models

---
 src/common/m_constants.fpp              |  2 +-
 src/common/m_derived_types.fpp          |  3 +
 src/common/m_mpi_common.fpp             |  9 +--
 src/simulation/m_body_forces.fpp        | 13 ++--
 src/simulation/m_bubbles.fpp            | 19 ++++--
 src/simulation/m_bubbles_EL.fpp         | 81 +++++++++++++++----------
 src/simulation/m_bubbles_EL_kernels.fpp | 51 ++++++++++++----
 src/simulation/m_global_parameters.fpp  |  3 +
 src/simulation/m_mpi_proxy.fpp          |  3 +-
 toolchain/mfc/run/case_dicts.py         |  5 +-
 10 files changed, 121 insertions(+), 68 deletions(-)

diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp
index b987eb18a1..14ae48d6be 100644
--- a/src/common/m_constants.fpp
+++ b/src/common/m_constants.fpp
@@ -22,7 +22,7 @@ module m_constants
     integer, parameter :: fourier_rings = 5                       !< Fourier filter ring limit
     integer, parameter :: num_fluids_max = 10                     !< Maximum number of fluids in the simulation
     integer, parameter :: num_probes_max = 10                     !< Maximum number of flow probes in the simulation
-    integer, parameter :: num_patches_max = 10
+    integer, parameter :: num_patches_max = 20
     integer, parameter :: num_bc_patches_max = 10
     integer, parameter :: pathlen_max = 400
     integer, parameter :: nnode = 4    !< Number of QBMM nodes
diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp
index 75d91dd315..47bd6b4900 100644
--- a/src/common/m_derived_types.fpp
+++ b/src/common/m_derived_types.fpp
@@ -434,6 +434,9 @@ module m_derived_types
         integer :: nBubs_glb                !< Global number of bubbles
         integer :: vel_model                !< Particle velocity model
         integer :: drag_model               !< Particle drag model
+        logical :: pressure_force           !< Include pressure force translational motion
+        logical :: gravity_force            !< Include gravity force in translational motion
+        logical :: momentum_transfer_force  !< Include momentum transfer from radial dynamics in translational motion
         real(wp) :: c_d                     !< Drag coefficient
         real(wp) :: epsilonb         !< Standard deviation scaling for the gaussian function
         real(wp) :: charwidth        !< Domain virtual depth (z direction, for 2D simulations)
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index a770ce1c01..a50b78dcea 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -479,22 +479,19 @@ contains
         !!  @param var_loc holds the local value to be reduced among
         !!      all the processors in communicator. On output, the variable holds
         !!      the sum, reduced amongst all of the local values.
-    subroutine s_mpi_reduce_int_sum(var_loc)
+    subroutine s_mpi_reduce_int_sum(var_loc, sum)
 
         integer, intent(inout) :: var_loc
+        integer, intent(inout) :: sum
 
 #ifdef MFC_MPI
         integer :: ierr !< Generic flag used to identify and report MPI errors
 
-        ! Temporary storage variable that holds the reduced sum value
-        integer :: var_glb
-
         ! Performing reduction procedure and eventually storing its result
         ! into the variable that was initially inputted into the subroutine
-        call MPI_REDUCE(var_loc, var_glb, 1, MPI_INTEGER, &
+        call MPI_REDUCE(var_loc, sum, 1, MPI_INTEGER, &
                         MPI_SUM, 0, MPI_COMM_WORLD, ierr)
 
-        var_loc = var_glb
 #endif
 
     end subroutine s_mpi_reduce_int_sum
diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp
index 1ea2c3be3b..aaf838d2bb 100644
--- a/src/simulation/m_body_forces.fpp
+++ b/src/simulation/m_body_forces.fpp
@@ -56,16 +56,13 @@ contains
     subroutine s_compute_acceleration(t)
 
         real(wp), intent(in) :: t
+        accel_bf(:) = 0._wp
 
-        if (m > 0) then
-            accel_bf(1) = g_x + k_x*sin(w_x*t - p_x)
-            if (n > 0) then
-                accel_bf(2) = g_y + k_y*sin(w_y*t - p_y)
-                if (p > 0) then
-                    accel_bf(3) = g_z + k_z*sin(w_z*t - p_z)
-                end if
+        #:for DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
+            if (bf_${XYZ}$) then
+                accel_bf(${DIR}$) = g_${XYZ}$ + k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$)
             end if
-        end if
+        #:endfor
 
         $:GPU_UPDATE(device='[accel_bf]')
 
diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp
index a936254ac2..c2bb091598 100644
--- a/src/simulation/m_bubbles.fpp
+++ b/src/simulation/m_bubbles.fpp
@@ -450,7 +450,7 @@ contains
         !!  @param fRho Current density
         !!  @param fP Current driving pressure
         !!  @param fR Current bubble radius
-        !!  @param fV Current bubble velocity
+        !!  @param fV Current bubble radial velocity
         !!  @param fR0 Equilibrium bubble radius
         !!  @param fpb Internal bubble pressure
         !!  @param fpbdot Time-derivative of internal bubble pressure
@@ -490,7 +490,7 @@ contains
         real(wp) :: h !< Time step size
         real(wp), dimension(4) :: myR_tmp1, myV_tmp1, myR_tmp2, myV_tmp2 !< Bubble radius, radial velocity, and radial acceleration for the inner loop
         real(wp), dimension(4) :: myPb_tmp1, myMv_tmp1, myPb_tmp2, myMv_tmp2 !< Gas pressure and vapor mass for the inner loop (EL)
-        real(wp) :: fR2, fV2, fpb2, fmass_v2, vTemp, aTemp
+        real(wp) :: fR2, fV2, fpb2, fmass_v2, vTemp, aTemp, f_bTemp
         integer :: l, iter_count
 
         call s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, &
@@ -571,10 +571,19 @@ contains
                             end do
                         elseif (lag_params%vel_model == 2) then
                             do l = 1, num_dims
-                                aTemp = f_get_acceleration(fPos(l), fR, fVel(l), fmass_n, fmass_v, &
+                                f_bTemp = f_get_bubble_force(fPos(l), fR, fV, fVel, fmass_n, fmass_v, &
+                                                           fRe, fRho, cell, l, q_prim_vf)
+                                aTemp = f_bTemp/(fmass_n + fmass_v)
+                                fPos(l) = fPos(l) + h * fVel(l)
+                                fVel(l) = fVel(l) + h * aTemp
+                            end do
+                        elseif (lag_params%vel_model == 3) then
+                            do l = 1, num_dims
+                                f_bTemp = f_get_bubble_force(fPos(l), fR, fV, fVel, fmass_n, fmass_v, &
                                                            fRe, fRho, cell, l, q_prim_vf)
-                                fPos(l) = fPos(l) + h*fVel(l)
-                                fVel(l) = fVel(l) + h*aTemp
+                                aTemp = 2._wp * f_bTemp / (fmass_n + fmass_v) - 3 * fV * fVel(l) / fR
+                                fPos(l) = fPos(l) + h * fVel(l)
+                                fVel(l) = fVel(l) + h * aTemp
                             end do
                         end if
                     end if
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index fc27ec7f09..78e33d9b3c 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -279,7 +279,7 @@ contains
 
         print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id
 
-        call s_mpi_reduce_int_sum(bub_id)
+        call s_mpi_reduce_int_sum(bub_id, bub_id)
 
         if (proc_rank == 0) then
             if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat')
@@ -621,7 +621,7 @@ contains
         real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot
         real(wp) :: myPinf, aux1, aux2, myCson, myRho
         real(wp), dimension(3) :: myPos, myVel
-        real(wp) :: gamma, pi_inf, qv
+        real(wp) :: gamma, pi_inf, qv, f_b
         real(wp), dimension(contxe) :: myalpha_rho, myalpha
         real(wp), dimension(2) :: Re
         integer, dimension(3) :: cell
@@ -734,10 +734,18 @@ contains
                         mtn_dveldt(k, l, stage) = 0._wp
                     elseif (lag_params%vel_model == 2) then
                         mtn_dposdt(k, l, stage) = myVel(l)
-                        mtn_dveldt(k, l, stage) = f_get_acceleration(myPos(l), &
-                                                                     myR, myVel(l), &
-                                                                     myMass_n, myMass_v, &
-                                                                     Re(1), myRho, cell, l, q_prim_vf)
+                        f_b = f_get_bubble_force(myPos(l), &
+                                                 myR, myV, myVel, &
+                                                 myMass_n, myMass_v, &
+                                                 Re(1), myRho, cell, l, q_prim_vf)
+                        mtn_dveldt(k, l, stage) = f_b / (myMass_n + myMass_v)
+                    elseif (lag_params%vel_model == 3) then
+                        mtn_dposdt(k, l, stage) = myVel(l)
+                        f_b = f_get_bubble_force(myPos(l), &
+                                                 myR, myV, myVel, &
+                                                 myMass_n, myMass_v, &
+                                                 Re(1), myRho, cell, l, q_prim_vf)
+                        mtn_dveldt(k, l, stage) = 2._wp * f_b / (myMass_n + myMass_v) - 3._wp * myV * myVel(l) / myR
                     else
                         mtn_dposdt(k, l, stage) = 0._wp
                         mtn_dveldt(k, l, stage) = 0._wp
@@ -776,6 +784,7 @@ contains
 
         if (lag_params%solver_approach == 2) then
 
+            ! (q / (1 - beta)) * d(beta)/dt source
             if (p == 0) then
                 $:GPU_PARALLEL_LOOP(collapse=4)
                 do k = 0, p
@@ -813,6 +822,7 @@ contains
 
                 call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l)
 
+                ! (beta / (1 - beta)) * dP/dl source
                 $:GPU_PARALLEL_LOOP(collapse=3)
                 do k = 0, p
                     do j = 0, n
@@ -839,6 +849,7 @@ contains
 
                 call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l)
 
+                ! (beta / (1 - beta)) * d(Pu)/dl source
                 $:GPU_PARALLEL_LOOP(collapse=3)
                 do k = 0, p
                     do j = 0, n
@@ -852,7 +863,6 @@ contains
                     end do
                 end do
             end do
-
         end if
 
     end subroutine s_compute_bubbles_EL_source
@@ -1541,6 +1551,13 @@ contains
             nBubs = nBubs + newBubs
         end if
 
+        if (run_time_info) then
+            call s_mpi_reduce_int_sum(nBubs, active_bubs)
+            if (proc_rank == 0 .and. active_bubs == 0) then
+                call s_mpi_abort('No bubbles remain in the domain. Simulation ending.')
+            end if
+        end if
+
         ! Handle MPI transfer of bubbles going to another processor's local domain
         if (num_procs > 1) then
             call nvtxStartRange("LAG-BC-TRANSFER-LIST")
@@ -1745,36 +1762,34 @@ contains
                     end do
                 end do
             end do
-        else
-            if (dir == 2) then
-                ! Gradient in y dir.
-                $:GPU_PARALLEL_LOOP(collapse=3)
-                do k = 0, p
-                    do j = 0, n
-                        do i = 0, m
-                            dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) &
-                                             + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) &
-                                             - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1))
-                            dq%sf(i, j, k) = dq%sf(i, j, k)/ &
-                                             ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1)))
-                        end do
+        elseif (dir == 2) then
+            ! Gradient in y dir.
+            $:GPU_PARALLEL_LOOP(collapse=3)
+            do k = 0, p
+                do j = 0, n
+                    do i = 0, m
+                        dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) &
+                                         + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) &
+                                         - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1))
+                        dq%sf(i, j, k) = dq%sf(i, j, k)/ &
+                                         ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1)))
                     end do
                 end do
-            else
-                ! Gradient in z dir.
-                $:GPU_PARALLEL_LOOP(collapse=3)
-                do k = 0, p
-                    do j = 0, n
-                        do i = 0, m
-                            dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) &
-                                             + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) &
-                                             - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1))
-                            dq%sf(i, j, k) = dq%sf(i, j, k)/ &
-                                             ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1)))
-                        end do
+            end do
+        elseif (dir == 3) then
+            ! Gradient in z dir.
+            $:GPU_PARALLEL_LOOP(collapse=3)
+            do k = 0, p
+                do j = 0, n
+                    do i = 0, m
+                        dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) &
+                                         + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) &
+                                         - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1))
+                        dq%sf(i, j, k) = dq%sf(i, j, k)/ &
+                                         ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1)))
                     end do
                 end do
-            end if
+            end do
         end if
 
     end subroutine s_gradient_dir
diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp
index 35a7528792..484a433da0 100644
--- a/src/simulation/m_bubbles_EL_kernels.fpp
+++ b/src/simulation/m_bubbles_EL_kernels.fpp
@@ -523,10 +523,11 @@ contains
 
     end function f_interpolate_velocity
 
-    !! This function calculates the acceleration of the bubble
+    !! This function calculates the force on a bubble
             !!      based on the pressure gradient, velocity, and drag model.
             !! @param pos Position of the bubble in direction i
             !! @param rad Radius of the bubble
+            !! @param rdot Radial velocity of the bubble
             !! @param vel Velocity of the bubble
             !! @param mg Mass of the gas in the bubble
             !! @param mv Mass of the liquid in the bubble
@@ -536,16 +537,18 @@ contains
             !! @param i Direction of the velocity (1: x, 2: y, 3: z)
             !! @param q_prim_vf Eulerian field with primitive variables
             !! @return a Acceleration of the bubble in direction i
-    pure function f_get_acceleration(pos, rad, vel, mg, mv, Re, rho, cell, i, q_prim_vf) result(a)
+    pure function f_get_bubble_force(pos, rad, rdot, vel, mg, mv, Re, rho, cell, i, q_prim_vf) result(force)
         $:GPU_ROUTINE(parallelism='[seq]')
-        real(wp), intent(in) :: pos, rad, vel, mg, mv, Re, rho
+        real(wp), intent(in) :: pos, rad, rdot, mg, mv, Re, rho
+        real(wp), intent(in), dimension(3) :: vel
         integer, dimension(3), intent(in) :: cell
         integer, intent(in) :: i
         type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
 
-        real(wp) :: a
-        real(wp) :: dp, vol, force, v_rel
+        real(wp) :: a, dp, vol, force, Re_b, C_d, v_rel_mag, rho_b
+        real(wp), dimension(3) :: v_rel
         real(wp), dimension(fd_order - 1) :: xi, eta, L
+        integer :: j
 
         if (fd_order == 2) then
             if (i == 1) then
@@ -610,19 +613,43 @@ contains
             dp = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3)
         end if
 
-        vol = (4._wp/3._wp)*pi*rad**3._wp
-        force = -1._wp*vol*dp
+        vol = (4._wp/3._wp) * pi * (rad**3._wp)
+        v_rel_mag = 0._wp
 
-        v_rel = vel - f_interpolate_velocity(pos, cell, i, q_prim_vf)
+        do j = 1, num_dims
+            v_rel(j) = vel(j) - f_interpolate_velocity(pos, cell, j, q_prim_vf)
+            v_rel_mag = v_rel_mag + v_rel(j)**2._wp
+        end do
+
+        force = 0._wp
 
         if (lag_params%drag_model == 1) then ! Free slip Stokes drag
-            force = force - (4._wp*pi*rad*v_rel)/Re
+            force = force - (4._wp*pi*rad*v_rel(i))/Re
         else if (lag_params%drag_model == 2) then ! No slip Stokes drag
-            force = force - (6._wp*pi*rad*v_rel)/Re
+            force = force - (6._wp*pi*rad*v_rel(i))/Re
+        else if (lag_params%drag_model == 3) then ! Levich drag
+            force = force - (12._wp*pi*rad*v_rel(i))/Re
+        else if (lag_params%drag_model > 0) then ! Drag coefficient model
+            v_rel_mag = sqrt(v_rel_mag)
+            Re_b = max(1e-3, rho * v_rel_mag * 2._wp * rad * Re)
+            if (lag_params%drag_model == 4) then ! Mei et al. 1994
+                C_d = 16._wp / Re_b * (1._wp + (8._wp / Re_b + 0.5_wp * (1._wp + 3.315_wp * Re_b**(-0.5_wp))) ** -1._wp)
+            end if
+            force = force - 0.5_wp * C_d * rho * pi * rad**2._wp * v_rel(i) * v_rel_mag
+        end if
+
+        if (lag_params%pressure_force) then
+            force = force - vol * dp
+        end if
+
+        if (lag_params%gravity_force) then
+            force = force + (mg + mv) * accel_bf(i)
         end if
 
-        a = force/(mg + mv)
+        if (lag_params%momentum_transfer_force) then
+            force = force  - 4._wp * pi * rho * rad**2._wp * v_rel(i) * rdot
+        end if
 
-    end function f_get_acceleration
+    end function f_get_bubble_force
 
 end module m_bubbles_EL_kernels
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 29870bb613..c6ac933a93 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -816,6 +816,9 @@ contains
         lag_params%nBubs_glb = dflt_int
         lag_params%vel_model = dflt_int
         lag_params%drag_model = dflt_int
+        lag_params%pressure_force = .true.
+        lag_params%gravity_force = .false.
+        lag_params%momentum_transfer_force = .false.
         lag_params%c_d = dflt_real
         lag_params%epsilonb = 1._wp
         lag_params%charwidth = dflt_real
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 316cb541d0..380997948a 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -188,7 +188,8 @@ contains
 
         if (bubbles_lagrange) then
             #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', &
-                & 'write_bubbles', 'write_bubbles_stats', 'vel_model', 'drag_model']
+                & 'write_bubbles', 'write_bubbles_stats', 'vel_model', 'drag_model', &
+                & 'pressure_force', 'gravity_force', 'momentum_transfer_force']
                 call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
             #:endfor
 
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index bf4f15d7a8..22c5459b02 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -163,7 +163,7 @@ def analytic(self):
 
     PRE_PROCESS[f"patch_bc({bc_p_id})%radius"] = ParamType.REAL
 
-for p_id in range(1, 10+1):
+for p_id in range(1, 20+1):
     for attribute, ty in [("geometry", ParamType.INT), ("smoothen", ParamType.LOG),
                       ("smooth_patch_id", ParamType.INT), ("hcid", ParamType.INT)]:
         PRE_PROCESS[f"patch_icpp({p_id})%{attribute}"] = ty
@@ -326,7 +326,8 @@ def analytic(self):
 })
 
 for var in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector',
-             'write_bubbles', 'write_bubbles_stats' ]:
+             'write_bubbles', 'write_bubbles_stats', 'pressure_force', 
+             'gravity_force', 'momentum_transfer_force']:
     SIMULATION[f'lag_params%{var}'] = ParamType.LOG
 
 for var in [ 'solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb',

From c23a919511eed92ad5c4cf52b485e8e6cf59a6bb Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Thu, 9 Oct 2025 09:54:49 -0400
Subject: [PATCH 55/62] Add EL Silo I/O, improve run_time.inf, and post process
 terminal output

---
 docs/documentation/case.md                |  24 +-
 examples/2D_lagrange_bubblescreen/case.py |   1 +
 examples/3D_lagrange_bubblescreen/case.py |   1 +
 src/common/m_constants.fpp                |   1 +
 src/post_process/m_data_output.fpp        | 539 +++++++++++++++++++---
 src/post_process/m_global_parameters.fpp  |  38 ++
 src/post_process/m_mpi_proxy.fpp          |  11 +-
 src/post_process/m_start_up.f90           |  17 +-
 src/post_process/p_main.fpp               |  13 +
 src/pre_process/m_assign_variables.fpp    |   2 +-
 src/simulation/m_body_forces.fpp          |  12 +-
 src/simulation/m_bubbles.fpp              |   5 +-
 src/simulation/m_bubbles_EL.fpp           | 418 ++++++++++-------
 src/simulation/m_data_output.fpp          |  37 +-
 toolchain/mfc/run/case_dicts.py           |  18 +
 15 files changed, 869 insertions(+), 268 deletions(-)

diff --git a/docs/documentation/case.md b/docs/documentation/case.md
index 5f1d739530..9e0536f913 100644
--- a/docs/documentation/case.md
+++ b/docs/documentation/case.md
@@ -482,9 +482,9 @@ It is recommended to set `weno_eps` to $10^{-6}$ for WENO-JS, and to $10^{-40}$
 
 - `mp_weno` activates monotonicity preservation in the WENO reconstruction (MPWENO) such that the values of reconstructed variables do not reside outside the range spanned by WENO stencil ([Balsara and Shu, 2000](references.md); [Suresh and Huynh, 1997](references.md)).
 
-- `muscl_order` specifies the order of the MUSCL scheme that is used for spatial reconstruction of variables by an integer of 1, or 2, that corresponds to the 1st, and 2nd order respectively. When using `muscl_order = 2`, `muscl_lim` must be defined. 
+- `muscl_order` specifies the order of the MUSCL scheme that is used for spatial reconstruction of variables by an integer of 1, or 2, that corresponds to the 1st, and 2nd order respectively. When using `muscl_order = 2`, `muscl_lim` must be defined.
 
-- `muscl_lim` specifies the slope limiter that is used in 2nd order MUSCL Reconstruction by an integer from 1 through 5. 
+- `muscl_lim` specifies the slope limiter that is used in 2nd order MUSCL Reconstruction by an integer from 1 through 5.
 `muscl_lim = 1`, `2`, `3`, `4`, and `5` correspond to minmod, monotonized central, Van Albada, Van Leer, and SUPERBEE, respectively.
 
 - `int_comp` activates interface compression using THINC used in MUSCL Reconstruction, with control parameters (`ic_eps`, and `ic_beta`).
@@ -599,6 +599,24 @@ To restart the simulation from $k$-th time step, see [Restarting Cases](running.
 | `output_partial_domain` | Logical | Output part of the domain |
 | `[x,y,z]_output%beg` | Real    | Beginning of the output domain in the [x,y,z]-direction |
 | `[x,y,z]_output%end` | Real    | End of the output domain in the [x,y,z]-direction |
+| `lag_txt_wrt`        | Logical | Write Lagrangian bubble data to `.dat` files |
+| `lag_header`         | Logical | Write header to Lagrangian bubble `.dat` files |
+| `lag_db_wrt`         | Logical | Write Lagrangian bubble data to silo/hdf5 database files |
+| `lag_id_wrt`         | Logical | Add the global bubble idea to the database file |
+| `lag_pos_wrt`        | Logical | Add the bubble position to the database file |
+| `lag_pos_prev_wrt`   | Logical | Add the previous bubble position to the database file |
+| `lag_vel_wrt`        | Logical | Add the bubble translational velocity to the database file |
+| `lag_rad_wrt`        | Logical | Add the bubble radius to the database file |
+| `lag_rvel_wrt`       | Logical | Add the bubble radial velocity to the database file |
+| `lag_r0_wrt`         | Logical | Add the bubble initial radius to the database file |
+| `lag_rmax_wrt`       | Logical | Add the bubble maximum radius to the database file |
+| `lag_rmin_wrt`       | Logical | Add the bubble minimum radius to the database file |
+| `lag_dphidt_wrt`     | Logical | Add the bubble subgrid velocity potential to the database file |
+| `lag_pres_wrt`       | Logical | Add the bubble pressure to the database file |
+| `lag_mv_wrt`         | Logical | Add the bubble vapor mass to the database file |
+| `lag_mg_wrt`         | Logical | Add the bubble gas mass to the database file |
+| `lag_betaT_wrt`      | Logical | Add the bubble heat flux model coefficient to the database file |
+| `lag_betaC_wrt`      | Logical | Add the bubble mass flux model coefficient to the database file |
 
 The table lists formatted database output parameters. The parameters define variables that are outputted from simulation and file types and formats of data as well as options for post-processing.
 
@@ -628,7 +646,7 @@ If `file_per_process` is true, then pre_process, simulation, and post_process mu
 
 - `output_partial_domain` activates the output of part of the domain specified by `[x,y,z]_output%beg` and `[x,y,z]_output%end`.
 This is useful for large domains where only a portion of the domain is of interest.
-It is not supported when `precision = 1` and `format = 1`. 
+It is not supported when `precision = 1` and `format = 1`.
 It also cannot be enabled with `flux_wrt`, `heat_ratio_wrt`, `pres_inf_wrt`, `c_wrt`, `omega_wrt`, `ib`, `schlieren_wrt`, `qm_wrt`, or 'liutex_wrt'.
 
 ### 8. Acoustic Source {#acoustic-source}
diff --git a/examples/2D_lagrange_bubblescreen/case.py b/examples/2D_lagrange_bubblescreen/case.py
index fb1dd1cf81..18fba87222 100644
--- a/examples/2D_lagrange_bubblescreen/case.py
+++ b/examples/2D_lagrange_bubblescreen/case.py
@@ -111,6 +111,7 @@
             "precision": 2,
             "prim_vars_wrt": "T",
             "parallel_io": "T",
+            "lag_db_wrt": "T",
             # Patch 1: Water (left)
             "patch_icpp(1)%geometry": 3,
             "patch_icpp(1)%x_centroid": 0.0,
diff --git a/examples/3D_lagrange_bubblescreen/case.py b/examples/3D_lagrange_bubblescreen/case.py
index 1e0d81b81b..40a85eb022 100644
--- a/examples/3D_lagrange_bubblescreen/case.py
+++ b/examples/3D_lagrange_bubblescreen/case.py
@@ -120,6 +120,7 @@
             "precision": 2,
             "prim_vars_wrt": "T",
             "parallel_io": "T",
+            "lag_db_wrt": "T",
             # Patch 1: Water (left)
             "patch_icpp(1)%geometry": 9,
             "patch_icpp(1)%x_centroid": 0.0,
diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp
index 1d2e53d206..114286f53b 100644
--- a/src/common/m_constants.fpp
+++ b/src/common/m_constants.fpp
@@ -61,6 +61,7 @@ module m_constants
     ! Lagrange bubbles constants
     integer, parameter :: mapCells = 3         !< Number of cells around the bubble where the smoothening function will have effect
     real(wp), parameter :: R_uni = 8314._wp    !< Universal gas constant - J/kmol/K
+    integer, parameter :: lag_io_vars = 21 ! Number of variables per particle for MPI_IO
 
     ! Strang Splitting constants
     real(wp), parameter :: dflt_adap_dt_tol = 1.e-4_wp !< Default tolerance for adaptive step size
diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp
index 4c8867225e..d9b9fec3eb 100644
--- a/src/post_process/m_data_output.fpp
+++ b/src/post_process/m_data_output.fpp
@@ -30,7 +30,8 @@ module m_data_output
  s_open_energy_data_file, &
  s_write_grid_to_formatted_database_file, &
  s_write_variable_to_formatted_database_file, &
- s_write_lag_bubbles_results, &
+ s_write_lag_bubbles_results_to_text, &
+ s_write_lag_bubbles_to_formatted_database_file, &
  s_write_intf_data_file, &
  s_write_energy_data_file, &
  s_close_formatted_database_file, &
@@ -276,12 +277,14 @@ contains
         end if
 
         if (bubbles_lagrange) then !Lagrangian solver
-            dbdir = trim(case_dir)//'/lag_bubbles_post_process'
-            file_loc = trim(dbdir)//'/.'
-            call my_inquire(file_loc, dir_check)
+            if (lag_txt_wrt) then
+                dbdir = trim(case_dir)//'/lag_bubbles_post_process'
+                file_loc = trim(dbdir)//'/.'
+                call my_inquire(file_loc, dir_check)
 
-            if (dir_check .neqv. .true.) then
-                call s_create_directory(trim(dbdir))
+                if (dir_check .neqv. .true.) then
+                    call s_create_directory(trim(dbdir))
+                end if
             end if
         end if
 
@@ -1094,7 +1097,7 @@ contains
 
     !>  Subroutine that writes the post processed results in the folder 'lag_bubbles_data'
             !!  @param t_step Current time step
-    impure subroutine s_write_lag_bubbles_results(t_step)
+    impure subroutine s_write_lag_bubbles_results_to_text(t_step)
 
         integer, intent(in) :: t_step
 
@@ -1112,32 +1115,66 @@ contains
         logical :: lg_bub_file, file_exist
 
         integer, dimension(2) :: gsizes, lsizes, start_idx_part
-        integer :: ifile, tot_data
+        integer :: ifile
         integer :: ierr !< Generic flag used to identify and report MPI errors
+        real(wp) :: file_time, file_dt
+        integer :: file_num_procs, file_tot_part, tot_part
         integer :: i
 
-        write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat'
+        integer, dimension(:), allocatable :: proc_bubble_counts
+        real(wp), dimension(1:1, 1:lag_io_vars) :: lag_io_null
+        lag_io_null = 0._wp
+
+        ! Construct file path
+        write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+
+        ! Check if file exists
         inquire (FILE=trim(file_loc), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!')
+        end if
 
-        if (file_exist) then
-            if (proc_rank == 0) then
-                open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown')
-                read (9) tot_data, time_real
-                close (9)
-            end if
-        else
-            print '(A)', trim(file_loc)//' is missing. Exiting.'
-            call s_mpi_abort
+        if (.not. parallel_io) return
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
+
+            call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr)
+            call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
         end if
 
-        call MPI_BCAST(tot_data, 1, MPI_integer, 0, MPI_COMM_WORLD, ierr)
-        call MPI_BCAST(time_real, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+        allocate (proc_bubble_counts(file_num_procs))
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
 
-        gsizes(1) = tot_data
-        gsizes(2) = 21
-        lsizes(1) = tot_data
-        lsizes(2) = 21
+            ! Skip to processor counts position
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), &
+                       MPI_OFFSET_KIND)
+            call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr)
+            call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
+
+        call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+        gsizes(1) = file_tot_part
+        gsizes(2) = lag_io_vars
+        lsizes(1) = file_tot_part
+        lsizes(2) = lag_io_vars
         start_idx_part(1) = 0
         start_idx_part(2) = 0
 
@@ -1145,59 +1182,451 @@ contains
                                       MPI_ORDER_FORTRAN, mpi_p, view, ierr)
         call MPI_TYPE_COMMIT(view, ierr)
 
+        call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
+                           mpi_info_int, ifile, ierr)
+
+        disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
+                   file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+        call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
+                               'native', mpi_info_null, ierr)
+
+        allocate (MPI_IO_DATA_lg_bubbles(file_tot_part, 1:lag_io_vars))
+
+        call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, lag_io_vars*file_tot_part, &
+                               mpi_p, status, ierr)
+
+        write (file_loc, '(A,I0,A)') 'lag_bubbles_post_process_', t_step, '.dat'
+        file_loc = trim(case_dir)//'/lag_bubbles_post_process/'//trim(file_loc)
+
+        if (proc_rank == 0) then
+            open (unit=29, file=file_loc, form='formatted', position='rewind')
+
+            if (lag_header) then
+                write (29, '(A)', advance='no')
+                if (lag_id_wrt) write (29, '(A8)', advance='no') 'id, '
+                if (lag_pos_wrt) write (29, '(3(A17))', advance='no') 'px, ', 'py, ', 'pz, '
+                if (lag_pos_prev_wrt) write (29, '(3(A17))', advance='no') 'pvx, ', 'pvy, ', 'pvz, '
+                if (lag_vel_wrt) write (29, '(3(A17))', advance='no') 'vx, ', 'vy, ', 'vz, '
+                if (lag_rad_wrt) write (29, '(A17)', advance='no') 'radius, '
+                if (lag_rvel_wrt) write (29, '(A17)', advance='no') 'rvel, '
+                if (lag_r0_wrt) write (29, '(A17)', advance='no') 'r0, '
+                if (lag_rmax_wrt) write (29, '(A17)', advance='no') 'rmax, '
+                if (lag_rmin_wrt) write (29, '(A17)', advance='no') 'rmin, '
+                if (lag_dphidt_wrt) write (29, '(A17)', advance='no') 'dphidt, '
+                if (lag_pres_wrt) write (29, '(A17)', advance='no') 'pressure, '
+                if (lag_mv_wrt) write (29, '(A17)', advance='no') 'mv, '
+                if (lag_mg_wrt) write (29, '(A17)', advance='no') 'mg, '
+                if (lag_betaT_wrt) write (29, '(A17)', advance='no') 'betaT, '
+                if (lag_betaC_wrt) write (29, '(A17)', advance='no') 'betaC, '
+                write (29, '(A15)') 'time'
+            end if
+
+            do i = 1, file_tot_part
+                id = int(MPI_IO_DATA_lg_bubbles(i, 1))
+                inputvals(1:20) = MPI_IO_DATA_lg_bubbles(i, 2:21)
+                if (id > 0) then
+                    write (29, '(100(A))', advance='no') ''
+
+                    if (lag_id_wrt) write (29, '(I6, A)', advance='no') id, ', '
+                    if (lag_pos_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(1), ', ', inputvals(2), ', ', inputvals(3), ', '
+                    if (lag_pos_prev_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(4), ', ', inputvals(5), ', ', inputvals(6), ', '
+                    if (lag_vel_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(8), ', '
+                    if (lag_rad_wrt) write (29, '(E15.7, A)', advance='no') inputvals(10), ', '
+                    if (lag_rvel_wrt) write (29, '(E15.7, A)', advance='no') inputvals(11), ', '
+                    if (lag_r0_wrt) write (29, '(E15.7, A)', advance='no') inputvals(12), ', '
+                    if (lag_rmax_wrt) write (29, '(E15.7, A)', advance='no') inputvals(13), ', '
+                    if (lag_rmin_wrt) write (29, '(E15.7, A)', advance='no') inputvals(14), ', '
+                    if (lag_dphidt_wrt) write (29, '(E15.7, A)', advance='no') inputvals(15), ', '
+                    if (lag_pres_wrt) write (29, '(E15.7, A)', advance='no') inputvals(16), ', '
+                    if (lag_mv_wrt) write (29, '(E15.7, A)', advance='no') inputvals(17), ', '
+                    if (lag_mg_wrt) write (29, '(E15.7, A)', advance='no') inputvals(18), ', '
+                    if (lag_betaT_wrt) write (29, '(E15.7, A)', advance='no') inputvals(19), ', '
+                    if (lag_betaC_wrt) write (29, '(E15.7, A)', advance='no') inputvals(20), ', '
+                    write (29, '(E15.7)') time_real
+                end if
+            end do
+            close (29)
+        end if
+
+        deallocate (MPI_IO_DATA_lg_bubbles)
+
+        call s_mpi_barrier()
+
+        call MPI_FILE_CLOSE(ifile, ierr)
+#endif
+
+    end subroutine s_write_lag_bubbles_results_to_text
+
+
+ impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step)
+
+        integer, intent(in) :: t_step
+
+        character(len=len_trim(case_dir) + 3*name_len) :: file_loc
+
+        integer :: id
+
+#ifdef MFC_MPI
+        real(wp), dimension(20) :: inputvals
+        real(wp) :: time_real
+        integer, dimension(MPI_STATUS_SIZE) :: status
+        integer(KIND=MPI_OFFSET_KIND) :: disp
+        integer :: view
+
+        logical :: lg_bub_file, file_exist
+
+        integer, dimension(2) :: gsizes, lsizes, start_idx_part
+        integer :: ifile, ierr, tot_data, valid_data, nBub
+        real(wp) :: file_time, file_dt
+        integer :: file_num_procs, file_tot_part
+        integer, dimension(:), allocatable :: proc_bubble_counts
+        real(wp), dimension(1:1, 1:lag_io_vars) :: dummy
+        character(LEN=4*name_len), dimension(num_procs) :: meshnames
+        integer, dimension(num_procs) :: meshtypes
+        real(wp) :: dummy_data
+
+        integer :: i, j
+
+        real(wp), dimension(:), allocatable :: bub_id
+        real(wp), dimension(:), allocatable :: px, py, pz, ppx, ppy, ppz, vx, vy, vz
+        real(wp), dimension(:), allocatable :: radius, rvel, rnot, rmax, rmin, dphidt
+        real(wp), dimension(:), allocatable :: pressure, mv, mg, betaT, betaC
+
+        dummy = 0._wp
+        dummy_data = 0._wp
+
+        ! Construct file path
         write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (FILE=trim(file_loc), EXIST=lg_bub_file)
 
-        if (lg_bub_file) then
+        ! Check if file exists
+        inquire (FILE=trim(file_loc), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!')
+        end if
+
+        if (.not. parallel_io) return
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
+
+            call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr)
+            call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
+
+        call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+        allocate (proc_bubble_counts(file_num_procs))
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
+
+            ! Skip to processor counts position
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), &
+                       MPI_OFFSET_KIND)
+            call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr)
+            call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
+
+        call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+        ! Set time variables from file
+
+        nBub = proc_bubble_counts(proc_rank + 1)
+
+        start_idx_part(1) = 0
+        do i = 1, proc_rank
+            start_idx_part(1) = start_idx_part(1) + proc_bubble_counts(i)
+        end do
+
+        start_idx_part(2) = 0
+        lsizes(1) = nBub
+        lsizes(2) = lag_io_vars
+
+        gsizes(1) = file_tot_part
+        gsizes(2) = lag_io_vars
+
+        if (nBub > 0) then
+
+            #:for VAR in ['bub_id', 'px', 'py', 'pz', 'ppx', 'ppy', 'ppz', 'vx', 'vy', 'vz', &
+                          'radius', 'rvel', 'rnot', 'rmax', 'rmin', 'dphidt', &
+                          'pressure', 'mv', 'mg', 'betaT', 'betaC']
+                allocate(${VAR}$(nBub))
+            #:endfor
+            allocate (MPI_IO_DATA_lg_bubbles(nBub, 1:lag_io_vars))
+
+            call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
+                                          MPI_ORDER_FORTRAN, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
 
             call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
                                mpi_info_int, ifile, ierr)
 
-            disp = 0._wp
-            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
-                                   'native', mpi_info_null, ierr)
+            ! Skip extended header
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
+                       file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
 
-            allocate (MPI_IO_DATA_lg_bubbles(tot_data, 1:21))
+            call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, &
+                                   lag_io_vars*nBub, mpi_p, status, ierr)
 
-            call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, 21*tot_data, &
-                                   mpi_p, status, ierr)
+            call MPI_FILE_CLOSE(ifile, ierr)
+            call MPI_TYPE_FREE(view, ierr)
 
-            write (file_loc, '(A,I0,A)') 'lag_bubbles_post_process_', t_step, '.dat'
-            file_loc = trim(case_dir)//'/lag_bubbles_post_process/'//trim(file_loc)
+            ! Extract data from MPI_IO_DATA_lg_bubbles array
+            ! Adjust these indices based on your actual data layout
+            #:for VAR, IDX in [('bub_id', 1), ('px', 2), ('py',3), ('pz',4), ('ppx',5), ('ppy',6), ('ppz',7), &
+                              ('vx',8), ('vy',9), ('vz',10), ('radius',11), ('rvel',12), &
+                              ('rnot',13), ('rmax',14), ('rmin',15), ('dphidt',16), &
+                              ('pressure',17), ('mv',18), ('mg',19), ('betaT',20), ('betaC',21)]
+                ${VAR}$(:) = MPI_IO_DATA_lg_bubbles(:, ${IDX}$) 
+            #:endfor
 
+            ! Next, the root processor proceeds to record all of the spatial
+            ! extents in the formatted database master file. In addition, it
+            ! also records a sub-domain connectivity map so that the entire
+            ! grid may be reassembled by looking at the master file.
             if (proc_rank == 0) then
-                open (unit=29, file=file_loc, form='formatted', position='rewind')
-                !write(29,*) 'lg_bubID, x, y, z, xPrev, yPrev, zPrev, xVel, yVel, ',   &
-                !            'zVel, radius, interfaceVelocity, equilibriumRadius',       &
-                !            'Rmax, Rmin, dphidt, pressure, mv, mg, betaT, betaC, time'
-                do i = 1, tot_data
-                    id = int(MPI_IO_DATA_lg_bubbles(i, 1))
-                    inputvals(1:20) = MPI_IO_DATA_lg_bubbles(i, 2:21)
-                    if (id > 0) then
-                        write (29, 6) int(id), inputvals(1), inputvals(2), &
-                            inputvals(3), inputvals(4), inputvals(5), inputvals(6), inputvals(7), &
-                            inputvals(8), inputvals(9), inputvals(10), inputvals(11), &
-                            inputvals(12), inputvals(13), inputvals(14), inputvals(15), &
-                            inputvals(16), inputvals(17), inputvals(18), inputvals(19), &
-                            inputvals(20), time_real
-6                       format(I6, 21(1x, E15.7))
-                    end if
+
+                do i = 1, num_procs
+                    write (meshnames(i), '(A,I0,A,I0,A)') '../p', i - 1, &
+                        '/', t_step, '.silo:lag_bubbles'
+                    meshtypes(i) = DB_POINTMESH
                 end do
-                close (29)
+                err = DBSET2DSTRLEN(len(meshnames(1)))
+                err = DBPUTMMESH(dbroot, 'lag_bubbles', 16, &
+                                 num_procs, meshnames, &
+                                 len_trim(meshnames), &
+                                 meshtypes, DB_F77NULL, ierr)
+            end if
+
+            err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, &
+                          px, py, pz, nBub, &
+                          DB_DOUBLE, DB_F77NULL, ierr)
+
+            if (lag_id_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_id', t_step, bub_id, nBub)
+            end if
+
+            if (lag_vel_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_vel1', t_step, vx, nBub)
+                call s_write_lag_variable_to_formatted_database_file('part_vel2', t_step, vy, nBub)
+                if (p > 0) then
+                    call s_write_lag_variable_to_formatted_database_file('part_vel3', t_step, vz, nBub)
+                end if
+            end if
+
+            if (lag_rad_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_radius', t_step, radius, nBub)
             end if
 
+            if (lag_rvel_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rdot', t_step, rvel, nBub)
+            end if
+
+            if (lag_r0_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_r0', t_step, rnot, nBub)
+            end if
+
+            if (lag_rmax_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rmax', t_step, rmax, nBub)
+            end if
+
+            if (lag_rmin_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rmin', t_step, rmin, nBub)
+            end if
+
+            if (lag_dphidt_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_dphidt', t_step, dphidt, nBub)
+            end if
+
+            if (lag_pres_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_pressure', t_step, pressure, nBub)
+            end if
+
+            if (lag_mv_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_mv', t_step, mv, nBub)
+            end if
+
+            if (lag_mg_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_mg', t_step, mg, nBub)
+            end if
+
+            if (lag_betaT_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_betaT', t_step, betaT, nBub)
+            end if
+
+            if (lag_betaC_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_betaC', t_step, betaC, nBub)
+            end if
+
+            deallocate (bub_id, px, py, pz, ppx, ppy, ppz, vx, vy, vz, radius, &
+                        rvel, rnot, rmax, rmin, dphidt, pressure, mv, mg, &
+                        betaT, betaC)
             deallocate (MPI_IO_DATA_lg_bubbles)
+        else
+            call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
 
-        end if
+            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
 
-        call s_mpi_barrier()
+            ! Skip extended header
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
+                       file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
 
-        call MPI_FILE_CLOSE(ifile, ierr)
+            call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+            call MPI_TYPE_FREE(view, ierr)
+
+            if (proc_rank == 0) then
+
+                do i = 1, num_procs
+                    write (meshnames(i), '(A,I0,A,I0,A)') '../p', i - 1, &
+                        '/', t_step, '.silo:lag_bubbles'
+                    meshtypes(i) = DB_POINTMESH
+                end do
+                err = DBSET2DSTRLEN(len(meshnames(1)))
+                err = DBPUTMMESH(dbroot, 'lag_bubbles', 16, &
+                                 num_procs, meshnames, &
+                                 len_trim(meshnames), &
+                                 meshtypes, DB_F77NULL, ierr)
+            end if
+
+            err = DBSETEMPTYOK(1)
+            err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, &
+                          dummy_data, dummy_data, dummy_data, 0, &
+                          DB_DOUBLE, DB_F77NULL, ierr)
+
+            if (lag_id_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_id', t_step)
+            end if
+
+            if (lag_vel_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_vel1', t_step)
+                call s_write_lag_variable_to_formatted_database_file('part_vel2', t_step)
+                if (p > 0) then
+                    call s_write_lag_variable_to_formatted_database_file('part_vel3', t_step)
+                end if
+            end if
+
+            if (lag_rad_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_radius', t_step)
+            end if
+
+            if (lag_rvel_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rdot', t_step)
+            end if
+
+            if (lag_r0_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_r0', t_step)
+            end if
+
+            if (lag_rmax_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rmax', t_step)
+            end if
+
+            if (lag_rmin_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_rmin', t_step)
+            end if
+
+            if (lag_dphidt_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_dphidt', t_step)
+            end if
+
+            if (lag_pres_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_pressure', t_step)
+            end if
+
+            if (lag_mv_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_mv', t_step)
+            end if
+
+            if (lag_mg_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_mg', t_step)
+            end if
+
+            if (lag_betaT_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_betaT', t_step)
+            end if
+
+            if (lag_betaC_wrt) then
+                call s_write_lag_variable_to_formatted_database_file('part_betaC', t_step)
+            end if
+
+        end if
 
 #endif
 
-    end subroutine s_write_lag_bubbles_results
+    end subroutine s_write_lag_bubbles_to_formatted_database_file
+
+    subroutine s_write_lag_variable_to_formatted_database_file(varname, t_step, data, nBubs)
+
+        character(len=*), intent(in) :: varname
+        integer, intent(in) :: t_step
+        real(wp), dimension(1:), intent(in), optional :: data
+        integer, intent(in), optional :: nBubs
+
+        character(len=64), dimension(num_procs) :: var_names
+        integer, dimension(num_procs) :: var_types
+        real(wp) :: dummy_data
+
+        integer :: ierr !< Generic flag used to identify and report database errors
+        integer :: i
+
+        dummy_data = 0._wp
+
+        if (present(nBubs) .and. present(data)) then
+            if (proc_rank == 0) then
+                do i = 1, num_procs
+                    write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, &
+                        '/', t_step, '.silo:'//trim(varname)
+                    var_types(i) = DB_POINTVAR
+                end do
+                err = DBSET2DSTRLEN(len(var_names(1)))
+                err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), &
+                                num_procs, var_names, &
+                                len_trim(var_names), &
+                                var_types, DB_F77NULL, ierr)
+            end if
+
+            err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), &
+                           'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr)
+        else
+            if (proc_rank == 0) then
+                do i = 1, num_procs
+                    write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, &
+                        '/', t_step, '.silo:'//trim(varname)
+                    var_types(i) = DB_POINTVAR
+                end do
+                err = DBSET2DSTRLEN(len(var_names(1)))
+                err = DBSETEMPTYOK(1)
+                err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), &
+                                num_procs, var_names, &
+                                len_trim(var_names), &
+                                var_types, DB_F77NULL, ierr)
+            end if
+
+            err = DBSETEMPTYOK(1)
+            err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), &
+                           'lag_bubbles', 11, dummy_data, 0, DB_DOUBLE, DB_F77NULL, ierr)
+        end if
+
+    end subroutine s_write_lag_variable_to_formatted_database_file
+
     impure subroutine s_write_intf_data_file(q_prim_vf)
 
         type(scalar_field), dimension(sys_size), intent(IN) :: q_prim_vf
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index 134d900f91..2400b13ad1 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -258,6 +258,24 @@ module m_global_parameters
     logical :: ib
     logical :: chem_wrt_Y(1:num_species)
     logical :: chem_wrt_T
+    logical :: lag_header
+    logical :: lag_txt_wrt
+    logical :: lag_db_wrt
+    logical :: lag_id_wrt
+    logical :: lag_pos_wrt
+    logical :: lag_pos_prev_wrt
+    logical :: lag_vel_wrt
+    logical :: lag_rad_wrt
+    logical :: lag_rvel_wrt
+    logical :: lag_r0_wrt
+    logical :: lag_rmax_wrt
+    logical :: lag_rmin_wrt
+    logical :: lag_dphidt_wrt
+    logical :: lag_pres_wrt
+    logical :: lag_mv_wrt
+    logical :: lag_mg_wrt
+    logical :: lag_betaT_wrt
+    logical :: lag_betaC_wrt
     !> @}
 
     real(wp), dimension(num_fluids_max) :: schlieren_alpha    !<
@@ -329,6 +347,8 @@ module m_global_parameters
 
     real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D)
 
+    real(wp) :: wall_time, wall_time_avg !< Wall time measurements
+
 contains
 
     !> Assigns default values to user inputs prior to reading
@@ -438,6 +458,24 @@ contains
         sim_data = .false.
         cf_wrt = .false.
         ib = .false.
+        lag_txt_wrt = .false.
+        lag_header = .true.
+        lag_db_wrt = .false.
+        lag_id_wrt = .true.
+        lag_pos_wrt = .true.
+        lag_pos_prev_wrt = .false.
+        lag_vel_wrt = .true.
+        lag_rad_wrt = .true.
+        lag_rvel_wrt = .false.
+        lag_r0_wrt = .false.
+        lag_rmax_wrt = .false.
+        lag_rmin_wrt = .false.
+        lag_dphidt_wrt = .false.
+        lag_pres_wrt = .false.
+        lag_mv_wrt = .false.
+        lag_mg_wrt = .false.
+        lag_betaT_wrt = .false.
+        lag_betaC_wrt = .false.
 
         schlieren_alpha = dflt_real
 
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index 8699b97fe4..7adb31a433 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -106,10 +106,19 @@ contains
             & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange',        &
             & 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io',   &
             & 'down_sample' ]
-
             call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
+        if (bubbles_lagrange) then
+            #:for VAR in ['lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt',         &
+                & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', &
+                & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt',    &
+                & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt',    &
+                & 'lag_betaT_wrt', 'lag_betaC_wrt', 'bc_io', 'down_sample' ]
+                call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+        end if
+
         call MPI_BCAST(flux_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         call MPI_BCAST(omega_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         call MPI_BCAST(mom_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index c5e7fe31be..c9b681f8f9 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -90,7 +90,11 @@ impure subroutine s_read_input_file
             cfl_target, surface_tension, bubbles_lagrange, &
             sim_data, hyperelasticity, Bx0, relativity, cont_damage, &
             num_bc_patches, igr, igr_order, down_sample, recon_type, &
-            muscl_order
+            muscl_order, lag_header, lag_txt_wrt, lag_db_wrt, &
+            lag_id_wrt, lag_pos_wrt, lag_pos_prev_wrt, lag_vel_wrt, &
+            lag_rad_wrt, lag_rvel_wrt, lag_r0_wrt, lag_rmax_wrt, &
+            lag_rmin_wrt, lag_dphidt_wrt, lag_pres_wrt, lag_mv_wrt, &
+            lag_mg_wrt, lag_betaT_wrt, lag_betaC_wrt
 
         ! Inquiring the status of the post_process.inp file
         file_loc = 'post_process.inp'
@@ -176,15 +180,15 @@ impure subroutine s_perform_time_step(t_step)
         integer, intent(inout) :: t_step
         if (proc_rank == 0) then
             if (cfl_dt) then
-                print '(" [", I3, "%]  Saving ", I8, " of ", I0, "")', &
+                print '(" [", I3, "%]  Saving ", I8, " of ", I0, " Time Avg = ", ES16.6,  " Time/step = ", ES12.6, "")', &
                     int(ceiling(100._wp*(real(t_step - n_start)/(n_save)))), &
-                    t_step, n_save
+                    t_step, n_save, wall_time_avg, wall_time
             else
-                print '(" [", I3, "%]  Saving ", I8, " of ", I0, " @ t_step = ", I0, "")', &
+                print '(" [", I3, "%]  Saving ", I8, " of ", I0, " @ t_step = ", I0, " Time Avg = ", ES16.6,  " Time/step = ", ES12.6, "")', &
                     int(ceiling(100._wp*(real(t_step - t_step_start)/(t_step_stop - t_step_start + 1)))), &
                     (t_step - t_step_start)/t_step_save + 1, &
                     (t_step_stop - t_step_start)/t_step_save + 1, &
-                    t_step
+                    t_step, wall_time_avg, wall_time
             end if
         end if
 
@@ -719,7 +723,8 @@ impure subroutine s_save_data(t_step, varname, pres, c, H)
             call s_write_variable_to_formatted_database_file(varname, t_step)
             varname(:) = ' '
 
-            call s_write_lag_bubbles_results(t_step) !! Individual bubble evolution
+            if (lag_txt_wrt) call s_write_lag_bubbles_results_to_text(t_step) ! text output
+            if (lag_db_wrt) call s_write_lag_bubbles_to_formatted_database_file(t_step) ! silo file output
         end if
 
         if (sim_data .and. proc_rank == 0) then
diff --git a/src/post_process/p_main.fpp b/src/post_process/p_main.fpp
index 59b69e3b07..12d6353c67 100644
--- a/src/post_process/p_main.fpp
+++ b/src/post_process/p_main.fpp
@@ -26,6 +26,7 @@ program p_main
     real(wp) :: pres
     real(wp) :: c
     real(wp) :: H
+    real(wp) :: start, finish
 
     call s_initialize_mpi_domain()
 
@@ -49,10 +50,22 @@ program p_main
         ! available step. To avoid this, we force synchronization here.
         call s_mpi_barrier()
 
+        call cpu_time(start)
+
         call s_perform_time_step(t_step)
 
         call s_save_data(t_step, varname, pres, c, H)
 
+        call cpu_time(finish)
+
+        wall_time = abs(finish - start)
+
+        if (t_step >= 2) then
+            wall_time_avg = (wall_time + (t_step - 2)*wall_time_avg)/(t_step - 1)
+        else
+            wall_time_avg = 0._wp
+        end if
+
         if (cfl_dt) then
             if (t_step == n_save - 1) then
                 exit
diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp
index f60573b1b9..ca0c7c1c16 100644
--- a/src/pre_process/m_assign_variables.fpp
+++ b/src/pre_process/m_assign_variables.fpp
@@ -684,7 +684,7 @@ contains
 
         if (surface_tension) then
             q_prim_vf(c_idx)%sf(j, k, l) = eta*patch_icpp(patch_id)%cf_val + &
-                                           (1._wp - eta)*patch_icpp(smooth_patch_id)%cf_val
+                                           (1._wp - eta)*orig_prim_vf(c_idx)
         end if
 
         ! Updating the patch identities bookkeeping variable
diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp
index 1ea2c3be3b..9c094c8998 100644
--- a/src/simulation/m_body_forces.fpp
+++ b/src/simulation/m_body_forces.fpp
@@ -57,15 +57,11 @@ contains
 
         real(wp), intent(in) :: t
 
-        if (m > 0) then
-            accel_bf(1) = g_x + k_x*sin(w_x*t - p_x)
-            if (n > 0) then
-                accel_bf(2) = g_y + k_y*sin(w_y*t - p_y)
-                if (p > 0) then
-                    accel_bf(3) = g_z + k_z*sin(w_z*t - p_z)
-                end if
+        #:for DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
+            if (bf_${XYZ}$) then
+                accel_bf(${DIR}$) = g_${XYZ}$ + k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$)
             end if
-        end if
+        #:endfor
 
         $:GPU_UPDATE(device='[accel_bf]')
 
diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp
index 0ec758dc22..23c255428a 100644
--- a/src/simulation/m_bubbles.fpp
+++ b/src/simulation/m_bubbles.fpp
@@ -72,6 +72,9 @@ contains
             ! Rayleigh-Plesset bubbles
             fCpbw = f_cpbw_KM(fR0, fR, fV, fpb)
             f_rddot = f_rddot_RP(fP, fRho, fR, fV, fCpbw)
+        else
+            ! Default: No bubble dynamics
+            f_rddot = 0._wp
         end if
 
     end function f_rddot
@@ -445,7 +448,7 @@ contains
         !!  @param fRho Current density
         !!  @param fP Current driving pressure
         !!  @param fR Current bubble radius
-        !!  @param fV Current bubble velocity
+        !!  @param fV Current bubble radial velocity
         !!  @param fR0 Equilibrium bubble radius
         !!  @param fpb Internal bubble pressure
         !!  @param fpbdot Time-derivative of internal bubble pressure
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index bacd19497d..cebffbeb96 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -226,7 +226,7 @@ contains
                 do while (ios == 0)
                     read (94, *, iostat=ios) (inputBubble(i), i=1, 8)
                     if (ios /= 0) cycle
-                    indomain = particle_in_domain(inputBubble(1:3))
+                    indomain = particle_in_domain_physical(inputBubble(1:3))
                     id = id + 1
                     if (id > lag_params%nBubs_glb .and. proc_rank == 0) then
                         call s_mpi_abort("Current number of bubbles is larger than nBubs_glb")
@@ -324,25 +324,25 @@ contains
         cell = -buff_size
         call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
 
-        ! Check if the bubble is located in the ghost cell of a symmetric boundary
-        if ((bc_x%beg == BC_REFLECTIVE .and. cell(1) < 0) .or. &
-            (bc_x%end == BC_REFLECTIVE .and. cell(1) > m) .or. &
-            (bc_y%beg == BC_REFLECTIVE .and. cell(2) < 0) .or. &
-            (bc_y%end == BC_REFLECTIVE .and. cell(2) > n)) then
-            call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric boundary.")
+        ! Check if the bubble is located in the ghost cell of a symmetric, or wall boundary
+        if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. &
+            (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. &
+            (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. &
+            (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then
+            call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
         end if
 
         if (p > 0) then
-            if ((bc_z%beg == BC_REFLECTIVE .and. cell(3) < 0) .or. &
-                (bc_z%end == BC_REFLECTIVE .and. cell(3) > p)) then
-                call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric boundary.")
+            if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. &
+                (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then
+                call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.")
             end if
         end if
 
         ! If particle is in the ghost cells, find the closest non-ghost cell
-        cell(1) = min(max(cell(1), 0), m)
-        cell(2) = min(max(cell(2), 0), n)
-        if (p > 0) cell(3) = min(max(cell(3), 0), p)
+        !cell(1) = min(max(cell(1), 0), m)
+        !cell(2) = min(max(cell(2), 0), n)
+        !if (p > 0) cell(3) = min(max(cell(3), 0), p)
         call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), &
                                             rhol, gamma, pi_inf, qv, Re)
         dynP = 0._wp
@@ -405,6 +405,8 @@ contains
         integer, intent(inout) :: bub_id, save_count
 
         character(LEN=path_len + 2*name_len) :: file_loc
+        real(wp) :: file_time, file_dt
+        integer :: file_num_procs, file_tot_part, tot_part
 
 #ifdef MFC_MPI
         real(wp), dimension(20) :: inputvals
@@ -419,81 +421,144 @@ contains
         integer :: ifile, ierr, tot_data, id
         integer :: i
 
-        write (file_loc, '(a,i0,a)') 'lag_bubbles_mpi_io_', save_count, '.dat'
+        integer, dimension(:), allocatable :: proc_bubble_counts
+        real(wp), dimension(1:1, 1:lag_io_vars) :: dummy
+        dummy = 0._wp
+
+        ! Construct file path
+        write (file_loc, '(A,I0,A)') 'lag_bubbles_', save_count, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (file=trim(file_loc), exist=file_exist)
-
-        if (file_exist) then
-            if (proc_rank == 0) then
-                open (9, file=trim(file_loc), form='unformatted', status='unknown')
-                read (9) tot_data, mytime, dt
-                close (9)
-                print *, 'Reading lag_bubbles_mpi_io: ', tot_data, mytime, dt
-            end if
-        else
-            print '(a)', trim(file_loc)//' is missing. exiting.'
-            call s_mpi_abort
+
+        ! Check if file exists
+        inquire (FILE=trim(file_loc), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!')
         end if
 
-        call MPI_BCAST(tot_data, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-        call MPI_BCAST(mytime, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
-        call MPI_BCAST(dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        if (.not. parallel_io) return
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
+
+            call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr)
+            call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr)
+            call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
+
+        call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+        call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+        allocate (proc_bubble_counts(file_num_procs))
+
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
+
+            ! Skip to processor counts position
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), &
+                       MPI_OFFSET_KIND)
+            call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr)
+            call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
+
+        call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+
+        ! Set time variables from file
+        mytime = file_time
+        dt = file_dt
+
+        bub_id = proc_bubble_counts(proc_rank + 1)
 
-        gsizes(1) = tot_data
-        gsizes(2) = 21
-        lsizes(1) = tot_data
-        lsizes(2) = 21
         start_idx_part(1) = 0
+        do i = 1, proc_rank
+            start_idx_part(1) = start_idx_part(1) + proc_bubble_counts(i)
+        end do
+
         start_idx_part(2) = 0
+        lsizes(1) = bub_id
+        lsizes(2) = lag_io_vars
 
-        call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
-                                      MPI_ORDER_FORTRAN, mpi_p, view, ierr)
-        call MPI_type_COMMIT(view, ierr)
+        gsizes(1) = file_tot_part
+        gsizes(2) = lag_io_vars
 
-        ! Open the file to write all flow variables
-        write (file_loc, '(a,i0,a)') 'lag_bubbles_', save_count, '.dat'
-        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (file=trim(file_loc), exist=particle_file)
+        if (bub_id > 0) then
+
+            allocate (MPI_IO_DATA_lag_bubbles(bub_id, 1:lag_io_vars))
+
+            call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
+                                          MPI_ORDER_FORTRAN, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
 
-        if (particle_file) then
-            call MPI_FILE_open(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
+            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
                                mpi_info_int, ifile, ierr)
-            disp = 0._wp
-            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
-                                   'native', mpi_info_null, ierr)
-            allocate (MPI_IO_DATA_lag_bubbles(tot_data, 1:21))
-            call MPI_FILE_read_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*tot_data, &
-                                   mpi_p, status, ierr)
-            do i = 1, tot_data
-                id = int(MPI_IO_DATA_lag_bubbles(i, 1))
-                inputvals(1:20) = MPI_IO_DATA_lag_bubbles(i, 2:21)
-                indomain = particle_in_domain(inputvals(1:3))
-                if (indomain .and. (id > 0)) then
-                    bub_id = bub_id + 1
-                    nBubs = bub_id                  ! local number of bubbles
-                    lag_id(bub_id, 1) = id          ! global ID
-                    lag_id(bub_id, 2) = bub_id      ! local ID
-                    mtn_pos(bub_id, 1:3, 1) = inputvals(1:3)
-                    mtn_posPrev(bub_id, 1:3, 1) = inputvals(4:6)
-                    mtn_vel(bub_id, 1:3, 1) = inputvals(7:9)
-                    intfc_rad(bub_id, 1) = inputvals(10)
-                    intfc_vel(bub_id, 1) = inputvals(11)
-                    bub_R0(bub_id) = inputvals(12)
-                    Rmax_stats(bub_id) = inputvals(13)
-                    Rmin_stats(bub_id) = inputvals(14)
-                    bub_dphidt(bub_id) = inputvals(15)
-                    gas_p(bub_id, 1) = inputvals(16)
-                    gas_mv(bub_id, 1) = inputvals(17)
-                    gas_mg(bub_id) = inputvals(18)
-                    gas_betaT(bub_id) = inputvals(19)
-                    gas_betaC(bub_id) = inputvals(20)
-                    cell = -buff_size
-                    call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1))
-                end if
+
+            ! Skip extended header
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
+                       file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
+
+            call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lag_bubbles, &
+                                   lag_io_vars*bub_id, mpi_p, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+            call MPI_TYPE_FREE(view, ierr)
+
+            nBubs = bub_id
+
+            do i = 1, bub_id
+                lag_id(i, 1) = int(MPI_IO_DATA_lag_bubbles(i, 1))
+                mtn_pos(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 2:4)
+                mtn_posPrev(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 5:7)
+                mtn_vel(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 8:10)
+                intfc_rad(i, 1) = MPI_IO_DATA_lag_bubbles(i, 11)
+                intfc_vel(i, 1) = MPI_IO_DATA_lag_bubbles(i, 12)
+                bub_R0(i) = MPI_IO_DATA_lag_bubbles(i, 13)
+                Rmax_stats(i) = MPI_IO_DATA_lag_bubbles(i, 14)
+                Rmin_stats(i) = MPI_IO_DATA_lag_bubbles(i, 15)
+                bub_dphidt(i) = MPI_IO_DATA_lag_bubbles(i, 16)
+                gas_p(i, 1) = MPI_IO_DATA_lag_bubbles(i, 17)
+                gas_mv(i, 1) = MPI_IO_DATA_lag_bubbles(i, 18)
+                gas_mg(i) = MPI_IO_DATA_lag_bubbles(i, 19)
+                gas_betaT(i) = MPI_IO_DATA_lag_bubbles(i, 20)
+                gas_betaC(i) = MPI_IO_DATA_lag_bubbles(i, 21)
             end do
+
             deallocate (MPI_IO_DATA_lag_bubbles)
+
+        else
+            nBubs = 0
+
+            call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
+
+            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, &
+                               mpi_info_int, ifile, ierr)
+
+            ! Skip extended header
+            disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + &
+                       file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
+
+            call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+            call MPI_TYPE_FREE(view, ierr)
+        end if
+
+        if (proc_rank == 0) then
+            write (*, '(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count
+            write (*, '(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt
         end if
-        call MPI_FILE_CLOSE(ifile, ierr)
+
+        deallocate (proc_bubble_counts)
 #endif
 
     end subroutine s_restart_bubbles
@@ -649,6 +714,7 @@ contains
 
         if (lag_params%solver_approach == 2) then
 
+            ! (q / (1 - beta)) * d(beta)/dt source
             if (p == 0) then
                 $:GPU_PARALLEL_LOOP(collapse=4)
                 do k = 0, p
@@ -686,6 +752,7 @@ contains
 
                 call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l)
 
+                ! (q / (1 - beta)) * d(beta)/dt source
                 $:GPU_PARALLEL_LOOP(collapse=3)
                 do k = 0, p
                     do j = 0, n
@@ -712,6 +779,7 @@ contains
 
                 call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l)
 
+                ! (beta / (1 - beta)) * d(Pu)/dl source
                 $:GPU_PARALLEL_LOOP(collapse=3)
                 do k = 0, p
                     do j = 0, n
@@ -1234,25 +1302,24 @@ contains
                                   (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1)))
         end if
 
-        ! For symmetric boundary condition
-        if (bc_x%beg == BC_REFLECTIVE) then
+        ! For symmetric and wall boundary condition
+        if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
             particle_in_domain = (particle_in_domain .and. (pos_part(1) >= x_cb(-1)))
         end if
-        if (bc_x%end == BC_REFLECTIVE) then
+        if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
             particle_in_domain = (particle_in_domain .and. (pos_part(1) < x_cb(m)))
         end if
-        if (bc_y%beg == BC_REFLECTIVE .and. (.not. cyl_coord)) then
+        if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then
             particle_in_domain = (particle_in_domain .and. (pos_part(2) >= y_cb(-1)))
         end if
-        if (bc_y%end == BC_REFLECTIVE .and. (.not. cyl_coord)) then
+        if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then
             particle_in_domain = (particle_in_domain .and. (pos_part(2) < y_cb(n)))
         end if
-
         if (p > 0) then
-            if (bc_z%beg == BC_REFLECTIVE) then
+            if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
                 particle_in_domain = (particle_in_domain .and. (pos_part(3) >= z_cb(-1)))
             end if
-            if (bc_z%end == BC_REFLECTIVE) then
+            if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then
                 particle_in_domain = (particle_in_domain .and. (pos_part(3) < z_cb(p)))
             end if
         end if
@@ -1303,41 +1370,39 @@ contains
                     end do
                 end do
             end do
-        else
-            if (dir == 2) then
-                ! Gradient in y dir.
-                $:GPU_PARALLEL_LOOP(collapse=3)
-                do k = 0, p
-                    do j = 0, n
-                        do i = 0, m
-                            dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) &
-                                             + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) &
-                                             - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1))
-                            dq%sf(i, j, k) = dq%sf(i, j, k)/ &
-                                             ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1)))
-                        end do
+        elseif (dir == 2) then
+            ! Gradient in y dir.
+            $:GPU_PARALLEL_LOOP(collapse=3)
+            do k = 0, p
+                do j = 0, n
+                    do i = 0, m
+                        dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) &
+                                         + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) &
+                                         - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1))
+                        dq%sf(i, j, k) = dq%sf(i, j, k)/ &
+                                         ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1)))
                     end do
                 end do
-            else
-                ! Gradient in z dir.
-                $:GPU_PARALLEL_LOOP(collapse=3)
-                do k = 0, p
-                    do j = 0, n
-                        do i = 0, m
-                            dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) &
-                                             + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) &
-                                             - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1))
-                            dq%sf(i, j, k) = dq%sf(i, j, k)/ &
-                                             ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1)))
-                        end do
+            end do
+        elseif (dir == 3) then
+            ! Gradient in z dir.
+            $:GPU_PARALLEL_LOOP(collapse=3)
+            do k = 0, p
+                do j = 0, n
+                    do i = 0, m
+                        dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) &
+                                         + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) &
+                                         - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1))
+                        dq%sf(i, j, k) = dq%sf(i, j, k)/ &
+                                         ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1)))
                     end do
                 end do
-            end if
+            end do
         end if
 
     end subroutine s_gradient_dir
 
-    !> Subroutine that writes on each time step the changes of the lagrangian bubbles.
+     !> Subroutine that writes on each time step the changes of the lagrangian bubbles.
         !!  @param q_time Current time
     impure subroutine s_write_lag_particles(qtime)
 
@@ -1465,7 +1530,7 @@ contains
 
         character(LEN=path_len + 2*name_len) :: file_loc
         logical :: file_exist
-        integer :: bub_id, tot_part, tot_part_wrtn, npart_wrtn
+        integer :: bub_id, tot_part
         integer :: i, k
 
 #ifdef MFC_MPI
@@ -1476,6 +1541,9 @@ contains
         integer :: view
         integer, dimension(2) :: gsizes, lsizes, start_idx_part
         integer, dimension(num_procs) :: part_order, part_ord_mpi
+        integer, dimension(num_procs) :: proc_bubble_counts
+        real(wp), dimension(1:1, 1:lag_io_vars) :: dummy
+        dummy = 0._wp
 
         bub_id = 0._wp
         if (nBubs /= 0) then
@@ -1488,78 +1556,60 @@ contains
 
         if (.not. parallel_io) return
 
+        lsizes(1) = bub_id
+        lsizes(2) = lag_io_vars
+
         ! Total number of particles
         call MPI_ALLREDUCE(bub_id, tot_part, 1, MPI_integer, &
                            MPI_SUM, MPI_COMM_WORLD, ierr)
 
-        ! Total number of particles written so far
-        call MPI_ALLREDUCE(npart_wrtn, tot_part_wrtn, 1, MPI_integer, &
-                           MPI_SUM, MPI_COMM_WORLD, ierr)
-
-        lsizes(1) = max(1, bub_id)
-        lsizes(2) = 21
-
-        ! if the partcle number is zero, put 1 since MPI cannot deal with writing
-        ! zero particle
-        part_order(:) = 1
-        part_order(proc_rank + 1) = max(1, bub_id)
+        call MPI_ALLGATHER(bub_id, 1, MPI_INTEGER, proc_bubble_counts, 1, MPI_INTEGER, &
+                           MPI_COMM_WORLD, ierr)
 
-        call MPI_ALLREDUCE(part_order, part_ord_mpi, num_procs, MPI_integer, &
-                           MPI_MAX, MPI_COMM_WORLD, ierr)
-
-        gsizes(1) = sum(part_ord_mpi(1:num_procs))
-        gsizes(2) = 21
-
-        start_idx_part(1) = sum(part_ord_mpi(1:proc_rank + 1)) - part_ord_mpi(proc_rank + 1)
+        ! Calculate starting index for this processor's particles
+        call MPI_EXSCAN(lsizes(1), start_idx_part(1), 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr)
+        if (proc_rank == 0) start_idx_part(1) = 0
         start_idx_part(2) = 0
 
-        write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat'
+        gsizes(1) = tot_part
+        gsizes(2) = lag_io_vars
+
+        write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
         file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (FILE=trim(file_loc), EXIST=file_exist)
-        if (file_exist .and. proc_rank == 0) then
-            call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr)
-        end if
 
-        ! Writing down the total number of particles
+        ! Clean up existing file
         if (proc_rank == 0) then
-            open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown')
-            write (9) gsizes(1), mytime, dt
-            close (9)
+            inquire (FILE=trim(file_loc), EXIST=file_exist)
+            if (file_exist) then
+                call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr)
+            end if
         end if
 
-        call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
-                                      MPI_ORDER_FORTRAN, mpi_p, view, ierr)
-        call MPI_type_COMMIT(view, ierr)
-
-        allocate (MPI_IO_DATA_lag_bubbles(1:max(1, bub_id), 1:21))
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
 
-        ! Open the file to write all flow variables
-        write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat'
-        file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
-        inquire (FILE=trim(file_loc), EXIST=file_exist)
-        if (file_exist .and. proc_rank == 0) then
-            call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr)
-        end if
-
-        call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
-                           mpi_info_int, ifile, ierr)
+        if (proc_rank == 0) then
+            call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, &
+                               ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
+                               mpi_info_int, ifile, ierr)
 
-        disp = 0._wp
+            ! Write header using MPI I/O for consistency
+            call MPI_FILE_WRITE(ifile, tot_part, 1, MPI_INTEGER, status, ierr)
+            call MPI_FILE_WRITE(ifile, mytime, 1, mpi_p, status, ierr)
+            call MPI_FILE_WRITE(ifile, dt, 1, mpi_p, status, ierr)
+            call MPI_FILE_WRITE(ifile, num_procs, 1, MPI_INTEGER, status, ierr)
+            call MPI_FILE_WRITE(ifile, proc_bubble_counts, num_procs, MPI_INTEGER, status, ierr)
 
-        call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, &
-                               'native', mpi_info_null, ierr)
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
 
-        ! Cycle through list
-        i = 1
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
 
-        if (bub_id == 0) then
-            MPI_IO_DATA_lag_bubbles(1, 1:21) = 0._wp
-        else
+        if (bub_id > 0) then
+            allocate (MPI_IO_DATA_lag_bubbles(max(1, bub_id), 1:lag_io_vars))
 
+            i = 1
             do k = 1, nBubs
-
                 if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then
-
                     MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_id(k, 1))
                     MPI_IO_DATA_lag_bubbles(i, 2:4) = mtn_pos(k, 1:3, 1)
                     MPI_IO_DATA_lag_bubbles(i, 5:7) = mtn_posPrev(k, 1:3, 1)
@@ -1575,21 +1625,47 @@ contains
                     MPI_IO_DATA_lag_bubbles(i, 19) = gas_mg(k)
                     MPI_IO_DATA_lag_bubbles(i, 20) = gas_betaT(k)
                     MPI_IO_DATA_lag_bubbles(i, 21) = gas_betaC(k)
-
                     i = i + 1
-
                 end if
-
             end do
 
-        end if
+            call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, &
+                                          MPI_ORDER_FORTRAN, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
+
+            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, &
+                               ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
+                               mpi_info_int, ifile, ierr)
+
+            ! Skip header (written by rank 0)
+            disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + &
+                       num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
+
+            call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA_lag_bubbles, &
+                                    lag_io_vars*bub_id, mpi_p, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
 
-        call MPI_FILE_write_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*max(1, bub_id), &
-                                mpi_p, status, ierr)
+            deallocate (MPI_IO_DATA_lag_bubbles)
+
+        else
+            call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr)
+            call MPI_TYPE_COMMIT(view, ierr)
 
-        call MPI_FILE_CLOSE(ifile, ierr)
+            call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, &
+                               ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), &
+                               mpi_info_int, ifile, ierr)
 
-        deallocate (MPI_IO_DATA_lag_bubbles)
+            ! Skip header (written by rank 0)
+            disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + &
+                       num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND)
+            call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr)
+
+            call MPI_FILE_WRITE_ALL(ifile, dummy, 0, mpi_p, status, ierr)
+
+            call MPI_FILE_CLOSE(ifile, ierr)
+        end if
 
 #endif
 
diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp
index 5f5b79c481..90e750babe 100644
--- a/src/simulation/m_data_output.fpp
+++ b/src/simulation/m_data_output.fpp
@@ -36,7 +36,7 @@ module m_data_output
 
     implicit none
 
-    private; 
+    private;
     public :: s_initialize_data_output_module, &
               s_open_run_time_information_file, &
               s_open_com_files, &
@@ -157,24 +157,16 @@ contains
         write (3, '(A)') ''; write (3, '(A)') ''
 
         ! Generating table header for the stability criteria to be outputted
-        if (cfl_dt) then
-            if (viscous) then
-                write (3, '(A)') '     Time-steps        dt     = Time         ICFL '// &
-                    'Max      VCFL Max        Rc Min       ='
-            else
-                write (3, '(A)') '            Time-steps                dt       Time '// &
-                    '               ICFL Max              '
-            end if
-        else
-            if (viscous) then
-                write (3, '(A)') '     Time-steps        Time         ICFL '// &
-                    'Max      VCFL Max        Rc Min        '
-            else
-                write (3, '(A)') '            Time-steps                Time '// &
-                    '               ICFL Max              '
-            end if
+        write (3, '(13X,A9,13X,A10,13X,A10,13X,A10)', advance="no") &
+            trim('Time-step'), trim('dt'), trim('Time'), trim('ICFL Max')
+
+        if (viscous) then
+            write(3, '(13X,A10,13X,A16)', advance="no") &
+                trim('VCFL Max'), trim('Rc Min')
         end if
 
+        write(3, *) ! new line
+
     end subroutine s_open_run_time_information_file
 
     !>  This opens a formatted data file where the root processor
@@ -358,16 +350,17 @@ contains
 
         ! Outputting global stability criteria extrema at current time-step
         if (proc_rank == 0) then
+            write (3, '(13X,I9,13X,F10.6,13X,F10.6,13X,F10.6)', advance="no") &
+                t_step, dt, mytime, icfl_max_glb
+
             if (viscous) then
-                write (3, '(6X,I8,F10.6,6X,6X,F10.6,6X,F9.6,6X,F9.6,6X,F10.6)') &
-                    t_step, dt, t_step*dt, icfl_max_glb, &
+                write (3, '(13X,F10.6,13X,ES16.6)', advance="no") &
                     vcfl_max_glb, &
                     Rc_min_glb
-            else
-                write (3, '(13X,I8,14X,F10.6,14X,F10.6,13X,F9.6)') &
-                    t_step, dt, t_step*dt, icfl_max_glb
             end if
 
+            write(3, *) ! new line
+
             if (.not. f_approx_equal(icfl_max_glb, icfl_max_glb)) then
                 call s_mpi_abort('ICFL is NaN. Exiting.')
             elseif (icfl_max_glb > 1._wp) then
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index 8378d3044d..c72fd9b64c 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -456,6 +456,24 @@ def analytic(self):
     'surface_tension': ParamType.LOG,
     'output_partial_domain': ParamType.LOG,
     'bubbles_lagrange': ParamType.LOG,
+    'lag_header': ParamType.LOG,
+    'lag_txt_wrt': ParamType.LOG,
+    'lag_db_wrt': ParamType.LOG,
+    'lag_id_wrt': ParamType.LOG,
+    'lag_pos_wrt': ParamType.LOG,
+    'lag_pos_prev_wrt': ParamType.LOG,
+    'lag_vel_wrt': ParamType.LOG,
+    'lag_rad_wrt': ParamType.LOG,
+    'lag_rvel_wrt': ParamType.LOG,
+    'lag_r0_wrt': ParamType.LOG,
+    'lag_rmax_wrt': ParamType.LOG,
+    'lag_rmin_wrt': ParamType.LOG,
+    'lag_dphidt_wrt': ParamType.LOG,
+    'lag_pres_wrt': ParamType.LOG,
+    'lag_mv_wrt': ParamType.LOG,
+    'lag_mg_wrt': ParamType.LOG,
+    'lag_betaT_wrt': ParamType.LOG,
+    'lag_betaC_wrt': ParamType.LOG,
 })
 
 for cmp in ["x", "y", "z"]:

From e549426511b66f467174de25a7016d0ca1247cd5 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Thu, 9 Oct 2025 09:58:27 -0400
Subject: [PATCH 56/62] format, spelling, and lint

---
 src/post_process/m_data_output.fpp | 17 ++++++++---------
 src/simulation/m_body_forces.fpp   |  2 +-
 src/simulation/m_bubbles_EL.fpp    |  2 +-
 src/simulation/m_data_output.fpp   |  8 ++++----
 4 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp
index d9b9fec3eb..db9d4f3e0d 100644
--- a/src/post_process/m_data_output.fpp
+++ b/src/post_process/m_data_output.fpp
@@ -1257,8 +1257,7 @@ contains
 
     end subroutine s_write_lag_bubbles_results_to_text
 
-
- impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step)
+    impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step)
 
         integer, intent(in) :: t_step
 
@@ -1360,9 +1359,9 @@ contains
         if (nBub > 0) then
 
             #:for VAR in ['bub_id', 'px', 'py', 'pz', 'ppx', 'ppy', 'ppz', 'vx', 'vy', 'vz', &
-                          'radius', 'rvel', 'rnot', 'rmax', 'rmin', 'dphidt', &
-                          'pressure', 'mv', 'mg', 'betaT', 'betaC']
-                allocate(${VAR}$(nBub))
+                'radius', 'rvel', 'rnot', 'rmax', 'rmin', 'dphidt', &
+                'pressure', 'mv', 'mg', 'betaT', 'betaC']
+                allocate (${VAR}$ (nBub))
             #:endfor
             allocate (MPI_IO_DATA_lg_bubbles(nBub, 1:lag_io_vars))
 
@@ -1387,10 +1386,10 @@ contains
             ! Extract data from MPI_IO_DATA_lg_bubbles array
             ! Adjust these indices based on your actual data layout
             #:for VAR, IDX in [('bub_id', 1), ('px', 2), ('py',3), ('pz',4), ('ppx',5), ('ppy',6), ('ppz',7), &
-                              ('vx',8), ('vy',9), ('vz',10), ('radius',11), ('rvel',12), &
-                              ('rnot',13), ('rmax',14), ('rmin',15), ('dphidt',16), &
-                              ('pressure',17), ('mv',18), ('mg',19), ('betaT',20), ('betaC',21)]
-                ${VAR}$(:) = MPI_IO_DATA_lg_bubbles(:, ${IDX}$) 
+                ('vx',8), ('vy',9), ('vz',10), ('radius',11), ('rvel',12), &
+                ('rnot',13), ('rmax',14), ('rmin',15), ('dphidt',16), &
+                ('pressure',17), ('mv',18), ('mg',19), ('betaT',20), ('betaC',21)]
+                ${VAR}$ (:) = MPI_IO_DATA_lg_bubbles(:, ${IDX}$)
             #:endfor
 
             ! Next, the root processor proceeds to record all of the spatial
diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp
index 9c094c8998..3b12755aa7 100644
--- a/src/simulation/m_body_forces.fpp
+++ b/src/simulation/m_body_forces.fpp
@@ -59,7 +59,7 @@ contains
 
         #:for DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
             if (bf_${XYZ}$) then
-                accel_bf(${DIR}$) = g_${XYZ}$ + k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$)
+                accel_bf(${DIR}$) = g_${XYZ}$+k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$)
             end if
         #:endfor
 
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index cebffbeb96..7cda250df3 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -1402,7 +1402,7 @@ contains
 
     end subroutine s_gradient_dir
 
-     !> Subroutine that writes on each time step the changes of the lagrangian bubbles.
+    !> Subroutine that writes on each time step the changes of the lagrangian bubbles.
         !!  @param q_time Current time
     impure subroutine s_write_lag_particles(qtime)
 
diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp
index 90e750babe..0bc804b183 100644
--- a/src/simulation/m_data_output.fpp
+++ b/src/simulation/m_data_output.fpp
@@ -36,7 +36,7 @@ module m_data_output
 
     implicit none
 
-    private;
+    private; 
     public :: s_initialize_data_output_module, &
               s_open_run_time_information_file, &
               s_open_com_files, &
@@ -161,11 +161,11 @@ contains
             trim('Time-step'), trim('dt'), trim('Time'), trim('ICFL Max')
 
         if (viscous) then
-            write(3, '(13X,A10,13X,A16)', advance="no") &
+            write (3, '(13X,A10,13X,A16)', advance="no") &
                 trim('VCFL Max'), trim('Rc Min')
         end if
 
-        write(3, *) ! new line
+        write (3, *) ! new line
 
     end subroutine s_open_run_time_information_file
 
@@ -359,7 +359,7 @@ contains
                     Rc_min_glb
             end if
 
-            write(3, *) ! new line
+            write (3, *) ! new line
 
             if (.not. f_approx_equal(icfl_max_glb, icfl_max_glb)) then
                 call s_mpi_abort('ICFL is NaN. Exiting.')

From bf2138ba0807ee5dddc723d3ac8bea83e031fcfb Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Thu, 9 Oct 2025 10:07:05 -0400
Subject: [PATCH 57/62] typos from github runner

---
 .typos.toml                              | 1 +
 docs/documentation/gpuParallelization.md | 2 +-
 examples/1D_shuosher_analytical/case.py  | 2 +-
 examples/1D_shuosher_old/case.py         | 2 +-
 examples/1D_shuosher_teno5/case.py       | 2 +-
 examples/1D_shuosher_teno7/case.py       | 2 +-
 examples/1D_shuosher_wenojs5/case.py     | 2 +-
 examples/1D_shuosher_wenom5/case.py      | 2 +-
 examples/1D_shuosher_wenoz5/case.py      | 2 +-
 src/common/m_boundary_common.fpp         | 2 +-
 src/common/m_phase_change.fpp            | 4 ++--
 src/simulation/m_body_forces.fpp         | 2 +-
 12 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/.typos.toml b/.typos.toml
index 492855221f..5a9775699a 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -19,6 +19,7 @@ strang = "strang"
 Strang = "Strang"
 TKE = "TKE"
 HSA = "HSA"
+INFP = "INFP"
 
 [files]
 extend-exclude = ["docs/documentation/references*", "tests/", "toolchain/cce_simulation_workgroup_256.sh"]
diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md
index c40d3c57d9..8c64996599 100644
--- a/docs/documentation/gpuParallelization.md
+++ b/docs/documentation/gpuParallelization.md
@@ -31,7 +31,7 @@ Note: Ordering is not guaranteed or stable, so use key-value pairing when using
 
 - Data on the GPU has a reference counter
 - When data is referred to being allocated, it means that GPU memory is allocated if it is not already present in GPU memory. If a variable is already present, the reference counter is just incremented.
-- When data is referred to being dellocated, it means that the reference counter is decremented. If the reference counter is zero, then the data is actually deallocated from GPU memory
+- When data is referred to being deallocated, it means that the reference counter is decremented. If the reference counter is zero, then the data is actually deallocated from GPU memory
 - When data is referred to being attached, it means that the device pointer attaches to target if it not already attached. If pointer is already attached, then the attachment counter is just incremented
 - When data is referred to being detached, it means that the attachment counter is decremented. If attachment counter is zero, then actually detached
 
diff --git a/examples/1D_shuosher_analytical/case.py b/examples/1D_shuosher_analytical/case.py
index 30e50acff5..8126714ff4 100644
--- a/examples/1D_shuosher_analytical/case.py
+++ b/examples/1D_shuosher_analytical/case.py
@@ -57,7 +57,7 @@
             "patch_icpp(1)%pres": 10.3333,
             "patch_icpp(1)%alpha_rho(1)": 3.957143,
             "patch_icpp(1)%alpha(1)": 1.0,
-            # One anlytic patch to take care of -4 < x < 5
+            # One analytic patch to take care of -4 < x < 5
             # Patch 2 Analytic
             "patch_icpp(2)%geometry": 1,
             "patch_icpp(2)%x_centroid": 0.5,
diff --git a/examples/1D_shuosher_old/case.py b/examples/1D_shuosher_old/case.py
index 24dd240ddf..7ad71e733a 100644
--- a/examples/1D_shuosher_old/case.py
+++ b/examples/1D_shuosher_old/case.py
@@ -57,7 +57,7 @@
             "patch_icpp(1)%pres": 10.3333,
             "patch_icpp(1)%alpha_rho(1)": 3.957143,
             "patch_icpp(1)%alpha(1)": 1.0,
-            # One anlytic patch to take care of -4 < x < 5
+            # One analytic patch to take care of -4 < x < 5
             # Patch 2 Analytic
             "patch_icpp(2)%geometry": 1,
             "patch_icpp(2)%x_centroid": 0.5,
diff --git a/examples/1D_shuosher_teno5/case.py b/examples/1D_shuosher_teno5/case.py
index 73189b3cec..2477ba6a87 100644
--- a/examples/1D_shuosher_teno5/case.py
+++ b/examples/1D_shuosher_teno5/case.py
@@ -59,7 +59,7 @@
             "patch_icpp(1)%pres": 10.333,
             "patch_icpp(1)%alpha_rho(1)": 3.857,
             "patch_icpp(1)%alpha(1)": 1.0,
-            # One anlytic patch to take care of 1 < x < 10
+            # One analytic patch to take care of 1 < x < 10
             # Patch 2 Analytic
             "patch_icpp(2)%geometry": 1,
             "patch_icpp(2)%x_centroid": 5.5,
diff --git a/examples/1D_shuosher_teno7/case.py b/examples/1D_shuosher_teno7/case.py
index 8a21aa1309..9bac5d82a0 100644
--- a/examples/1D_shuosher_teno7/case.py
+++ b/examples/1D_shuosher_teno7/case.py
@@ -59,7 +59,7 @@
             "patch_icpp(1)%pres": 10.333,
             "patch_icpp(1)%alpha_rho(1)": 3.857,
             "patch_icpp(1)%alpha(1)": 1.0,
-            # One anlytic patch to take care of 1 < x < 10
+            # One analytic patch to take care of 1 < x < 10
             # Patch 2 Analytic
             "patch_icpp(2)%geometry": 1,
             "patch_icpp(2)%x_centroid": 5.5,
diff --git a/examples/1D_shuosher_wenojs5/case.py b/examples/1D_shuosher_wenojs5/case.py
index b99e5ef7e2..52763938fd 100644
--- a/examples/1D_shuosher_wenojs5/case.py
+++ b/examples/1D_shuosher_wenojs5/case.py
@@ -58,7 +58,7 @@
             "patch_icpp(1)%pres": 10.333,
             "patch_icpp(1)%alpha_rho(1)": 3.857,
             "patch_icpp(1)%alpha(1)": 1.0,
-            # One anlytic patch to take care of 1 < x < 10
+            # One analytic patch to take care of 1 < x < 10
             # Patch 2 Analytic
             "patch_icpp(2)%geometry": 1,
             "patch_icpp(2)%x_centroid": 5.5,
diff --git a/examples/1D_shuosher_wenom5/case.py b/examples/1D_shuosher_wenom5/case.py
index 8c96fea5b6..c3dde3a589 100644
--- a/examples/1D_shuosher_wenom5/case.py
+++ b/examples/1D_shuosher_wenom5/case.py
@@ -58,7 +58,7 @@
             "patch_icpp(1)%pres": 10.333,
             "patch_icpp(1)%alpha_rho(1)": 3.857,
             "patch_icpp(1)%alpha(1)": 1.0,
-            # One anlytic patch to take care of 1 < x < 10
+            # One analytic patch to take care of 1 < x < 10
             # Patch 2 Analytic
             "patch_icpp(2)%geometry": 1,
             "patch_icpp(2)%x_centroid": 5.5,
diff --git a/examples/1D_shuosher_wenoz5/case.py b/examples/1D_shuosher_wenoz5/case.py
index a7e8e3e3cd..f959f363ae 100644
--- a/examples/1D_shuosher_wenoz5/case.py
+++ b/examples/1D_shuosher_wenoz5/case.py
@@ -58,7 +58,7 @@
             "patch_icpp(1)%pres": 10.333,
             "patch_icpp(1)%alpha_rho(1)": 3.857,
             "patch_icpp(1)%alpha(1)": 1.0,
-            # One anlytic patch to take care of 1 < x < 10
+            # One analytic patch to take care of 1 < x < 10
             # Patch 2 Analytic
             "patch_icpp(2)%geometry": 1,
             "patch_icpp(2)%x_centroid": 5.5,
diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index 4958bad881..f89278a86d 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -3,7 +3,7 @@
 !! @brief Contains module m_boundary_conditions_common
 
 !> @brief The purpose of the module is to apply noncharacteristic and processor
-!! boundary condiitons
+!! boundary conditions
 
 #:include 'macros.fpp'
 
diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp
index e04242a787..27cadf555a 100644
--- a/src/common/m_phase_change.fpp
+++ b/src/common/m_phase_change.fpp
@@ -165,7 +165,7 @@ contains
                         ! depleting the mass of liquid
                         q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM
 
-                        ! tranferring the total mass to vapor
+                        ! transferring the total mass to vapor
                         q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM
 
                         ! calling pT-equilibrium for overheated vapor, which is MFL = 0
@@ -175,7 +175,7 @@ contains
                         call s_TSat(pSOV, TSatOV, TSOV)
 
                         ! subcooled liquid case
-                        ! tranferring the total mass to liquid
+                        ! transferring the total mass to liquid
                         q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM
 
                         ! depleting the mass of vapor
diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp
index 3b12755aa7..3b88efe060 100644
--- a/src/simulation/m_body_forces.fpp
+++ b/src/simulation/m_body_forces.fpp
@@ -26,7 +26,7 @@ module m_body_forces
 
 contains
 
-    !> This subroutine inializes the module global array of mixture
+    !> This subroutine initializes the module global array of mixture
     !! densities in each grid cell
     impure subroutine s_initialize_body_forces_module
 

From 1debcdc8df944ac0f3abf4306d1fb976eb9c5092 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Thu, 9 Oct 2025 13:23:31 -0400
Subject: [PATCH 58/62] formatting and bug fixes

---
 src/post_process/m_data_output.fpp | 17 ++++++++---------
 src/post_process/m_start_up.f90    |  2 +-
 src/simulation/m_body_forces.fpp   |  2 +-
 src/simulation/m_bubbles_EL.fpp    |  4 +++-
 src/simulation/m_data_output.fpp   |  8 ++++----
 src/simulation/m_start_up.fpp      |  6 ++++--
 6 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp
index d9b9fec3eb..db9d4f3e0d 100644
--- a/src/post_process/m_data_output.fpp
+++ b/src/post_process/m_data_output.fpp
@@ -1257,8 +1257,7 @@ contains
 
     end subroutine s_write_lag_bubbles_results_to_text
 
-
- impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step)
+    impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step)
 
         integer, intent(in) :: t_step
 
@@ -1360,9 +1359,9 @@ contains
         if (nBub > 0) then
 
             #:for VAR in ['bub_id', 'px', 'py', 'pz', 'ppx', 'ppy', 'ppz', 'vx', 'vy', 'vz', &
-                          'radius', 'rvel', 'rnot', 'rmax', 'rmin', 'dphidt', &
-                          'pressure', 'mv', 'mg', 'betaT', 'betaC']
-                allocate(${VAR}$(nBub))
+                'radius', 'rvel', 'rnot', 'rmax', 'rmin', 'dphidt', &
+                'pressure', 'mv', 'mg', 'betaT', 'betaC']
+                allocate (${VAR}$ (nBub))
             #:endfor
             allocate (MPI_IO_DATA_lg_bubbles(nBub, 1:lag_io_vars))
 
@@ -1387,10 +1386,10 @@ contains
             ! Extract data from MPI_IO_DATA_lg_bubbles array
             ! Adjust these indices based on your actual data layout
             #:for VAR, IDX in [('bub_id', 1), ('px', 2), ('py',3), ('pz',4), ('ppx',5), ('ppy',6), ('ppz',7), &
-                              ('vx',8), ('vy',9), ('vz',10), ('radius',11), ('rvel',12), &
-                              ('rnot',13), ('rmax',14), ('rmin',15), ('dphidt',16), &
-                              ('pressure',17), ('mv',18), ('mg',19), ('betaT',20), ('betaC',21)]
-                ${VAR}$(:) = MPI_IO_DATA_lg_bubbles(:, ${IDX}$) 
+                ('vx',8), ('vy',9), ('vz',10), ('radius',11), ('rvel',12), &
+                ('rnot',13), ('rmax',14), ('rmin',15), ('dphidt',16), &
+                ('pressure',17), ('mv',18), ('mg',19), ('betaT',20), ('betaC',21)]
+                ${VAR}$ (:) = MPI_IO_DATA_lg_bubbles(:, ${IDX}$)
             #:endfor
 
             ! Next, the root processor proceeds to record all of the spatial
diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index c9b681f8f9..7ef6de571d 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -184,7 +184,7 @@ impure subroutine s_perform_time_step(t_step)
                     int(ceiling(100._wp*(real(t_step - n_start)/(n_save)))), &
                     t_step, n_save, wall_time_avg, wall_time
             else
-                print '(" [", I3, "%]  Saving ", I8, " of ", I0, " @ t_step = ", I0, " Time Avg = ", ES16.6,  " Time/step = ", ES12.6, "")', &
+                print '(" [", I3, "%]  Saving ", I8, " of ", I0, " @ t_step = ", I8, " Time Avg = ", ES16.6,  " Time/step = ", ES12.6, "")', &
                     int(ceiling(100._wp*(real(t_step - t_step_start)/(t_step_stop - t_step_start + 1)))), &
                     (t_step - t_step_start)/t_step_save + 1, &
                     (t_step_stop - t_step_start)/t_step_save + 1, &
diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp
index 9c094c8998..3b12755aa7 100644
--- a/src/simulation/m_body_forces.fpp
+++ b/src/simulation/m_body_forces.fpp
@@ -59,7 +59,7 @@ contains
 
         #:for DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
             if (bf_${XYZ}$) then
-                accel_bf(${DIR}$) = g_${XYZ}$ + k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$)
+                accel_bf(${DIR}$) = g_${XYZ}$+k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$)
             end if
         #:endfor
 
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index cebffbeb96..fa01e93475 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -529,6 +529,8 @@ contains
                 gas_mg(i) = MPI_IO_DATA_lag_bubbles(i, 19)
                 gas_betaT(i) = MPI_IO_DATA_lag_bubbles(i, 20)
                 gas_betaC(i) = MPI_IO_DATA_lag_bubbles(i, 21)
+                cell = -buff_size
+                call s_locate_cell(mtn_pos(i, 1:3, 1), cell, mtn_s(i, 1:3, 1))
             end do
 
             deallocate (MPI_IO_DATA_lag_bubbles)
@@ -1402,7 +1404,7 @@ contains
 
     end subroutine s_gradient_dir
 
-     !> Subroutine that writes on each time step the changes of the lagrangian bubbles.
+    !> Subroutine that writes on each time step the changes of the lagrangian bubbles.
         !!  @param q_time Current time
     impure subroutine s_write_lag_particles(qtime)
 
diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp
index 90e750babe..0bc804b183 100644
--- a/src/simulation/m_data_output.fpp
+++ b/src/simulation/m_data_output.fpp
@@ -36,7 +36,7 @@ module m_data_output
 
     implicit none
 
-    private;
+    private; 
     public :: s_initialize_data_output_module, &
               s_open_run_time_information_file, &
               s_open_com_files, &
@@ -161,11 +161,11 @@ contains
             trim('Time-step'), trim('dt'), trim('Time'), trim('ICFL Max')
 
         if (viscous) then
-            write(3, '(13X,A10,13X,A16)', advance="no") &
+            write (3, '(13X,A10,13X,A16)', advance="no") &
                 trim('VCFL Max'), trim('Rc Min')
         end if
 
-        write(3, *) ! new line
+        write (3, *) ! new line
 
     end subroutine s_open_run_time_information_file
 
@@ -359,7 +359,7 @@ contains
                     Rc_min_glb
             end if
 
-            write(3, *) ! new line
+            write (3, *) ! new line
 
             if (.not. f_approx_equal(icfl_max_glb, icfl_max_glb)) then
                 call s_mpi_abort('ICFL is NaN. Exiting.')
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index c69364aa2f..21be331e8e 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1261,7 +1261,9 @@ contains
         end if
 
         if (bubbles_lagrange) then
-            $:GPU_UPDATE(host='[intfc_rad]')
+            $:GPU_UPDATE(host='[lag_id, mtn_pos, mtn_posPrev, mtn_vel, intfc_rad, &
+                & intfc_vel, bub_R0, Rmax_stats, Rmin_stats, bub_dphidt, gas_p, &
+                & gas_mv, gas_mg, gas_betaT, gas_betaC]')
             do i = 1, nBubs
                 if (ieee_is_nan(intfc_rad(i, 1)) .or. intfc_rad(i, 1) <= 0._wp) then
                     call s_mpi_abort("Bubble radius is negative or NaN, please reduce dt.")
@@ -1270,7 +1272,7 @@ contains
 
             $:GPU_UPDATE(host='[q_beta%vf(1)%sf]')
             call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, bc_type, q_beta%vf(1))
-            $:GPU_UPDATE(host='[Rmax_stats,Rmin_stats,gas_p,gas_mv,intfc_vel]')
+
             call s_write_restart_lag_bubbles(save_count) !parallel
             if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats()
         else

From eea226fb3990de338bc36d59a419b320ae5aac8f Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Thu, 9 Oct 2025 13:40:12 -0400
Subject: [PATCH 59/62] autoreview suggestions

---
 src/post_process/m_data_output.fpp | 2 +-
 src/simulation/m_bubbles_EL.fpp    | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp
index db9d4f3e0d..5064490d34 100644
--- a/src/post_process/m_data_output.fpp
+++ b/src/post_process/m_data_output.fpp
@@ -1230,7 +1230,7 @@ contains
                     if (lag_id_wrt) write (29, '(I6, A)', advance='no') id, ', '
                     if (lag_pos_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(1), ', ', inputvals(2), ', ', inputvals(3), ', '
                     if (lag_pos_prev_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(4), ', ', inputvals(5), ', ', inputvals(6), ', '
-                    if (lag_vel_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(8), ', '
+                    if (lag_vel_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(9), ', '
                     if (lag_rad_wrt) write (29, '(E15.7, A)', advance='no') inputvals(10), ', '
                     if (lag_rvel_wrt) write (29, '(E15.7, A)', advance='no') inputvals(11), ', '
                     if (lag_r0_wrt) write (29, '(E15.7, A)', advance='no') inputvals(12), ', '
diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp
index fa01e93475..7d0dd32bd8 100644
--- a/src/simulation/m_bubbles_EL.fpp
+++ b/src/simulation/m_bubbles_EL.fpp
@@ -339,10 +339,6 @@ contains
             end if
         end if
 
-        ! If particle is in the ghost cells, find the closest non-ghost cell
-        !cell(1) = min(max(cell(1), 0), m)
-        !cell(2) = min(max(cell(2), 0), n)
-        !if (p > 0) cell(3) = min(max(cell(3), 0), p)
         call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), &
                                             rhol, gamma, pi_inf, qv, Re)
         dynP = 0._wp

From 135401168b824db564bbf4ac29686c5272a0ffe9 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Fri, 10 Oct 2025 08:17:32 -0400
Subject: [PATCH 60/62] support 1D output in paraview with 1D meshes

---
 src/common/m_mpi_common.fpp              |   4 +-
 src/post_process/m_data_output.fpp       | 339 ++++++++++-------------
 src/post_process/m_global_parameters.fpp |  22 +-
 src/post_process/m_mpi_proxy.fpp         |  15 +-
 4 files changed, 167 insertions(+), 213 deletions(-)

diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 07463137f2..bc63e1bdfc 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -1570,14 +1570,14 @@ contains
 
 #ifdef MFC_POST_PROCESS
         ! Ghost zone at the beginning
-        if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then
+        if (proc_coords(1) > 0 .and. format == 1) then
             offset_x%beg = 2
         else
             offset_x%beg = 0
         end if
 
         ! Ghost zone at the end
-        if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then
+        if (proc_coords(1) < num_procs_x - 1 .and. format == 1) then
             offset_x%end = 2
         else
             offset_x%end = 0
diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp
index 5064490d34..371ad8e862 100644
--- a/src/post_process/m_data_output.fpp
+++ b/src/post_process/m_data_output.fpp
@@ -156,7 +156,7 @@ contains
         ! the offsets and the one bookkeeping the number of cell-boundaries
         ! in each active coordinate direction. Note that all these variables
         ! are only needed by the Silo-HDF5 format for multidimensional data.
-        if (format == 1 .and. n > 0) then
+        if (format == 1) then
 
             allocate (data_extents(1:2, 0:num_procs - 1))
 
@@ -165,11 +165,16 @@ contains
                 allocate (lo_offset(1:3))
                 allocate (hi_offset(1:3))
                 allocate (dims(1:3))
-            else
+            elseif (n > 0) then
                 allocate (spatial_extents(1:4, 0:num_procs - 1))
                 allocate (lo_offset(1:2))
                 allocate (hi_offset(1:2))
                 allocate (dims(1:2))
+            else
+                allocate (spatial_extents(1:2, 0:num_procs - 1))
+                allocate (lo_offset(1:1))
+                allocate (hi_offset(1:1))
+                allocate (dims(1:1))
             end if
 
         end if
@@ -181,7 +186,7 @@ contains
         ! With the same, latter, requirements, the variables bookkeeping the
         ! number of cell-boundaries in each active coordinate direction are
         ! also set here.
-        if (format == 1 .and. n > 0) then
+        if (format == 1) then
             if (p > 0) then
                 if (grid_geometry == 3) then
                     lo_offset(:) = (/offset_y%beg, offset_z%beg, offset_x%beg/)
@@ -200,12 +205,16 @@ contains
                                 n + offset_y%beg + offset_y%end + 2, &
                                 p + offset_z%beg + offset_z%end + 2/)
                 end if
-            else
+            elseif (n > 0) then
                 lo_offset(:) = (/offset_x%beg, offset_y%beg/)
                 hi_offset(:) = (/offset_x%end, offset_y%end/)
 
                 dims(:) = (/m + offset_x%beg + offset_x%end + 2, &
                             n + offset_y%beg + offset_y%end + 2/)
+            else
+                lo_offset(:) = (/offset_x%beg/)
+                hi_offset(:) = (/offset_x%end/)
+                dims(:) = (/m + offset_x%beg + offset_x%end + 2/)
             end if
         end if
 
@@ -659,7 +668,7 @@ contains
 
         ! Silo-HDF5 Database Format
 
-        if (format == 1 .and. n > 0) then
+        if (format == 1) then
 
             ! For multidimensional data sets, the spatial extents of all of
             ! the grid(s) handled by the local processor(s) are recorded so
@@ -679,10 +688,13 @@ contains
                                               maxval(y_cb), maxval(z_cb)/)
                 end if
 
-            else
+            elseif (n > 0) then
                 spatial_extents(:, 0) = (/minval(x_cb), minval(y_cb), &
                                           maxval(x_cb), maxval(y_cb)/)
 
+            else
+
+                spatial_extents(:, 0) = (/minval(x_cb), maxval(x_cb)/)
             end if
 
             ! Next, the root processor proceeds to record all of the spatial
@@ -715,48 +727,45 @@ contains
             ! with its offsets that indicate the presence and size of ghost
             ! zone layer(s), are put in the formatted database slave file.
 
-            if (precision == 1) then
-                if (p > 0) then
-                    z_cb_s(:) = real(z_cb(:), sp)
+            if (p > 0) then
+                err = DBMKOPTLIST(2, optlist)
+                err = DBADDIOPT(optlist, DBOPT_LO_OFFSET, lo_offset)
+                err = DBADDIOPT(optlist, DBOPT_HI_OFFSET, hi_offset)
+                if (grid_geometry == 3) then
+                    err = DBPUTQM(dbfile, 'rectilinear_grid', 16, &
+                                  'x', 1, 'y', 1, 'z', 1, &
+                                  y_cb, z_cb, x_cb, dims, 3, &
+                                  DB_DOUBLE, DB_COLLINEAR, &
+                                  optlist, ierr)
+                else
+                    err = DBPUTQM(dbfile, 'rectilinear_grid', 16, &
+                                  'x', 1, 'y', 1, 'z', 1, &
+                                  x_cb, y_cb, z_cb, dims, 3, &
+                                  DB_DOUBLE, DB_COLLINEAR, &
+                                  optlist, ierr)
                 end if
-                x_cb_s(:) = real(x_cb(:), sp)
-                y_cb_s(:) = real(y_cb(:), sp)
+                err = DBFREEOPTLIST(optlist)
+            elseif (n > 0) then
+                err = DBMKOPTLIST(2, optlist)
+                err = DBADDIOPT(optlist, DBOPT_LO_OFFSET, lo_offset)
+                err = DBADDIOPT(optlist, DBOPT_HI_OFFSET, hi_offset)
+                err = DBPUTQM(dbfile, 'rectilinear_grid', 16, &
+                              'x', 1, 'y', 1, 'z', 1, &
+                              x_cb, y_cb, DB_F77NULL, dims, 2, &
+                              DB_DOUBLE, DB_COLLINEAR, &
+                              optlist, ierr)
+                err = DBFREEOPTLIST(optlist)
+            else
+                err = DBMKOPTLIST(2, optlist)
+                err = DBADDIOPT(optlist, DBOPT_LO_OFFSET, lo_offset)
+                err = DBADDIOPT(optlist, DBOPT_HI_OFFSET, hi_offset)
+                err = DBPUTQM(dbfile, 'rectilinear_grid', 16, &
+                              'x', 1, 'y', 1, 'z', 1, &
+                              x_cb, DB_F77NULL, DB_F77NULL, dims, 1, &
+                              DB_DOUBLE, DB_COLLINEAR, &
+                              optlist, ierr)
+                err = DBFREEOPTLIST(optlist)
             end if
-
-            #:for PRECISION, SFX, DBT in [(1,'_s','DB_FLOAT'),(2,'',"DB_DOUBLE")]
-                if (precision == ${PRECISION}$) then
-                    if (p > 0) then
-                        err = DBMKOPTLIST(2, optlist)
-                        err = DBADDIOPT(optlist, DBOPT_LO_OFFSET, lo_offset)
-                        err = DBADDIOPT(optlist, DBOPT_HI_OFFSET, hi_offset)
-                        if (grid_geometry == 3) then
-                            err = DBPUTQM(dbfile, 'rectilinear_grid', 16, &
-                                          'x', 1, 'y', 1, 'z', 1, &
-                                          y_cb${SFX}$, z_cb${SFX}$, x_cb${SFX}$, dims, 3, &
-                                          ${DBT}$, DB_COLLINEAR, &
-                                          optlist, ierr)
-                        else
-                            err = DBPUTQM(dbfile, 'rectilinear_grid', 16, &
-                                          'x', 1, 'y', 1, 'z', 1, &
-                                          x_cb${SFX}$, y_cb${SFX}$, z_cb${SFX}$, dims, 3, &
-                                          ${DBT}$, DB_COLLINEAR, &
-                                          optlist, ierr)
-                        end if
-                        err = DBFREEOPTLIST(optlist)
-                    else
-                        err = DBMKOPTLIST(2, optlist)
-                        err = DBADDIOPT(optlist, DBOPT_LO_OFFSET, lo_offset)
-                        err = DBADDIOPT(optlist, DBOPT_HI_OFFSET, hi_offset)
-                        err = DBPUTQM(dbfile, 'rectilinear_grid', 16, &
-                                      'x', 1, 'y', 1, 'z', 1, &
-                                      x_cb${SFX}$, y_cb${SFX}$, DB_F77NULL, dims, 2, &
-                                      ${DBT}$, DB_COLLINEAR, &
-                                      optlist, ierr)
-                        err = DBFREEOPTLIST(optlist)
-                    end if
-                end if
-            #:endfor
-
             ! END: Silo-HDF5 Database Format
 
             ! Binary Database Format
@@ -873,144 +882,46 @@ contains
 
         if (format == 1) then
 
-            ! In 1D, a curve object, featuring the local processor grid and
-            ! flow variable data, is written to the formatted database slave
-            ! file. The root process, on the other hand, will also take care
-            ! of gathering the entire grid and associated flow variable data
-            ! and write it to the formatted database master file.
-            if (n == 0) then
-
-                if (precision == 1 .and. wp == dp) then
-                    x_cc_s(:) = real(x_cc(:), sp)
-                    q_sf_s(:, :, :) = real(q_sf(:, :, :), sp)
-                elseif (precision == 1 .and. wp == sp) then
-                    x_cc_s(:) = x_cc(:)
-                    q_sf_s(:, :, :) = q_sf(:, :, :)
-                end if
-
-                ! Writing the curve object associated with the local process
-                ! to the formatted database slave file
-                #:for PRECISION, SFX, DBT in [(1,'_s','DB_FLOAT'),(2,'',"DB_DOUBLE")]
-                    if (precision == ${PRECISION}$) then
-                        err = DBPUTCURVE(dbfile, trim(varname), len_trim(varname), &
-                                         x_cc${SFX}$ (0:m), q_sf${SFX}$, ${DBT}$, m + 1, &
-                                         DB_F77NULL, ierr)
-                    end if
-                #:endfor
-
-                ! Assembling the local grid and flow variable data for the
-                ! entire computational domain on to the root process
-
-                if (num_procs > 1) then
-                    call s_mpi_defragment_1d_grid_variable()
-                    call s_mpi_defragment_1d_flow_variable(q_sf, q_root_sf)
-
-                    if (precision == 1) then
-                        x_root_cc_s(:) = real(x_root_cc(:), sp)
-                        q_root_sf_s(:, :, :) = real(q_root_sf(:, :, :), sp)
-                    end if
-                else
-                    if (precision == 1) then
-                        x_root_cc_s(:) = real(x_cc(:), sp)
-                        q_root_sf_s(:, :, :) = real(q_sf(:, :, :), sp)
-                    else
-                        x_root_cc(:) = x_cc(:)
-                        q_root_sf(:, :, :) = q_sf(:, :, :)
-                    end if
-                end if
-
-                ! Writing the curve object associated with the root process
-                ! to the formatted database master file
-                if (proc_rank == 0) then
-                    #:for PRECISION, SFX, DBT in [(1,'_s','DB_FLOAT'),(2,'',"DB_DOUBLE")]
-                        if (precision == ${PRECISION}$) then
-                            err = DBPUTCURVE(dbroot, trim(varname), &
-                                             len_trim(varname), &
-                                             x_root_cc${SFX}$, q_root_sf${SFX}$, &
-                                             ${DBT}$, m_root + 1, &
-                                             DB_F77NULL, ierr)
-                        end if
-                    #:endfor
-                end if
-
-                return
-
-                ! In multidimensions, the local process(es) take care of writing
-                ! the flow variable data they are in charge of to the formatted
-                ! database slave file. The root processor, additionally, is also
-                ! responsible in gathering the flow variable extents of each of
-                ! the local processor(s) and writing them to formatted database
-                ! master file.
+            ! Determining the extents of the flow variable on each local
+            ! process and gathering all this information on root process
+            if (num_procs > 1) then
+                call s_mpi_gather_data_extents(q_sf, data_extents)
             else
+                data_extents(:, 0) = (/minval(q_sf), maxval(q_sf)/)
+            end if
 
-                ! Determining the extents of the flow variable on each local
-                ! process and gathering all this information on root process
-                if (num_procs > 1) then
-                    call s_mpi_gather_data_extents(q_sf, data_extents)
-                else
-                    data_extents(:, 0) = (/minval(q_sf), maxval(q_sf)/)
-                end if
-
-                ! Next, the root process proceeds to write the gathered flow
-                ! variable data extents to formatted database master file.
-                if (proc_rank == 0) then
+            ! Next, the root process proceeds to write the gathered flow
+            ! variable data extents to formatted database master file.
+            if (proc_rank == 0) then
 
-                    do i = 1, num_procs
-                        write (varnames(i), '(A,I0,A,I0,A)') '../p', i - 1, &
-                            '/', t_step, '.silo:'//trim(varname)
-                    end do
+                do i = 1, num_procs
+                    write (varnames(i), '(A,I0,A,I0,A)') '../p', i - 1, &
+                        '/', t_step, '.silo:'//trim(varname)
+                end do
 
-                    vartypes = DB_QUADVAR
+                vartypes = DB_QUADVAR
 
-                    err = DBSET2DSTRLEN(len(varnames(1)))
-                    err = DBMKOPTLIST(2, optlist)
-                    err = DBADDIOPT(optlist, DBOPT_EXTENTS_SIZE, 2)
-                    err = DBADDDOPT(optlist, DBOPT_EXTENTS, data_extents)
-                    err = DBPUTMVAR(dbroot, trim(varname), &
-                                    len_trim(varname), num_procs, &
-                                    varnames, len_trim(varnames), &
-                                    vartypes, optlist, ierr)
-                    err = DBFREEOPTLIST(optlist)
+                err = DBSET2DSTRLEN(len(varnames(1)))
+                err = DBMKOPTLIST(2, optlist)
+                err = DBADDIOPT(optlist, DBOPT_EXTENTS_SIZE, 2)
+                err = DBADDDOPT(optlist, DBOPT_EXTENTS, data_extents)
+                err = DBPUTMVAR(dbroot, trim(varname), &
+                                len_trim(varname), num_procs, &
+                                varnames, len_trim(varnames), &
+                                vartypes, optlist, ierr)
+                err = DBFREEOPTLIST(optlist)
 
-                end if
+            end if
 
-                ! Finally, each of the local processor(s) proceeds to write
-                ! the flow variable data that it is responsible for to the
-                ! formatted database slave file.
-                if (wp == dp) then
-                    if (precision == 1) then
-                        do i = -offset_x%beg, m + offset_x%end
-                            do j = -offset_y%beg, n + offset_y%end
-                                do k = -offset_z%beg, p + offset_z%end
-                                    q_sf_s(i, j, k) = real(q_sf(i, j, k), sp)
-                                end do
-                            end do
-                        end do
-                        if (grid_geometry == 3) then
-                            do i = -offset_x%beg, m + offset_x%end
-                                do j = -offset_y%beg, n + offset_y%end
-                                    do k = -offset_z%beg, p + offset_z%end
-                                        cyl_q_sf_s(j, k, i) = q_sf_s(i, j, k)
-                                    end do
-                                end do
-                            end do
-                        end if
-                    else
-                        if (grid_geometry == 3) then
-                            do i = -offset_x%beg, m + offset_x%end
-                                do j = -offset_y%beg, n + offset_y%end
-                                    do k = -offset_z%beg, p + offset_z%end
-                                        cyl_q_sf(j, k, i) = q_sf(i, j, k)
-                                    end do
-                                end do
-                            end do
-                        end if
-                    end if
-                elseif (wp == sp) then
+            ! Finally, each of the local processor(s) proceeds to write
+            ! the flow variable data that it is responsible for to the
+            ! formatted database slave file.
+            if (wp == dp) then
+                if (precision == 1) then
                     do i = -offset_x%beg, m + offset_x%end
                         do j = -offset_y%beg, n + offset_y%end
                             do k = -offset_z%beg, p + offset_z%end
-                                q_sf_s(i, j, k) = q_sf(i, j, k)
+                                q_sf_s(i, j, k) = real(q_sf(i, j, k), sp)
                             end do
                         end do
                     end do
@@ -1023,38 +934,72 @@ contains
                             end do
                         end do
                     end if
+                else
+                    if (grid_geometry == 3) then
+                        do i = -offset_x%beg, m + offset_x%end
+                            do j = -offset_y%beg, n + offset_y%end
+                                do k = -offset_z%beg, p + offset_z%end
+                                    cyl_q_sf(j, k, i) = q_sf(i, j, k)
+                                end do
+                            end do
+                        end do
+                    end if
                 end if
+            elseif (wp == sp) then
+                do i = -offset_x%beg, m + offset_x%end
+                    do j = -offset_y%beg, n + offset_y%end
+                        do k = -offset_z%beg, p + offset_z%end
+                            q_sf_s(i, j, k) = q_sf(i, j, k)
+                        end do
+                    end do
+                end do
+                if (grid_geometry == 3) then
+                    do i = -offset_x%beg, m + offset_x%end
+                        do j = -offset_y%beg, n + offset_y%end
+                            do k = -offset_z%beg, p + offset_z%end
+                                cyl_q_sf_s(j, k, i) = q_sf_s(i, j, k)
+                            end do
+                        end do
+                    end do
+                end if
+            end if
 
-                #:for PRECISION, SFX, DBT in [(1,'_s','DB_FLOAT'),(2,'',"DB_DOUBLE")]
-                    if (precision == ${PRECISION}$) then
-                        if (p > 0) then
-                            if (grid_geometry == 3) then
-                                err = DBPUTQV1(dbfile, trim(varname), &
-                                               len_trim(varname), &
-                                               'rectilinear_grid', 16, &
-                                               cyl_q_sf${SFX}$, dims - 1, 3, DB_F77NULL, &
-                                               0, ${DBT}$, DB_ZONECENT, &
-                                               DB_F77NULL, ierr)
-                            else
-                                err = DBPUTQV1(dbfile, trim(varname), &
-                                               len_trim(varname), &
-                                               'rectilinear_grid', 16, &
-                                               q_sf${SFX}$, dims - 1, 3, DB_F77NULL, &
-                                               0, ${DBT}$, DB_ZONECENT, &
-                                               DB_F77NULL, ierr)
-                            end if
+            #:for PRECISION, SFX, DBT in [(1,'_s','DB_FLOAT'),(2,'',"DB_DOUBLE")]
+                if (precision == ${PRECISION}$) then
+                    if (p > 0) then
+                        if (grid_geometry == 3) then
+                            err = DBPUTQV1(dbfile, trim(varname), &
+                                           len_trim(varname), &
+                                           'rectilinear_grid', 16, &
+                                           cyl_q_sf${SFX}$, dims - 1, 3, DB_F77NULL, &
+                                           0, ${DBT}$, DB_ZONECENT, &
+                                           DB_F77NULL, ierr)
                         else
                             err = DBPUTQV1(dbfile, trim(varname), &
                                            len_trim(varname), &
                                            'rectilinear_grid', 16, &
-                                           q_sf${SFX}$, dims - 1, 2, DB_F77NULL, &
+                                           q_sf${SFX}$, dims - 1, 3, DB_F77NULL, &
                                            0, ${DBT}$, DB_ZONECENT, &
                                            DB_F77NULL, ierr)
                         end if
-                    end if
-                #:endfor
+                    elseif (n > 0) then
+                        err = DBPUTQV1(dbfile, trim(varname), &
+                                       len_trim(varname), &
+                                       'rectilinear_grid', 16, &
+                                       q_sf${SFX}$, dims - 1, 2, DB_F77NULL, &
+                                       0, ${DBT}$, DB_ZONECENT, &
+                                       DB_F77NULL, ierr)
+                    else
+                        err = DBPUTQV1(dbfile, trim(varname), &
+                                       len_trim(varname), &
+                                       'rectilinear_grid', 16, &
+                                       q_sf${SFX}$, dims - 1, 1, DB_F77NULL, &
+                                       0, ${DBT}$, DB_ZONECENT, &
+                                       DB_F77NULL, ierr)
 
-            end if
+                    end if
+                end if
+            #:endfor
 
             ! END: Silo-HDF5 Database Format
 
@@ -1871,7 +1816,7 @@ contains
         ! the offsets and the one bookkeeping the number of cell-boundaries
         ! in each active coordinate direction. Note that all these variables
         ! were only needed by Silo-HDF5 format for multidimensional data.
-        if (format == 1 .and. n > 0) then
+        if (format == 1) then
             deallocate (spatial_extents)
             deallocate (data_extents)
             deallocate (lo_offset)
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index 2400b13ad1..07046fc2bf 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -60,7 +60,6 @@ module m_global_parameters
     !> @name Cell-boundary locations in the x-, y- and z-coordinate directions
     !> @{
     real(wp), allocatable, dimension(:) :: x_cb, x_root_cb, y_cb, z_cb
-    real(wp), allocatable, dimension(:) :: x_cb_s, y_cb_s, z_cb_s
     !> @}
 
     !> @name Cell-center locations in the x-, y- and z-coordinate directions
@@ -850,7 +849,7 @@ contains
         ! in the Silo-HDF5 format. If this is the case, one must also verify
         ! whether the raw simulation data is 2D or 3D. In the 2D case, size
         ! of the z-coordinate direction ghost zone layer must be zeroed out.
-        if (num_procs == 1 .or. format /= 1 .or. n == 0) then
+        if (num_procs == 1 .or. format /= 1) then
 
             offset_x%beg = 0
             offset_x%end = 0
@@ -859,6 +858,13 @@ contains
             offset_z%beg = 0
             offset_z%end = 0
 
+        elseif (n == 0) then
+
+            offset_y%beg = 0
+            offset_y%end = 0
+            offset_z%beg = 0
+            offset_z%end = 0
+
         elseif (p == 0) then
 
             offset_z%beg = 0
@@ -893,17 +899,7 @@ contains
         idwbuff(3)%end = idwint(3)%end - idwbuff(3)%beg
 
         ! Allocating single precision grid variables if needed
-        if (precision == 1) then
-            allocate (x_cb_s(-1 - offset_x%beg:m + offset_x%end))
-            if (n > 0) then
-                allocate (y_cb_s(-1 - offset_y%beg:n + offset_y%end))
-                if (p > 0) then
-                    allocate (z_cb_s(-1 - offset_z%beg:p + offset_z%end))
-                end if
-            end if
-        else
-            allocate (x_cc_s(-buff_size:m + buff_size))
-        end if
+        allocate (x_cc_s(-buff_size:m + buff_size))
 
         ! Allocating the grid variables in the x-coordinate direction
         allocate (x_cb(-1 - offset_x%beg:m + offset_x%end))
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index 7adb31a433..795936250e 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -235,7 +235,7 @@ contains
                                  ierr)
             end if
             ! Simulation is 2D
-        else
+        elseif (n > 0) then
 
             ! Minimum spatial extent in the x-direction
             call MPI_GATHERV(minval(x_cb), 1, mpi_p, &
@@ -260,7 +260,20 @@ contains
                              spatial_extents(4, 0), recvcounts, 4*displs, &
                              mpi_p, 0, MPI_COMM_WORLD, &
                              ierr)
+            ! Simulation is 1D
+        else
 
+            ! Minimum spatial extent in the x-direction
+            call MPI_GATHERV(minval(x_cb), 1, mpi_p, &
+                             spatial_extents(1, 0), recvcounts, 4*displs, &
+                             mpi_p, 0, MPI_COMM_WORLD, &
+                             ierr)
+
+            ! Maximum spatial extent in the x-direction
+            call MPI_GATHERV(maxval(x_cb), 1, mpi_p, &
+                             spatial_extents(2, 0), recvcounts, 4*displs, &
+                             mpi_p, 0, MPI_COMM_WORLD, &
+                             ierr)
         end if
 
 #endif

From 8311260032601cddf68fe05f3ff0ec90913e0f97 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Date: Fri, 10 Oct 2025 08:20:51 -0400
Subject: [PATCH 61/62] CI fix for MacOS runners

---
 .github/workflows/test.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 3031858eb1..503f691ec6 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -48,6 +48,11 @@ jobs:
       - name: Clone
         uses: actions/checkout@v4
 
+      - name: Set up Python 3.13
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
       - name: Setup MacOS
         if:   matrix.os == 'macos'
         run:  |

From ca6e5929911d26e603ea06b8936a41866ee4d9d5 Mon Sep 17 00:00:00 2001
From: Ben Wilfong <bwilfong3@gatech.edu>
Date: Mon, 13 Oct 2025 15:35:46 -0400
Subject: [PATCH 62/62] correct merge errors

---
 src/common/m_mpi_common.fpp      | 17 +----------------
 src/post_process/m_mpi_proxy.fpp |  9 ---------
 2 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 6e403a306e..fd9310abc3 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -404,21 +404,17 @@ contains
     impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, &
                                                               vcfl_max_loc, &
                                                               Rc_min_loc, &
-                                                              ccfl_max_loc, &
                                                               icfl_max_glb, &
                                                               vcfl_max_glb, &
-                                                              Rc_min_glb, &
-                                                              ccfl_max_glb)
+                                                              Rc_min_glb)
 
         real(wp), intent(in) :: icfl_max_loc
         real(wp), intent(in) :: vcfl_max_loc
         real(wp), intent(in) :: Rc_min_loc
-        real(wp), intent(in) :: ccfl_max_loc
 
         real(wp), intent(out) :: icfl_max_glb
         real(wp), intent(out) :: vcfl_max_glb
         real(wp), intent(out) :: Rc_min_glb
-        real(wp), intent(out) :: ccfl_max_glb
 
 #ifdef MFC_SIMULATION
 #ifdef MFC_MPI
@@ -438,24 +434,13 @@ contains
                             mpi_p, MPI_MIN, 0, &
                             MPI_COMM_WORLD, ierr)
         end if
-
-        if (surface_tension) then
-            call MPI_REDUCE(ccfl_max_loc, ccfl_max_glb, 1, &
-                            mpi_p, MPI_MAX, 0, &
-                            MPI_COMM_WORLD, ierr)
-        end if
-
 #else
-
         icfl_max_glb = icfl_max_loc
 
         if (viscous) then
             vcfl_max_glb = vcfl_max_loc
             Rc_min_glb = Rc_min_loc
         end if
-
-        if (surface_tension) ccfl_max_glb = ccfl_max_loc
-
 #endif
 #endif
 
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index aa127af969..795936250e 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -104,17 +104,8 @@ contains
             & 'file_per_process', 'relax', 'cf_wrt', 'igr', 'liutex_wrt',      &
             & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt',          &
             & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange',        &
-<<<<<<< HEAD
-            & 'output_partial_domain', 'relativity', 'cont_damage',            &
-            & 'lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt',         &
-            & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', &
-            & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt',    &
-            & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt',    &
-            & 'lag_betaT_wrt', 'lag_betaC_wrt', 'bc_io', 'down_sample' ]
-=======
             & 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io',   &
             & 'down_sample' ]
->>>>>>> ImprovedELBubblesIO
             call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         #:endfor