From 24a9c8c9762787a5627cef3574f9b069946ce8ba Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Wed, 23 Apr 2025 18:05:42 -0400 Subject: [PATCH 01/62] add bubble advection --- src/simulation/m_bubbles_EL.fpp | 26 +++++++++++++-- src/simulation/m_sim_helpers.f90 | 55 +++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index b48f941a39..859bbbc3f0 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -258,6 +258,8 @@ contains call s_restart_bubbles(bub_id, save_count) end if + if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat') + print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id !$acc update device(bubbles_lagrange, lag_params) @@ -552,6 +554,8 @@ contains if (bubble_model == 2) then !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) copyin(stage) do k = 1, nBubs + cell = -buff_size + call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1)) ! Keller-Miksis model ! Current bubble state @@ -600,25 +604,41 @@ contains gas_dpdt(k, stage) = fpbdt intfc_draddt(k, stage) = fV + !$acc loop seq + do l = 1, num_dims + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,1:3,1), cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = 0._wp + end do end do else - if (proc_rank == 0) print *, 'WARNING: Lagrange bubbles work with Keller Miksis model!', & - ' Deactivating radial motion.' + !if (proc_rank == 0) print *, 'WARNING: Lagrange bubbles work with Keller Miksis model!', & + !' Deactivating radial motion.' !$acc parallel loop gang vector default(present) private(k) copyin(stage) do k = 1, nBubs + cell = -buff_size + call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1)) + intfc_dveldt(k, stage) = 0._wp intfc_draddt(k, stage) = 0._wp + + !$acc loop seq + do l = 1, num_dims + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,1:3,1),cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = 0._wp + end do end do end if ! Bubbles remain in a fixed position +#if 0 !$acc parallel loop collapse(2) gang vector default(present) private(k) copyin(stage) do k = 1, nBubs do l = 1, 3 - mtn_dposdt(k, l, stage) = 0._wp + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_ps(k,:,1), l, q_prim_vf) mtn_dveldt(k, l, stage) = 0._wp end do end do +#endif call nvtxEndRange diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.f90 index 88d37f9674..dc07ea811d 100644 --- a/src/simulation/m_sim_helpers.f90 +++ b/src/simulation/m_sim_helpers.f90 @@ -10,7 +10,8 @@ module m_sim_helpers private; public :: s_compute_enthalpy, & s_compute_stability_from_dt, & - s_compute_dt_from_cfl + s_compute_dt_from_cfl, & + f_interpolate_velocity contains @@ -267,4 +268,56 @@ subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l) end subroutine s_compute_dt_from_cfl + function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v) +!$acc routine seq + real(wp), dimension(3) :: pos + integer, dimension(3) :: cell + integer :: i + type(scalar_field), dimension(sys_size) :: q_prim_vf + real(wp) :: v + real(wp) :: L1, L2, L3 + real(wp) :: x, x1, x2, x3 + real(wp) :: y1, y2, y3 + + if (i == 1) then + x1 = x_cc(cell(1) - 1) + y1 = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) + x2 = x_cc(cell(1)) + y2 = q_prim_vf(momxb)%sf(cell(1) , cell(2), cell(3)) + x3 = x_cc(cell(1) + 1) + y3 = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) + elseif (i == 2) then + x1 = y_cc(cell(2) - 1) + y1 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) + x2 = y_cc(cell(2)) + y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) , cell(3)) + x3 = y_cc(cell(2) + 1) + y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) + else + x1 = z_cc(cell(3) - 1) + y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) + x2 = z_cc(cell(3)) + y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) ) + x3 = z_cc(cell(3) + 1) + y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) + endif + + x = pos(i) + + L1 = ((x - x2)*(x - x3)) / ((x1 - x2)*(x1 - x3)) + L2 = ((x - x1)*(x - x3)) / ((x2 - x1)*(x2 - x3)) + L3 = ((x - x1)*(x - x2)) / ((x3 - x1)*(x3 - x2)) + + v = L1*y1 + L2*y2 + L3*y3 + + !if (i == 1) then + !V = q_prim_vf(momxb)%sf(cell(1),cell(2),cell(3)) + !elseif (i == 2) then + !V = q_prim_vf(momxb+1)%sf(cell(1),cell(2),cell(3)) + !else + !V = q_prim_vf(momxe)%sf(cell(1),cell(2),cell(3)) + !end if + + end function f_interpolate_velocity + end module m_sim_helpers From 614c6c5257f77f8fc876c273e9799dbb2112120b Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Thu, 24 Apr 2025 13:12:03 -0400 Subject: [PATCH 02/62] small updates --- src/simulation/m_bubbles_EL.fpp | 2 +- src/simulation/m_time_steppers.fpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 859bbbc3f0..96ac686ab0 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -613,7 +613,7 @@ contains else !if (proc_rank == 0) print *, 'WARNING: Lagrange bubbles work with Keller Miksis model!', & !' Deactivating radial motion.' - !$acc parallel loop gang vector default(present) private(k) copyin(stage) + !$acc parallel loop gang vector default(present) private(k, cell) copyin(stage) do k = 1, nBubs cell = -buff_size call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1)) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 045dfda597..4403f3a3ad 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -1181,6 +1181,8 @@ contains end do + if (bodyForces) call s_apply_bodyforces(q_cons_ts(2)%vf, q_prim_vf, rhs_ts_rkck(6)%vf, dt) + !> Update values mytime = mytime + dt_did call s_update_rkck(q_cons_ts) From 5332d2ce2ea5b7d1aa53603301bfe4c98c2a7ac7 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Thu, 24 Apr 2025 22:49:11 -0400 Subject: [PATCH 03/62] working on GPUs --- src/simulation/m_bubbles_EL.fpp | 8 ++++---- src/simulation/m_sim_helpers.f90 | 16 ++++++++-------- src/simulation/m_start_up.fpp | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 96ac686ab0..fd4a96abf2 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -605,8 +605,8 @@ contains intfc_draddt(k, stage) = fV !$acc loop seq - do l = 1, num_dims - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,1:3,1), cell, l, q_prim_vf) + do l = 1, 3 + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), cell, l, q_prim_vf) mtn_dveldt(k, l, stage) = 0._wp end do end do @@ -622,8 +622,8 @@ contains intfc_draddt(k, stage) = 0._wp !$acc loop seq - do l = 1, num_dims - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,1:3,1),cell, l, q_prim_vf) + do l = 1, 3 + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf) mtn_dveldt(k, l, stage) = 0._wp end do end do diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.f90 index dc07ea811d..b840029d71 100644 --- a/src/simulation/m_sim_helpers.f90 +++ b/src/simulation/m_sim_helpers.f90 @@ -270,13 +270,12 @@ end subroutine s_compute_dt_from_cfl function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v) !$acc routine seq - real(wp), dimension(3) :: pos integer, dimension(3) :: cell integer :: i type(scalar_field), dimension(sys_size) :: q_prim_vf real(wp) :: v real(wp) :: L1, L2, L3 - real(wp) :: x, x1, x2, x3 + real(wp) :: pos, x1, x2, x3 real(wp) :: y1, y2, y3 if (i == 1) then @@ -293,20 +292,21 @@ function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v) y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) , cell(3)) x3 = y_cc(cell(2) + 1) y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) - else + elseif (i == 3 .and. p > 0) then x1 = z_cc(cell(3) - 1) y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) x2 = z_cc(cell(3)) y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) ) x3 = z_cc(cell(3) + 1) y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) + else + x1 = 0._wp; x2 = 0._wp; x3 = 0._wp + y1 = 0._wp; y2 = 0._wp; y3 = 0._wp endif - x = pos(i) - - L1 = ((x - x2)*(x - x3)) / ((x1 - x2)*(x1 - x3)) - L2 = ((x - x1)*(x - x3)) / ((x2 - x1)*(x2 - x3)) - L3 = ((x - x1)*(x - x2)) / ((x3 - x1)*(x3 - x2)) + L1 = ((pos - x2)*(pos - x3)) / ((x1 - x2)*(x1 - x3)) + L2 = ((pos - x1)*(pos - x3)) / ((x2 - x1)*(x2 - x3)) + L3 = ((pos - x1)*(pos - x2)) / ((x3 - x1)*(x3 - x2)) v = L1*y1 + L2*y2 + L3*y3 diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 0606ad5ce1..4ae97c9e3f 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1411,7 +1411,7 @@ contains if (bubbles_lagrange) then !$acc update host(q_beta%vf(1)%sf) call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, q_beta%vf(1)) - !$acc update host(Rmax_stats, Rmin_stats, gas_p, gas_mv, intfc_rad, intfc_vel) + !$acc update host(Rmax_stats, Rmin_stats, gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_vel) call s_write_restart_lag_bubbles(save_count) !parallel if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats() else From 68214e3ce1ee1288ceb7a62cb70c8d6ceeed6df5 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Tue, 29 Apr 2025 01:19:03 -0400 Subject: [PATCH 04/62] more models --- src/common/m_derived_types.fpp | 2 + src/simulation/m_bubbles_EL.fpp | 35 ++++++----- src/simulation/m_bubbles_EL_kernels.fpp | 84 +++++++++++++++++++++++++ src/simulation/m_global_parameters.fpp | 4 ++ src/simulation/m_sim_helpers.f90 | 55 +--------------- src/simulation/m_start_up.fpp | 2 + toolchain/mfc/run/case_dicts.py | 3 +- 7 files changed, 115 insertions(+), 70 deletions(-) diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index b9198335c0..4c99bd0bfa 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -422,6 +422,8 @@ module m_derived_types logical :: write_bubbles !< Write files to track the bubble evolution each time step logical :: write_bubbles_stats !< Write the maximum and minimum radius of each bubble integer :: nBubs_glb !< Global number of bubbles + integer :: vel_model !< Particle velocity model + integer :: drag_model !< Particle drag model real(wp) :: epsilonb !< Standard deviation scaling for the gaussian function real(wp) :: charwidth !< Domain virtual depth (z direction, for 2D simulations) real(wp) :: valmaxvoid !< Maximum void fraction permitted diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index fd4a96abf2..39b4f7e8ed 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -606,8 +606,16 @@ contains !$acc loop seq do l = 1, 3 - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), cell, l, q_prim_vf) - mtn_dveldt(k, l, stage) = 0._wp + if (lag_params%vel_model == 1) then + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = 0._wp + elseif (lag_params%vel_model == 2) then + mtn_dposdt(k, l, stage) = mtn_vel(k,l,1) + mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),intfc_rad(k,1),mtn_vel(k,l,1),cell,l,q_prim_vf) + else + mtn_dposdt(k, l, stage) = 0._wp + mtn_dveldt(k, l, stage) = 0._wp + end if end do end do else @@ -623,23 +631,20 @@ contains !$acc loop seq do l = 1, 3 - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf) - mtn_dveldt(k, l, stage) = 0._wp + if (lag_params%vel_model == 1) then + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = 0._wp + elseif (lag_params%vel_model == 2) then + mtn_dposdt(k, l, stage) = mtn_vel(k,l,1) + mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),intfc_rad(k,1),mtn_vel(k,l,1),cell,l,q_prim_vf) + else + mtn_dposdt(k, l, stage) = 0._wp + mtn_dveldt(k, l, stage) = 0._wp + end if end do end do end if - ! Bubbles remain in a fixed position -#if 0 - !$acc parallel loop collapse(2) gang vector default(present) private(k) copyin(stage) - do k = 1, nBubs - do l = 1, 3 - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_ps(k,:,1), l, q_prim_vf) - mtn_dveldt(k, l, stage) = 0._wp - end do - end do -#endif - call nvtxEndRange !< EULER-LAGRANGE COUPLING diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 43246aad55..6aec813680 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -454,4 +454,88 @@ contains end subroutine s_get_cell + function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v) +!$acc routine seq + integer, dimension(3) :: cell + integer :: i + type(scalar_field), dimension(sys_size) :: q_prim_vf + real(wp) :: v + real(wp) :: L1, L2, L3 + real(wp) :: pos, x1, x2, x3 + real(wp) :: y1, y2, y3 + + if (i == 1) then + x1 = x_cc(cell(1) - 1) + y1 = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) + x2 = x_cc(cell(1)) + y2 = q_prim_vf(momxb)%sf(cell(1) , cell(2), cell(3)) + x3 = x_cc(cell(1) + 1) + y3 = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) + elseif (i == 2) then + x1 = y_cc(cell(2) - 1) + y1 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) + x2 = y_cc(cell(2)) + y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) , cell(3)) + x3 = y_cc(cell(2) + 1) + y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) + elseif (i == 3 .and. p > 0) then + x1 = z_cc(cell(3) - 1) + y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) + x2 = z_cc(cell(3)) + y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) ) + x3 = z_cc(cell(3) + 1) + y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) + else + x1 = 0._wp; x2 = 0._wp; x3 = 0._wp + y1 = 0._wp; y2 = 0._wp; y3 = 0._wp + endif + + L1 = ((pos - x2)*(pos - x3)) / ((x1 - x2)*(x1 - x3)) + L2 = ((pos - x1)*(pos - x3)) / ((x2 - x1)*(x2 - x3)) + L3 = ((pos - x1)*(pos - x2)) / ((x3 - x1)*(x3 - x2)) + + v = L1*y1 + L2*y2 + L3*y3 + + end function f_interpolate_velocity + + function f_get_acceleration(pos,rad,vel,cell,i,q_prim_vf) result(a) +!$acc routine seq + integer, dimension(3) :: cell + integer :: i + type(scalar_field), dimension(sys_size) :: q_prim_vf + real(wp) :: a, area, vol, mass, force, drag_force, vel + real(wp) :: pos, rad, dp, v_rel + + if (i == 1) then + dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / & + (x_cc(cell(1) + 1) - x_cc(cell(1) - 1)) + elseif (i == 2) then + dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / & + (y_cc(cell(2) + 1) -y_cc(cell(2) - 1)) + elseif (i == 3 .and. p > 0) then + dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - & + q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / & + (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) + else + dp = 0._wp + end if + + area = pi * rad**2._wp + vol = (4._wp/3._wp) * pi * rad**3._wp + + force = -1._wp * area * dp + + if (lag_params%drag_model == 1) then ! Stokes drag + v_rel = vel - f_interpolate_velocity(pos,cell,i,q_prim_vf) + force = force - (6._wp * pi * rad * v_rel) / fluid_pp(1)%Re(1) + end if + + mass = 1e-3 * vol + + a = force / mass + + end function f_get_acceleration + end module m_bubbles_EL_kernels diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 329153bfdf..fce65b7d89 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -318,6 +318,8 @@ module m_global_parameters !! in the flow. These include the stiffened gas equation of state parameters, !! the Reynolds numbers and the Weber numbers. + !$acc declare create(fluid_pp) + !$acc declare create(bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3) !$acc declare create(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3) !$acc declare create(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3) @@ -739,6 +741,8 @@ contains lag_params%write_bubbles = .false. lag_params%write_bubbles_stats = .false. lag_params%nBubs_glb = dflt_int + lag_params%vel_model = dflt_int + lag_params%drag_model = dflt_int lag_params%epsilonb = 1._wp lag_params%charwidth = dflt_real lag_params%valmaxvoid = dflt_real diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.f90 index b840029d71..88d37f9674 100644 --- a/src/simulation/m_sim_helpers.f90 +++ b/src/simulation/m_sim_helpers.f90 @@ -10,8 +10,7 @@ module m_sim_helpers private; public :: s_compute_enthalpy, & s_compute_stability_from_dt, & - s_compute_dt_from_cfl, & - f_interpolate_velocity + s_compute_dt_from_cfl contains @@ -268,56 +267,4 @@ subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l) end subroutine s_compute_dt_from_cfl - function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v) -!$acc routine seq - integer, dimension(3) :: cell - integer :: i - type(scalar_field), dimension(sys_size) :: q_prim_vf - real(wp) :: v - real(wp) :: L1, L2, L3 - real(wp) :: pos, x1, x2, x3 - real(wp) :: y1, y2, y3 - - if (i == 1) then - x1 = x_cc(cell(1) - 1) - y1 = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) - x2 = x_cc(cell(1)) - y2 = q_prim_vf(momxb)%sf(cell(1) , cell(2), cell(3)) - x3 = x_cc(cell(1) + 1) - y3 = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) - elseif (i == 2) then - x1 = y_cc(cell(2) - 1) - y1 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) - x2 = y_cc(cell(2)) - y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) , cell(3)) - x3 = y_cc(cell(2) + 1) - y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) - elseif (i == 3 .and. p > 0) then - x1 = z_cc(cell(3) - 1) - y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) - x2 = z_cc(cell(3)) - y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) ) - x3 = z_cc(cell(3) + 1) - y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) - else - x1 = 0._wp; x2 = 0._wp; x3 = 0._wp - y1 = 0._wp; y2 = 0._wp; y3 = 0._wp - endif - - L1 = ((pos - x2)*(pos - x3)) / ((x1 - x2)*(x1 - x3)) - L2 = ((pos - x1)*(pos - x3)) / ((x2 - x1)*(x2 - x3)) - L3 = ((pos - x1)*(pos - x2)) / ((x3 - x1)*(x3 - x2)) - - v = L1*y1 + L2*y2 + L3*y3 - - !if (i == 1) then - !V = q_prim_vf(momxb)%sf(cell(1),cell(2),cell(3)) - !elseif (i == 2) then - !V = q_prim_vf(momxb+1)%sf(cell(1),cell(2),cell(3)) - !else - !V = q_prim_vf(momxe)%sf(cell(1),cell(2),cell(3)) - !end if - - end function f_interpolate_velocity - end module m_sim_helpers diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 4ae97c9e3f..755446e91e 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1633,6 +1633,8 @@ contains !$acc update device(bc_y%grcbc_in, bc_y%grcbc_out, bc_y%grcbc_vel_out) !$acc update device(bc_z%grcbc_in, bc_z%grcbc_out, bc_z%grcbc_vel_out) + !$acc update device(fluid_pp) + !$acc update device(relax, relax_model) if (relax) then !$acc update device(palpha_eps, ptgalpha_eps) diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index 0294ffc5e5..9ebb5fdbf6 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -266,7 +266,8 @@ def analytic(self): 'write_bubbles', 'write_bubbles_stats' ]: SIMULATION[f'lag_params%{var}'] = ParamType.LOG -for var in [ 'solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb']: +for var in [ 'solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb', + 'vel_model', 'drag_model']: SIMULATION[f'lag_params%{var}'] = ParamType.INT for var in [ 'epsilonb', 'valmaxvoid', 'charwidth', 'diffcoefvap', From 950752c52051fa38dd1d199d4ab45ca1ea638204 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Wed, 30 Apr 2025 16:03:41 -0400 Subject: [PATCH 05/62] first changes --- src/common/m_mpi_common.fpp | 698 ++++++++++++++++++++--- src/post_process/m_mpi_proxy.fpp | 440 +------------- src/pre_process/m_mpi_proxy.fpp | 454 --------------- src/simulation/m_mpi_proxy.fpp | 951 ++++++------------------------- 4 files changed, 808 insertions(+), 1735 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 86199de3ce..da35420544 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -28,19 +28,23 @@ module m_mpi_common !$acc declare create(v_size) !! Generic flags used to identify and report MPI errors - real(wp), private, allocatable, dimension(:), target :: q_prims_buff_send !< + real(wp), private, allocatable, dimension(:), target :: buff_send !< !! This variable is utilized to pack and send the buffer of the cell-average !! primitive variables, for a single computational domain boundary at the !! time, to the relevant neighboring processor. - real(wp), private, allocatable, dimension(:), target :: q_prims_buff_recv !< - !! q_prims_buff_recv is utilized to receive and unpack the buffer of the cell- + real(wp), private, allocatable, dimension(:), target :: buff_recv !< + !! buff_recv is utilized to receive and unpack the buffer of the cell- !! average primitive variables, for a single computational domain boundary !! at the time, from the relevant neighboring processor. - !$acc declare create(q_prims_buff_send, q_prims_buff_recv) + !$acc declare create(buff_send, buff_recv) integer :: halo_size + !$acc declare create(halo_size) + + integer :: nVars !< nVars for surface tension communication + !$acc declare create(nVars) contains @@ -51,52 +55,34 @@ contains #ifdef MFC_MPI - ! Allocating q_prims_buff_send/recv and ib_buff_send/recv. Please note that + ! Allocating q_cons_buff_send/recv and ib_buff_send/recv. Please note that ! for the sake of simplicity, both variables are provided sufficient ! storage to hold the largest buffer in the computational domain. -#ifdef MFC_SIMULATION - if (qbmm .and. .not. polytropic) then - if (n > 0) then - if (p > 0) then - @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* & - & (m + 2*buff_size + 1)* & - & (n + 2*buff_size + 1)* & - & (p + 2*buff_size + 1)/ & - & (min(m, n, p) + 2*buff_size + 1))) - else - @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* & - & (max(m, n) + 2*buff_size + 1))) - end if - else - @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4))) - end if - - @:ALLOCATE(q_prims_buff_recv(0:ubound(q_prims_buff_send, 1))) - v_size = sys_size + 2*nb*4 + if (qbmm .and. .not. polytropic) then + v_size = vec_size + 2*nb*4 else -#endif - if (n > 0) then - if (p > 0) then - @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*sys_size* & - & (m + 2*buff_size + 1)* & - & (n + 2*buff_size + 1)* & - & (p + 2*buff_size + 1)/ & - & (min(m, n, p) + 2*buff_size + 1))) - else - @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*sys_size* & - & (max(m, n) + 2*buff_size + 1))) - end if + v_size = sys_size + end if + + if (n > 0) then + if (p > 0) then + halo_size = NINT(-1._wp + 1._wp * buff_size*(v_size)* & + & (m + 2*buff_size + 1)* & + & (n + 2*buff_size + 1)* & + & (p + 2*buff_size + 1)/ & + & (min(m, n, p) + 2*buff_size + 1)) else - @:ALLOCATE(q_prims_buff_send(0:-1 + buff_size*sys_size)) + halo_size = -1 + buff_size*(v_size)* & + & (max(m, n) + 2*buff_size + 1) end if + else + halo_size = -1 + buff_size*(v_size) + end if - @:ALLOCATE(q_prims_buff_recv(0:ubound(q_prims_buff_send, 1))) + !$acc update device(halo_size, v_size) - v_size = sys_size -#ifdef MFC_SIMULATION - end if -#endif + @:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size)) #endif @@ -709,7 +695,7 @@ contains do j = 0, buff_size - 1 do i = 1, sys_size r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - q_prims_buff_send(r) = q_cons_vf(i)%sf(j + pack_offset, k, l) + buff_send(r) = q_cons_vf(i)%sf(j + pack_offset, k, l) end do end do end do @@ -725,7 +711,7 @@ contains do q = 1, nb r = (i - 1) + (q - 1)*4 + v_size* & (j + buff_size*(k + (n + 1)*l)) - q_prims_buff_send(r) = pb(j + pack_offset, k, l, i - sys_size, q) + buff_send(r) = pb(j + pack_offset, k, l, i - sys_size, q) end do end do end do @@ -740,7 +726,7 @@ contains do q = 1, nb r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & (j + buff_size*(k + (n + 1)*l)) - q_prims_buff_send(r) = mv(j + pack_offset, k, l, i - sys_size, q) + buff_send(r) = mv(j + pack_offset, k, l, i - sys_size, q) end do end do end do @@ -757,7 +743,7 @@ contains r = (i - 1) + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & (k + buff_size*l)) - q_prims_buff_send(r) = q_cons_vf(i)%sf(j, k + pack_offset, l) + buff_send(r) = q_cons_vf(i)%sf(j, k + pack_offset, l) end do end do end do @@ -774,7 +760,7 @@ contains r = (i - 1) + (q - 1)*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & (k + buff_size*l)) - q_prims_buff_send(r) = pb(j, k + pack_offset, l, i - sys_size, q) + buff_send(r) = pb(j, k + pack_offset, l, i - sys_size, q) end do end do end do @@ -790,7 +776,7 @@ contains r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & (k + buff_size*l)) - q_prims_buff_send(r) = mv(j, k + pack_offset, l, i - sys_size, q) + buff_send(r) = mv(j, k + pack_offset, l, i - sys_size, q) end do end do end do @@ -807,7 +793,7 @@ contains r = (i - 1) + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)*l)) - q_prims_buff_send(r) = q_cons_vf(i)%sf(j, k, l + pack_offset) + buff_send(r) = q_cons_vf(i)%sf(j, k, l + pack_offset) end do end do end do @@ -824,7 +810,7 @@ contains r = (i - 1) + (q - 1)*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)*l)) - q_prims_buff_send(r) = pb(j, k, l + pack_offset, i - sys_size, q) + buff_send(r) = pb(j, k, l + pack_offset, i - sys_size, q) end do end do end do @@ -840,7 +826,7 @@ contains r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)*l)) - q_prims_buff_send(r) = mv(j, k, l + pack_offset, i - sys_size, q) + buff_send(r) = mv(j, k, l + pack_offset, i - sys_size, q) end do end do end do @@ -853,8 +839,8 @@ contains #:endfor call nvtxEndRange ! Packbuf - p_send => q_prims_buff_send(0) - p_recv => q_prims_buff_recv(0) + p_send => buff_send(0) + p_recv => buff_recv(0) ! Send/Recv #ifdef MFC_SIMULATION @@ -866,7 +852,7 @@ contains call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") #:else call nvtxStartRange("RHS-COMM-DEV2HOST") - !$acc update host(q_prims_buff_send) + !$acc update host(buff_send) call nvtxEndRange call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") #:endif @@ -884,7 +870,7 @@ contains !$acc wait #:else call nvtxStartRange("RHS-COMM-HOST2DEV") - !$acc update device(q_prims_buff_recv) + !$acc update device(buff_recv) call nvtxEndRange #:endif end if @@ -908,7 +894,7 @@ contains do i = 1, sys_size r = (i - 1) + v_size* & (j + buff_size*((k + 1) + (n + 1)*l)) - q_cons_vf(i)%sf(j + unpack_offset, k, l) = q_prims_buff_recv(r) + q_cons_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r) #if defined(__INTEL_COMPILER) if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then print *, "Error", j, k, l, i @@ -930,7 +916,7 @@ contains do q = 1, nb r = (i - 1) + (q - 1)*4 + v_size* & (j + buff_size*((k + 1) + (n + 1)*l)) - pb(j + unpack_offset, k, l, i - sys_size, q) = q_prims_buff_recv(r) + pb(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r) end do end do end do @@ -945,7 +931,7 @@ contains do q = 1, nb r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & (j + buff_size*((k + 1) + (n + 1)*l)) - mv(j + unpack_offset, k, l, i - sys_size, q) = q_prims_buff_recv(r) + mv(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r) end do end do end do @@ -962,7 +948,7 @@ contains r = (i - 1) + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + buff_size*l)) - q_cons_vf(i)%sf(j, k + unpack_offset, l) = q_prims_buff_recv(r) + q_cons_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r) #if defined(__INTEL_COMPILER) if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then print *, "Error", j, k, l, i @@ -985,7 +971,7 @@ contains r = (i - 1) + (q - 1)*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + buff_size*l)) - pb(j, k + unpack_offset, l, i - sys_size, q) = q_prims_buff_recv(r) + pb(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r) end do end do end do @@ -1001,7 +987,7 @@ contains r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + buff_size*l)) - mv(j, k + unpack_offset, l, i - sys_size, q) = q_prims_buff_recv(r) + mv(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r) end do end do end do @@ -1020,7 +1006,7 @@ contains ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)* & (l + buff_size))) - q_cons_vf(i)%sf(j, k, l + unpack_offset) = q_prims_buff_recv(r) + q_cons_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r) #if defined(__INTEL_COMPILER) if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then print *, "Error", j, k, l, i @@ -1044,7 +1030,7 @@ contains ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)* & (l + buff_size))) - pb(j, k, l + unpack_offset, i - sys_size, q) = q_prims_buff_recv(r) + pb(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r) end do end do end do @@ -1061,7 +1047,7 @@ contains ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)* & (l + buff_size))) - mv(j, k, l + unpack_offset, i - sys_size, q) = q_prims_buff_recv(r) + mv(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r) end do end do end do @@ -1078,11 +1064,593 @@ contains end subroutine s_mpi_sendrecv_variables_buffers + subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, mpi_dir, pbc_loc) + + type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf + integer, intent(in) :: mpi_dir, pbc_loc + + integer :: i, j, k, l, r, q !< Generic loop iterators + + integer :: buffer_counts(1:3), buffer_count + + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag + + logical :: beg_end_geq_0 + + integer :: pack_offset, unpack_offset + real(wp), pointer :: p_send, p_recv + +#ifdef MFC_MPI + + nVars = num_dims + 1 + !$acc update device(nVars) + + buffer_counts = (/ & + buff_size*nVars*(n + 1)*(p + 1), & + buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), & + buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + /) + + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 + + ! Implements: + ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc + ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] + ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] + ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] + ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] + + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) + + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) + + grid_dims = (/m, n, p/) + + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) - buff_size + 1 + end if + + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + buff_size + 1 + end if + + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, nVars + r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l)) + buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l) + end do + end do + end do + end do + + #:elif mpi_dir == 2 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVars + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + nVars* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l) + end do + end do + end do + end do + + #:else + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVars + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + nVars* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset) + end do + end do + end do + end do + #:endif + end if + #:endfor + + ! Send/Recv + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + p_send => buff_send(0) + p_recv => buff_recv(0) + + #:if rdma_mpi + !$acc data attach(p_send, p_recv) + !$acc host_data use_device(p_send, p_recv) + #:else + !$acc update host(buff_send) + #:endif + + call MPI_SENDRECV( & + p_send, buffer_count, mpi_p, dst_proc, send_tag, & + p_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + #:if rdma_mpi + !$acc end host_data + !$acc end data + !$acc wait + #:else + !$acc update device(buff_recv) + #:endif + end if + #:endfor + + ! Unpack Received Buffer + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = 1, nVars + r = (i - 1) + nVars* & + (j + buff_size*((k + 1) + (n + 1)*l)) + c_divs_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + end do + + #:elif mpi_dir == 2 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVars + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = (i - 1) + nVars* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + c_divs_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + end do + + #:else + ! Unpacking buffer from bc_z%beg + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVars + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + nVars* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + c_divs_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + end do + + #:endif + end if + #:endfor + +#endif + + end subroutine s_mpi_sendrecv_capilary_variables_buffers + + !> The purpose of this procedure is to optimally decompose + !! the computational domain among the available processors. + !! This is performed by attempting to award each processor, + !! in each of the coordinate directions, approximately the + !! same number of cells, and then recomputing the affected + !! global parameters. + subroutine s_mpi_decompose_computational_domain + +#ifdef MFC_MPI + + integer :: num_procs_x, num_procs_y, num_procs_z !< + !! Optimal number of processors in the x-, y- and z-directions + + real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !< + !! Non-optimal number of processors in the x-, y- and z-directions + + real(wp) :: fct_min !< + !! Processor factorization (fct) minimization parameter + + integer :: MPI_COMM_CART !< + !! Cartesian processor topology communicator + + integer :: rem_cells !< + !! Remaining number of cells, in a particular coordinate direction, + !! after the majority is divided up among the available processors + + integer :: i, j !< Generic loop iterators + + if (num_procs == 1 .and. parallel_io) then + do i = 1, num_dims + start_idx(i) = 0 + end do + return + end if + + ! 3D Cartesian Processor Topology + if (n > 0) then + + if (p > 0) then + + if (cyl_coord .and. p > 0) then + ! Implement pencil processor blocking if using cylindrical coordinates so + ! that all cells in azimuthal direction are stored on a single processor. + ! This is necessary for efficient application of Fourier filter near axis. + + ! Initial values of the processor factorization optimization + num_procs_x = 1 + num_procs_y = num_procs + num_procs_z = 1 + ierr = -1 + + ! Computing minimization variable for these initial values + tmp_num_procs_x = num_procs_x + tmp_num_procs_y = num_procs_y + tmp_num_procs_z = num_procs_z + fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + + ! Searching for optimal computational domain distribution + do i = 1, num_procs + + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then + + tmp_num_procs_x = i + tmp_num_procs_y = num_procs/i + + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + .and. & + (n + 1)/tmp_num_procs_y & + >= & + num_stcls_min*weno_order) then + + num_procs_x = i + num_procs_y = num_procs/i + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + ierr = 0 + + end if + + end if + + end do + + else + + ! Initial estimate of optimal processor topology + num_procs_x = 1 + num_procs_y = 1 + num_procs_z = num_procs + ierr = -1 + + ! Benchmarking the quality of this initial guess + tmp_num_procs_x = num_procs_x + tmp_num_procs_y = num_procs_y + tmp_num_procs_z = num_procs_z + fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + 10._wp*abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) + + ! Optimization of the initial processor topology + do i = 1, num_procs + + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then + + do j = 1, num_procs/i + + if (mod(num_procs/i, j) == 0 & + .and. & + (n + 1)/j >= num_stcls_min*weno_order) then + + tmp_num_procs_x = i + tmp_num_procs_y = j + tmp_num_procs_z = num_procs/(i*j) + + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) & + .and. & + (p + 1)/tmp_num_procs_z & + >= & + num_stcls_min*weno_order) & + then + + num_procs_x = i + num_procs_y = j + num_procs_z = num_procs/(i*j) + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) + ierr = 0 + + end if + + end if + + end do + + end if + + end do + + end if + + ! Verifying that a valid decomposition of the computational + ! domain has been established. If not, the simulation exits. + if (proc_rank == 0 .and. ierr == -1) then + call s_mpi_abort('Unsupported combination of values '// & + 'of num_procs, m, n, p and '// & + 'weno_order. Exiting.') + end if + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & + num_procs_y, num_procs_z/), & + (/.true., .true., .true./), & + .false., MPI_COMM_CART, ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & + proc_coords, ierr) + ! END: 3D Cartesian Processor Topology + + ! Global Parameters for z-direction + + ! Number of remaining cells + rem_cells = mod(p + 1, num_procs_z) + + ! Optimal number of cells per processor + p = (p + 1)/num_procs_z - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells + if (proc_coords(3) == i - 1) then + p = p + 1; exit + end if + end do + + ! Boundary condition at the beginning + if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then + proc_coords(3) = proc_coords(3) - 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_z%beg, ierr) + proc_coords(3) = proc_coords(3) + 1 + end if + + ! Boundary condition at the end + if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then + proc_coords(3) = proc_coords(3) + 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_z%end, ierr) + proc_coords(3) = proc_coords(3) - 1 + end if + + if (parallel_io) then + if (proc_coords(3) < rem_cells) then + start_idx(3) = (p + 1)*proc_coords(3) + else + start_idx(3) = (p + 1)*proc_coords(3) + rem_cells + end if + end if + + ! 2D Cartesian Processor Topology + else + + ! Initial estimate of optimal processor topology + num_procs_x = 1 + num_procs_y = num_procs + ierr = -1 + + ! Benchmarking the quality of this initial guess + tmp_num_procs_x = num_procs_x + tmp_num_procs_y = num_procs_y + fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + + ! Optimization of the initial processor topology + do i = 1, num_procs + + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then + + tmp_num_procs_x = i + tmp_num_procs_y = num_procs/i + + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + .and. & + (n + 1)/tmp_num_procs_y & + >= & + num_stcls_min*weno_order) then + + num_procs_x = i + num_procs_y = num_procs/i + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + ierr = 0 + + end if + + end if + + end do + + ! Verifying that a valid decomposition of the computational + ! domain has been established. If not, the simulation exits. + if (proc_rank == 0 .and. ierr == -1) then + call s_mpi_abort('Unsupported combination of values '// & + 'of num_procs, m, n and '// & + 'weno_order. Exiting.') + end if + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & + num_procs_y/), (/.true., & + .true./), .false., MPI_COMM_CART, & + ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & + proc_coords, ierr) + + end if + ! END: 2D Cartesian Processor Topology + + ! Global Parameters for y-direction + + ! Number of remaining cells + rem_cells = mod(n + 1, num_procs_y) + + ! Optimal number of cells per processor + n = (n + 1)/num_procs_y - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells + if (proc_coords(2) == i - 1) then + n = n + 1; exit + end if + end do + + ! Boundary condition at the beginning + if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then + proc_coords(2) = proc_coords(2) - 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_y%beg, ierr) + proc_coords(2) = proc_coords(2) + 1 + end if + + ! Boundary condition at the end + if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then + proc_coords(2) = proc_coords(2) + 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_y%end, ierr) + proc_coords(2) = proc_coords(2) - 1 + end if + + if (parallel_io) then + if (proc_coords(2) < rem_cells) then + start_idx(2) = (n + 1)*proc_coords(2) + else + start_idx(2) = (n + 1)*proc_coords(2) + rem_cells + end if + end if + + ! 1D Cartesian Processor Topology + else + + ! Optimal processor topology + num_procs_x = num_procs + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & + (/.true./), .false., MPI_COMM_CART, & + ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & + proc_coords, ierr) + + end if + + ! Global Parameters for x-direction + + ! Number of remaining cells + rem_cells = mod(m + 1, num_procs_x) + + ! Optimal number of cells per processor + m = (m + 1)/num_procs_x - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells + if (proc_coords(1) == i - 1) then + m = m + 1; exit + end if + end do + + ! Boundary condition at the beginning + if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then + proc_coords(1) = proc_coords(1) - 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) + proc_coords(1) = proc_coords(1) + 1 + end if + + ! Boundary condition at the end + if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then + proc_coords(1) = proc_coords(1) + 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) + proc_coords(1) = proc_coords(1) - 1 + end if + + if (parallel_io) then + if (proc_coords(1) < rem_cells) then + start_idx(1) = (m + 1)*proc_coords(1) + else + start_idx(1) = (m + 1)*proc_coords(1) + rem_cells + end if + end if + +#endif + + end subroutine s_mpi_decompose_computational_domain + !> Module deallocation and/or disassociation procedures subroutine s_finalize_mpi_common_module #ifdef MFC_MPI - deallocate (q_prims_buff_send, q_prims_buff_recv) + deallocate (buff_send, buff_recv) #endif end subroutine s_finalize_mpi_common_module diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 5154d19102..db7c50d648 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -202,445 +202,7 @@ contains end subroutine s_mpi_bcast_user_inputs - !> This subroutine takes care of efficiently distributing - !! the computational domain among the available processors - !! as well as recomputing some of the global parameters so - !! that they reflect the configuration of sub-domain that - !! is overseen by the local processor. - subroutine s_mpi_decompose_computational_domain - -#ifdef MFC_MPI - - ! # of processors in the x-, y- and z-coordinate directions - integer :: num_procs_x, num_procs_y, num_procs_z - - ! Temporary # of processors in x-, y- and z-coordinate directions - ! used during the processor factorization optimization procedure - real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z - - ! Processor factorization (fct) minimization parameter - real(wp) :: fct_min - - ! Cartesian processor topology communicator - integer :: MPI_COMM_CART - - ! Number of remaining cells for a particular coordinate direction - ! after the bulk has evenly been distributed among the available - ! processors for that coordinate direction - integer :: rem_cells - - ! Generic loop iterators - integer :: i, j - - if (num_procs == 1 .and. parallel_io) then - do i = 1, num_dims - start_idx(i) = 0 - end do - return - end if - - ! Performing the computational domain decomposition. The procedure - ! is optimized by ensuring that each processor contains a close to - ! equivalent piece of the computational domain. Note that explicit - ! type-casting is omitted here for code legibility purposes. - - ! Generating 3D Cartesian Processor Topology - - if (n > 0) then - - if (p > 0) then - - if (cyl_coord .and. p > 0) then - ! Implement pencil processor blocking if using cylindrical coordinates so - ! that all cells in azimuthal direction are stored on a single processor. - ! This is necessary for efficient application of Fourier filter near axis. - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - num_procs_z = 1 - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = 1 - num_procs_z = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + 10._wp*abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - do j = 1, (num_procs/i) - - if (mod(num_procs/i, j) == 0 & - .and. & - (n + 1)/j >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = j - tmp_num_procs_z = num_procs/(i*j) - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) & - .and. & - (p + 1)/tmp_num_procs_z & - >= & - num_stcls_min*weno_order) & - then - - num_procs_x = i - num_procs_y = j - num_procs_z = num_procs/(i*j) - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - ierr = 0 - - end if - - end if - - end do - - end if - - end do - - end if - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting.' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & - num_procs_y, num_procs_z/), & - (/.true., .true., .true./), & - .false., MPI_COMM_CART, ierr) - - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & - proc_coords, ierr) - - ! END: Generating 3D Cartesian Processor Topology - - ! Sub-domain Global Parameters in z-direction - - ! Number of remaining cells after majority is distributed - rem_cells = mod(p + 1, num_procs_z) - - ! Optimal number of cells per processor - p = (p + 1)/num_procs_z - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(3) == i - 1) then - p = p + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(3) > 0 .or. bc_z%beg == -1) then - proc_coords(3) = proc_coords(3) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%beg, ierr) - proc_coords(3) = proc_coords(3) + 1 - end if - - ! Ghost zone at the beginning - if (proc_coords(3) > 0 .and. format == 1) then - offset_z%beg = 2 - else - offset_z%beg = 0 - end if - - ! Boundary condition at the end - if (proc_coords(3) < num_procs_z - 1 .or. bc_z%end == -1) then - proc_coords(3) = proc_coords(3) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%end, ierr) - proc_coords(3) = proc_coords(3) - 1 - end if - - ! Ghost zone at the end - if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then - offset_z%end = 2 - else - offset_z%end = 0 - end if - - if (parallel_io) then - if (proc_coords(3) < rem_cells) then - start_idx(3) = (p + 1)*proc_coords(3) - else - start_idx(3) = (p + 1)*proc_coords(3) + rem_cells - end if - end if - - ! Generating 2D Cartesian Processor Topology - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting.' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & - num_procs_y/), (/.true., & - .true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & - proc_coords, ierr) - - end if - - ! END: Generating 2D Cartesian Processor Topology - - ! Sub-domain Global Parameters in y-direction - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(n + 1, num_procs_y) - - ! Optimal number of cells per processor - n = (n + 1)/num_procs_y - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(2) == i - 1) then - n = n + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(2) > 0 .or. bc_y%beg == -1) then - proc_coords(2) = proc_coords(2) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%beg, ierr) - proc_coords(2) = proc_coords(2) + 1 - end if - - ! Ghost zone at the beginning - if (proc_coords(2) > 0 .and. format == 1) then - offset_y%beg = 2 - else - offset_y%beg = 0 - end if - - ! Boundary condition at the end - if (proc_coords(2) < num_procs_y - 1 .or. bc_y%end == -1) then - proc_coords(2) = proc_coords(2) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%end, ierr) - proc_coords(2) = proc_coords(2) - 1 - end if - - ! Ghost zone at the end - if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then - offset_y%end = 2 - else - offset_y%end = 0 - end if - - if (parallel_io) then - if (proc_coords(2) < rem_cells) then - start_idx(2) = (n + 1)*proc_coords(2) - else - start_idx(2) = (n + 1)*proc_coords(2) + rem_cells - end if - end if - - ! Generating 1D Cartesian Processor Topology - - else - - ! Number of processors in the coordinate direction is equal to - ! the total number of processors available - num_procs_x = num_procs - - ! Number of cells in undecomposed computational domain needed - ! for sub-domain reassembly during formatted data output - m_root = m - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & - (/.true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the corresponding Cartesian coordinates of the local - ! processor rank in the newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & - proc_coords, ierr) - - end if - - ! Sub-domain Global Parameters in x-direction - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(m + 1, num_procs_x) - - ! Optimal number of cells per processor - m = (m + 1)/num_procs_x - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(1) == i - 1) then - m = m + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(1) > 0 .or. bc_x%beg == -1) then - proc_coords(1) = proc_coords(1) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) - proc_coords(1) = proc_coords(1) + 1 - end if - - ! Ghost zone at the beginning - if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then - offset_x%beg = 2 - else - offset_x%beg = 0 - end if - - ! Boundary condition at the end - if (proc_coords(1) < num_procs_x - 1 .or. bc_x%end == -1) then - proc_coords(1) = proc_coords(1) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) - proc_coords(1) = proc_coords(1) - 1 - end if - - ! Ghost zone at the end - if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then - offset_x%end = 2 - else - offset_x%end = 0 - end if - - if (parallel_io) then - if (proc_coords(1) < rem_cells) then - start_idx(1) = (m + 1)*proc_coords(1) - else - start_idx(1) = (m + 1)*proc_coords(1) + rem_cells - end if - end if - -#endif - - end subroutine s_mpi_decompose_computational_domain - - !> Communicates the buffer regions associated with the grid +!> Communicates the buffer regions associated with the grid !! variables with processors in charge of the neighboring !! sub-domains. Note that only cell-width spacings feature !! buffer regions so that no information relating to the diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index c9888e79ca..1428600fff 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -131,458 +131,4 @@ contains end subroutine s_mpi_bcast_user_inputs - !> Description: This subroutine takes care of efficiently distributing - !! the computational domain among the available processors - !! as well as recomputing some of the global parameters so - !! that they reflect the configuration of sub-domain that is - !! overseen by the local processor. - subroutine s_mpi_decompose_computational_domain - -#ifdef MFC_MPI - - ! # of processors in the x-, y- and z-coordinate directions - integer :: num_procs_x, num_procs_y, num_procs_z - - ! Temporary # of processors in x-, y- and z-coordinate directions - ! used during the processor factorization optimization procedure - real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z - - ! Processor factorization (fct) minimization parameter - real(wp) :: fct_min - - ! Cartesian processor topology communicator - integer :: MPI_COMM_CART - - ! Number of remaining cells for a particular coordinate direction - ! after the bulk has evenly been distributed among the available - ! processors for that coordinate direction - integer :: rem_cells - - ! Generic loop iterators - integer :: i, j - - if (num_procs == 1 .and. parallel_io) then - do i = 1, num_dims - start_idx(i) = 0 - end do - return - end if - - ! Performing the computational domain decomposition. The procedure - ! is optimized by ensuring that each processor contains a close to - ! equivalent piece of the computational domain. Note that explicit - ! type-casting is omitted here for code legibility purposes. - - ! Generating 3D Cartesian Processor Topology - - if (n > 0) then - - if (p > 0) then - - if (cyl_coord .and. p > 0) then - ! Implement pencil processor blocking if using cylindrical coordinates so - ! that all cells in azimuthal direction are stored on a single processor. - ! This is necessary for efficient application of Fourier filter near axis. - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - num_procs_z = 1 - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = 1 - num_procs_z = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + 10._wp*abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - do j = 1, (num_procs/i) - - if (mod(num_procs/i, j) == 0 & - .and. & - (n + 1)/j >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = j - tmp_num_procs_z = num_procs/(i*j) - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) & - .and. & - (p + 1)/tmp_num_procs_z & - >= & - num_stcls_min*weno_order) & - then - - num_procs_x = i - num_procs_y = j - num_procs_z = num_procs/(i*j) - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - ierr = 0 - - end if - - end if - - end do - - end if - - end do - - end if - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting.' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & - num_procs_y, num_procs_z/), & - (/.true., .true., .true./), & - .false., MPI_COMM_CART, ierr) - - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & - proc_coords, ierr) - - ! END: Generating 3D Cartesian Processor Topology - - ! Sub-domain Global Parameters in z-direction - - ! Number of remaining cells after majority is distributed - rem_cells = mod(p + 1, num_procs_z) - - ! Preliminary uniform cell-width spacing - if (old_grid .neqv. .true.) then - dz = (z_domain%end - z_domain%beg)/real(p + 1, wp) - end if - - ! Optimal number of cells per processor - p = (p + 1)/num_procs_z - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(3) == i - 1) then - p = p + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then - proc_coords(3) = proc_coords(3) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%beg, ierr) - proc_coords(3) = proc_coords(3) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then - proc_coords(3) = proc_coords(3) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%end, ierr) - proc_coords(3) = proc_coords(3) - 1 - end if - - ! Beginning and end sub-domain boundary locations - if (parallel_io .neqv. .true.) then - if (old_grid .neqv. .true.) then - if (proc_coords(3) < rem_cells) then - z_domain%beg = z_domain%beg + dz*real((p + 1)* & - proc_coords(3)) - z_domain%end = z_domain%end - dz*real((p + 1)* & - (num_procs_z - proc_coords(3) - 1) & - - (num_procs_z - rem_cells)) - else - z_domain%beg = z_domain%beg + dz*real((p + 1)* & - proc_coords(3) + rem_cells) - z_domain%end = z_domain%end - dz*real((p + 1)* & - (num_procs_z - proc_coords(3) - 1)) - end if - end if - else - if (proc_coords(3) < rem_cells) then - start_idx(3) = (p + 1)*proc_coords(3) - else - start_idx(3) = (p + 1)*proc_coords(3) + rem_cells - end if - end if - - ! Generating 2D Cartesian Processor Topology - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting.' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & - num_procs_y/), (/.true., & - .true./), .false., MPI_COMM_CART, & - ierr) - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & - proc_coords, ierr) - - end if - - ! END: Generating 2D Cartesian Processor Topology - - ! Sub-domain Global Parameters in y-direction - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(n + 1, num_procs_y) - - ! Preliminary uniform cell-width spacing - if (old_grid .neqv. .true.) then - dy = (y_domain%end - y_domain%beg)/real(n + 1, wp) - end if - - ! Optimal number of cells per processor - n = (n + 1)/num_procs_y - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(2) == i - 1) then - n = n + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then - proc_coords(2) = proc_coords(2) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%beg, ierr) - proc_coords(2) = proc_coords(2) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then - proc_coords(2) = proc_coords(2) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%end, ierr) - proc_coords(2) = proc_coords(2) - 1 - end if - - ! Beginning and end sub-domain boundary locations - if (parallel_io .neqv. .true.) then - if (old_grid .neqv. .true.) then - if (proc_coords(2) < rem_cells) then - y_domain%beg = y_domain%beg + dy*real((n + 1)* & - proc_coords(2)) - y_domain%end = y_domain%end - dy*real((n + 1)* & - (num_procs_y - proc_coords(2) - 1) & - - (num_procs_y - rem_cells)) - else - y_domain%beg = y_domain%beg + dy*real((n + 1)* & - proc_coords(2) + rem_cells) - y_domain%end = y_domain%end - dy*real((n + 1)* & - (num_procs_y - proc_coords(2) - 1)) - end if - end if - else - if (proc_coords(2) < rem_cells) then - start_idx(2) = (n + 1)*proc_coords(2) - else - start_idx(2) = (n + 1)*proc_coords(2) + rem_cells - end if - end if - - ! Generating 1D Cartesian Processor Topology - - else - - ! Number of processors in the coordinate direction is equal to - ! the total number of processors available - num_procs_x = num_procs - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & - (/.true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the corresponding Cartesian coordinates of the local - ! processor rank in the newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & - proc_coords, ierr) - - end if - - ! Sub-domain Global Parameters in x-direction - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(m + 1, num_procs_x) - - ! Preliminary uniform cell-width spacing - if (old_grid .neqv. .true.) then - dx = (x_domain%end - x_domain%beg)/real(m + 1, wp) - end if - - ! Optimal number of cells per processor - m = (m + 1)/num_procs_x - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(1) == i - 1) then - m = m + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then - proc_coords(1) = proc_coords(1) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) - proc_coords(1) = proc_coords(1) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then - proc_coords(1) = proc_coords(1) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) - proc_coords(1) = proc_coords(1) - 1 - end if - - ! Beginning and end sub-domain boundary locations - if (parallel_io .neqv. .true.) then - if (old_grid .neqv. .true.) then - if (proc_coords(1) < rem_cells) then - x_domain%beg = x_domain%beg + dx*real((m + 1)* & - proc_coords(1)) - x_domain%end = x_domain%end - dx*real((m + 1)* & - (num_procs_x - proc_coords(1) - 1) & - - (num_procs_x - rem_cells)) - else - x_domain%beg = x_domain%beg + dx*real((m + 1)* & - proc_coords(1) + rem_cells) - x_domain%end = x_domain%end - dx*real((m + 1)* & - (num_procs_x - proc_coords(1) - 1)) - end if - end if - else - if (proc_coords(1) < rem_cells) then - start_idx(1) = (m + 1)*proc_coords(1) - else - start_idx(1) = (m + 1)*proc_coords(1) + rem_cells - end if - end if - -#endif - - end subroutine s_mpi_decompose_computational_domain - end module m_mpi_proxy diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 9dd922f36b..145bbbd23b 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -61,42 +61,32 @@ module m_mpi_proxy !> @} !$acc declare create(v_size) - integer :: nVars !< nVars for surface tension communication - !$acc declare create(nVars) - + contains - !> The computation of parameters, the allocation of memory, - !! the association of pointers and/or the execution of any - !! other procedures that are necessary to setup the module. - subroutine s_initialize_mpi_proxy_module + subroutine s_initialize_mpi_proxy() -#ifdef MFC_MPI - if (surface_tension) then - nVars = num_dims + 1 + if (ib) then if (n > 0) then if (p > 0) then - @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* & - & (m + 2*buff_size + 1)* & - & (n + 2*buff_size + 1)* & - & (p + 2*buff_size + 1)/ & - & (min(m, n, p) + 2*buff_size + 1))) + @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * & + & (m + 2*gp_layers + 1)* & + & (n + 2*gp_layers + 1)* & + & (p + 2*gp_layers + 1)/ & + & (min(m, n, p) + 2*gp_layers + 1))) else - @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* & - & (max(m, n) + 2*buff_size + 1))) + @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* & + & (max(m, n) + 2*gp_layers + 1))) end if else - @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1))) + @:ALLOCATE(ib_buff_send(0:-1 + gp_layers)) end if - - @:ALLOCATE(c_divs_buff_recv(0:ubound(c_divs_buff_send, 1))) + @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1))) end if - !$acc update device(v_size, nVars) -#endif - end subroutine s_initialize_mpi_proxy_module + end subroutine s_initialize_mpi_proxy - !> Since only the processor with rank 0 reads and verifies + !> Since only the processor with rank 0 reads and verifies !! the consistency of user inputs, these are initially not !! available to the other processors. Then, the purpose of !! this subroutine is to distribute the user inputs to the @@ -252,561 +242,199 @@ contains end subroutine s_mpi_bcast_user_inputs - !> The purpose of this procedure is to optimally decompose - !! the computational domain among the available processors. - !! This is performed by attempting to award each processor, - !! in each of the coordinate directions, approximately the - !! same number of cells, and then recomputing the affected - !! global parameters. - subroutine s_mpi_decompose_computational_domain + subroutine s_mpi_sendrecv_ib_buffers(c_divs_vf, gp_layers) -#ifdef MFC_MPI - - integer :: num_procs_x, num_procs_y, num_procs_z !< - !! Optimal number of processors in the x-, y- and z-directions - real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !< - !! Non-optimal number of processors in the x-, y- and z-directions - - real(wp) :: fct_min !< - !! Processor factorization (fct) minimization parameter + type(integer_field), intent(inout) :: ib_markers + integer, intent(in) :: gp_layers - integer :: MPI_COMM_CART !< - !! Cartesian processor topology communicator - integer :: rem_cells !< - !! Remaining number of cells, in a particular coordinate direction, - !! after the majority is divided up among the available processors + integer :: i, j, k, l, r, q !< Generic loop iterators - integer :: i, j !< Generic loop iterators + integer :: buffer_counts(1:3), buffer_count - if (num_procs == 1 .and. parallel_io) then - do i = 1, num_dims - start_idx(i) = 0 - end do - return - end if + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag - ! 3D Cartesian Processor Topology - if (n > 0) then + logical :: beg_end_geq_0 - if (p > 0) then + integer :: pack_offset, unpack_offset + real(wp), pointer :: p_send, p_recv - if (cyl_coord .and. p > 0) then - ! Implement pencil processor blocking if using cylindrical coordinates so - ! that all cells in azimuthal direction are stored on a single processor. - ! This is necessary for efficient application of Fourier filter near axis. +#ifdef MFC_MPI - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - num_procs_z = 1 - ierr = -1 + nVars = num_dims + 1 + !$acc update device(nVars) - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) + buffer_counts = (/ & + buff_size*nVars*(n + 1)*(p + 1), & + buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), & + buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + /) - ! Searching for optimal computational domain distribution - do i = 1, num_procs + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then + ! Implements: + ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc + ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] + ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] + ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] + ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 + grid_dims = (/m, n, p/) - end if + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) - buff_size + 1 + end if - end if + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + buff_size + 1 + end if + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, nVars + r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l)) + buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l) + end do + end do + end do end do - else - - ! Initial estimate of optimal processor topology - num_procs_x = 1 - num_procs_y = 1 - num_procs_z = num_procs - ierr = -1 - - ! Benchmarking the quality of this initial guess - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + 10._wp*abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - - ! Optimization of the initial processor topology - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - do j = 1, num_procs/i - - if (mod(num_procs/i, j) == 0 & - .and. & - (n + 1)/j >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = j - tmp_num_procs_z = num_procs/(i*j) - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) & - .and. & - (p + 1)/tmp_num_procs_z & - >= & - num_stcls_min*weno_order) & - then - - num_procs_x = i - num_procs_y = j - num_procs_z = num_procs/(i*j) - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - ierr = 0 - - end if - - end if - + #:elif mpi_dir == 2 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVars + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + nVars* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l) + end do end do - - end if - + end do end do - end if - - ! Verifying that a valid decomposition of the computational - ! domain has been established. If not, the simulation exits. - if (proc_rank == 0 .and. ierr == -1) then - call s_mpi_abort('Unsupported combination of values '// & - 'of num_procs, m, n, p and '// & - 'weno_order. Exiting.') - end if - - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & - num_procs_y, num_procs_z/), & - (/.true., .true., .true./), & - .false., MPI_COMM_CART, ierr) - - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & - proc_coords, ierr) - ! END: 3D Cartesian Processor Topology - - ! Global Parameters for z-direction - - ! Number of remaining cells - rem_cells = mod(p + 1, num_procs_z) - - ! Optimal number of cells per processor - p = (p + 1)/num_procs_z - 1 - - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(3) == i - 1) then - p = p + 1; exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then - proc_coords(3) = proc_coords(3) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%beg, ierr) - proc_coords(3) = proc_coords(3) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then - proc_coords(3) = proc_coords(3) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%end, ierr) - proc_coords(3) = proc_coords(3) - 1 - end if - - if (parallel_io) then - if (proc_coords(3) < rem_cells) then - start_idx(3) = (p + 1)*proc_coords(3) - else - start_idx(3) = (p + 1)*proc_coords(3) + rem_cells - end if - end if - - ! 2D Cartesian Processor Topology - else - - ! Initial estimate of optimal processor topology - num_procs_x = 1 - num_procs_y = num_procs - ierr = -1 - - ! Benchmarking the quality of this initial guess - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Optimization of the initial processor topology - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - ! Verifying that a valid decomposition of the computational - ! domain has been established. If not, the simulation exits. - if (proc_rank == 0 .and. ierr == -1) then - call s_mpi_abort('Unsupported combination of values '// & - 'of num_procs, m, n and '// & - 'weno_order. Exiting.') - end if - - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & - num_procs_y/), (/.true., & - .true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & - proc_coords, ierr) - - end if - ! END: 2D Cartesian Processor Topology - - ! Global Parameters for y-direction - - ! Number of remaining cells - rem_cells = mod(n + 1, num_procs_y) - - ! Optimal number of cells per processor - n = (n + 1)/num_procs_y - 1 - - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(2) == i - 1) then - n = n + 1; exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then - proc_coords(2) = proc_coords(2) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%beg, ierr) - proc_coords(2) = proc_coords(2) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then - proc_coords(2) = proc_coords(2) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%end, ierr) - proc_coords(2) = proc_coords(2) - 1 - end if - - if (parallel_io) then - if (proc_coords(2) < rem_cells) then - start_idx(2) = (n + 1)*proc_coords(2) - else - start_idx(2) = (n + 1)*proc_coords(2) + rem_cells - end if - end if - - ! 1D Cartesian Processor Topology - else - - ! Optimal processor topology - num_procs_x = num_procs - - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & - (/.true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & - proc_coords, ierr) - - end if - - ! Global Parameters for x-direction - - ! Number of remaining cells - rem_cells = mod(m + 1, num_procs_x) - - ! Optimal number of cells per processor - m = (m + 1)/num_procs_x - 1 - - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(1) == i - 1) then - m = m + 1; exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then - proc_coords(1) = proc_coords(1) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) - proc_coords(1) = proc_coords(1) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then - proc_coords(1) = proc_coords(1) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) - proc_coords(1) = proc_coords(1) - 1 - end if - - if (parallel_io) then - if (proc_coords(1) < rem_cells) then - start_idx(1) = (m + 1)*proc_coords(1) - else - start_idx(1) = (m + 1)*proc_coords(1) + rem_cells - end if - end if - -#endif - - end subroutine s_mpi_decompose_computational_domain - - !> The goal of this procedure is to populate the buffers of - !! the grid variables by communicating with the neighboring - !! processors. Note that only the buffers of the cell-width - !! distributions are handled in such a way. This is because - !! the buffers of cell-boundary locations may be calculated - !! directly from those of the cell-width distributions. - !! @param mpi_dir MPI communication coordinate direction - !! @param pbc_loc Processor boundary condition (PBC) location - subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc) - - integer, intent(in) :: mpi_dir - integer, intent(in) :: pbc_loc - - integer :: dst_proc(1:3) - -#ifdef MFC_MPI - - ! MPI Communication in x-direction - if (mpi_dir == 1) then - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_x%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - dx(m - buff_size + 1), buff_size, & - mpi_p, bc_x%end, 0, & - dx(-buff_size), buff_size, & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_x%beg/bc_x%beg - call MPI_SENDRECV( & - dx(0), buff_size, & - mpi_p, bc_x%beg, 1, & - dx(-buff_size), buff_size, & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_x%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_x%beg/bc_x%end - call MPI_SENDRECV( & - dx(0), buff_size, & - mpi_p, bc_x%beg, 1, & - dx(m + 1), buff_size, & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the end only - - ! Send/receive buffer to/from bc_x%end/bc_x%end - call MPI_SENDRECV( & - dx(m - buff_size + 1), buff_size, & - mpi_p, bc_x%end, 0, & - dx(m + 1), buff_size, & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - + #:else + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVars + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + nVars* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset) + end do + end do + end do + end do + #:endif end if - ! END: MPI Communication in x-direction - - ! MPI Communication in y-direction - elseif (mpi_dir == 2) then - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_y%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_y%end/bc_y%beg - call MPI_SENDRECV( & - dy(n - buff_size + 1), buff_size, & - mpi_p, bc_y%end, 0, & - dy(-buff_size), buff_size, & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_y%beg/bc_y%beg - call MPI_SENDRECV( & - dy(0), buff_size, & - mpi_p, bc_y%beg, 1, & - dy(-buff_size), buff_size, & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_y%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_y%beg/bc_y%end - call MPI_SENDRECV( & - dy(0), buff_size, & - mpi_p, bc_y%beg, 1, & - dy(n + 1), buff_size, & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + #:endfor - else ! PBC at the end only + ! Send/Recv + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + p_send => buff_send(0) + p_recv => buff_recv(0) - ! Send/receive buffer to/from bc_y%end/bc_y%end - call MPI_SENDRECV( & - dy(n - buff_size + 1), buff_size, & - mpi_p, bc_y%end, 0, & - dy(n + 1), buff_size, & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + #:if rdma_mpi + !$acc data attach(p_send, p_recv) + !$acc host_data use_device(p_send, p_recv) + #:else + !$acc update host(buff_send) + #:endif - end if + call MPI_SENDRECV( & + p_send, buffer_count, mpi_p, dst_proc, send_tag, & + p_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + #:if rdma_mpi + !$acc end host_data + !$acc end data + !$acc wait + #:else + !$acc update device(buff_recv) + #:endif end if - ! END: MPI Communication in y-direction - - ! MPI Communication in z-direction - else - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_z%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_z%end/bc_z%beg - call MPI_SENDRECV( & - dz(p - buff_size + 1), buff_size, & - mpi_p, bc_z%end, 0, & - dz(-buff_size), buff_size, & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_z%beg/bc_z%beg - call MPI_SENDRECV( & - dz(0), buff_size, & - mpi_p, bc_z%beg, 1, & - dz(-buff_size), buff_size, & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_z%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_z%beg/bc_z%end - call MPI_SENDRECV( & - dz(0), buff_size, & - mpi_p, bc_z%beg, 1, & - dz(p + 1), buff_size, & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the end only + #:endfor - ! Send/receive buffer to/from bc_z%end/bc_z%end - call MPI_SENDRECV( & - dz(p - buff_size + 1), buff_size, & - mpi_p, bc_z%end, 0, & - dz(p + 1), buff_size, & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + ! Unpack Received Buffer + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = 1, nVars + r = (i - 1) + nVars* & + (j + buff_size*((k + 1) + (n + 1)*l)) + c_divs_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r) + end do + end do + end do + end do - end if + #:elif mpi_dir == 2 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVars + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = (i - 1) + nVars* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + c_divs_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r) + end do + end do + end do + end do + #:else + ! Unpacking buffer from bc_z%beg + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVars + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + nVars* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + c_divs_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r) + end do + end do + end do + end do + #:endif end if - - end if - ! END: MPI Communication in z-direction + #:endfor #endif - end subroutine s_mpi_sendrecv_grid_variables_buffers + end subroutine s_mpi_sendrecv_ib_buffers !> The goal of this procedure is to populate the buffers of !! the cell-average conservative variables by communicating @@ -820,23 +448,6 @@ contains integer, pointer, dimension(:) :: p_i_send, p_i_recv #ifdef MFC_MPI - - if (n > 0) then - if (p > 0) then - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * & - & (m + 2*gp_layers + 1)* & - & (n + 2*gp_layers + 1)* & - & (p + 2*gp_layers + 1)/ & - & (min(m, n, p) + 2*gp_layers + 1))) - else - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* & - & (max(m, n) + 2*gp_layers + 1))) - end if - else - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers)) - end if - @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1))) - !nCalls_time = nCalls_time + 1 ! MPI Communication in x-direction @@ -1657,217 +1268,6 @@ contains end subroutine s_mpi_sendrecv_ib_buffers - subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, mpi_dir, pbc_loc) - - type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf - integer, intent(in) :: mpi_dir, pbc_loc - - integer :: i, j, k, l, r, q !< Generic loop iterators - - integer :: buffer_counts(1:3), buffer_count - - type(int_bounds_info) :: boundary_conditions(1:3) - integer :: beg_end(1:2), grid_dims(1:3) - integer :: dst_proc, src_proc, recv_tag, send_tag - - logical :: beg_end_geq_0 - - integer :: pack_offset, unpack_offset - real(wp), pointer :: p_send, p_recv - -#ifdef MFC_MPI - - nVars = num_dims + 1 - !$acc update device(nVars) - - buffer_counts = (/ & - buff_size*nVars*(n + 1)*(p + 1), & - buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), & - buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & - /) - - buffer_count = buffer_counts(mpi_dir) - boundary_conditions = (/bc_x, bc_y, bc_z/) - beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) - beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 - - ! Implements: - ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc - ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] - ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] - ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] - ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] - - send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) - recv_tag = f_logical_to_int(pbc_loc == 1) - - dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) - src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) - - grid_dims = (/m, n, p/) - - pack_offset = 0 - if (f_xor(pbc_loc == 1, beg_end_geq_0)) then - pack_offset = grid_dims(mpi_dir) - buff_size + 1 - end if - - unpack_offset = 0 - if (pbc_loc == 1) then - unpack_offset = grid_dims(mpi_dir) + buff_size + 1 - end if - - ! Pack Buffer to Send - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVars - r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l)) - c_divs_buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l) - end do - end do - end do - end do - - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - c_divs_buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l) - end do - end do - end do - end do - - #:else - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - c_divs_buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset) - end do - end do - end do - end do - #:endif - end if - #:endfor - - ! Send/Recv - #:for rdma_mpi in [False, True] - if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then - p_send => c_divs_buff_send(0) - p_recv => c_divs_buff_recv(0) - - #:if rdma_mpi - !$acc data attach(p_send, p_recv) - !$acc host_data use_device(p_send, p_recv) - #:else - !$acc update host(c_divs_buff_send) - #:endif - - call MPI_SENDRECV( & - p_send, buffer_count, mpi_p, dst_proc, send_tag, & - p_recv, buffer_count, mpi_p, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - #:if rdma_mpi - !$acc end host_data - !$acc end data - !$acc wait - #:else - !$acc update device(c_divs_buff_recv) - #:endif - end if - #:endfor - - ! Unpack Received Buffer - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVars - r = (i - 1) + nVars* & - (j + buff_size*((k + 1) + (n + 1)*l)) - c_divs_vf(i)%sf(j + unpack_offset, k, l) = c_divs_buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - c_divs_vf(i)%sf(j, k + unpack_offset, l) = c_divs_buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - #:else - ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - c_divs_vf(i)%sf(j, k, l + unpack_offset) = c_divs_buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - #:endif - end if - #:endfor - -#endif - - end subroutine s_mpi_sendrecv_capilary_variables_buffers - subroutine s_mpi_send_random_number(phi_rn, num_freq) integer, intent(in) :: num_freq real(wp), intent(inout), dimension(1:num_freq) :: phi_rn @@ -1876,15 +1276,12 @@ contains #endif end subroutine s_mpi_send_random_number - !> Module deallocation and/or disassociation procedures - subroutine s_finalize_mpi_proxy_module + subroutine s_finalize_mpi_proxy() -#ifdef MFC_MPI - if (surface_tension) then - @:DEALLOCATE(c_divs_buff_send, c_divs_buff_recv) + if (ib) then + @:DEALLOCATE(ib_buff_send, ib_buff_recv) end if -#endif - end subroutine s_finalize_mpi_proxy_module + end subroutine s_initialize_mpi_proxy end module m_mpi_proxy From 2667687b7c0e42b2222b19c73d555986fc0a9160 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Wed, 30 Apr 2025 22:37:21 -0400 Subject: [PATCH 06/62] bug fixes --- src/common/m_mpi_common.fpp | 207 ++++++++++++++++++++++++++++++--- src/simulation/m_mpi_proxy.fpp | 204 +------------------------------- 2 files changed, 198 insertions(+), 213 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index da35420544..ebd66a4252 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -60,7 +60,7 @@ contains ! storage to hold the largest buffer in the computational domain. if (qbmm .and. .not. polytropic) then - v_size = vec_size + 2*nb*4 + v_size = sys_size + 2*nb*4 else v_size = sys_size end if @@ -701,7 +701,6 @@ contains end do end do -#ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then !$acc parallel loop collapse(4) gang vector default(present) private(r) do l = 0, p @@ -733,7 +732,6 @@ contains end do end do end if -#endif #:elif mpi_dir == 2 !$acc parallel loop collapse(4) gang vector default(present) private(r) do i = 1, sys_size @@ -749,7 +747,6 @@ contains end do end do -#ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 @@ -783,7 +780,6 @@ contains end do end do end if -#endif #:else !$acc parallel loop collapse(4) gang vector default(present) private(r) do i = 1, sys_size @@ -799,7 +795,6 @@ contains end do end do -#ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 @@ -833,7 +828,6 @@ contains end do end do end if -#endif #:endif end if #:endfor @@ -906,7 +900,6 @@ contains end do end do -#ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then !$acc parallel loop collapse(5) gang vector default(present) private(r) do l = 0, p @@ -938,7 +931,6 @@ contains end do end do end if -#endif #:elif mpi_dir == 2 !$acc parallel loop collapse(4) gang vector default(present) private(r) do i = 1, sys_size @@ -960,7 +952,6 @@ contains end do end do -#ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 @@ -994,7 +985,6 @@ contains end do end do end if -#endif #:else ! Unpacking buffer from bc_z%beg !$acc parallel loop collapse(4) gang vector default(present) private(r) @@ -1018,7 +1008,6 @@ contains end do end do -#ifdef MFC_SIMULATION if (qbmm .and. .not. polytropic) then !$acc parallel loop collapse(5) gang vector default(present) private(r) do i = sys_size + 1, sys_size + 4 @@ -1054,12 +1043,10 @@ contains end do end do end if -#endif #:endif end if #:endfor call nvtxEndRange - #endif end subroutine s_mpi_sendrecv_variables_buffers @@ -1172,6 +1159,7 @@ contains end if #:endfor +#ifdef MFC_SIMULATION ! Send/Recv #:for rdma_mpi in [False, True] if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then @@ -1199,6 +1187,12 @@ contains #:endif end if #:endfor +#else + call MPI_SENDRECV( & + p_send, buffer_count, mpi_p, dst_proc, send_tag, & + p_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) +#endif ! Unpack Received Buffer #:for mpi_dir in [1, 2, 3] @@ -1646,6 +1640,191 @@ contains end subroutine s_mpi_decompose_computational_domain + !> The goal of this procedure is to populate the buffers of + !! the grid variables by communicating with the neighboring + !! processors. Note that only the buffers of the cell-width + !! distributions are handled in such a way. This is because + !! the buffers of cell-boundary locations may be calculated + !! directly from those of the cell-width distributions. + !! @param mpi_dir MPI communication coordinate direction + !! @param pbc_loc Processor boundary condition (PBC) location +#ifndef MFC_PRE_PROCESS + subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc) + + integer, intent(in) :: mpi_dir + integer, intent(in) :: pbc_loc + +#ifdef MFC_MPI + + ! MPI Communication in x-direction + if (mpi_dir == 1) then + + if (pbc_loc == -1) then ! PBC at the beginning + + if (bc_x%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_x%end/bc_x%beg + call MPI_SENDRECV( & + dx(m - buff_size + 1), buff_size, & + mpi_p, bc_x%end, 0, & + dx(-buff_size), buff_size, & + mpi_p, bc_x%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the beginning only + + ! Send/receive buffer to/from bc_x%beg/bc_x%beg + call MPI_SENDRECV( & + dx(0), buff_size, & + mpi_p, bc_x%beg, 1, & + dx(-buff_size), buff_size, & + mpi_p, bc_x%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + else ! PBC at the end + + if (bc_x%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_x%beg/bc_x%end + call MPI_SENDRECV( & + dx(0), buff_size, & + mpi_p, bc_x%beg, 1, & + dx(m + 1), buff_size, & + mpi_p, bc_x%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the end only + + ! Send/receive buffer to/from bc_x%end/bc_x%end + call MPI_SENDRECV( & + dx(m - buff_size + 1), buff_size, & + mpi_p, bc_x%end, 0, & + dx(m + 1), buff_size, & + mpi_p, bc_x%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + end if + ! END: MPI Communication in x-direction + + ! MPI Communication in y-direction + elseif (mpi_dir == 2) then + + if (pbc_loc == -1) then ! PBC at the beginning + + if (bc_y%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_y%end/bc_y%beg + call MPI_SENDRECV( & + dy(n - buff_size + 1), buff_size, & + mpi_p, bc_y%end, 0, & + dy(-buff_size), buff_size, & + mpi_p, bc_y%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the beginning only + + ! Send/receive buffer to/from bc_y%beg/bc_y%beg + call MPI_SENDRECV( & + dy(0), buff_size, & + mpi_p, bc_y%beg, 1, & + dy(-buff_size), buff_size, & + mpi_p, bc_y%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + else ! PBC at the end + + if (bc_y%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_y%beg/bc_y%end + call MPI_SENDRECV( & + dy(0), buff_size, & + mpi_p, bc_y%beg, 1, & + dy(n + 1), buff_size, & + mpi_p, bc_y%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the end only + + ! Send/receive buffer to/from bc_y%end/bc_y%end + call MPI_SENDRECV( & + dy(n - buff_size + 1), buff_size, & + mpi_p, bc_y%end, 0, & + dy(n + 1), buff_size, & + mpi_p, bc_y%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + end if + ! END: MPI Communication in y-direction + + ! MPI Communication in z-direction + else + + if (pbc_loc == -1) then ! PBC at the beginning + + if (bc_z%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_z%end/bc_z%beg + call MPI_SENDRECV( & + dz(p - buff_size + 1), buff_size, & + mpi_p, bc_z%end, 0, & + dz(-buff_size), buff_size, & + mpi_p, bc_z%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the beginning only + + ! Send/receive buffer to/from bc_z%beg/bc_z%beg + call MPI_SENDRECV( & + dz(0), buff_size, & + mpi_p, bc_z%beg, 1, & + dz(-buff_size), buff_size, & + mpi_p, bc_z%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + else ! PBC at the end + + if (bc_z%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_z%beg/bc_z%end + call MPI_SENDRECV( & + dz(0), buff_size, & + mpi_p, bc_z%beg, 1, & + dz(p + 1), buff_size, & + mpi_p, bc_z%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the end only + + ! Send/receive buffer to/from bc_z%end/bc_z%end + call MPI_SENDRECV( & + dz(p - buff_size + 1), buff_size, & + mpi_p, bc_z%end, 0, & + dz(p + 1), buff_size, & + mpi_p, bc_z%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + end if + + end if + ! END: MPI Communication in z-direction + +#endif + + end subroutine s_mpi_sendrecv_grid_variables_buffers +#endif + !> Module deallocation and/or disassociation procedures subroutine s_finalize_mpi_common_module diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 145bbbd23b..f37dea4e4a 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -64,7 +64,7 @@ module m_mpi_proxy contains - subroutine s_initialize_mpi_proxy() + subroutine s_initialize_mpi_proxy_module() if (ib) then if (n > 0) then @@ -84,7 +84,7 @@ contains @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1))) end if - end subroutine s_initialize_mpi_proxy + end subroutine s_initialize_mpi_proxy_module !> Since only the processor with rank 0 reads and verifies !! the consistency of user inputs, these are initially not @@ -242,201 +242,7 @@ contains end subroutine s_mpi_bcast_user_inputs - subroutine s_mpi_sendrecv_ib_buffers(c_divs_vf, gp_layers) - - - type(integer_field), intent(inout) :: ib_markers - integer, intent(in) :: gp_layers - - - integer :: i, j, k, l, r, q !< Generic loop iterators - - integer :: buffer_counts(1:3), buffer_count - - type(int_bounds_info) :: boundary_conditions(1:3) - integer :: beg_end(1:2), grid_dims(1:3) - integer :: dst_proc, src_proc, recv_tag, send_tag - - logical :: beg_end_geq_0 - - integer :: pack_offset, unpack_offset - real(wp), pointer :: p_send, p_recv - -#ifdef MFC_MPI - - nVars = num_dims + 1 - !$acc update device(nVars) - - buffer_counts = (/ & - buff_size*nVars*(n + 1)*(p + 1), & - buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), & - buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & - /) - - buffer_count = buffer_counts(mpi_dir) - boundary_conditions = (/bc_x, bc_y, bc_z/) - beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) - beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 - - ! Implements: - ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc - ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] - ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] - ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] - ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] - - send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) - recv_tag = f_logical_to_int(pbc_loc == 1) - - dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) - src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) - - grid_dims = (/m, n, p/) - - pack_offset = 0 - if (f_xor(pbc_loc == 1, beg_end_geq_0)) then - pack_offset = grid_dims(mpi_dir) - buff_size + 1 - end if - - unpack_offset = 0 - if (pbc_loc == 1) then - unpack_offset = grid_dims(mpi_dir) + buff_size + 1 - end if - - ! Pack Buffer to Send - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVars - r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l) - end do - end do - end do - end do - - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l) - end do - end do - end do - end do - - #:else - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset) - end do - end do - end do - end do - #:endif - end if - #:endfor - - ! Send/Recv - #:for rdma_mpi in [False, True] - if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then - p_send => buff_send(0) - p_recv => buff_recv(0) - - #:if rdma_mpi - !$acc data attach(p_send, p_recv) - !$acc host_data use_device(p_send, p_recv) - #:else - !$acc update host(buff_send) - #:endif - - call MPI_SENDRECV( & - p_send, buffer_count, mpi_p, dst_proc, send_tag, & - p_recv, buffer_count, mpi_p, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - #:if rdma_mpi - !$acc end host_data - !$acc end data - !$acc wait - #:else - !$acc update device(buff_recv) - #:endif - end if - #:endfor - - ! Unpack Received Buffer - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVars - r = (i - 1) + nVars* & - (j + buff_size*((k + 1) + (n + 1)*l)) - c_divs_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r) - end do - end do - end do - end do - - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - c_divs_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r) - end do - end do - end do - end do - - #:else - ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - c_divs_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r) - end do - end do - end do - end do - #:endif - end if - #:endfor - -#endif - - end subroutine s_mpi_sendrecv_ib_buffers - - !> The goal of this procedure is to populate the buffers of + !> The goal of this procedure is to populate the buffers of !! the cell-average conservative variables by communicating !! with the neighboring processors. subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers) @@ -1276,12 +1082,12 @@ contains #endif end subroutine s_mpi_send_random_number - subroutine s_finalize_mpi_proxy() + subroutine s_finalize_mpi_proxy_module() if (ib) then @:DEALLOCATE(ib_buff_send, ib_buff_recv) end if - end subroutine s_initialize_mpi_proxy + end subroutine s_finalize_mpi_proxy_module end module m_mpi_proxy From 9814a7f803dcb19d3a7997df64cea99df3886a7e Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Wed, 30 Apr 2025 22:38:15 -0400 Subject: [PATCH 07/62] update mass calculation --- src/simulation/m_bubbles_EL.fpp | 16 ++++++++++++---- src/simulation/m_bubbles_EL_kernels.fpp | 10 +++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 39b4f7e8ed..a2ac3a7ae9 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -607,11 +607,15 @@ contains !$acc loop seq do l = 1, 3 if (lag_params%vel_model == 1) then - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf) + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), & + cell, l, q_prim_vf) mtn_dveldt(k, l, stage) = 0._wp elseif (lag_params%vel_model == 2) then mtn_dposdt(k, l, stage) = mtn_vel(k,l,1) - mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),intfc_rad(k,1),mtn_vel(k,l,1),cell,l,q_prim_vf) + mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),& + intfc_rad(k,1), mtn_vel(k,l,1), & + gas_mg(k,1), gas_mv(k,1), & + cell, l, q_prim_vf) else mtn_dposdt(k, l, stage) = 0._wp mtn_dveldt(k, l, stage) = 0._wp @@ -632,11 +636,15 @@ contains !$acc loop seq do l = 1, 3 if (lag_params%vel_model == 1) then - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1),cell, l, q_prim_vf) + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), & + cell, l, q_prim_vf) mtn_dveldt(k, l, stage) = 0._wp elseif (lag_params%vel_model == 2) then mtn_dposdt(k, l, stage) = mtn_vel(k,l,1) - mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),intfc_rad(k,1),mtn_vel(k,l,1),cell,l,q_prim_vf) + mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),& + intfc_rad(k,1), mtn_vel(k,l,1), & + gas_mg(k,1), gas_mv(k,1), & + cell, l, q_prim_vf) else mtn_dposdt(k, l, stage) = 0._wp mtn_dveldt(k, l, stage) = 0._wp diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 6aec813680..ccd5cbe0bc 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -498,13 +498,13 @@ contains end function f_interpolate_velocity - function f_get_acceleration(pos,rad,vel,cell,i,q_prim_vf) result(a) + function f_get_acceleration(pos,rad,vel,mg,mv,cell,i,q_prim_vf) result(a) !$acc routine seq integer, dimension(3) :: cell integer :: i type(scalar_field), dimension(sys_size) :: q_prim_vf real(wp) :: a, area, vol, mass, force, drag_force, vel - real(wp) :: pos, rad, dp, v_rel + real(wp) :: pos, rad, dp, v_rel, mg, mv if (i == 1) then dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - & @@ -523,8 +523,6 @@ contains end if area = pi * rad**2._wp - vol = (4._wp/3._wp) * pi * rad**3._wp - force = -1._wp * area * dp if (lag_params%drag_model == 1) then ! Stokes drag @@ -532,9 +530,7 @@ contains force = force - (6._wp * pi * rad * v_rel) / fluid_pp(1)%Re(1) end if - mass = 1e-3 * vol - - a = force / mass + a = force / (mg + mv) end function f_get_acceleration From 2c5d7631b7fa0686ae79cfd8cfa801ffd7282841 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Thu, 1 May 2025 16:11:57 -0400 Subject: [PATCH 08/62] more refactoring --- examples/3D_TaylorGreenVortex/case.py | 4 +- src/common/m_boundary_conditions.fpp | 24 +- src/common/m_mpi_common.fpp | 348 +++++------------------ src/post_process/m_data_input.f90 | 14 +- src/post_process/m_global_parameters.fpp | 2 +- src/post_process/m_mpi_proxy.fpp | 200 ------------- src/post_process/m_start_up.f90 | 4 +- 7 files changed, 92 insertions(+), 504 deletions(-) diff --git a/examples/3D_TaylorGreenVortex/case.py b/examples/3D_TaylorGreenVortex/case.py index de440980cb..33889c7958 100644 --- a/examples/3D_TaylorGreenVortex/case.py +++ b/examples/3D_TaylorGreenVortex/case.py @@ -2,7 +2,7 @@ import math import json -N = 256 +N = 49 Re = 1600 L = 1 @@ -41,7 +41,7 @@ "p": N, "cyl_coord": "F", "dt": dt, - "t_step_start": 13529, + "t_step_start": 0, "t_step_stop": Nt, "t_step_save": int(Nt / 100), # Simulation Algorithm Parameters diff --git a/src/common/m_boundary_conditions.fpp b/src/common/m_boundary_conditions.fpp index 762ff6a44f..575eecad34 100644 --- a/src/common/m_boundary_conditions.fpp +++ b/src/common/m_boundary_conditions.fpp @@ -50,7 +50,7 @@ contains call s_no_slip_wall(q_prim_vf, pb, mv, 1, -1) case default ! Processor BC at beginning call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 1, -1) + q_prim_vf, 1, -1, sys_size, pb, mv) end select select case (bc_x%end) @@ -66,7 +66,7 @@ contains call s_no_slip_wall(q_prim_vf, pb, mv, 1, 1) case default ! Processor BC at end call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 1, 1) + q_prim_vf, 1, 1, sys_size, pb, mv) end select #ifdef MFC_SIMULATION @@ -110,7 +110,7 @@ contains call s_no_slip_wall(q_prim_vf, pb, mv, 2, -1) case default ! Processor BC at beginning call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 2, -1) + q_prim_vf, 2, -1, sys_size, pb, mv) end select select case (bc_y%end) @@ -126,7 +126,7 @@ contains call s_no_slip_wall(q_prim_vf, pb, mv, 2, 1) case default ! Processor BC at end call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 2, 1) + q_prim_vf, 2, 1, sys_size, pb, mv) end select #ifdef MFC_SIMULATION @@ -168,7 +168,7 @@ contains call s_no_slip_wall(q_prim_vf, pb, mv, 3, -1) case default ! Processor BC at beginning call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 3, -1) + q_prim_vf, 3, -1, sys_size, pb, mv) end select select case (bc_z%end) @@ -184,7 +184,7 @@ contains call s_no_slip_wall(q_prim_vf, pb, mv, 3, 1) case default ! Processor BC at end call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 3, 1) + q_prim_vf, 3, 1, sys_size, pb, mv) end select #ifdef MFC_SIMULATION @@ -1470,7 +1470,7 @@ contains end do end do else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, -1) + call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1) end if if (bc_x%end <= -3) then !< ghost-cell extrapolation @@ -1515,7 +1515,7 @@ contains end do end do else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, 1) + call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1) end if if (n == 0) then @@ -1562,7 +1562,7 @@ contains end do end do else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, -1) + call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) end if if (bc_y%end <= -3) then !< ghost-cell extrapolation @@ -1607,7 +1607,7 @@ contains end do end do else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, 1) + call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) end if if (p == 0) then @@ -1654,7 +1654,7 @@ contains end do end do else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, -1) + call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1) end if if (bc_z%end <= -3) then !< ghost-cell extrapolation @@ -1699,7 +1699,7 @@ contains end do end do else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, 1) + call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1) end if end subroutine s_populate_capillary_buffers diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index ebd66a4252..e5ff84f2ec 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -70,7 +70,7 @@ contains halo_size = NINT(-1._wp + 1._wp * buff_size*(v_size)* & & (m + 2*buff_size + 1)* & & (n + 2*buff_size + 1)* & - & (p + 2*buff_size + 1)/ & + & (p + 2*buff_size + 1)/ & & (min(m, n, p) + 2*buff_size + 1)) else halo_size = -1 + buff_size*(v_size)* & @@ -608,13 +608,14 @@ contains !! @param q_cons_vf Cell-average conservative variables !! @param mpi_dir MPI communication coordinate direction !! @param pbc_loc Processor boundary condition (PBC) location - subroutine s_mpi_sendrecv_variables_buffers(q_cons_vf, & - pb, mv, & + subroutine s_mpi_sendrecv_variables_buffers(q_comm, & mpi_dir, & - pbc_loc) + pbc_loc, & + nVar, & + pb, mv) - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + type(scalar_field), dimension(1:), intent(inout) :: q_comm + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: mpi_dir, pbc_loc @@ -626,34 +627,36 @@ contains integer :: beg_end(1:2), grid_dims(1:3) integer :: dst_proc, src_proc, recv_tag, send_tag - logical :: beg_end_geq_0 + logical :: beg_end_geq_0, qbmm_comm - integer :: pack_offset, unpack_offset + integer :: pack_offset, unpack_offset, nVar real(wp), pointer :: p_send, p_recv #ifdef MFC_MPI call nvtxStartRange("RHS-COMM-PACKBUF") -!$acc update device(v_size) -#ifdef MFC_SIMULATION - if (qbmm .and. .not. polytropic) then + qbmm_comm = .false. + + if (present(pb) .and. present(mv) .and. qbmm .and. .not. polytropic) then + qbmm_comm = .true. + v_size = sys_size + 2*nb*4 buffer_counts = (/ & - buff_size*(sys_size + 2*nb*4)*(n + 1)*(p + 1), & - buff_size*(sys_size + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), & + buff_size*(nVar + 2*nb*4)*(n + 1)*(p + 1), & + buff_size*(nVar + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), & buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & /) else -#endif + v_size = nVar buffer_counts = (/ & - buff_size*sys_size*(n + 1)*(p + 1), & - buff_size*sys_size*(m + 2*buff_size + 1)*(p + 1), & + buff_size*nVar*(n + 1)*(p + 1), & + buff_size*nVar*(m + 2*buff_size + 1)*(p + 1), & buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & /) -#ifdef MFC_SIMULATION end if -#endif + + !$acc update device(v_size, nVar) buffer_count = buffer_counts(mpi_dir) boundary_conditions = (/bc_x, bc_y, bc_z/) @@ -693,24 +696,24 @@ contains do l = 0, p do k = 0, n do j = 0, buff_size - 1 - do i = 1, sys_size + do i = 1, nVar r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = q_cons_vf(i)%sf(j + pack_offset, k, l) + buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l) end do end do end do end do - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(4) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = 0, buff_size - 1 - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do q = 1, nb r = (i - 1) + (q - 1)*4 + v_size* & (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = pb(j + pack_offset, k, l, i - sys_size, q) + buff_send(r) = pb(j + pack_offset, k, l, i - nVar, q) end do end do end do @@ -721,11 +724,11 @@ contains do l = 0, p do k = 0, n do j = 0, buff_size - 1 - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do q = 1, nb r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = mv(j + pack_offset, k, l, i - sys_size, q) + buff_send(r) = mv(j + pack_offset, k, l, i - nVar, q) end do end do end do @@ -734,22 +737,22 @@ contains end if #:elif mpi_dir == 2 !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size + do i = 1, nVar do l = 0, p do k = 0, buff_size - 1 do j = -buff_size, m + buff_size r = (i - 1) + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & (k + buff_size*l)) - buff_send(r) = q_cons_vf(i)%sf(j, k + pack_offset, l) + buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l) end do end do end do end do - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 do j = -buff_size, m + buff_size @@ -757,7 +760,7 @@ contains r = (i - 1) + (q - 1)*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & (k + buff_size*l)) - buff_send(r) = pb(j, k + pack_offset, l, i - sys_size, q) + buff_send(r) = pb(j, k + pack_offset, l, i - nVar, q) end do end do end do @@ -765,7 +768,7 @@ contains end do !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 do j = -buff_size, m + buff_size @@ -773,7 +776,7 @@ contains r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & (k + buff_size*l)) - buff_send(r) = mv(j, k + pack_offset, l, i - sys_size, q) + buff_send(r) = mv(j, k + pack_offset, l, i - nVar, q) end do end do end do @@ -782,22 +785,22 @@ contains end if #:else !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size + do i = 1, nVar do l = 0, buff_size - 1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size r = (i - 1) + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = q_cons_vf(i)%sf(j, k, l + pack_offset) + buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset) end do end do end do end do - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -805,7 +808,7 @@ contains r = (i - 1) + (q - 1)*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = pb(j, k, l + pack_offset, i - sys_size, q) + buff_send(r) = pb(j, k, l + pack_offset, i - nVar, q) end do end do end do @@ -813,7 +816,7 @@ contains end do !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar+ 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -821,7 +824,7 @@ contains r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = mv(j, k, l + pack_offset, i - sys_size, q) + buff_send(r) = mv(j, k, l + pack_offset, i - nVar, q) end do end do end do @@ -885,12 +888,12 @@ contains do l = 0, p do k = 0, n do j = -buff_size, -1 - do i = 1, sys_size + do i = 1, nVar r = (i - 1) + v_size* & (j + buff_size*((k + 1) + (n + 1)*l)) - q_cons_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r) + q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then print *, "Error", j, k, l, i error stop "NaN(s) in recv" end if @@ -900,16 +903,16 @@ contains end do end do - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(5) gang vector default(present) private(r) do l = 0, p do k = 0, n do j = -buff_size, -1 - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do q = 1, nb r = (i - 1) + (q - 1)*4 + v_size* & (j + buff_size*((k + 1) + (n + 1)*l)) - pb(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r) + pb(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) end do end do end do @@ -920,11 +923,11 @@ contains do l = 0, p do k = 0, n do j = -buff_size, -1 - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do q = 1, nb r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & (j + buff_size*((k + 1) + (n + 1)*l)) - mv(j + unpack_offset, k, l, i - sys_size, q) = buff_recv(r) + mv(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) end do end do end do @@ -933,16 +936,16 @@ contains end if #:elif mpi_dir == 2 !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size + do i = 1, nVar do l = 0, p do k = -buff_size, -1 do j = -buff_size, m + buff_size r = (i - 1) + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + buff_size*l)) - q_cons_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r) + q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then print *, "Error", j, k, l, i error stop "NaN(s) in recv" end if @@ -952,9 +955,9 @@ contains end do end do - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 do j = -buff_size, m + buff_size @@ -962,7 +965,7 @@ contains r = (i - 1) + (q - 1)*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + buff_size*l)) - pb(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r) + pb(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) end do end do end do @@ -970,7 +973,7 @@ contains end do !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 do j = -buff_size, m + buff_size @@ -978,7 +981,7 @@ contains r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + buff_size*l)) - mv(j, k + unpack_offset, l, i - sys_size, q) = buff_recv(r) + mv(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) end do end do end do @@ -988,7 +991,7 @@ contains #:else ! Unpacking buffer from bc_z%beg !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size + do i = 1, nVar do l = -buff_size, -1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -996,9 +999,9 @@ contains ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)* & (l + buff_size))) - q_cons_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r) + q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then print *, "Error", j, k, l, i error stop "NaN(s) in recv" end if @@ -1008,9 +1011,9 @@ contains end do end do - if (qbmm .and. .not. polytropic) then + if (qbmm_comm) then !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -1019,7 +1022,7 @@ contains ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)* & (l + buff_size))) - pb(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r) + pb(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) end do end do end do @@ -1027,7 +1030,7 @@ contains end do !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 + do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -1036,7 +1039,7 @@ contains ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)* & (l + buff_size))) - mv(j, k, l + unpack_offset, i - sys_size, q) = buff_recv(r) + mv(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) end do end do end do @@ -1051,224 +1054,6 @@ contains end subroutine s_mpi_sendrecv_variables_buffers - subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, mpi_dir, pbc_loc) - - type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf - integer, intent(in) :: mpi_dir, pbc_loc - - integer :: i, j, k, l, r, q !< Generic loop iterators - - integer :: buffer_counts(1:3), buffer_count - - type(int_bounds_info) :: boundary_conditions(1:3) - integer :: beg_end(1:2), grid_dims(1:3) - integer :: dst_proc, src_proc, recv_tag, send_tag - - logical :: beg_end_geq_0 - - integer :: pack_offset, unpack_offset - real(wp), pointer :: p_send, p_recv - -#ifdef MFC_MPI - - nVars = num_dims + 1 - !$acc update device(nVars) - - buffer_counts = (/ & - buff_size*nVars*(n + 1)*(p + 1), & - buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), & - buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & - /) - - buffer_count = buffer_counts(mpi_dir) - boundary_conditions = (/bc_x, bc_y, bc_z/) - beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) - beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 - - ! Implements: - ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc - ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] - ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] - ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] - ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] - - send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) - recv_tag = f_logical_to_int(pbc_loc == 1) - - dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) - src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) - - grid_dims = (/m, n, p/) - - pack_offset = 0 - if (f_xor(pbc_loc == 1, beg_end_geq_0)) then - pack_offset = grid_dims(mpi_dir) - buff_size + 1 - end if - - unpack_offset = 0 - if (pbc_loc == 1) then - unpack_offset = grid_dims(mpi_dir) + buff_size + 1 - end if - - ! Pack Buffer to Send - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVars - r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l) - end do - end do - end do - end do - - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l) - end do - end do - end do - end do - - #:else - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset) - end do - end do - end do - end do - #:endif - end if - #:endfor - -#ifdef MFC_SIMULATION - ! Send/Recv - #:for rdma_mpi in [False, True] - if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then - p_send => buff_send(0) - p_recv => buff_recv(0) - - #:if rdma_mpi - !$acc data attach(p_send, p_recv) - !$acc host_data use_device(p_send, p_recv) - #:else - !$acc update host(buff_send) - #:endif - - call MPI_SENDRECV( & - p_send, buffer_count, mpi_p, dst_proc, send_tag, & - p_recv, buffer_count, mpi_p, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - #:if rdma_mpi - !$acc end host_data - !$acc end data - !$acc wait - #:else - !$acc update device(buff_recv) - #:endif - end if - #:endfor -#else - call MPI_SENDRECV( & - p_send, buffer_count, mpi_p, dst_proc, send_tag, & - p_recv, buffer_count, mpi_p, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) -#endif - - ! Unpack Received Buffer - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVars - r = (i - 1) + nVars* & - (j + buff_size*((k + 1) + (n + 1)*l)) - c_divs_vf(i)%sf(j + unpack_offset, k, l) = buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - c_divs_vf(i)%sf(j, k + unpack_offset, l) = buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - #:else - ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - c_divs_vf(i)%sf(j, k, l + unpack_offset) = buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - #:endif - end if - #:endfor - -#endif - - end subroutine s_mpi_sendrecv_capilary_variables_buffers - !> The purpose of this procedure is to optimally decompose !! the computational domain among the available processors. !! This is performed by attempting to award each processor, @@ -1819,7 +1604,6 @@ contains end if ! END: MPI Communication in z-direction - #endif end subroutine s_mpi_sendrecv_grid_variables_buffers diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index e4ddd8c18f..1ca507a2dc 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -17,6 +17,8 @@ module m_data_input use m_mpi_proxy !< Message passing interface (MPI) module proxy + use m_mpi_common + use m_compile_specific use m_helper @@ -586,7 +588,7 @@ subroutine s_populate_grid_variables_buffer_regions ! Processor BC at the beginning else - call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'x') + call s_mpi_sendrecv_grid_variables_buffers(1, -1) end if @@ -622,7 +624,7 @@ subroutine s_populate_grid_variables_buffer_regions ! Processor BC at the end else - call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'x') + call s_mpi_sendrecv_grid_variables_buffers(1, 1) end if @@ -664,7 +666,7 @@ subroutine s_populate_grid_variables_buffer_regions ! Processor BC at the beginning else - call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'y') + call s_mpi_sendrecv_grid_variables_buffers(2, -1) end if @@ -700,7 +702,7 @@ subroutine s_populate_grid_variables_buffer_regions ! Processor BC at the end else - call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'y') + call s_mpi_sendrecv_grid_variables_buffers(2, 1) end if @@ -742,7 +744,7 @@ subroutine s_populate_grid_variables_buffer_regions ! Processor BC at the beginning else - call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'z') + call s_mpi_sendrecv_grid_variables_buffers(3, -1) end if @@ -778,7 +780,7 @@ subroutine s_populate_grid_variables_buffer_regions ! Processor BC at the end else - call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'z') + call s_mpi_sendrecv_grid_variables_buffers(3, 1) end if diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 00c5f0ec27..0a5089f247 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -469,7 +469,7 @@ contains integer :: i, j, fac ! Setting m_root equal to m in the case of a 1D serial simulation - if (num_procs == 1 .and. n == 0) m_root = m + if (num_procs >1 .and. n == 0) m_root = m_glb ! Gamma/Pi_inf Model if (model_eqns == 1) then diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index db7c50d648..29ffbc134f 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -202,206 +202,6 @@ contains end subroutine s_mpi_bcast_user_inputs -!> Communicates the buffer regions associated with the grid - !! variables with processors in charge of the neighboring - !! sub-domains. Note that only cell-width spacings feature - !! buffer regions so that no information relating to the - !! cell-boundary locations is communicated. - !! @param pbc_loc Processor boundary condition (PBC) location - !! @param sweep_coord Coordinate direction normal to the processor boundary - subroutine s_mpi_sendrecv_grid_vars_buffer_regions(pbc_loc, sweep_coord) - - character(LEN=3), intent(in) :: pbc_loc - character, intent(in) :: sweep_coord - -#ifdef MFC_MPI - - ! Communications in the x-direction - - if (sweep_coord == 'x') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_x%end >= 0) then - - ! Sending/receiving the data to/from bc_x%end/bc_x%beg - call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, & - mpi_p, bc_x%end, 0, & - dx(-buff_size), buff_size, & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Sending/receiving the data to/from bc_x%beg/bc_x%beg - call MPI_SENDRECV(dx(0), buff_size, & - mpi_p, bc_x%beg, 1, & - dx(-buff_size), buff_size, & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_x%beg >= 0) then - - ! Sending/receiving the data to/from bc_x%beg/bc_x%end - call MPI_SENDRECV(dx(0), buff_size, & - mpi_p, bc_x%beg, 1, & - dx(m + 1), buff_size, & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Sending/receiving the data to/from bc_x%end/bc_x%end - call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, & - mpi_p, bc_x%end, 0, & - dx(m + 1), buff_size, & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - end if - - ! END: Communications in the x-direction - - ! Communications in the y-direction - - elseif (sweep_coord == 'y') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_y%end >= 0) then - - ! Sending/receiving the data to/from bc_y%end/bc_y%beg - call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, & - mpi_p, bc_y%end, 0, & - dy(-buff_size), buff_size, & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Sending/receiving the data to/from bc_y%beg/bc_y%beg - call MPI_SENDRECV(dy(0), buff_size, & - mpi_p, bc_y%beg, 1, & - dy(-buff_size), buff_size, & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_y%beg >= 0) then - - ! Sending/receiving the data to/from bc_y%beg/bc_y%end - call MPI_SENDRECV(dy(0), buff_size, & - mpi_p, bc_y%beg, 1, & - dy(n + 1), buff_size, & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Sending/receiving the data to/from bc_y%end/bc_y%end - call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, & - mpi_p, bc_y%end, 0, & - dy(n + 1), buff_size, & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - end if - - ! END: Communications in the y-direction - - ! Communications in the z-direction - - else - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_z%end >= 0) then - - ! Sending/receiving the data to/from bc_z%end/bc_z%beg - call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, & - mpi_p, bc_z%end, 0, & - dz(-buff_size), buff_size, & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Sending/receiving the data to/from bc_z%beg/bc_z%beg - call MPI_SENDRECV(dz(0), buff_size, & - mpi_p, bc_z%beg, 1, & - dz(-buff_size), buff_size, & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_z%beg >= 0) then - - ! Sending/receiving the data to/from bc_z%beg/bc_z%end - call MPI_SENDRECV(dz(0), buff_size, & - mpi_p, bc_z%beg, 1, & - dz(p + 1), buff_size, & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Sending/receiving the data to/from bc_z%end/bc_z%end - call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, & - mpi_p, bc_z%end, 0, & - dz(p + 1), buff_size, & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - end if - - end if - - ! END: Communications in the z-direction - -#endif - - end subroutine s_mpi_sendrecv_grid_vars_buffer_regions - !> Communicates buffer regions associated with conservative !! variables with processors in charge of the neighboring !! sub-domains diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index 8fece2820b..a9484255c0 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -33,6 +33,8 @@ module m_start_up use m_checker_common + use m_boundary_conditions + use m_checker use m_thermochem, only: num_species, species_names @@ -177,7 +179,7 @@ subroutine s_perform_time_step(t_step) ! Populating the buffer regions of the conservative variables if (buff_size > 0) then - call s_populate_conservative_variables_buffer_regions() + call s_populate_variables_buffers(q_cons_vf) if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1)) end if From 97e4f0334f59c0bd6e2a90f6a3f3e51c34bf5519 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Mon, 5 May 2025 12:42:07 -0400 Subject: [PATCH 09/62] bug fix --- src/simulation/m_bubbles_EL.fpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index a2ac3a7ae9..7a5bb6e421 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -614,7 +614,7 @@ contains mtn_dposdt(k, l, stage) = mtn_vel(k,l,1) mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),& intfc_rad(k,1), mtn_vel(k,l,1), & - gas_mg(k,1), gas_mv(k,1), & + gas_mg(k), gas_mv(k), & cell, l, q_prim_vf) else mtn_dposdt(k, l, stage) = 0._wp @@ -643,7 +643,7 @@ contains mtn_dposdt(k, l, stage) = mtn_vel(k,l,1) mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),& intfc_rad(k,1), mtn_vel(k,l,1), & - gas_mg(k,1), gas_mv(k,1), & + gas_mg(k), gas_mv(k), & cell, l, q_prim_vf) else mtn_dposdt(k, l, stage) = 0._wp From 5644605ba16e7be744be513fe0b2229c0c205bc1 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Mon, 5 May 2025 15:59:19 -0400 Subject: [PATCH 10/62] more refactoring --- src/common/m_boundary_common.fpp | 314 ++++++++++++++++++++-- src/post_process/m_data_input.f90 | 40 +++ src/post_process/m_global_parameters.fpp | 4 + src/post_process/m_start_up.f90 | 18 +- src/pre_process/m_boundary_conditions.fpp | 144 +--------- src/pre_process/m_data_output.fpp | 2 +- src/pre_process/m_perturbation.fpp | 2 +- src/simulation/m_boundary_conditions.fpp | 129 --------- src/simulation/m_global_parameters.fpp | 4 - src/simulation/m_mpi_proxy.fpp | 2 - src/simulation/m_rhs.fpp | 2 +- src/simulation/m_sim_helpers.f90 | 25 +- src/simulation/m_time_steppers.fpp | 2 - toolchain/mfc/run/case_dicts.py | 3 +- 14 files changed, 359 insertions(+), 332 deletions(-) delete mode 100644 src/simulation/m_boundary_conditions.fpp diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 55ac2fec82..857eae09e9 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -17,6 +17,11 @@ module m_boundary_common use m_constants + use m_delay_file_access + + use m_compile_specific + + implicit none type(scalar_field), dimension(:, :), allocatable :: bc_buffers @@ -32,6 +37,11 @@ module m_boundary_common s_populate_variables_buffers, & s_create_mpi_types, & s_populate_capillary_buffers, & + s_write_serial_boundary_condition_files, & + s_write_parallel_boundary_condition_files, & + s_read_serial_boundary_condition_files, & + s_read_parallel_boundary_condition_files, & + s_assign_default_bc_type, & s_finalize_boundary_common_module public :: bc_buffers, bcxb, bcxe, bcyb, bcye, bczb, bcze @@ -48,7 +58,6 @@ contains @:ALLOCATE(bc_buffers(1:num_dims, -1:1)) -#ifndef MFC_POST_PROCESS if (bc_io) then @:ALLOCATE(bc_buffers(1, -1)%sf(1:sys_size, 0:n, 0:p)) @:ALLOCATE(bc_buffers(1, 1)%sf(1:sys_size, 0:n, 0:p)) @@ -64,17 +73,16 @@ contains end if end if end if -#endif end subroutine s_initialize_boundary_common_module !> The purpose of this procedure is to populate the buffers !! of the primitive variables, depending on the selected !! boundary conditions. - subroutine s_populate_variables_buffers(q_prim_vf, pb, mv, bc_type) + subroutine s_populate_variables_buffers(bc_type, q_prim_vf, pb, mv) type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type integer :: bc_loc, bc_dir @@ -82,7 +90,7 @@ contains ! Population of Buffers in x-direction if (bcxb >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, -1) + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -106,7 +114,7 @@ contains end if if (bcxe >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 1, 1) + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -134,7 +142,7 @@ contains if (n == 0) return if (bcyb >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, -1) + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -160,7 +168,7 @@ contains end if if (bcye >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 2, 1) + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -188,7 +196,7 @@ contains if (p == 0) return if (bczb >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, -1) + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = -buff_size, n + buff_size @@ -212,7 +220,7 @@ contains end if if (bcze >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, pb, mv, 3, 1) + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) do l = -buff_size, n + buff_size @@ -1166,7 +1174,7 @@ contains !< x-direction if (bcxb >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, -1) + call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -1184,7 +1192,7 @@ contains end if if (bcxe >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, 1) + call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -1205,7 +1213,7 @@ contains !< y-direction if (bcyb >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, -1) + call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -1223,7 +1231,7 @@ contains end if if (bcye >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, 1) + call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = 0, p @@ -1244,7 +1252,7 @@ contains !< z-direction if (bczb >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, -1) + call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = -buff_size, n + buff_size @@ -1262,7 +1270,7 @@ contains end if if (bcze >= 0) then - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, 1) + call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) do l = -buff_size, n + buff_size @@ -1511,9 +1519,279 @@ contains #endif end subroutine s_create_mpi_types + subroutine s_write_serial_boundary_condition_files(q_prim_vf, bc_type, step_dirpath, old_grid) + + type(scalar_field), dimension(sys_size) :: q_prim_vf + type(integer_field), dimension(1:num_dims, -1:1) :: bc_type + logical :: old_grid + + character(LEN=*), intent(in) :: step_dirpath + + integer :: dir, loc, i + character(len=path_len) :: file_path + + character(len=10) :: status + + if (old_grid) then + status = 'old' + else + status = 'new' + end if + + call s_pack_boundary_condition_buffers(q_prim_vf) + + file_path = trim(step_dirpath)//'/bc_type.dat' + open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status) + do dir = 1, num_dims + do loc = -1, 1, 2 + write (1) bc_type(dir, loc)%sf + end do + end do + close (1) + + file_path = trim(step_dirpath)//'/bc_buffers.dat' + open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status) + do dir = 1, num_dims + do loc = -1, 1, 2 + write (1) bc_buffers(dir, loc)%sf + end do + end do + close (1) + + end subroutine s_write_serial_boundary_condition_files + + subroutine s_write_parallel_boundary_condition_files(q_prim_vf, bc_type) + + type(scalar_field), dimension(sys_size) :: q_prim_vf + type(integer_field), dimension(1:num_dims, -1:1) :: bc_type + + integer :: dir, loc + character(len=path_len) :: file_loc, file_path + + character(len=10) :: status + +#ifdef MFC_MPI + integer :: ierr + integer :: file_id + integer :: offset + character(len=7) :: proc_rank_str + logical :: dir_check + + call s_pack_boundary_condition_buffers(q_prim_vf) + + file_loc = trim(case_dir)//'/restart_data/boundary_conditions' + if (proc_rank == 0) then + call my_inquire(file_loc, dir_check) + if (dir_check .neqv. .true.) then + call s_create_directory(trim(file_loc)) + end if + end if + + call s_create_mpi_types(bc_type) + + call s_mpi_barrier() + + call DelayFileAccess(proc_rank) + + write (proc_rank_str, '(I7.7)') proc_rank + file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat' + call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr) + + offset = 0 + + ! Write bc_types + do dir = 1, num_dims + do loc = -1, 1, 2 + call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) + call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) + offset = offset + sizeof(bc_type(dir, loc)%sf) + end do + end do + + ! Write bc_buffers + do dir = 1, num_dims + do loc = -1, 1, 2 + call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) + call MPI_File_write_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) + offset = offset + sizeof(bc_buffers(dir, loc)%sf) + end do + end do + + call MPI_File_close(file_id, ierr) +#endif + + end subroutine s_write_parallel_boundary_condition_files + + subroutine s_read_serial_boundary_condition_files(step_dirpath, bc_type) + + character(LEN=*), intent(in) :: step_dirpath + + type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type + + integer :: dir, loc + logical :: file_exist + character(len=path_len) :: file_path + + character(len=10) :: status + + ! Read bc_types + file_path = trim(step_dirpath)//'/bc_type.dat' + inquire (FILE=trim(file_path), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort(trim(file_path)//' is missing. Exiting.') + end if + + open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown') + do dir = 1, num_dims + do loc = -1, 1, 2 + read (1) bc_type(dir, loc)%sf + !$acc update device(bc_type(dir, loc)%sf) + end do + end do + close (1) + + ! Read bc_buffers + file_path = trim(step_dirpath)//'/bc_buffers.dat' + inquire (FILE=trim(file_path), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort(trim(file_path)//' is missing. Exiting.') + end if + + open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown') + do dir = 1, num_dims + do loc = -1, 1, 2 + read (1) bc_buffers(dir, loc)%sf + !$acc update device(bc_buffers(dir, loc)%sf) + end do + end do + close (1) + + end subroutine s_read_serial_boundary_condition_files + + subroutine s_read_parallel_boundary_condition_files(bc_type) + + type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type + + integer :: dir, loc + character(len=path_len) :: file_loc, file_path + + character(len=10) :: status + +#ifdef MFC_MPI + integer :: ierr + integer :: file_id + integer :: offset + character(len=7) :: proc_rank_str + logical :: dir_check + + file_loc = trim(case_dir)//'/restart_data/boundary_conditions' + + if (proc_rank == 0) then + call my_inquire(file_loc, dir_check) + if (dir_check .neqv. .true.) then + call s_mpi_abort(trim(file_loc)//' is missing. Exiting.') + end if + end if + + call s_create_mpi_types(bc_type) + + call s_mpi_barrier() + + call DelayFileAccess(proc_rank) + + write (proc_rank_str, '(I7.7)') proc_rank + file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat' + call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr) + + offset = 0 + + ! Read bc_types + do dir = 1, num_dims + do loc = -1, 1, 2 + call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) + call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) + offset = offset + sizeof(bc_type(dir, loc)%sf) + !$acc update device(bc_type(dir, loc)%sf) + end do + end do + + ! Read bc_buffers + do dir = 1, num_dims + do loc = -1, 1, 2 + call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) + call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) + offset = offset + sizeof(bc_buffers(dir, loc)%sf) + !$acc update device(bc_buffers(dir, loc)%sf) + end do + end do + + call MPI_File_close(file_id, ierr) +#endif + + end subroutine s_read_parallel_boundary_condition_files + + subroutine s_pack_boundary_condition_buffers(q_prim_vf) + + type(scalar_field), dimension(sys_size) :: q_prim_vf + integer :: i, j, k + + do k = 0, p + do j = 0, n + do i = 1, sys_size + bc_buffers(1, -1)%sf(i, j, k) = q_prim_vf(i)%sf(0, j, k) + bc_buffers(1, 1)%sf(i, j, k) = q_prim_vf(i)%sf(m, j, k) + end do + end do + end do + + if (n > 0) then + do k = 0, p + do j = 1, sys_size + do i = 0, m + bc_buffers(2, -1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k) + bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k) + end do + end do + end do + + if (p > 0) then + do k = 1, sys_size + do j = 0, n + do i = 0, m + bc_buffers(3, -1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0) + bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p) + end do + end do + end do + end if + end if + + end subroutine s_pack_boundary_condition_buffers + + subroutine s_assign_default_bc_type(bc_type) + + type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type + + bc_type(1, -1)%sf(:, :, :) = bc_x%beg + bc_type(1, 1)%sf(:, :, :) = bc_x%end + !$acc update device(bc_type(1,-1)%sf, bc_type(1,1)%sf) + + if (n > 0) then + bc_type(2, -1)%sf(:, :, :) = bc_y%beg + bc_type(2, 1)%sf(:, :, :) = bc_y%end + !$acc update device(bc_type(2,-1)%sf, bc_type(2,1)%sf) + + if (p > 0) then + bc_type(3, -1)%sf(:, :, :) = bc_z%beg + bc_type(3, 1)%sf(:, :, :) = bc_z%end + !$acc update device(bc_type(3,-1)%sf, bc_type(3,1)%sf) + end if + end if + + end subroutine s_assign_default_bc_type + subroutine s_finalize_boundary_common_module() -#ifndef MFC_POST_PROCESS if (bc_io) then deallocate (bc_buffers(1, -1)%sf) deallocate (bc_buffers(1, 1)%sf) @@ -1526,7 +1804,7 @@ contains end if end if end if -#endif + deallocate (bc_buffers) end subroutine s_finalize_boundary_common_module diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index 826ae7f572..6986d76ee2 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -21,6 +21,8 @@ module m_data_input use m_compile_specific + use m_boundary_common + use m_helper implicit none @@ -56,6 +58,9 @@ end subroutine s_read_abstract_data_files type(scalar_field), allocatable, dimension(:), public :: q_particle !< !! Lagrangian solver (particle void fraction) + type(integer_field), allocatable, dimension(:, :), public :: bc_type !< + !! Boundary condition identifiers + type(scalar_field), public :: q_T_sf !< !! Temperature field @@ -127,6 +132,11 @@ subroutine s_read_serial_data_files(t_step) ' is missing. Exiting.') end if + if (bc_io) then + call s_read_serial_boundary_condition_files(t_step_dir, bc_type) + else + call s_assign_default_bc_type(bc_type) + end if ! Reading the Grid Data File for the x-direction ! Checking whether x_cb.dat exists @@ -546,6 +556,12 @@ subroutine s_read_parallel_data_files(t_step) deallocate (x_cb_glb, y_cb_glb, z_cb_glb) + if (bc_io) then + call s_read_parallel_boundary_condition_files(bc_type) + else + call s_assign_default_bc_type(bc_type) + end if + #endif end subroutine s_read_parallel_data_files @@ -1411,6 +1427,20 @@ subroutine s_initialize_data_input_module end if + ! Allocating arrays to store the bc types + allocate(bc_type(1:num_dims,-1:1)) + + allocate(bc_type(1,-1)%sf(0:0,0:n,0:p)) + allocate(bc_type(1,1)%sf(0:0,0:n,0:p)) + if (n > 0) then + allocate(bc_type(2,-1)%sf(-buff_size:m+buff_size,0:0,0:p)) + allocate(bc_type(2,1)%sf(-buff_size:m+buff_size,0:0,0:p)) + if (p > 0) then + allocate(bc_type(3,-1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0)) + allocate(bc_type(3,1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0)) + end if + end if + if (parallel_io .neqv. .true.) then s_read_data_files => s_read_serial_data_files else @@ -1446,6 +1476,16 @@ subroutine s_finalize_data_input_module deallocate (q_T_sf%sf) end if + deallocate(bc_type(1,-1)%sf, bc_type(1,1)%sf) + if (n > 0) then + deallocate(bc_type(2,-1)%sf, bc_type(2, 1)%sf) + if (p > 0) then + deallocate(bc_type(3,-1)%sf, bc_type(3,1)%sf) + end if + end if + + deallocate(bc_type) + s_read_data_files => null() end subroutine s_finalize_data_input_module diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 0a5089f247..3847ce0f73 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -147,6 +147,8 @@ module m_global_parameters ! Stands for "InDices With BUFFer". type(int_bounds_info) :: idwbuff(1:3) + integer :: num_bc_patches + logical :: bc_io !> @name Boundary conditions in the x-, y- and z-coordinate directions !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z @@ -370,6 +372,8 @@ contains bc_x%beg = dflt_int; bc_x%end = dflt_int bc_y%beg = dflt_int; bc_y%end = dflt_int bc_z%beg = dflt_int; bc_z%end = dflt_int + bc_io = .false. + num_bc_patches = dflt_int #:for DIM in ['x', 'y', 'z'] #:for DIR in [1, 2, 3] diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index a9484255c0..3acf713d84 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -16,6 +16,10 @@ module m_start_up use m_mpi_proxy !< Message passing interface (MPI) module proxy + use m_mpi_common !< Common MPI subroutines + + use m_boundary_common !< Common boundary conditions subroutines + use m_variables_conversion !< Subroutines to change the state variables from !! one form to another @@ -33,8 +37,6 @@ module m_start_up use m_checker_common - use m_boundary_conditions - use m_checker use m_thermochem, only: num_species, species_names @@ -86,7 +88,8 @@ subroutine s_read_input_file relax_model, cf_wrt, sigma, adv_n, ib, num_ibs, & cfl_adap_dt, cfl_const_dt, t_save, t_stop, n_start, & cfl_target, surface_tension, bubbles_lagrange, rkck_adap_dt, & - sim_data, hyperelasticity, Bx0, relativity, cont_damage + sim_data, hyperelasticity, Bx0, relativity, cont_damage, & + num_bc_patches ! Inquiring the status of the post_process.inp file file_loc = 'post_process.inp' @@ -117,6 +120,11 @@ subroutine s_read_input_file if (cfl_adap_dt .or. cfl_const_dt .or. rkck_adap_dt) cfl_dt = .true. + if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == -17) .or. & + num_bc_patches > 0) then + bc_io = .true. + endif + else call s_mpi_abort('File post_process.inp is missing. Exiting.') end if @@ -179,8 +187,8 @@ subroutine s_perform_time_step(t_step) ! Populating the buffer regions of the conservative variables if (buff_size > 0) then - call s_populate_variables_buffers(q_cons_vf) - if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1)) + call s_populate_variables_buffers(bc_type, q_cons_vf) + !if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1)) end if ! Initialize the Temperature cache. diff --git a/src/pre_process/m_boundary_conditions.fpp b/src/pre_process/m_boundary_conditions.fpp index 5bc18ad68c..75452459bb 100644 --- a/src/pre_process/m_boundary_conditions.fpp +++ b/src/pre_process/m_boundary_conditions.fpp @@ -24,9 +24,7 @@ module m_boundary_conditions real(wp) :: radius type(bounds_info) :: x_boundary, y_boundary, z_boundary !< - private; public :: s_apply_boundary_patches, & - s_write_serial_boundary_condition_files, & - s_write_parallel_boundary_condition_files + private; public :: s_apply_boundary_patches contains subroutine s_line_segment_bc(patch_id, q_prim_vf, bc_type) @@ -267,144 +265,4 @@ contains end subroutine s_apply_boundary_patches - subroutine s_write_serial_boundary_condition_files(q_prim_vf, bc_type, step_dirpath) - - type(scalar_field), dimension(sys_size) :: q_prim_vf - type(integer_field), dimension(1:num_dims, -1:1) :: bc_type - - character(LEN=*), intent(in) :: step_dirpath - - integer :: dir, loc, i - character(len=path_len) :: file_path - - character(len=10) :: status - - if (old_grid) then - status = 'old' - else - status = 'new' - end if - - call s_pack_boundary_condition_buffers(q_prim_vf) - - file_path = trim(step_dirpath)//'/bc_type.dat' - open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status) - do dir = 1, num_dims - do loc = -1, 1, 2 - write (1) bc_type(dir, loc)%sf - end do - end do - close (1) - - file_path = trim(step_dirpath)//'/bc_buffers.dat' - open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status) - do dir = 1, num_dims - do loc = -1, 1, 2 - write (1) bc_buffers(dir, loc)%sf - end do - end do - close (1) - - end subroutine s_write_serial_boundary_condition_files - - subroutine s_write_parallel_boundary_condition_files(q_prim_vf, bc_type) - - type(scalar_field), dimension(sys_size) :: q_prim_vf - type(integer_field), dimension(1:num_dims, -1:1) :: bc_type - - integer :: dir, loc - character(len=path_len) :: file_loc, file_path - - character(len=10) :: status - -#ifdef MFC_MPI - integer :: ierr - integer :: file_id - integer :: offset - character(len=7) :: proc_rank_str - logical :: dir_check - - call s_pack_boundary_condition_buffers(q_prim_vf) - - file_loc = trim(case_dir)//'/restart_data/boundary_conditions' - if (proc_rank == 0) then - call my_inquire(file_loc, dir_check) - if (dir_check .neqv. .true.) then - call s_create_directory(trim(file_loc)) - end if - end if - - call s_create_mpi_types(bc_type) - - call s_mpi_barrier() - - call DelayFileAccess(proc_rank) - - write (proc_rank_str, '(I7.7)') proc_rank - file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat' - call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr) - - offset = 0 - - ! Write bc_types - do dir = 1, num_dims - do loc = -1, 1, 2 - call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) - call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) - offset = offset + sizeof(bc_type(dir, loc)%sf) - end do - end do - - ! Write bc_buffers - do dir = 1, num_dims - do loc = -1, 1, 2 - call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) - call MPI_File_write_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) - offset = offset + sizeof(bc_buffers(dir, loc)%sf) - end do - end do - - call MPI_File_close(file_id, ierr) -#endif - - end subroutine s_write_parallel_boundary_condition_files - - subroutine s_pack_boundary_condition_buffers(q_prim_vf) - - type(scalar_field), dimension(sys_size) :: q_prim_vf - integer :: i, j, k - - do k = 0, p - do j = 0, n - do i = 1, sys_size - bc_buffers(1, -1)%sf(i, j, k) = q_prim_vf(i)%sf(0, j, k) - bc_buffers(1, 1)%sf(i, j, k) = q_prim_vf(i)%sf(m, j, k) - end do - end do - end do - - if (n > 0) then - do k = 0, p - do j = 1, sys_size - do i = 0, m - bc_buffers(2, -1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k) - bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k) - end do - end do - end do - - if (p > 0) then - do k = 1, sys_size - do j = 0, n - do i = 0, m - bc_buffers(3, -1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0) - bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p) - end do - end do - end do - end if - end if - - end subroutine s_pack_boundary_condition_buffers - end module m_boundary_conditions diff --git a/src/pre_process/m_data_output.fpp b/src/pre_process/m_data_output.fpp index 7ae637f034..95363db1e3 100644 --- a/src/pre_process/m_data_output.fpp +++ b/src/pre_process/m_data_output.fpp @@ -162,7 +162,7 @@ contains end if if (bc_io) then - call s_write_serial_boundary_condition_files(q_prim_vf, bc_type, t_step_dir) + call s_write_serial_boundary_condition_files(q_prim_vf, bc_type, t_step_dir, old_grid) end if ! x-coordinate direction diff --git a/src/pre_process/m_perturbation.fpp b/src/pre_process/m_perturbation.fpp index d1b7536fdb..e1d9ed7efc 100644 --- a/src/pre_process/m_perturbation.fpp +++ b/src/pre_process/m_perturbation.fpp @@ -624,7 +624,7 @@ contains do q = 1, elliptic_smoothing_iters ! Communication of buffer regions and apply boundary conditions - call s_populate_variables_buffers(q_prim_vf, pb%sf, mv%sf, bc_type) + call s_populate_variables_buffers(bc_type, q_prim_vf, pb%sf, mv%sf) ! Perform smoothing and store in temp array if (n == 0) then diff --git a/src/simulation/m_boundary_conditions.fpp b/src/simulation/m_boundary_conditions.fpp deleted file mode 100644 index 0d6d22f8af..0000000000 --- a/src/simulation/m_boundary_conditions.fpp +++ /dev/null @@ -1,129 +0,0 @@ -! @file m_boundary_conditions.fpp -! @brief Contains module m_boundary_conditions - -!> @brief This module contains -module m_boundary_conditions - - use m_derived_types - - use m_global_parameters -#ifdef MFC_MPI - use mpi -#endif - use m_delay_file_access - - use m_compile_specific - - use m_boundary_common - -contains - - subroutine s_read_serial_boundary_condition_files(step_dirpath, bc_type) - - character(LEN=*), intent(in) :: step_dirpath - - type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type - - integer :: dir, loc - logical :: file_exist - character(len=path_len) :: file_path - - character(len=10) :: status - - ! Read bc_types - file_path = trim(step_dirpath)//'/bc_type.dat' - inquire (FILE=trim(file_path), EXIST=file_exist) - if (.not. file_exist) then - call s_mpi_abort(trim(file_path)//' is missing. Exiting.') - end if - - open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown') - do dir = 1, num_dims - do loc = -1, 1, 2 - read (1) bc_type(dir, loc)%sf - !$acc update device(bc_type(dir, loc)%sf) - end do - end do - close (1) - - ! Read bc_buffers - file_path = trim(step_dirpath)//'/bc_buffers.dat' - inquire (FILE=trim(file_path), EXIST=file_exist) - if (.not. file_exist) then - call s_mpi_abort(trim(file_path)//' is missing. Exiting.') - end if - - open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown') - do dir = 1, num_dims - do loc = -1, 1, 2 - read (1) bc_buffers(dir, loc)%sf - !$acc update device(bc_buffers(dir, loc)%sf) - end do - end do - close (1) - - end subroutine s_read_serial_boundary_condition_files - - subroutine s_read_parallel_boundary_condition_files(bc_type) - - type(integer_field), dimension(1:num_dims, -1:1), intent(inout) :: bc_type - - integer :: dir, loc - character(len=path_len) :: file_loc, file_path - - character(len=10) :: status - -#ifdef MFC_MPI - integer :: ierr - integer :: file_id - integer :: offset - character(len=7) :: proc_rank_str - logical :: dir_check - - file_loc = trim(case_dir)//'/restart_data/boundary_conditions' - - if (proc_rank == 0) then - call my_inquire(file_loc, dir_check) - if (dir_check .neqv. .true.) then - call s_mpi_abort(trim(file_loc)//' is missing. Exiting.') - end if - end if - - call s_create_mpi_types(bc_type) - - call s_mpi_barrier() - - call DelayFileAccess(proc_rank) - - write (proc_rank_str, '(I7.7)') proc_rank - file_path = trim(file_loc)//'/bc_'//trim(proc_rank_str)//'.dat' - call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr) - - offset = 0 - - ! Read bc_types - do dir = 1, num_dims - do loc = -1, 1, 2 - call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) - call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, 1, MPI_BC_TYPE_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) - offset = offset + sizeof(bc_type(dir, loc)%sf) - !$acc update device(bc_type(dir, loc)%sf) - end do - end do - - ! Read bc_buffers - do dir = 1, num_dims - do loc = -1, 1, 2 - call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_p, MPI_BC_BUFFER_TYPE(dir, loc), 'native', MPI_INFO_NULL, ierr) - call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, 1, MPI_BC_BUFFER_TYPE(dir, loc), MPI_STATUS_IGNORE, ierr) - offset = offset + sizeof(bc_buffers(dir, loc)%sf) - !$acc update device(bc_buffers(dir, loc)%sf) - end do - end do - - call MPI_File_close(file_id, ierr) -#endif - - end subroutine s_read_parallel_boundary_condition_files - -end module m_boundary_conditions diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 1bdc9f1d97..217d14937a 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -190,8 +190,6 @@ module m_global_parameters integer :: num_bc_patches logical :: bc_io - integer :: BC_RIEMANN_EXTRAPOLATION - integer :: BC_GHOST_EXTRAPOLATION !> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z @@ -586,8 +584,6 @@ contains num_bc_patches = 0 bc_io = .false. - BC_RIEMANN_EXTRAPOLATION = -4 - BC_GHOST_EXTRAPOLATION = -3 bc_x%beg = dflt_int; bc_x%end = dflt_int bc_y%beg = dflt_int; bc_y%end = dflt_int diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index ffabb9e216..9edd2f3121 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -72,8 +72,6 @@ contains end subroutine s_initialize_mpi_proxy_module -contains - !> Since only the processor with rank 0 reads and verifies !! the consistency of user inputs, these are initially not !! available to the other processors. Then, the purpose of diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 6930f3caa7..0c8470dff4 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -671,7 +671,7 @@ contains call nvtxEndRange call nvtxStartRange("RHS-COMMUNICATION") - call s_populate_variables_buffers(q_prim_qp%vf, pb, mv, bc_type) + call s_populate_variables_buffers(bc_type, q_prim_qp%vf, pb, mv) call nvtxEndRange call nvtxStartRange("RHS-ELASTIC") diff --git a/src/simulation/m_sim_helpers.f90 b/src/simulation/m_sim_helpers.f90 index 4d525cf5d9..88d37f9674 100644 --- a/src/simulation/m_sim_helpers.f90 +++ b/src/simulation/m_sim_helpers.f90 @@ -10,8 +10,7 @@ module m_sim_helpers private; public :: s_compute_enthalpy, & s_compute_stability_from_dt, & - s_compute_dt_from_cfl, & - s_assign_default_bc_type + s_compute_dt_from_cfl contains @@ -268,26 +267,4 @@ subroutine s_compute_dt_from_cfl(vel, c, max_dt, rho, Re_l, j, k, l) end subroutine s_compute_dt_from_cfl - subroutine s_assign_default_bc_type(bc_type) - - type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type - - bc_type(1, -1)%sf(:, :, :) = bc_x%beg - bc_type(1, 1)%sf(:, :, :) = bc_x%end - !$acc update device(bc_type(1,-1)%sf, bc_type(1,1)%sf) - - if (n > 0) then - bc_type(2, -1)%sf(:, :, :) = bc_y%beg - bc_type(2, 1)%sf(:, :, :) = bc_y%end - !$acc update device(bc_type(2,-1)%sf, bc_type(2,1)%sf) - - if (p > 0) then - bc_type(3, -1)%sf(:, :, :) = bc_z%beg - bc_type(3, 1)%sf(:, :, :) = bc_z%end - !$acc update device(bc_type(3,-1)%sf, bc_type(3,1)%sf) - end if - end if - - end subroutine s_assign_default_bc_type - end module m_sim_helpers diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index f8cdb7a7ac..385b846058 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -32,8 +32,6 @@ module m_time_steppers use m_boundary_common - use m_boundary_conditions - use m_helper use m_sim_helpers diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index a2d2ca559b..0b82fbb6ea 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -61,6 +61,7 @@ def analytic(self): 'Bx0': ParamType.REAL, 'relativity': ParamType.LOG, 'cont_damage': ParamType.LOG, + 'num_bc_patches': ParamType.INT, } PRE_PROCESS = COMMON.copy() @@ -98,7 +99,6 @@ def analytic(self): 'surface_tension': ParamType.LOG, 'elliptic_smoothing': ParamType.LOG, 'elliptic_smoothing_iters': ParamType.INT, - 'num_bc_patches': ParamType.INT, 'viscous': ParamType.LOG, 'bubbles_lagrange': ParamType.LOG, }) @@ -292,7 +292,6 @@ def analytic(self): 'viscous': ParamType.LOG, 'bubbles_lagrange': ParamType.LOG, 'rkck_tolerance': ParamType.REAL, - 'num_bc_patches': ParamType.INT, 'powell': ParamType.LOG, 'tau_star': ParamType.REAL, 'cont_damage_s': ParamType.REAL, From 78043ab5645ca1b765cf18d7075a75e3fd12c7a0 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Mon, 5 May 2025 17:25:16 -0400 Subject: [PATCH 11/62] more refactoring --- src/common/m_mpi_common.fpp | 10 +- src/common/m_variables_conversion.fpp | 4 + src/post_process/m_data_input.f90 | 603 +-------------- src/post_process/m_global_parameters.fpp | 6 + src/post_process/m_mpi_proxy.fpp | 651 ---------------- src/post_process/m_start_up.f90 | 9 +- src/simulation/m_ibm.fpp | 17 +- src/simulation/m_mpi_proxy.fpp | 946 ++++------------------- 8 files changed, 191 insertions(+), 2055 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index b6dd6ad36b..6d670b1237 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -636,17 +636,17 @@ contains if (present(pb) .and. present(mv) .and. qbmm .and. .not. polytropic) then qbmm_comm = .true. - v_size = sys_size + 2*nb*4 + v_size = nVar + 2*nb*4 buffer_counts = (/ & - buff_size*(nVar + 2*nb*4)*(n + 1)*(p + 1), & - buff_size*(nVar + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), & + buff_size*v_size*(n + 1)*(p + 1), & + buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), & buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & /) else v_size = nVar buffer_counts = (/ & - buff_size*nVar*(n + 1)*(p + 1), & - buff_size*nVar*(m + 2*buff_size + 1)*(p + 1), & + buff_size*v_size*(n + 1)*(p + 1), & + buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), & buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & /) end if diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index dbbf4457e1..a71bc4875b 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -1152,6 +1152,10 @@ contains if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) +#ifdef MFC_POST_PROCESS + if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) +#endif + end do end do end do diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index 6986d76ee2..2db034b4b5 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -32,7 +32,6 @@ module m_data_input s_read_serial_data_files, & s_read_parallel_data_files, & s_populate_grid_variables_buffer_regions, & - s_populate_conservative_variables_buffer_regions, & s_finalize_data_input_module abstract interface @@ -55,9 +54,6 @@ end subroutine s_read_abstract_data_files type(scalar_field), allocatable, dimension(:), public :: q_prim_vf !< !! Primitive variables - type(scalar_field), allocatable, dimension(:), public :: q_particle !< - !! Lagrangian solver (particle void fraction) - type(integer_field), allocatable, dimension(:, :), public :: bc_type !< !! Boundary condition identifiers @@ -250,29 +246,6 @@ subroutine s_read_serial_data_files(t_step) end if end if - if (bubbles_lagrange) then !Lagrangian solver - - ! Checking whether the data file associated with the variable - ! position of currently manipulated conservative variable exists - write (file_num, '(I0)') sys_size + 1 - file_loc = trim(t_step_dir)//'/q_cons_vf'// & - trim(file_num)//'.dat' - inquire (FILE=trim(file_loc), EXIST=file_check) - - ! Reading the data file if it exists, exiting otherwise - if (file_check) then - open (1, FILE=trim(file_loc), FORM='unformatted', & - STATUS='old', ACTION='read') - read (1) q_particle(1)%sf(0:m, 0:n, 0:p) - close (1) - else - print '(A)', 'File q_cons_vf'//trim(file_num)// & - '.dat is missing in '//trim(t_step_dir)// & - '. Exiting.' - call s_mpi_abort() - end if - end if - end subroutine s_read_serial_data_files !> This subroutine is called at each time-step that has to @@ -304,14 +277,6 @@ subroutine s_read_parallel_data_files(t_step) integer :: i - integer :: alt_sys !Altered sys_size for lagrangian solver - - if (bubbles_lagrange) then - alt_sys = sys_size + 1 - else - alt_sys = sys_size - end if - allocate (x_cb_glb(-1:m_glb)) allocate (y_cb_glb(-1:n_glb)) allocate (z_cb_glb(-1:p_glb)) @@ -467,8 +432,6 @@ subroutine s_read_parallel_data_files(t_step) ! Initialize MPI data I/O if (ib) then call s_initialize_mpi_data(q_cons_vf, ib_markers) - elseif (bubbles_lagrange) then - call s_initialize_mpi_data(q_cons_vf, beta=q_particle(1)) else call s_initialize_mpi_data(q_cons_vf) end if @@ -483,46 +446,20 @@ subroutine s_read_parallel_data_files(t_step) WP_MOK = int(8._wp, MPI_OFFSET_KIND) MOK = int(1._wp, MPI_OFFSET_KIND) str_MOK = int(name_len, MPI_OFFSET_KIND) - NVARS_MOK = int(alt_sys, MPI_OFFSET_KIND) + NVARS_MOK = int(sys_size, MPI_OFFSET_KIND) ! Read the data for each variable - if (bubbles_euler .or. elasticity) then - do i = 1, sys_size - var_MOK = int(i, MPI_OFFSET_KIND) - - ! Initial displacement to skip at beginning of file - disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) - - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & - mpi_p, status, ierr) - end do - else - do i = 1, sys_size - var_MOK = int(i, MPI_OFFSET_KIND) - - ! Initial displacement to skip at beginning of file - disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) - - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & - mpi_p, status, ierr) - end do - end if - - if (bubbles_lagrange) then !Lagrangian solver - var_MOK = int(sys_size + 1, MPI_OFFSET_KIND) + do i = 1, sys_size + var_MOK = int(i, MPI_OFFSET_KIND) ! Initial displacement to skip at beginning of file disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(sys_size + 1), & + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & 'native', mpi_info_int, ierr) - call MPI_FILE_READ(ifile, MPI_IO_DATA%var(sys_size + 1)%sf, data_size, & - mpi_p, status, ierr) - end if + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & + mpi_p, status, ierr) + end do call s_mpi_barrier() @@ -816,510 +753,6 @@ subroutine s_populate_grid_variables_buffer_regions end subroutine s_populate_grid_variables_buffer_regions - !> The purpose of this procedure is to populate the buffers - !! of the cell-average conservative variables, depending on - !! the boundary conditions. - subroutine s_populate_conservative_variables_buffer_regions(q_particle) - - type(scalar_field), intent(inout), optional :: q_particle - - integer :: i, j, k !< Generic loop iterators - - ! Populating Buffer Regions in the x-direction - - ! Ghost-cell extrapolation BC at the beginning - if (bc_x%beg <= BC_GHOST_EXTRAP) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(-j, 0:n, 0:p) = & - q_particle%sf(0, 0:n, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(-j, 0:n, 0:p) = q_cons_vf(i)%sf(0, 0:n, 0:p) - end do - end if - end do - - ! Symmetry BC at the beginning - elseif (bc_x%beg == BC_REFLECTIVE) then - - do j = 1, buff_size - - if (present(q_particle)) then - q_particle%sf(-j, 0:n, 0:p) = & - q_particle%sf(j - 1, 0:n, 0:p) - else - ! Density or partial densities - do i = 1, cont_idx%end - q_cons_vf(i)%sf(-j, 0:n, 0:p) = & - q_cons_vf(i)%sf(j - 1, 0:n, 0:p) - end do - - ! x-component of momentum - q_cons_vf(mom_idx%beg)%sf(-j, 0:n, 0:p) = & - -q_cons_vf(mom_idx%beg)%sf(j - 1, 0:n, 0:p) - - ! Remaining momentum component(s), if any, as well as the - ! energy and the variable(s) from advection equation(s) - do i = mom_idx%beg + 1, sys_size - q_cons_vf(i)%sf(-j, 0:n, 0:p) = & - q_cons_vf(i)%sf(j - 1, 0:n, 0:p) - end do - end if - - end do - - ! Periodic BC at the beginning - elseif (bc_x%beg == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(-j, 0:n, 0:p) = & - q_particle%sf((m + 1) - j, 0:n, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(-j, 0:n, 0:p) = & - q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p) - end do - end if - end do - - ! Processor BC at the beginning - else - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'x', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'x') - end if - - end if - - ! Ghost-cell extrapolation BC at the end - if (bc_x%end <= BC_GHOST_EXTRAP) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(m + j, 0:n, 0:p) = & - q_particle%sf(m, 0:n, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf(m, 0:n, 0:p) - end do - end if - end do - - ! Symmetry BC at the end - elseif (bc_x%end == BC_REFLECTIVE) then - - do j = 1, buff_size - - if (present(q_particle)) then - q_particle%sf(m + j, 0:n, 0:p) = & - q_particle%sf((m + 1) - j, 0:n, 0:p) - else - - ! Density or partial densities - do i = 1, cont_idx%end - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p) - end do - - ! x-component of momentum - q_cons_vf(mom_idx%beg)%sf(m + j, 0:n, 0:p) = & - -q_cons_vf(mom_idx%beg)%sf((m + 1) - j, 0:n, 0:p) - - ! Remaining momentum component(s), if any, as well as the - ! energy and the variable(s) from advection equation(s) - do i = mom_idx%beg + 1, sys_size - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p) - end do - end if - - end do - - ! Perodic BC at the end - elseif (bc_x%end == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(m + j, 0:n, 0:p) = & - q_particle%sf(j - 1, 0:n, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf(j - 1, 0:n, 0:p) - end do - end if - end do - - ! Processor BC at the end - else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'x', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'x') - end if - - end if - - ! END: Populating Buffer Regions in the x-direction - - ! Populating Buffer Regions in the y-direction - - if (n > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, -j, 0:p) = q_particle%sf(:, 0, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, -j, 0:p) = q_cons_vf(i)%sf(:, 0, 0:p) - end do - end if - end do - - ! Axis BC at the beginning - elseif (bc_y%beg == BC_AXIS) then - - do j = 1, buff_size - do k = 0, p - if (z_cc(k) < pi) then - if (present(q_particle)) then - q_particle%sf(:, -j, k) = & - q_particle%sf(:, j - 1, k + ((p + 1)/2)) - else - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2)) - end do - - q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k + ((p + 1)/2)) - - q_cons_vf(mom_idx%end)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%end)%sf(:, j - 1, k + ((p + 1)/2)) - - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2)) - end do - end if - else - if (present(q_particle)) then - q_particle%sf(:, -j, k) = & - q_particle%sf(:, j - 1, k - ((p + 1)/2)) - else - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2)) - end do - - q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k - ((p + 1)/2)) - - q_cons_vf(mom_idx%end)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%end)%sf(:, j - 1, k - ((p + 1)/2)) - - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2)) - end do - end if - end if - end do - end do - - ! Symmetry BC at the beginning - elseif (bc_y%beg == BC_REFLECTIVE) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, -j, 0:p) = & - q_particle%sf(:, j - 1, 0:p) - else - ! Density or partial densities and x-momentum component - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, -j, 0:p) = & - q_cons_vf(i)%sf(:, j - 1, 0:p) - end do - - ! y-component of momentum - q_cons_vf(mom_idx%beg + 1)%sf(:, -j, 0:p) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, 0:p) - - ! Remaining z-momentum component, if any, as well as the - ! energy and variable(s) from advection equation(s) - do i = mom_idx%beg + 2, sys_size - q_cons_vf(i)%sf(:, -j, 0:p) = & - q_cons_vf(i)%sf(:, j - 1, 0:p) - end do - end if - - end do - - ! Periodic BC at the beginning - elseif (bc_y%beg == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, -j, 0:p) = & - q_particle%sf(:, (n + 1) - j, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, -j, 0:p) = & - q_cons_vf(i)%sf(:, (n + 1) - j, 0:p) - end do - end if - end do - - ! Processor BC at the beginning - else - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'y', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'y') - end if - - end if - - ! Ghost-cell extrapolation BC at the end - if (bc_y%end <= BC_GHOST_EXTRAP) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, n + j, 0:p) = & - q_particle%sf(:, n, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, n, 0:p) - end do - end if - end do - - ! Symmetry BC at the end - elseif (bc_y%end == BC_REFLECTIVE) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, n + j, 0:p) = & - q_particle%sf(:, (n + 1) - j, 0:p) - else - ! Density or partial densities and x-momentum component - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, (n + 1) - j, 0:p) - end do - - ! y-component of momentum - q_cons_vf(mom_idx%beg + 1)%sf(:, n + j, 0:p) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, (n + 1) - j, 0:p) - - ! Remaining z-momentum component, if any, as well as the - ! energy and variable(s) from advection equation(s) - do i = mom_idx%beg + 2, sys_size - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, (n + 1) - j, 0:p) - end do - end if - - end do - - ! Perodic BC at the end - elseif (bc_y%end == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, n + j, 0:p) = & - q_particle%sf(:, j - 1, 0:p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, j - 1, 0:p) - end do - end if - end do - - ! Processor BC at the end - else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'y', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'y') - end if - - end if - - ! END: Populating Buffer Regions in the y-direction - - ! Populating Buffer Regions in the z-direction - - if (p > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_z%beg <= BC_GHOST_EXTRAP) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, -j) = q_particle%sf(:, :, 0) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, -j) = q_cons_vf(i)%sf(:, :, 0) - end do - end if - end do - - ! Symmetry BC at the beginning - elseif (bc_z%beg == BC_REFLECTIVE) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, -j) = & - q_particle%sf(:, :, j - 1) - else - ! Density or the partial densities and the momentum - ! components in x- and y-directions - do i = 1, mom_idx%beg + 1 - q_cons_vf(i)%sf(:, :, -j) = & - q_cons_vf(i)%sf(:, :, j - 1) - end do - - ! z-component of momentum - q_cons_vf(mom_idx%end)%sf(:, :, -j) = & - -q_cons_vf(mom_idx%end)%sf(:, :, j - 1) - - ! Energy and advection equation(s) variable(s) - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, :, -j) = & - q_cons_vf(i)%sf(:, :, j - 1) - end do - end if - - end do - - ! Periodic BC at the beginning - elseif (bc_z%beg == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, -j) = & - q_particle%sf(:, :, (p + 1) - j) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, -j) = & - q_cons_vf(i)%sf(:, :, (p + 1) - j) - end do - end if - end do - - ! Processor BC at the beginning - else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'z', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'z') - end if - - end if - - ! Ghost-cell extrapolation BC at the end - if (bc_z%end <= BC_GHOST_EXTRAP) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, p + j) = & - q_particle%sf(:, :, p) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, p) - end do - end if - end do - - ! Symmetry BC at the end - elseif (bc_z%end == BC_REFLECTIVE) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, p + j) = & - q_particle%sf(:, :, (p + 1) - j) - else - ! Density or the partial densities and the momentum - ! components in x- and y-directions - do i = 1, mom_idx%beg + 1 - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, (p + 1) - j) - end do - - ! z-component of momentum - q_cons_vf(mom_idx%end)%sf(:, :, p + j) = & - -q_cons_vf(mom_idx%end)%sf(:, :, (p + 1) - j) - - ! Energy and advection equation(s) variable(s) - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, (p + 1) - j) - end do - end if - - end do - - ! Perodic BC at the end - elseif (bc_z%end == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, p + j) = & - q_particle%sf(:, :, j - 1) - else - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, j - 1) - end do - end if - end do - - ! Processor BC at the end - else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'z', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'z') - end if - - end if - - end if - - end if - - ! END: Populating Buffer Regions in the z-direction - - end subroutine s_populate_conservative_variables_buffer_regions - !> Computation of parameters, allocation procedures, and/or !! any other tasks needed to properly setup the module subroutine s_initialize_data_input_module @@ -1331,7 +764,6 @@ subroutine s_initialize_data_input_module ! the simulation allocate (q_cons_vf(1:sys_size)) allocate (q_prim_vf(1:sys_size)) - if (bubbles_lagrange) allocate (q_particle(1)) ! Allocating the parts of the conservative and primitive variables ! that do require the direct knowledge of the dimensionality of the @@ -1358,12 +790,6 @@ subroutine s_initialize_data_input_module -buff_size:p + buff_size)) end if - if (bubbles_lagrange) then - allocate (q_particle(1)%sf(-buff_size:m + buff_size, & - -buff_size:n + buff_size, & - -buff_size:p + buff_size)) - end if - if (chemistry) then allocate (q_T_sf%sf(-buff_size:m + buff_size, & -buff_size:n + buff_size, & @@ -1388,12 +814,6 @@ subroutine s_initialize_data_input_module 0:0)) end if - if (bubbles_lagrange) then - allocate (q_particle(1)%sf(-buff_size:m + buff_size, & - -buff_size:n + buff_size, & - 0:0)) - end if - if (chemistry) then allocate (q_T_sf%sf(-buff_size:m + buff_size, & -buff_size:n + buff_size, & @@ -1417,10 +837,6 @@ subroutine s_initialize_data_input_module allocate (ib_markers%sf(-buff_size:m + buff_size, 0:0, 0:0)) end if - if (bubbles_lagrange) then - allocate (q_particle(1)%sf(-buff_size:m + buff_size, 0:0, 0:0)) - end if - if (chemistry) then allocate (q_T_sf%sf(-buff_size:m + buff_size, 0:0, 0:0)) end if @@ -1467,11 +883,6 @@ subroutine s_finalize_data_input_module deallocate (ib_markers%sf) end if - if (bubbles_lagrange) then - deallocate (q_particle(1)%sf) - deallocate (q_particle) - end if - if (chemistry) then deallocate (q_T_sf%sf) end if diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 3847ce0f73..c1cb89d614 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -124,6 +124,7 @@ module m_global_parameters type(int_bounds_info) :: mom_idx !< Indexes of first & last momentum eqns. integer :: E_idx !< Index of energy equation integer :: n_idx !< Index of number density + integer :: beta_idx !< Index of lagrange bubbles beta type(int_bounds_info) :: adv_idx !< Indexes of first & last advection eqns. type(int_bounds_info) :: internalEnergies_idx !< Indexes of first & last internal energy eqns. type(bub_bounds_info) :: bub_idx !< Indexes of first & last bubble variable eqns. @@ -591,6 +592,11 @@ contains end if + if (bubbles_lagrange) then + beta_idx = sys_size + 1 + sys_size = beta_idx + end if + if (mhd) then B_idx%beg = sys_size + 1 if (n == 0) then diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 29ffbc134f..2df1025c21 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -202,657 +202,6 @@ contains end subroutine s_mpi_bcast_user_inputs - !> Communicates buffer regions associated with conservative - !! variables with processors in charge of the neighboring - !! sub-domains - !! @param q_cons_vf Conservative variables - !! @param pbc_loc Processor boundary condition (PBC) location - !! @param sweep_coord Coordinate direction normal to the processor boundary - !! @param q_particle Projection of the lagrangian particles in the Eulerian framework - subroutine s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, pbc_loc, & - sweep_coord, q_particle) - - type(scalar_field), & - dimension(sys_size), & - intent(inout) :: q_cons_vf - - character(LEN=3), intent(in) :: pbc_loc - - character, intent(in) :: sweep_coord - - type(scalar_field), & - intent(inout), optional :: q_particle - -#ifdef MFC_MPI - - integer :: i, j, k, l, r !< Generic loop iterators - - ! Communications in the x-direction - - if (sweep_coord == 'x') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_x%end >= 0) then - - ! Packing the data to be sent to bc_x%end - do l = 0, p - do k = 0, n - do j = m - buff_size + 1, m - do i = 1, sys_size - r = sys_size*(j - m + buff_size - 1) & - + sys_size*buff_size*k + (i - 1) & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_x%end/bc_x%beg - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%end, 0, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Packing the data to be sent to bc_x%beg - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, sys_size - r = (i - 1) + sys_size*j & - + sys_size*buff_size*k & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_x%beg/bc_x%beg - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%beg, 1, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_x%beg - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*buff_size*k + (i - 1) & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_x%beg >= 0) then - - ! Packing the data to be sent to bc_x%beg - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, sys_size - r = (i - 1) + sys_size*j & - + sys_size*buff_size*k & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%beg, 1, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Packing the data to be sent to bc_x%end - do l = 0, p - do k = 0, n - do j = m - buff_size + 1, m - do i = 1, sys_size - r = sys_size*(j - m + buff_size - 1) & - + sys_size*buff_size*k + (i - 1) & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%end, 0, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_x%end - do l = 0, p - do k = 0, n - do j = m + 1, m + buff_size - do i = 1, sys_size - r = (i - 1) + sys_size*(j - m - 1) & - + sys_size*buff_size*k & - + sys_size*buff_size*(n + 1)*l - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - end if - - ! END: Communications in the x-direction - - ! Communications in the y-direction - - elseif (sweep_coord == 'y') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_y%end >= 0) then - - ! Packing the data to be sent to bc_y%end - do l = 0, p - do k = n - buff_size + 1, n - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k - n + buff_size - 1) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%end/bc_y%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), mpi_p, & - bc_y%end, 0, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Packing the data to be sent to bc_y%beg - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)*k & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l + (i - 1) - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%beg/bc_y%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), mpi_p, & - bc_y%beg, 1, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_y%beg - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = (i - 1) + sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + sys_size* & - (m + 2*buff_size + 1)*buff_size*l - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_y%beg >= 0) then - - ! Packing the data to be sent to bc_y%beg - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)*k & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l + (i - 1) - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%beg/bc_y%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), mpi_p, & - bc_y%beg, 1, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Packing the data to be sent to bc_y%end - do l = 0, p - do k = n - buff_size + 1, n - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k - n + buff_size - 1) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%end/bc_y%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), mpi_p, & - bc_y%end, 0, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received form bc_y%end - do l = 0, p - do k = n + 1, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = (i - 1) + sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k - n - 1) + sys_size* & - (m + 2*buff_size + 1)*buff_size*l - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - end if - - ! END: Communications in the y-direction - - ! Communications in the z-direction - - else - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_z%end >= 0) then - - ! Packing the data to be sent to bc_z%end - do l = p - buff_size + 1, p - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + sys_size* & - (m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)* & - (l - p + buff_size - 1) + (i - 1) - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%end/bc_z%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%end, 0, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Packing the data to be sent to bc_z%beg - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%beg/bc_z%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%beg, 1, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data from bc_z%beg - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*(l + buff_size) - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_z%beg >= 0) then - - ! Packing the data to be sent to bc_z%beg - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*l - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%beg/bc_z%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%beg, 1, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Packing the data to be sent to bc_z%end - do l = p - buff_size + 1, p - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + sys_size* & - (m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)* & - (l - p + buff_size - 1) + (i - 1) - if (present(q_particle)) then - q_cons_buffer_out(r) = & - q_particle%sf(j, k, l) - else - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end if - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%end/bc_z%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%end, 0, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_z%end - do l = p + 1, p + buff_size - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*(l - p - 1) - if (present(q_particle)) then - q_particle%sf(j, k, l) = q_cons_buffer_in(r) - else - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) - end if -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - end if - - end if - - ! END: Communications in the z-direction - -#endif - - end subroutine s_mpi_sendrecv_cons_vars_buffer_regions - !> This subroutine gathers the Silo database metadata for !! the spatial extents in order to boost the performance of !! the multidimensional visualization. diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index 3acf713d84..fe78f01da9 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -180,15 +180,10 @@ subroutine s_perform_time_step(t_step) ! Populating the grid and conservative variables call s_read_data_files(t_step) - ! Populating the buffer regions of the grid variables + ! Populating the buffer regions of the grid and conservative variables if (buff_size > 0) then call s_populate_grid_variables_buffer_regions() - end if - - ! Populating the buffer regions of the conservative variables - if (buff_size > 0) then call s_populate_variables_buffers(bc_type, q_cons_vf) - !if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1)) end if ! Initialize the Temperature cache. @@ -674,7 +669,7 @@ subroutine s_save_data(t_step, varname, pres, c, H) ! Adding the lagrangian subgrid variables to the formatted database file if (bubbles_lagrange) then !! Void fraction field - q_sf = 1._wp - q_particle(1)%sf( & + q_sf = 1._wp - q_cons_vf(beta_idx)%sf( & -offset_x%beg:m + offset_x%end, & -offset_y%beg:n + offset_y%end, & -offset_z%beg:p + offset_z%end) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 0c18d17327..f43fcb00ba 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -85,7 +85,7 @@ contains !$acc update device(levelset_norm%sf) ! Get neighboring IB variables from other processors - call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers) + call s_populate_ib_buffers(ib_markers, gp_layers) !$acc update host(ib_markers%sf) @@ -108,6 +108,21 @@ contains end subroutine s_ibm_setup + subroutine s_populate_ib_buffers(ib_markers, gp_layers) + + type(integer_field), intent(inout) :: ib_markers + integer, intent(in) :: gp_layers + + #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)] + #:for LOCC, LOCI in [('beg', -1), ('end', 1)] + if (bc_${DIRC}$%${LOCC}$ > 0) then + call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers, ${DIRI}$, ${LOCI}$) + end if + #:endfor + #:endfor + + end subroutine s_populate_ib_buffers + !> Subroutine that updates the conservative variables at the ghost points !! @param q_cons_vf Conservative Variables !! @param q_prim_vf Primitive variables diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 9edd2f3121..aa59159c59 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -44,31 +44,37 @@ module m_mpi_proxy !> @name Generic flags used to identify and report MPI errors !> @{ - integer, private :: err_code, ierr, v_size + integer, private :: err_code, ierr, i_halo_size !> @} - !$acc declare create(v_size) + !$acc declare create(i_halo_size) contains subroutine s_initialize_mpi_proxy_module() +#ifdef MFC_MPI if (ib) then if (n > 0) then if (p > 0) then - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * & + i_halo_size = -1 + gp_layers * & & (m + 2*gp_layers + 1)* & & (n + 2*gp_layers + 1)* & & (p + 2*gp_layers + 1)/ & - & (min(m, n, p) + 2*gp_layers + 1))) + & (min(m, n, p) + 2*gp_layers + 1) else - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* & - & (max(m, n) + 2*gp_layers + 1))) + i_halo_size = -1 + gp_layers* & + & (max(m, n) + 2*gp_layers + 1) end if else - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers)) + i_halo_size = -1 + gp_layers end if - @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1))) + + !$acc declare create(i_halo_size) + + @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size)) + end if +#endif end subroutine s_initialize_mpi_proxy_module @@ -229,834 +235,184 @@ contains end subroutine s_mpi_bcast_user_inputs - !> The goal of this procedure is to populate the buffers of - !! the cell-average conservative variables by communicating - !! with the neighboring processors. - subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers) + subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers, mpi_dir, pbc_loc) type(integer_field), intent(inout) :: ib_markers - integer, intent(in) :: gp_layers - integer :: i, j, k, l, r !< Generic loop iterators - integer, pointer, dimension(:) :: p_i_send, p_i_recv + integer, intent(in) :: gp_layers, mpi_dir, pbc_loc -#ifdef MFC_MPI - !nCalls_time = nCalls_time + 1 - - ! MPI Communication in x-direction - if (bc_x%beg >= 0) then ! PBC at the beginning + integer :: i, j, k, l, r, q !< Generic loop iterators - if (bc_x%end >= 0) then ! PBC at the beginning and end - - ! Packing buffer to be sent to bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = m - gp_layers + 1, m - r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do + integer :: buffer_counts(1:3), buffer_count - !call MPI_Barrier(MPI_COMM_WORLD, ierr) + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv + logical :: beg_end_geq_0, qbmm_comm - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) + integer :: pack_offset, unpack_offset - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - p_i_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + integer, pointer :: p_i_send, p_i_recv - !$acc end host_data - !$acc end data - !$acc wait - else -#endif +#ifdef MFC_MPI - !$acc update host(ib_buff_send, ib_buff_send) + call nvtxStartRange("RHS-COMM-PACKBUF") - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & + buffer_counts = (/ & gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + gp_layers*(m + 2*buff_size + 1)*(p + 1), & + gp_layers*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + /) -#if defined(MFC_OpenACC) - end if -#endif + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 - else ! PBC at the beginning only + ! Implements: + ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc + ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] + ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] + ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] + ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] - ! Packing buffer to be sent to bc_x%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, gp_layers - 1 - r = (j + gp_layers*(k + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) - !call MPI_Barrier(MPI_COMM_WORLD, ierr) + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - p_i_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif - - ! Unpacking buffer received from bc_x%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -gp_layers, -1 - r = (j + gp_layers*((k + 1) + (n + 1)*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do + grid_dims = (/m, n, p/) + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) - gp_layers + 1 end if - if (bc_x%end >= 0) then ! PBC at the end - - if (bc_x%beg >= 0) then ! PBC at the end and beginning - - !$acc parallel loop collapse(3) gang vector default(present) private(r) - ! Packing buffer to be sent to bc_x%beg - do l = 0, p - do k = 0, n - do j = 0, gp_layers - 1 - r = (j + gp_layers*(k + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - p_i_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the end only - - ! Packing buffer to be sent to bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = m - gp_layers + 1, m - r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - p_i_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if - - ! Unpacking buffer received from bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = m + 1, m + gp_layers - r = ((j - m - 1) + gp_layers*(k + (n + 1)*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + gp_layers + 1 end if - ! END: MPI Communication in x-direction - - ! MPI Communication in y-direction - - if (bc_y%beg >= 0) then ! PBC at the beginning - - if (bc_y%end >= 0) then ! PBC at the beginning and end - ! Packing buffer to be sent to bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = n - gp_layers + 1, n - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k - n + gp_layers - 1) + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = 0, gp_layers - 1 + r = (j + gp_layers * (k + (n + 1) * l)) + ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) + end do end do end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the beginning only - - ! Packing buffer to be sent to bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, gp_layers - 1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - (k + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif - - ! Unpacking buffer received from bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = -gp_layers, -1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + gp_layers*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - - if (bc_y%end >= 0) then ! PBC at the end - - if (bc_y%beg >= 0) then ! PBC at the end and beginning - - ! Packing buffer to be sent to bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, gp_layers - 1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - (k + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) + #:elif mpi_dir == 2 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = 0, gp_layers - 1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + gp_layers*l)) + ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) + end do end do end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the end only - - ! Packing buffer to be sent to bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = n - gp_layers + 1, n - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k - n + gp_layers - 1) + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) + #:else + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, gp_layers - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) + end do end do end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) + #:endif end if -#endif - - ! Unpacking buffer received form bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = n + 1, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k - n - 1) + gp_layers*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - ! END: MPI Communication in y-direction - - ! MPI Communication in z-direction - if (bc_z%beg >= 0) then ! PBC at the beginning - - if (bc_z%end >= 0) then ! PBC at the beginning and end - - ! Packing buffer to be sent to bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = p - gp_layers + 1, p - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l - p + gp_layers - 1))) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the beginning only - - ! Packing buffer to be sent to bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, gp_layers - 1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - + #:endfor + call nvtxEndRange ! Packbuf + + p_i_send => ib_buff_send(0) + p_i_recv => ib_buff_recv(0) + + ! Send/Recv + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + !$acc data attach(p_send, p_recv) + !$acc host_data use_device(p_send, p_recv) + call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") + #:else + call nvtxStartRange("RHS-COMM-DEV2HOST") + !$acc update host(buff_send) + call nvtxEndRange + call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") + #:endif + + call MPI_SENDRECV( & + p_i_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, & + p_i_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + + #:if rdma_mpi !$acc end host_data !$acc end data !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) + #:else + call nvtxStartRange("RHS-COMM-HOST2DEV") + !$acc update device(buff_recv) + call nvtxEndRange + #:endif end if -#endif + #:endfor - ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = -gp_layers, -1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l + gp_layers))) - ib_markers%sf(j, k, l) = ib_buff_recv(r) + ! Unpack Received Buffer + call nvtxStartRange("RHS-COMM-UNPACKBUF") + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = -gp_layers, -1 + r = (j + gp_layers * ((k + 1) + (n + 1) * l)) + ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) + end do + end do end do - end do - end do - - end if - - if (bc_z%end >= 0) then ! PBC at the end - - if (bc_z%beg >= 0) then ! PBC at the end and beginning - - ! Packing buffer to be sent to bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, gp_layers - 1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) + #:elif mpi_dir == 2 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = -gp_layers, -1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + gp_layers) + gp_layers*l)) + ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) + end do end do end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the end only - - ! Packing buffer to be sent to bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = p - gp_layers + 1, p - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l - p + gp_layers - 1))) - ib_buff_send(r) = ib_markers%sf(j, k, l) + #:else + ! Unpacking buffer from bc_z%beg + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = -gp_layers, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + gp_layers))) + ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) + end do end do end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) + #:endif end if -#endif - - ! Unpacking buffer received from bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = p + 1, p + gp_layers - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l - p - 1))) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - - ! END: MPI Communication in z-direction - + #:endfor + call nvtxEndRange #endif end subroutine s_mpi_sendrecv_ib_buffers From f2e5692c4558a766a4402964b4c40a77dfe69cde Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Mon, 5 May 2025 22:57:08 -0400 Subject: [PATCH 12/62] bug fixes and IBM halo exchange fix --- src/simulation/m_ibm.fpp | 9 ++++---- src/simulation/m_mpi_proxy.fpp | 40 ++++++++++++++++++---------------- src/simulation/m_start_up.fpp | 2 ++ src/simulation/m_viscous.fpp | 12 +++++----- src/simulation/m_weno.fpp | 8 +++---- 5 files changed, 37 insertions(+), 34 deletions(-) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index f43fcb00ba..08d1893c1e 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -85,7 +85,7 @@ contains !$acc update device(levelset_norm%sf) ! Get neighboring IB variables from other processors - call s_populate_ib_buffers(ib_markers, gp_layers) + call s_populate_ib_buffers(ib_markers) !$acc update host(ib_markers%sf) @@ -108,15 +108,14 @@ contains end subroutine s_ibm_setup - subroutine s_populate_ib_buffers(ib_markers, gp_layers) + subroutine s_populate_ib_buffers(ib_markers) type(integer_field), intent(inout) :: ib_markers - integer, intent(in) :: gp_layers #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)] #:for LOCC, LOCI in [('beg', -1), ('end', 1)] - if (bc_${DIRC}$%${LOCC}$ > 0) then - call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers, ${DIRI}$, ${LOCI}$) + if (bc_${DIRC}$%${LOCC}$ >= 0) then + call s_mpi_sendrecv_ib_buffers(ib_markers, ${DIRI}$, ${LOCI}$) end if #:endfor #:endfor diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index aa59159c59..1ff0252fb4 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -44,8 +44,10 @@ module m_mpi_proxy !> @name Generic flags used to identify and report MPI errors !> @{ - integer, private :: err_code, ierr, i_halo_size + integer, private :: err_code, ierr !> @} + + integer :: i_halo_size !$acc declare create(i_halo_size) contains @@ -70,9 +72,7 @@ contains end if !$acc declare create(i_halo_size) - @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size)) - end if #endif @@ -235,11 +235,11 @@ contains end subroutine s_mpi_bcast_user_inputs - subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers, mpi_dir, pbc_loc) + subroutine s_mpi_sendrecv_ib_buffers(ib_markers, mpi_dir, pbc_loc) type(integer_field), intent(inout) :: ib_markers - integer, intent(in) :: gp_layers, mpi_dir, pbc_loc + integer, intent(in) :: mpi_dir, pbc_loc integer :: i, j, k, l, r, q !< Generic loop iterators @@ -261,8 +261,8 @@ contains buffer_counts = (/ & gp_layers*(n + 1)*(p + 1), & - gp_layers*(m + 2*buff_size + 1)*(p + 1), & - gp_layers*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + gp_layers*(m + 2*gp_layers + 1)*(p + 1), & + gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1) & /) buffer_count = buffer_counts(mpi_dir) @@ -312,8 +312,8 @@ contains !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = 0, gp_layers - 1 - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers+ 1)* & (k + gp_layers*l)) ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) end do @@ -322,10 +322,10 @@ contains #:else !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, gp_layers - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) + do k = -gp_layers, n + gp_layers + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) end do end do @@ -389,8 +389,8 @@ contains !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = -gp_layers, -1 - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & ((k + gp_layers) + gp_layers*l)) ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) end do @@ -400,10 +400,10 @@ contains ! Unpacking buffer from bc_z%beg !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = -gp_layers, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & + do k = -gp_layers, n + gp_layers + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + ((k + gp_layers) + (n + 2*gp_layers + 1)* & (l + gp_layers))) ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) end do @@ -420,9 +420,11 @@ contains subroutine s_mpi_send_random_number(phi_rn, num_freq) integer, intent(in) :: num_freq real(wp), intent(inout), dimension(1:num_freq) :: phi_rn + #ifdef MFC_MPI call MPI_BCAST(phi_rn, num_freq, mpi_p, 0, MPI_COMM_WORLD, ierr) #endif + end subroutine s_mpi_send_random_number subroutine s_finalize_mpi_proxy_module() diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index a45346673c..8997b3f51a 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1480,6 +1480,7 @@ contains call s_initialize_mpi_common_module() + call s_initialize_mpi_proxy_module() call s_initialize_variables_conversion_module() if (grid_geometry == 3) call s_initialize_fftw_module() call s_initialize_riemann_solvers_module() @@ -1686,6 +1687,7 @@ contains if (viscous) then call s_finalize_viscous_module() end if + call s_finalize_mpi_proxy_module() if (surface_tension) call s_finalize_surface_tension_module() if (bodyForces) call s_finalize_body_forces_module() diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index a56143a1aa..8f8f7b450b 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -1392,7 +1392,7 @@ contains end if end if - if (bc_x%beg <= BC_GHOST_EXTRAPOLATION) then + if (bc_x%beg <= BC_GHOST_EXTRAP) then !$acc parallel loop collapse(2) gang vector default(present) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -1401,7 +1401,7 @@ contains end do end do end if - if (bc_x%end <= BC_GHOST_EXTRAPOLATION) then + if (bc_x%end <= BC_GHOST_EXTRAP) then !$acc parallel loop collapse(2) gang vector default(present) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -1411,7 +1411,7 @@ contains end do end if if (n > 0) then - if (bc_y%beg <= BC_GHOST_EXTRAPOLATION .and. bc_y%beg /= BC_NULL) then + if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then !$acc parallel loop collapse(2) gang vector default(present) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -1420,7 +1420,7 @@ contains end do end do end if - if (bc_y%end <= BC_GHOST_EXTRAPOLATION) then + if (bc_y%end <= BC_GHOST_EXTRAP) then !$acc parallel loop collapse(2) gang vector default(present) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -1430,7 +1430,7 @@ contains end do end if if (p > 0) then - if (bc_z%beg <= BC_GHOST_EXTRAPOLATION) then + if (bc_z%beg <= BC_GHOST_EXTRAP) then !$acc parallel loop collapse(2) gang vector default(present) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -1440,7 +1440,7 @@ contains end do end do end if - if (bc_z%end <= BC_GHOST_EXTRAPOLATION) then + if (bc_z%end <= BC_GHOST_EXTRAP) then !$acc parallel loop collapse(2) gang vector default(present) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 2071b3a0f0..6747af2e1a 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -272,12 +272,12 @@ contains ! any contributions from outside of the physical domain during ! the WENO reconstruction if (null_weights) then - if (bc_s%beg == BC_RIEMANN_EXTRAPOLATION) then + if (bc_s%beg == BC_RIEMANN_EXTRAP) then d_cbR_${XYZ}$ (1, 0) = 0._wp; d_cbR_${XYZ}$ (0, 0) = 1._wp d_cbL_${XYZ}$ (1, 0) = 0._wp; d_cbL_${XYZ}$ (0, 0) = 1._wp end if - if (bc_s%end == BC_RIEMANN_EXTRAPOLATION) then + if (bc_s%end == BC_RIEMANN_EXTRAP) then d_cbR_${XYZ}$ (0, s) = 0._wp; d_cbR_${XYZ}$ (1, s) = 1._wp d_cbL_${XYZ}$ (0, s) = 0._wp; d_cbL_${XYZ}$ (1, s) = 1._wp end if @@ -418,14 +418,14 @@ contains ! any contributions from outside of the physical domain during ! the WENO reconstruction if (null_weights) then - if (bc_s%beg == BC_RIEMANN_EXTRAPOLATION) then + if (bc_s%beg == BC_RIEMANN_EXTRAP) then d_cbR_${XYZ}$ (1:2, 0) = 0._wp; d_cbR_${XYZ}$ (0, 0) = 1._wp d_cbL_${XYZ}$ (1:2, 0) = 0._wp; d_cbL_${XYZ}$ (0, 0) = 1._wp d_cbR_${XYZ}$ (2, 1) = 0._wp; d_cbR_${XYZ}$ (:, 1) = d_cbR_${XYZ}$ (:, 1)/sum(d_cbR_${XYZ}$ (:, 1)) d_cbL_${XYZ}$ (2, 1) = 0._wp; d_cbL_${XYZ}$ (:, 1) = d_cbL_${XYZ}$ (:, 1)/sum(d_cbL_${XYZ}$ (:, 1)) end if - if (bc_s%end == BC_RIEMANN_EXTRAPOLATION) then + if (bc_s%end == BC_RIEMANN_EXTRAP) then d_cbR_${XYZ}$ (0, s - 1) = 0._wp; d_cbR_${XYZ}$ (:, s - 1) = d_cbR_${XYZ}$ (:, s - 1)/sum(d_cbR_${XYZ}$ (:, s - 1)) d_cbL_${XYZ}$ (0, s - 1) = 0._wp; d_cbL_${XYZ}$ (:, s - 1) = d_cbL_${XYZ}$ (:, s - 1)/sum(d_cbL_${XYZ}$ (:, s - 1)) d_cbR_${XYZ}$ (0:1, s) = 0._wp; d_cbR_${XYZ}$ (2, s) = 1._wp From 5e53d533730825f6edcf549e7485d4fe88dfd278 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Tue, 6 May 2025 15:59:06 -0400 Subject: [PATCH 13/62] gpu fixes --- src/simulation/m_mpi_proxy.fpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 1ff0252fb4..82dede13ab 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -71,7 +71,7 @@ contains i_halo_size = -1 + gp_layers end if - !$acc declare create(i_halo_size) + !$acc update device(i_halo_size) @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size)) end if #endif @@ -342,12 +342,12 @@ contains #:for rdma_mpi in [False, True] if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then #:if rdma_mpi - !$acc data attach(p_send, p_recv) - !$acc host_data use_device(p_send, p_recv) + !$acc data attach(p_i_send, p_i_recv) + !$acc host_data use_device(p_i_send, p_i_recv) call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") #:else call nvtxStartRange("RHS-COMM-DEV2HOST") - !$acc update host(buff_send) + !$acc update host(ib_buff_send) call nvtxEndRange call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") #:endif @@ -365,7 +365,7 @@ contains !$acc wait #:else call nvtxStartRange("RHS-COMM-HOST2DEV") - !$acc update device(buff_recv) + !$acc update device(ib_buff_recv) call nvtxEndRange #:endif end if From 697a0ef20deca1ad8a99da331d2ab8529778ab84 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Thu, 8 May 2025 13:41:28 -0400 Subject: [PATCH 14/62] additional code shortening --- src/common/m_mpi_common.fpp | 25 +++------- src/post_process/m_mpi_proxy.fpp | 85 ++------------------------------ src/pre_process/m_mpi_proxy.fpp | 16 +----- src/simulation/m_mpi_proxy.fpp | 2 +- 4 files changed, 13 insertions(+), 115 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 6d670b1237..bf2260ec5b 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -24,7 +24,7 @@ module m_mpi_common implicit none - integer, private :: err_code, ierr, v_size !< + integer, private :: ierr, v_size !< !$acc declare create(v_size) !! Generic flags used to identify and report MPI errors @@ -123,24 +123,11 @@ contains !! @param beta Eulerian void fraction from lagrangian bubbles subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta) - type(scalar_field), & - dimension(sys_size), & - intent(in) :: q_cons_vf - - type(integer_field), & - optional, & - intent(in) :: ib_markers - - type(levelset_field), & - optional, & - intent(IN) :: levelset - - type(levelset_norm_field), & - optional, & - intent(IN) :: levelset_norm - - type(scalar_field), & - intent(in), optional :: beta + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + type(integer_field), optional, intent(in) :: ib_markers + type(levelset_field), optional, intent(IN) :: levelset + type(levelset_norm_field), optional, intent(IN) :: levelset_norm + type(scalar_field), intent(in), optional :: beta integer, dimension(num_dims) :: sizes_glb, sizes_loc integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 2df1025c21..cbc6ba361c 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -23,14 +23,6 @@ module m_mpi_proxy implicit none - !> @name Buffers of the conservative variables received/sent from/to neighboring - !! processors. Note that these variables are structured as vectors rather - !! than arrays. - !> @{ - real(wp), allocatable, dimension(:) :: q_cons_buffer_in - real(wp), allocatable, dimension(:) :: q_cons_buffer_out - !> @} - !> @name Receive counts and displacement vector variables, respectively, used in !! enabling MPI to gather varying amounts of data from all processes to the !! root process @@ -41,7 +33,7 @@ module m_mpi_proxy !> @name Generic flags used to identify and report MPI errors !> @{ - integer, private :: err_code, ierr + integer, private :: ierr !> @} contains @@ -54,61 +46,6 @@ contains integer :: i !< Generic loop iterator - ! Allocating vectorized buffer regions of conservative variables. - ! The length of buffer vectors are set according to the size of the - ! largest buffer region in the sub-domain. - if (buff_size > 0) then - - ! Simulation is at least 2D - if (n > 0) then - - ! Simulation is 3D - if (p > 0) then - - allocate (q_cons_buffer_in(0:buff_size* & - sys_size* & - (m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)* & - (p + 2*buff_size + 1)/ & - (min(m, n, p) & - + 2*buff_size + 1) - 1)) - allocate (q_cons_buffer_out(0:buff_size* & - sys_size* & - (m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)* & - (p + 2*buff_size + 1)/ & - (min(m, n, p) & - + 2*buff_size + 1) - 1)) - - ! Simulation is 2D - else - - allocate (q_cons_buffer_in(0:buff_size* & - sys_size* & - (max(m, n) & - + 2*buff_size + 1) - 1)) - allocate (q_cons_buffer_out(0:buff_size* & - sys_size* & - (max(m, n) & - + 2*buff_size + 1) - 1)) - - end if - - ! Simulation is 1D - else - - allocate (q_cons_buffer_in(0:buff_size*sys_size - 1)) - allocate (q_cons_buffer_out(0:buff_size*sys_size - 1)) - - end if - - ! Initially zeroing out the vectorized buffer region variables - ! to avoid possible underflow from any unused allocated memory - q_cons_buffer_in = 0._wp - q_cons_buffer_out = 0._wp - - end if - ! Allocating and configuring the receive counts and the displacement ! vector variables used in variable-gather communication procedures. ! Note that these are only needed for either multidimensional runs @@ -366,10 +303,7 @@ contains subroutine s_mpi_gather_data_extents(q_sf, data_extents) real(wp), dimension(:, :, :), intent(in) :: q_sf - - real(wp), & - dimension(1:2, 0:num_procs - 1), & - intent(inout) :: data_extents + real(wp), dimension(1:2, 0:num_procs - 1), intent(inout) :: data_extents #ifdef MFC_MPI @@ -395,13 +329,8 @@ contains !! @param q_root_sf Flow variable defined on the entire computational domain subroutine s_mpi_defragment_1d_flow_variable(q_sf, q_root_sf) - real(wp), & - dimension(0:m), & - intent(in) :: q_sf - - real(wp), & - dimension(0:m), & - intent(inout) :: q_root_sf + real(wp), dimension(0:m), intent(in) :: q_sf + real(wp), dimension(0:m), intent(inout) :: q_root_sf #ifdef MFC_MPI @@ -421,12 +350,6 @@ contains #ifdef MFC_MPI - ! Deallocating the conservative variables buffer vectors - if (buff_size > 0) then - deallocate (q_cons_buffer_in) - deallocate (q_cons_buffer_out) - end if - ! Deallocating the receive counts and the displacement vector ! variables used in variable-gather communication procedures if ((format == 1 .and. n > 0) .or. n == 0) then diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index 6d16839eeb..abc47cdb65 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -23,20 +23,8 @@ module m_mpi_proxy implicit none - integer, private :: err_code, ierr, v_size !< - !! Generic flags used to identify and report MPI errors - - real(wp), private, allocatable, dimension(:), target :: q_prims_buff_send !< - !! This variable is utilized to pack and send the buffer of the cell-average - !! primitive variables, for a single computational domain boundary at the - !! time, to the relevant neighboring processor. - - real(wp), private, allocatable, dimension(:), target :: q_prims_buff_recv !< - !! q_prims_buff_recv is utilized to receive and unpack the buffer of the cell- - !! average primitive variables, for a single computational domain boundary - !! at the time, from the relevant neighboring processor. - - ! integer :: halo_size + integer, private :: ierr !< + !! Generic flag used to identify and report MPI errors contains !> Since only processor with rank 0 is in charge of reading diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 82dede13ab..de42e53f5c 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -44,7 +44,7 @@ module m_mpi_proxy !> @name Generic flags used to identify and report MPI errors !> @{ - integer, private :: err_code, ierr + integer, private :: ierr !> @} integer :: i_halo_size From 1ff32f0450ce5f1271a3f9fd198f65829c361027 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Thu, 8 May 2025 17:00:06 -0400 Subject: [PATCH 15/62] remove even more code --- src/common/m_boundary_common.fpp | 220 ++++++++++++++++++-- src/post_process/m_data_input.f90 | 251 ----------------------- src/post_process/m_global_parameters.fpp | 40 ++-- src/post_process/m_start_up.f90 | 17 +- src/pre_process/m_perturbation.fpp | 2 - src/simulation/m_start_up.fpp | 215 +------------------ 6 files changed, 228 insertions(+), 517 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 857eae09e9..2fa160c5f5 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -27,8 +27,6 @@ module m_boundary_common type(scalar_field), dimension(:, :), allocatable :: bc_buffers !$acc declare create(bc_buffers) - real(wp) :: bcxb, bcxe, bcyb, bcye, bczb, bcze - #ifdef MFC_MPI integer, dimension(1:3, -1:1) :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE #endif @@ -42,9 +40,10 @@ module m_boundary_common s_read_serial_boundary_condition_files, & s_read_parallel_boundary_condition_files, & s_assign_default_bc_type, & + s_populate_grid_variables_buffers, & s_finalize_boundary_common_module - public :: bc_buffers, bcxb, bcxe, bcyb, bcye, bczb, bcze + public :: bc_buffers #ifdef MFC_MPI public :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE @@ -54,8 +53,6 @@ contains subroutine s_initialize_boundary_common_module() - bcxb = bc_x%beg; bcxe = bc_x%end; bcyb = bc_y%beg; bcye = bc_y%end; bczb = bc_z%beg; bcze = bc_z%end - @:ALLOCATE(bc_buffers(1:num_dims, -1:1)) if (bc_io) then @@ -89,7 +86,7 @@ contains integer :: k, l ! Population of Buffers in x-direction - if (bcxb >= 0) then + if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) @@ -113,7 +110,7 @@ contains end do end if - if (bcxe >= 0) then + if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) @@ -141,7 +138,7 @@ contains if (n == 0) return - if (bcyb >= 0) then + if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) @@ -167,7 +164,7 @@ contains end do end if - if (bcye >= 0) then + if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) @@ -195,7 +192,7 @@ contains if (p == 0) return - if (bczb >= 0) then + if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) @@ -219,7 +216,7 @@ contains end do end if - if (bcze >= 0) then + if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb, mv) else !$acc parallel loop collapse(2) gang vector default(present) @@ -1173,7 +1170,7 @@ contains integer :: i, j, k, l !< x-direction - if (bcxb >= 0) then + if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) @@ -1191,7 +1188,7 @@ contains end do end if - if (bcxe >= 0) then + if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) @@ -1212,7 +1209,7 @@ contains if (n == 0) return !< y-direction - if (bcyb >= 0) then + if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) @@ -1230,7 +1227,7 @@ contains end do end if - if (bcye >= 0) then + if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) @@ -1251,7 +1248,7 @@ contains if (p == 0) return !< z-direction - if (bczb >= 0) then + if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) @@ -1269,7 +1266,7 @@ contains end do end if - if (bcze >= 0) then + if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1) else !$acc parallel loop collapse(2) gang vector default(present) @@ -1790,6 +1787,195 @@ contains end subroutine s_assign_default_bc_type +#ifndef MFC_PRE_PROCESS + !> The purpose of this subroutine is to populate the buffers + !! of the grid variables, which are constituted of the cell- + !! boundary locations and cell-width distributions, based on + !! the boundary conditions. + subroutine s_populate_grid_variables_buffers + + integer :: i !< Generic loop iterator + +#ifndef MFC_POST_PROCESS + ! Required for compatibiliy between codes + type(int_bounds_info) :: offset_x, offset_y, offset_z + offset_x%beg = buff_size; offset_x%end = buff_size + offset_y%beg = buff_size; offset_y%end = buff_size + offset_z%beg = buff_size; offset_z%end = buff_size +#endif + + ! Population of Buffers in x-direction + + ! Populating cell-width distribution buffer at bc_x%beg + if (bc_x%beg >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(1, -1) + elseif (bc_x%beg <= BC_GHOST_EXTRAP) then + do i = 1, buff_size + dx(-i) = dx(0) + end do + elseif (bc_x%beg == BC_REFLECTIVE) then + do i = 1, buff_size + dx(-i) = dx(i - 1) + end do + elseif (bc_x%beg == BC_PERIODIC) then + do i = 1, buff_size + dx(-i) = dx(m - (i - 1)) + end do + endif + + ! Computing the cell-boundary and center locations buffer at bc_x%beg + do i = 1, offset_x%beg + x_cb(-1 - i) = x_cb(-i) - dx(-i) + enddo + + do i = 1, buff_size + x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp + end do + + ! Populating the cell-width distribution buffer at bc_x%end + if (bc_x%end >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(1, 1) + elseif (bc_x%end <= BC_GHOST_EXTRAP) then + do i = 1, buff_size + dx(m + i) = dx(m) + end do + elseif (bc_x%end == BC_REFLECTIVE) then + do i = 1, buff_size + dx(m + i) = dx(m - (i - 1)) + end do + elseif (bc_x%end == BC_PERIODIC) then + do i = 1, buff_size + dx(m + i) = dx(i - 1) + end do + endif + + ! Populating the cell-boundary and center locations buffer at bc_x%end + do i = 1, offset_x%end + x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i) + end do + + do i = 1, buff_size + x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp + enddo + ! END: Population of Buffers in x-direction + + ! Population of Buffers in y-direction + + ! Populating cell-width distribution buffer at bc_y%beg + if (n == 0) then + return + elseif (bc_y%beg >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(2, -1) + elseif (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then + do i = 1, buff_size + dy(-i) = dy(0) + end do + elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then + do i = 1, buff_size + dy(-i) = dy(i - 1) + end do + elseif (bc_y%beg == BC_PERIODIC) then + do i = 1, buff_size + dy(-i) = dy(n - (i - 1)) + end do + endif + + ! Computing the cell-boundary and center locations buffer at bc_y%beg + do i = 1, offset_y%beg + y_cb(-1 - i) = y_cb(-i) - dy(-i) + enddo + + do i = 1, buff_size + y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp + enddo + + ! Populating the cell-width distribution buffer at bc_y%end + if (bc_y%end >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(2, 1) + elseif (bc_y%end <= BC_GHOST_EXTRAP) then + do i = 1, buff_size + dy(n + i) = dy(n) + end do + elseif (bc_y%end == BC_REFLECTIVE) then + do i = 1, buff_size + dy(n + i) = dy(n - (i - 1)) + end do + elseif (bc_y%end == BC_PERIODIC) then + do i = 1, buff_size + dy(n + i) = dy(i - 1) + end do + endif + + ! Populating the cell-boundary and center locations buffer at bc_y%end + do i = 1, offset_y%end + y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i) + enddo + + do i = 1, buff_size + y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp + end do + ! END: Population of Buffers in y-direction + + ! Population of Buffers in z-direction + + ! Populating cell-width distribution buffer at bc_z%beg + if (p == 0) then + return + elseif (Bc_z%beg >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(3, -1) + elseif (bc_z%beg <= BC_GHOST_EXTRAP) then + do i = 1, buff_size + dz(-i) = dz(0) + end do + elseif (bc_z%beg == BC_REFLECTIVE) then + do i = 1, buff_size + dz(-i) = dz(i - 1) + end do + elseif (bc_z%beg == BC_PERIODIC) then + do i = 1, buff_size + dz(-i) = dz(p - (i - 1)) + end do + endif + + ! Computing the cell-boundary and center locations buffer at bc_z%beg + do i = 1, offset_z%beg + z_cb(-1 - i) = z_cb(-i) - dz(-i) + enddo + + do i = 1, buff_size + z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp + enddo + + ! Populating the cell-width distribution buffer at bc_z%end + if (bc_z%end >= 0) then + call s_mpi_sendrecv_grid_variables_buffers(3, 1) + elseif (bc_z%end <= BC_GHOST_EXTRAP) then + do i = 1, buff_size + dz(p + i) = dz(p) + end do + elseif (bc_z%end == BC_REFLECTIVE) then + do i = 1, buff_size + dz(p + i) = dz(p - (i - 1)) + end do + elseif (bc_z%end == BC_PERIODIC) then + do i = 1, buff_size + dz(p + i) = dz(i - 1) + end do + endif + + ! Populating the cell-boundary and center locations buffer at bc_z%end + do i = 1, buff_size + z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i) + enddo + + do i = 1, buff_size + z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp + enddo + ! END: Population of Buffers in z-direction + + end subroutine s_populate_grid_variables_buffers +#endif + subroutine s_finalize_boundary_common_module() if (bc_io) then diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index 2db034b4b5..1b8d91e260 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -31,7 +31,6 @@ module m_data_input s_read_data_files, & s_read_serial_data_files, & s_read_parallel_data_files, & - s_populate_grid_variables_buffer_regions, & s_finalize_data_input_module abstract interface @@ -503,256 +502,6 @@ subroutine s_read_parallel_data_files(t_step) end subroutine s_read_parallel_data_files - !> The following subroutine populates the buffer regions of - !! the cell-width spacings, the cell-boundary locations and - !! the cell-center locations. Note that the buffer regions - !! of the last two variables should be interpreted slightly - !! differently than usual. They are really ghost zones that - !! are used in aiding the multidimensional visualization of - !! Silo database files, in VisIt, when processor boundary - !! conditions are present. - subroutine s_populate_grid_variables_buffer_regions - - integer :: i !< Generic loop iterator - - ! Populating Buffer Regions in the x-direction - - ! Ghost-cell extrapolation BC at the beginning - if (bc_x%beg <= BC_GHOST_EXTRAP) then - - do i = 1, buff_size - dx(-i) = dx(0) - end do - - ! Symmetry BC at the beginning - elseif (bc_x%beg == BC_REFLECTIVE) then - - do i = 1, buff_size - dx(-i) = dx(i - 1) - end do - - ! Periodic BC at the beginning - elseif (bc_x%beg == BC_PERIODIC) then - - do i = 1, buff_size - dx(-i) = dx((m + 1) - i) - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_grid_variables_buffers(1, -1) - - end if - - do i = 1, offset_x%beg - x_cb(-1 - i) = x_cb(-i) - dx(-i) - end do - - do i = 1, buff_size - x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp - end do - - ! Ghost-cell extrapolation BC at the end - if (bc_x%end <= BC_GHOST_EXTRAP) then - - do i = 1, buff_size - dx(m + i) = dx(m) - end do - - ! Symmetry BC at the end - elseif (bc_x%end == BC_REFLECTIVE) then - - do i = 1, buff_size - dx(m + i) = dx((m + 1) - i) - end do - - ! Periodic BC at the end - elseif (bc_x%end == BC_PERIODIC) then - - do i = 1, buff_size - dx(m + i) = dx(i - 1) - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_grid_variables_buffers(1, 1) - - end if - - do i = 1, offset_x%end - x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i) - end do - - do i = 1, buff_size - x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp - end do - - ! END: Populating Buffer Regions in the x-direction - - ! Populating Buffer Regions in the y-direction - - if (n > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then - - do i = 1, buff_size - dy(-i) = dy(0) - end do - - ! Symmetry BC at the beginning - elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then - - do i = 1, buff_size - dy(-i) = dy(i - 1) - end do - - ! Periodic BC at the beginning - elseif (bc_y%beg == BC_PERIODIC) then - - do i = 1, buff_size - dy(-i) = dy((n + 1) - i) - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_grid_variables_buffers(2, -1) - - end if - - do i = 1, offset_y%beg - y_cb(-1 - i) = y_cb(-i) - dy(-i) - end do - - do i = 1, buff_size - y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp - end do - - ! Ghost-cell extrapolation BC at the end - if (bc_y%end <= BC_GHOST_EXTRAP) then - - do i = 1, buff_size - dy(n + i) = dy(n) - end do - - ! Symmetry BC at the end - elseif (bc_y%end == BC_REFLECTIVE) then - - do i = 1, buff_size - dy(n + i) = dy((n + 1) - i) - end do - - ! Periodic BC at the end - elseif (bc_y%end == BC_PERIODIC) then - - do i = 1, buff_size - dy(n + i) = dy(i - 1) - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_grid_variables_buffers(2, 1) - - end if - - do i = 1, offset_y%end - y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i) - end do - - do i = 1, buff_size - y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp - end do - - ! END: Populating Buffer Regions in the y-direction - - ! Populating Buffer Regions in the z-direction - - if (p > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_z%beg <= BC_GHOST_EXTRAP) then - - do i = 1, buff_size - dz(-i) = dz(0) - end do - - ! Symmetry BC at the beginning - elseif (bc_z%beg == BC_REFLECTIVE) then - - do i = 1, buff_size - dz(-i) = dz(i - 1) - end do - - ! Periodic BC at the beginning - elseif (bc_z%beg == BC_PERIODIC) then - - do i = 1, buff_size - dz(-i) = dz((p + 1) - i) - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_grid_variables_buffers(3, -1) - - end if - - do i = 1, offset_z%beg - z_cb(-1 - i) = z_cb(-i) - dz(-i) - end do - - do i = 1, buff_size - z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp - end do - - ! Ghost-cell extrapolation BC at the end - if (bc_z%end <= BC_GHOST_EXTRAP) then - - do i = 1, buff_size - dz(p + i) = dz(p) - end do - - ! Symmetry BC at the end - elseif (bc_z%end == BC_REFLECTIVE) then - - do i = 1, buff_size - dz(p + i) = dz((p + 1) - i) - end do - - ! Periodic BC at the end - elseif (bc_z%end == BC_PERIODIC) then - - do i = 1, buff_size - dz(p + i) = dz(i - 1) - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_grid_variables_buffers(3, 1) - - end if - - do i = 1, offset_z%end - z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i) - end do - - do i = 1, buff_size - z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp - end do - - end if - - end if - - ! END: Populating Buffer Regions in the z-direction - - end subroutine s_populate_grid_variables_buffer_regions - !> Computation of parameters, allocation procedures, and/or !! any other tasks needed to properly setup the module subroutine s_initialize_data_input_module diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index c1cb89d614..ba36a8be19 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -474,7 +474,7 @@ contains integer :: i, j, fac ! Setting m_root equal to m in the case of a 1D serial simulation - if (num_procs >1 .and. n == 0) m_root = m_glb + if (num_procs > 1 .and. n == 0) m_root = m_glb ! Gamma/Pi_inf Model if (model_eqns == 1) then @@ -773,21 +773,12 @@ contains chemxe = species_idx%end #ifdef MFC_MPI - if (bubbles_lagrange) then - allocate (MPI_IO_DATA%view(1:sys_size + 1)) - allocate (MPI_IO_DATA%var(1:sys_size + 1)) - do i = 1, sys_size + 1 - allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) - MPI_IO_DATA%var(i)%sf => null() - end do - else - allocate (MPI_IO_DATA%view(1:sys_size)) - allocate (MPI_IO_DATA%var(1:sys_size)) - do i = 1, sys_size - allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) - MPI_IO_DATA%var(i)%sf => null() - end do - end if + allocate (MPI_IO_DATA%view(1:sys_size)) + allocate (MPI_IO_DATA%var(1:sys_size)) + do i = 1, sys_size + allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) + MPI_IO_DATA%var(i)%sf => null() + end do if (ib) allocate (MPI_IO_IB_DATA%var%sf(0:m, 0:n, 0:p)) #endif @@ -935,21 +926,18 @@ contains integer :: i ! Deallocating the grid variables for the x-coordinate direction - deallocate (x_cb, x_cc, dx) + deallocate (x_cc, x_cb, dx) ! Deallocating grid variables for the y- and z-coordinate directions if (n > 0) then - - deallocate (y_cb, y_cc, dy) - - if (p > 0) deallocate (z_cb, z_cc, dz) - + deallocate (y_cc, y_cb, dy) + if (p > 0) then + deallocate (z_cc, z_cb, dz) + end if + else ! Deallocating the grid variables, only used for the 1D simulations, ! and containing the defragmented computational domain grid data - else - deallocate (x_root_cb, x_root_cc) - end if deallocate (proc_coords) @@ -964,8 +952,6 @@ contains MPI_IO_DATA%var(i)%sf => null() end do - if (bubbles_lagrange) MPI_IO_DATA%var(sys_size + 1)%sf => null() - deallocate (MPI_IO_DATA%var) deallocate (MPI_IO_DATA%view) end if diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index fe78f01da9..e457c42722 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -182,7 +182,7 @@ subroutine s_perform_time_step(t_step) ! Populating the buffer regions of the grid and conservative variables if (buff_size > 0) then - call s_populate_grid_variables_buffer_regions() + call s_populate_grid_variables_buffers() call s_populate_variables_buffers(bc_type, q_cons_vf) end if @@ -694,13 +694,15 @@ subroutine s_initialize_modules ! Computation of parameters, allocation procedures, and/or any other tasks ! needed to properly setup the modules call s_initialize_global_parameters_module() - if (bubbles_euler .and. nb > 1) then - call s_simpson - end if + if (bubbles_euler .and. nb > 1) call s_simpson if (bubbles_euler .and. .not. polytropic) then call s_initialize_nonpoly() end if - if (num_procs > 1) call s_initialize_mpi_proxy_module() + if (num_procs > 1) then + call s_initialize_mpi_proxy_module() + call s_initialize_mpi_common_module() + endif + call s_initialize_boundary_common_module() call s_initialize_variables_conversion_module() call s_initialize_data_input_module() call s_initialize_derived_variables_module() @@ -753,7 +755,10 @@ subroutine s_finalize_modules call s_finalize_derived_variables_module() call s_finalize_data_input_module() call s_finalize_variables_conversion_module() - if (num_procs > 1) call s_finalize_mpi_proxy_module() + if (num_procs > 1) then + call s_finalize_mpi_proxy_module() + call s_finalize_mpi_common_module() + endif call s_finalize_global_parameters_module() ! Finalizing the MPI environment diff --git a/src/pre_process/m_perturbation.fpp b/src/pre_process/m_perturbation.fpp index e1d9ed7efc..7afb4f9c4b 100644 --- a/src/pre_process/m_perturbation.fpp +++ b/src/pre_process/m_perturbation.fpp @@ -32,8 +32,6 @@ contains subroutine s_initialize_perturbation_module() - bcxb = bc_x%beg; bcxe = bc_x%end; bcyb = bc_y%beg; bcye = bc_y%end; bczb = bc_z%beg; bcze = bc_z%end - if (mixlayer_perturb) then mixlayer_bc_fd = 2 nbp = n + 2 diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 8997b3f51a..fdceaecc43 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -96,7 +96,6 @@ module m_start_up s_read_data_files, & s_read_serial_data_files, & s_read_parallel_data_files, & - s_populate_grid_variables_buffers, & s_initialize_internal_energy_equations, & s_initialize_modules, s_initialize_gpu_vars, & s_initialize_mpi_domain, s_finalize_modules, & @@ -959,219 +958,7 @@ contains end subroutine s_read_parallel_data_files - !> The purpose of this subroutine is to populate the buffers - !! of the grid variables, which are constituted of the cell- - !! boundary locations and cell-width distributions, based on - !! the boundary conditions. - subroutine s_populate_grid_variables_buffers - - integer :: i !< Generic loop iterator - - ! Population of Buffers in x-direction - - ! Populating cell-width distribution buffer, at the beginning of the - ! coordinate direction, based on the selected boundary condition. In - ! order, these are the ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_x%beg <= BC_GHOST_EXTRAP) then - do i = 1, buff_size - dx(-i) = dx(0) - end do - elseif (bc_x%beg == BC_REFLECTIVE) then - do i = 1, buff_size - dx(-i) = dx(i - 1) - end do - elseif (bc_x%beg == BC_PERIODIC) then - do i = 1, buff_size - dx(-i) = dx(m - (i - 1)) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(1, -1) - end if - - ! Computing the cell-boundary locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - x_cb(-1 - i) = x_cb(-i) - dx(-i) - end do - ! Computing the cell-center locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp - end do - - ! Populating the cell-width distribution buffer, at the end of the - ! coordinate direction, based on desired boundary condition. These - ! include, in order, ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_x%end <= BC_GHOST_EXTRAP) then - do i = 1, buff_size - dx(m + i) = dx(m) - end do - elseif (bc_x%end == BC_REFLECTIVE) then - do i = 1, buff_size - dx(m + i) = dx(m - (i - 1)) - end do - elseif (bc_x%end == BC_PERIODIC) then - do i = 1, buff_size - dx(m + i) = dx(i - 1) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(1, 1) - end if - - ! Populating the cell-boundary locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i) - end do - ! Populating the cell-center locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp - end do - - ! END: Population of Buffers in x-direction - - ! Population of Buffers in y-direction - - ! Populating cell-width distribution buffer, at the beginning of the - ! coordinate direction, based on the selected boundary condition. In - ! order, these are the ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (n == 0) then - return - elseif (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then - do i = 1, buff_size - dy(-i) = dy(0) - end do - elseif (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then - do i = 1, buff_size - dy(-i) = dy(i - 1) - end do - elseif (bc_y%beg == BC_PERIODIC) then - do i = 1, buff_size - dy(-i) = dy(n - (i - 1)) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(2, -1) - end if - - ! Computing the cell-boundary locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - y_cb(-1 - i) = y_cb(-i) - dy(-i) - end do - ! Computing the cell-center locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp - end do - - ! Populating the cell-width distribution buffer, at the end of the - ! coordinate direction, based on desired boundary condition. These - ! include, in order, ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_y%end <= BC_GHOST_EXTRAP) then - do i = 1, buff_size - dy(n + i) = dy(n) - end do - elseif (bc_y%end == BC_REFLECTIVE) then - do i = 1, buff_size - dy(n + i) = dy(n - (i - 1)) - end do - elseif (bc_y%end == BC_PERIODIC) then - do i = 1, buff_size - dy(n + i) = dy(i - 1) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(2, 1) - end if - - ! Populating the cell-boundary locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i) - end do - ! Populating the cell-center locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp - end do - - ! END: Population of Buffers in y-direction - - ! Population of Buffers in z-direction - - ! Populating cell-width distribution buffer, at the beginning of the - ! coordinate direction, based on the selected boundary condition. In - ! order, these are the ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (p == 0) then - return - elseif (bc_z%beg <= BC_GHOST_EXTRAP) then - do i = 1, buff_size - dz(-i) = dz(0) - end do - elseif (bc_z%beg == BC_REFLECTIVE) then - do i = 1, buff_size - dz(-i) = dz(i - 1) - end do - elseif (bc_z%beg == BC_PERIODIC) then - do i = 1, buff_size - dz(-i) = dz(p - (i - 1)) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(3, -1) - end if - - ! Computing the cell-boundary locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - z_cb(-1 - i) = z_cb(-i) - dz(-i) - end do - ! Computing the cell-center locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp - end do - - ! Populating the cell-width distribution buffer, at the end of the - ! coordinate direction, based on desired boundary condition. These - ! include, in order, ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_z%end <= BC_GHOST_EXTRAP) then - do i = 1, buff_size - dz(p + i) = dz(p) - end do - elseif (bc_z%end == BC_REFLECTIVE) then - do i = 1, buff_size - dz(p + i) = dz(p - (i - 1)) - end do - elseif (bc_z%end == BC_PERIODIC) then - do i = 1, buff_size - dz(p + i) = dz(i - 1) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(3, 1) - end if - - ! Populating the cell-boundary locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i) - end do - ! Populating the cell-center locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp - end do - - ! END: Population of Buffers in z-direction - - end subroutine s_populate_grid_variables_buffers - - !> The purpose of this procedure is to initialize the + !> The purpose of this procedure is to initialize the !! values of the internal-energy equations of each phase !! from the mass of each phase, the mixture momentum and !! mixture-total-energy equations. From 85273cd9d136280dee7fb35668a885854a2e57d7 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Fri, 9 May 2025 12:17:33 -0400 Subject: [PATCH 16/62] format and bug fixes --- "\\" | 1603 +++++++++++++++++++++++++++++ src/common/m_boundary_common.fpp | 42 +- src/common/m_mpi_common.fpp | 18 +- src/post_process/m_data_input.f90 | 22 +- src/post_process/m_start_up.f90 | 8 +- src/simulation/m_ibm.fpp | 6 +- src/simulation/m_mpi_proxy.fpp | 12 +- 7 files changed, 1655 insertions(+), 56 deletions(-) create mode 100644 "\\" diff --git "a/\\" "b/\\" new file mode 100644 index 0000000000..6262662a46 --- /dev/null +++ "b/\\" @@ -0,0 +1,1603 @@ + +#:include 'macros.fpp' + +!> @brief The module serves as a proxy to the parameters and subroutines +!! available in the MPI implementation's MPI module. Specifically, +!! the purpose of the proxy is to harness basic MPI commands into +!! more complicated procedures as to accomplish the communication +!! goals for the simulation. +module m_mpi_common + +#ifdef MFC_MPI + use mpi !< Message passing interface (MPI) module +#endif + + use m_derived_types !< Definitions of the derived types + + use m_global_parameters !< Definitions of the global parameters + + use m_helper + + use ieee_arithmetic + + use m_nvtx + + implicit none + + integer, private :: ierr, v_size !< + !$acc declare create(v_size) + !! Generic flags used to identify and report MPI errors + + real(wp), private, allocatable, dimension(:), target :: buff_send !< + !! This variable is utilized to pack and send the buffer of the cell-average + !! primitive variables, for a single computational domain boundary at the + !! time, to the relevant neighboring processor. + + real(wp), private, allocatable, dimension(:), target :: buff_recv !< + !! buff_recv is utilized to receive and unpack the buffer of the cell- + !! average primitive variables, for a single computational domain boundary + !! at the time, from the relevant neighboring processor. + + !$acc declare create(buff_send, buff_recv) + + integer :: halo_size, nVars + !$acc declare create(halo_size, nVars) + + +contains + + !> The computation of parameters, the allocation of memory, + !! the association of pointers and/or the execution of any + !! other procedures that are necessary to setup the module. + subroutine s_initialize_mpi_common_module + +#ifdef MFC_MPI + ! Allocating buff_send/recv and. Please note that for the sake of + ! simplicity, both variables are provided sufficient storage to hold + ! the largest buffer in the computational domain. + + if (qbmm .and. .not. polytropic) then + v_size = sys_size + 2*nb*4 + else + v_size = sys_size + end if + + if (n > 0) then + if (p > 0) then + halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)* & + & (m + 2*buff_size + 1)* & + & (n + 2*buff_size + 1)* & + & (p + 2*buff_size + 1)/ & + & (min(m, n, p) + 2*buff_size + 1)) + else + halo_size = -1 + buff_size*(v_size)* & + & (max(m, n) + 2*buff_size + 1) + end if + else + halo_size = -1 + buff_size*(v_size) + end if + + !$acc update device(halo_size, v_size) + + @:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size)) +#endif + + end subroutine s_initialize_mpi_common_module + + !> The subroutine initializes the MPI execution environment + !! and queries both the number of processors which will be + !! available for the job and the local processor rank. + subroutine s_mpi_initialize + +#ifndef MFC_MPI + + ! Serial run only has 1 processor + num_procs = 1 + ! Local processor rank is 0 + proc_rank = 0 + +#else + + ! Initializing the MPI environment + call MPI_INIT(ierr) + + ! Checking whether the MPI environment has been properly initialized + if (ierr /= MPI_SUCCESS) then + print '(A)', 'Unable to initialize MPI environment. Exiting.' + call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) + end if + + ! Querying the number of processors available for the job + call MPI_COMM_SIZE(MPI_COMM_WORLD, num_procs, ierr) + + ! Querying the rank of the local processor + call MPI_COMM_RANK(MPI_COMM_WORLD, proc_rank, ierr) + +#endif + + end subroutine s_mpi_initialize + + !! @param q_cons_vf Conservative variables + !! @param ib_markers track if a cell is within the immersed boundary + !! @param levelset closest distance from every cell to the IB + !! @param levelset_norm normalized vector from every cell to the closest point to the IB + !! @param beta Eulerian void fraction from lagrangian bubbles + subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta) + + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + type(integer_field), optional, intent(in) :: ib_markers + type(levelset_field), optional, intent(IN) :: levelset + type(levelset_norm_field), optional, intent(IN) :: levelset_norm + type(scalar_field), intent(in), optional :: beta + + integer, dimension(num_dims) :: sizes_glb, sizes_loc + integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start + +#ifdef MFC_MPI + + ! Generic loop iterator + integer :: i, j, q, k, l + + !Altered system size for the lagrangian subgrid bubble model + integer :: alt_sys + + if (present(beta)) then + alt_sys = sys_size + 1 + else + alt_sys = sys_size + end if + + do i = 1, sys_size + MPI_IO_DATA%var(i)%sf => q_cons_vf(i)%sf(0:m, 0:n, 0:p) + end do + + if (present(beta)) then + MPI_IO_DATA%var(alt_sys)%sf => beta%sf(0:m, 0:n, 0:p) + end if + + !Additional variables pb and mv for non-polytropic qbmm +#ifdef MFC_PRE_PROCESS + if (qbmm .and. .not. polytropic) then + do i = 1, nb + do j = 1, nnode + MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j)%sf => pb%sf(0:m, 0:n, 0:p, j, i) + MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv%sf(0:m, 0:n, 0:p, j, i) + end do + end do + end if +#endif + +#ifdef MFC_SIMULATION + if (qbmm .and. .not. polytropic) then + do i = 1, nb + do j = 1, nnode + MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j)%sf => pb_ts(1)%sf(0:m, 0:n, 0:p, j, i) + MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv_ts(1)%sf(0:m, 0:n, 0:p, j, i) + end do + end do + end if +#endif + ! Define global(g) and local(l) sizes for flow variables + sizes_glb(1) = m_glb + 1; sizes_loc(1) = m + 1 + if (n > 0) then + sizes_glb(2) = n_glb + 1; sizes_loc(2) = n + 1 + if (p > 0) then + sizes_glb(3) = p_glb + 1; sizes_loc(3) = p + 1 + end if + end if + + ! Define the view for each variable + do i = 1, alt_sys + call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, & + MPI_ORDER_FORTRAN, mpi_p, MPI_IO_DATA%view(i), ierr) + call MPI_TYPE_COMMIT(MPI_IO_DATA%view(i), ierr) + end do + +#ifndef MFC_POST_PROCESS + if (qbmm .and. .not. polytropic) then + do i = sys_size + 1, sys_size + 2*nb*4 + call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, & + MPI_ORDER_FORTRAN, mpi_p, MPI_IO_DATA%view(i), ierr) + call MPI_TYPE_COMMIT(MPI_IO_DATA%view(i), ierr) + + end do + end if +#endif + + if (present(ib_markers)) then + +#ifdef MFC_PRE_PROCESS + MPI_IO_IB_DATA%var%sf => ib_markers%sf + MPI_IO_levelset_DATA%var%sf => levelset%sf + MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf +#else + MPI_IO_IB_DATA%var%sf => ib_markers%sf(0:m, 0:n, 0:p) + +#ifndef MFC_POST_PROCESS + MPI_IO_levelset_DATA%var%sf => levelset%sf(0:m, 0:n, 0:p, 1:num_ibs) + MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3) +#endif + +#endif + call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, & + MPI_ORDER_FORTRAN, MPI_INTEGER, MPI_IO_IB_DATA%view, ierr) + call MPI_TYPE_COMMIT(MPI_IO_IB_DATA%view, ierr) + +#ifndef MFC_POST_PROCESS + call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, & + MPI_ORDER_FORTRAN, mpi_p, MPI_IO_levelset_DATA%view, ierr) + call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, & + MPI_ORDER_FORTRAN, mpi_p, MPI_IO_levelsetnorm_DATA%view, ierr) + + call MPI_TYPE_COMMIT(MPI_IO_levelset_DATA%view, ierr) + call MPI_TYPE_COMMIT(MPI_IO_levelsetnorm_DATA%view, ierr) +#endif + end if + +#ifndef MFC_POST_PROCESS + if (present(ib_markers)) then + do j = 1, num_ibs + if (patch_ib(j)%c > 0) then + +#ifdef MFC_PRE_PROCESS + allocate (MPI_IO_airfoil_IB_DATA%var(1:2*Np)) +#endif + + airfoil_glb(1) = 3*Np*num_procs + airfoil_loc(1) = 3*Np + airfoil_start(1) = 3*proc_rank*Np + +#ifdef MFC_PRE_PROCESS + do i = 1, Np + MPI_IO_airfoil_IB_DATA%var(i)%x = airfoil_grid_l(i)%x + MPI_IO_airfoil_IB_DATA%var(i)%y = airfoil_grid_l(i)%y + end do +#endif + + call MPI_TYPE_CREATE_SUBARRAY(1, airfoil_glb, airfoil_loc, airfoil_start, & + MPI_ORDER_FORTRAN, mpi_p, MPI_IO_airfoil_IB_DATA%view(1), ierr) + call MPI_TYPE_COMMIT(MPI_IO_airfoil_IB_DATA%view(1), ierr) + +#ifdef MFC_PRE_PROCESS + do i = 1, Np + MPI_IO_airfoil_IB_DATA%var(Np + i)%x = airfoil_grid_u(i)%x + MPI_IO_airfoil_IB_DATA%var(Np + i)%y = airfoil_grid_u(i)%y + end do +#endif + call MPI_TYPE_CREATE_SUBARRAY(1, airfoil_glb, airfoil_loc, airfoil_start, & + MPI_ORDER_FORTRAN, mpi_p, MPI_IO_airfoil_IB_DATA%view(2), ierr) + call MPI_TYPE_COMMIT(MPI_IO_airfoil_IB_DATA%view(2), ierr) + + end if + end do + + end if +#endif + +#endif + + end subroutine s_initialize_mpi_data + + subroutine s_mpi_gather_data(my_vector, counts, gathered_vector, root) + + integer, intent(in) :: counts ! Array of vector lengths for each process + real(wp), intent(in), dimension(counts) :: my_vector ! Input vector on each process + integer, intent(in) :: root ! Rank of the root process + real(wp), allocatable, intent(out) :: gathered_vector(:) ! Gathered vector on the root process + + integer :: i, offset, ierr + integer, allocatable :: recounts(:), displs(:) + +#ifdef MFC_MPI + + allocate (recounts(num_procs)) + + call MPI_GATHER(counts, 1, MPI_INTEGER, recounts, 1, MPI_INTEGER, root, & + MPI_COMM_WORLD, ierr) + + allocate (displs(size(recounts))) + + displs(1) = 0 + + do i = 2, size(recounts) + displs(i) = displs(i - 1) + recounts(i - 1) + end do + + allocate (gathered_vector(sum(recounts))) + call MPI_GATHERV(my_vector, counts, mpi_p, gathered_vector, recounts, displs, mpi_p, & + root, MPI_COMM_WORLD, ierr) +#endif + end subroutine s_mpi_gather_data + + subroutine mpi_bcast_time_step_values(proc_time, time_avg) + + real(wp), dimension(0:num_procs - 1), intent(inout) :: proc_time + real(wp), intent(inout) :: time_avg + +#ifdef MFC_MPI + + call MPI_GATHER(time_avg, 1, mpi_p, proc_time(0), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + +#endif + + end subroutine mpi_bcast_time_step_values + + subroutine s_prohibit_abort(condition, message) + character(len=*), intent(in) :: condition, message + + print *, "" + print *, "CASE FILE ERROR" + print *, " - Prohibited condition: ", trim(condition) + if (len_trim(message) > 0) then + print *, " - Note: ", trim(message) + end if + print *, "" + call s_mpi_abort(code=CASE_FILE_ERROR_CODE) + end subroutine s_prohibit_abort + + !> The goal of this subroutine is to determine the global + !! extrema of the stability criteria in the computational + !! domain. This is performed by sifting through the local + !! extrema of each stability criterion. Note that each of + !! the local extrema is from a single process, within its + !! assigned section of the computational domain. Finally, + !! note that the global extrema values are only bookkeept + !! on the rank 0 processor. + !! @param icfl_max_loc Local maximum ICFL stability criterion + !! @param vcfl_max_loc Local maximum VCFL stability criterion + !! @param Rc_min_loc Local minimum Rc stability criterion + !! @param icfl_max_glb Global maximum ICFL stability criterion + !! @param vcfl_max_glb Global maximum VCFL stability criterion + !! @param Rc_min_glb Global minimum Rc stability criterion + subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, & + vcfl_max_loc, & + ccfl_max_loc, & + Rc_min_loc, & + icfl_max_glb, & + vcfl_max_glb, & + ccfl_max_glb, & + Rc_min_glb) + + real(wp), intent(in) :: icfl_max_loc + real(wp), intent(in) :: vcfl_max_loc + real(wp), intent(in) :: ccfl_max_loc + real(wp), intent(in) :: Rc_min_loc + + real(wp), intent(out) :: icfl_max_glb + real(wp), intent(out) :: vcfl_max_glb + real(wp), intent(out) :: ccfl_max_glb + real(wp), intent(out) :: Rc_min_glb + +#ifdef MFC_SIMULATION +#ifdef MFC_MPI + + ! Reducing local extrema of ICFL, VCFL, CCFL and Rc numbers to their + ! global extrema and bookkeeping the results on the rank 0 processor + call MPI_REDUCE(icfl_max_loc, icfl_max_glb, 1, & + mpi_p, MPI_MAX, 0, & + MPI_COMM_WORLD, ierr) + + if (viscous) then + call MPI_REDUCE(vcfl_max_loc, vcfl_max_glb, 1, & + mpi_p, MPI_MAX, 0, & + MPI_COMM_WORLD, ierr) + call MPI_REDUCE(Rc_min_loc, Rc_min_glb, 1, & + mpi_p, MPI_MIN, 0, & + MPI_COMM_WORLD, ierr) + end if + +#else + + icfl_max_glb = icfl_max_loc + + if (viscous) then + vcfl_max_glb = vcfl_max_loc + Rc_min_glb = Rc_min_loc + end if + +#endif +#endif + + end subroutine s_mpi_reduce_stability_criteria_extrema + + !> The following subroutine takes the input local variable + !! from all processors and reduces to the sum of all + !! values. The reduced variable is recorded back onto the + !! original local variable on each processor. + !! @param var_loc Some variable containing the local value which should be + !! reduced amongst all the processors in the communicator. + !! @param var_glb The globally reduced value + subroutine s_mpi_allreduce_sum(var_loc, var_glb) + + real(wp), intent(in) :: var_loc + real(wp), intent(out) :: var_glb + +#ifdef MFC_MPI + + ! Performing the reduction procedure + call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, & + MPI_SUM, MPI_COMM_WORLD, ierr) + +#endif + + end subroutine s_mpi_allreduce_sum + + !> The following subroutine takes the input local variable + !! from all processors and reduces to the minimum of all + !! values. The reduced variable is recorded back onto the + !! original local variable on each processor. + !! @param var_loc Some variable containing the local value which should be + !! reduced amongst all the processors in the communicator. + !! @param var_glb The globally reduced value + subroutine s_mpi_allreduce_min(var_loc, var_glb) + + real(wp), intent(in) :: var_loc + real(wp), intent(out) :: var_glb + +#ifdef MFC_MPI + + ! Performing the reduction procedure + call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, & + MPI_MIN, MPI_COMM_WORLD, ierr) + +#endif + + end subroutine s_mpi_allreduce_min + + !> The following subroutine takes the input local variable + !! from all processors and reduces to the maximum of all + !! values. The reduced variable is recorded back onto the + !! original local variable on each processor. + !! @param var_loc Some variable containing the local value which should be + !! reduced amongst all the processors in the communicator. + !! @param var_glb The globally reduced value + subroutine s_mpi_allreduce_max(var_loc, var_glb) + + real(wp), intent(in) :: var_loc + real(wp), intent(out) :: var_glb + +#ifdef MFC_MPI + + ! Performing the reduction procedure + call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, & + MPI_MAX, MPI_COMM_WORLD, ierr) + +#endif + + end subroutine s_mpi_allreduce_max + + !> The following subroutine takes the inputted variable and + !! determines its minimum value on the entire computational + !! domain. The result is stored back into inputted variable. + !! @param var_loc holds the local value to be reduced among + !! all the processors in communicator. On output, the variable holds + !! the minimum value, reduced amongst all of the local values. + subroutine s_mpi_reduce_min(var_loc) + + real(wp), intent(inout) :: var_loc + +#ifdef MFC_MPI + + ! Temporary storage variable that holds the reduced minimum value + real(wp) :: var_glb + + ! Performing reduction procedure and eventually storing its result + ! into the variable that was initially inputted into the subroutine + call MPI_REDUCE(var_loc, var_glb, 1, mpi_p, & + MPI_MIN, 0, MPI_COMM_WORLD, ierr) + + call MPI_BCAST(var_glb, 1, mpi_p, & + 0, MPI_COMM_WORLD, ierr) + + var_loc = var_glb + +#endif + + end subroutine s_mpi_reduce_min + + !> The following subroutine takes the first element of the + !! 2-element inputted variable and determines its maximum + !! value on the entire computational domain. The result is + !! stored back into the first element of the variable while + !! the rank of the processor that is in charge of the sub- + !! domain containing the maximum is stored into the second + !! element of the variable. + !! @param var_loc On input, this variable holds the local value and processor rank, + !! which are to be reduced among all the processors in communicator. + !! On output, this variable holds the maximum value, reduced amongst + !! all of the local values, and the process rank to which the value + !! belongs. + subroutine s_mpi_reduce_maxloc(var_loc) + + real(wp), dimension(2), intent(inout) :: var_loc + +#ifdef MFC_MPI + + real(wp), dimension(2) :: var_glb !< + !! Temporary storage variable that holds the reduced maximum value + !! and the rank of the processor with which the value is associated + + ! Performing reduction procedure and eventually storing its result + ! into the variable that was initially inputted into the subroutine + call MPI_REDUCE(var_loc, var_glb, 1, mpi_2p, & + MPI_MAXLOC, 0, MPI_COMM_WORLD, ierr) + + call MPI_BCAST(var_glb, 1, mpi_2p, & + 0, MPI_COMM_WORLD, ierr) + + var_loc = var_glb + +#endif + + end subroutine s_mpi_reduce_maxloc + + !> The subroutine terminates the MPI execution environment. + !! @param prnt error message to be printed + subroutine s_mpi_abort(prnt, code) + + character(len=*), intent(in), optional :: prnt + integer, intent(in), optional :: code + + if (present(prnt)) then + print *, prnt + call flush (6) + + end if + +#ifndef MFC_MPI + if (present(code)) then + stop code + else + stop 1 + end if +#else + ! Terminating the MPI environment + if (present(code)) then + call MPI_ABORT(MPI_COMM_WORLD, code, ierr) + else + call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) + end if +#endif + + end subroutine s_mpi_abort + + !>Halts all processes until all have reached barrier. + subroutine s_mpi_barrier + +#ifdef MFC_MPI + + ! Calling MPI_BARRIER + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + +#endif + + end subroutine s_mpi_barrier + + !> The subroutine finalizes the MPI execution environment. + subroutine s_mpi_finalize + +#ifdef MFC_MPI + + ! Finalizing the MPI environment + call MPI_FINALIZE(ierr) + +#endif + + end subroutine s_mpi_finalize + + !> The goal of this procedure is to populate the buffers of + !! the cell-average conservative variables by communicating + !! with the neighboring processors. + !! @param q_cons_vf Cell-average conservative variables + !! @param mpi_dir MPI communication coordinate direction + !! @param pbc_loc Processor boundary condition (PBC) location + subroutine s_mpi_sendrecv_variables_buffers(q_comm, & + mpi_dir, & + pbc_loc, & + nVar, & + pb, mv) + + type(scalar_field), dimension(1:), intent(inout) :: q_comm + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + integer, intent(in) :: mpi_dir, pbc_loc, nVar + + integer :: i, j, k, l, r, q !< Generic loop iterators + + integer :: buffer_counts(1:3), buffer_count + + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag + + logical :: beg_end_geq_0, qbmm_comm + + integer :: pack_offset, unpack_offset + + real(wp), pointer :: p_send, p_recv + +#ifdef MFC_MPI + + call nvtxStartRange("RHS-COMM-PACKBUF") + + qbmm_comm = .false. + + if (present(pb) .and. present(mv) .and. qbmm .and. .not. polytropic) then + qbmm_comm = .true. + v_size = nVar + 2*nb*4 + buffer_counts = (/ & + buff_size*v_size*(n + 1)*(p + 1), & + buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), & + buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + /) + else + v_size = nVar + buffer_counts = (/ & + buff_size*v_size*(n + 1)*(p + 1), & + buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), & + buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + /) + end if + + !$acc update device(v_size, nVar) + + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 + + ! Implements: + ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc + ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] + ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] + ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] + ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] + + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) + + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) + + grid_dims = (/m, n, p/) + + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) - buff_size + 1 + end if + + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + buff_size + 1 + end if + + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, nVar + r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) + buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l) + end do + end do + end do + end do + + if (qbmm_comm) then + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = pb(j + pack_offset, k, l, i - nVar, q) + end do + end do + end do + end do + end do + + !$acc parallel loop collapse(5) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = mv(j + pack_offset, k, l, i - nVar, q) + end do + end do + end do + end do + end do + end if + #:elif mpi_dir == 2 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVar + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l) + end do + end do + end do + end do + + if (qbmm_comm) then + !$acc parallel loop collapse(5) gang vector default(present) private(r) + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = pb(j, k + pack_offset, l, i - nVar, q) + end do + end do + end do + end do + end do + + !$acc parallel loop collapse(5) gang vector default(present) private(r) + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = mv(j, k + pack_offset, l, i - nVar, q) + end do + end do + end do + end do + end do + end if + #:else + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVar + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset) + end do + end do + end do + end do + + if (qbmm_comm) then + !$acc parallel loop collapse(5) gang vector default(present) private(r) + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = pb(j, k, l + pack_offset, i - nVar, q) + end do + end do + end do + end do + end do + + !$acc parallel loop collapse(5) gang vector default(present) private(r) + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = mv(j, k, l + pack_offset, i - nVar, q) + end do + end do + end do + end do + end do + end if + #:endif + end if + #:endfor + call nvtxEndRange ! Packbuf + + p_send => buff_send(0) + p_recv => buff_recv(0) + + ! Send/Recv +#ifdef MFC_SIMULATION + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + !$acc data attach(p_send, p_recv) + !$acc host_data use_device(p_send, p_recv) + call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") + #:else + call nvtxStartRange("RHS-COMM-DEV2HOST") + !$acc update host(buff_send) + call nvtxEndRange + call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") + #:endif + + call MPI_SENDRECV( & + p_send, buffer_count, mpi_p, dst_proc, send_tag, & + p_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + + #:if rdma_mpi + !$acc end host_data + !$acc end data + !$acc wait + #:else + call nvtxStartRange("RHS-COMM-HOST2DEV") + !$acc update device(buff_recv) + call nvtxEndRange + #:endif + end if + #:endfor +#else + call MPI_SENDRECV( & + p_send, buffer_count, mpi_p, dst_proc, send_tag, & + p_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) +#endif + + ! Unpack Received Buffer + call nvtxStartRange("RHS-COMM-UNPACKBUF") + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = 1, nVar + r = (i - 1) + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + end do + + if (qbmm_comm) then + !$acc parallel loop collapse(5) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + pb(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) + end do + end do + end do + end do + end do + + !$acc parallel loop collapse(5) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + mv(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) + end do + end do + end do + end do + end do + end if + #:elif mpi_dir == 2 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVar + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + end do + + if (qbmm_comm) then + !$acc parallel loop collapse(5) gang vector default(present) private(r) + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + pb(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) + end do + end do + end do + end do + end do + + !$acc parallel loop collapse(5) gang vector default(present) private(r) + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + mv(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) + end do + end do + end do + end do + end do + end if + #:else + ! Unpacking buffer from bc_z%beg + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, nVar + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + end do + + if (qbmm_comm) then + !$acc parallel loop collapse(5) gang vector default(present) private(r) + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + pb(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) + end do + end do + end do + end do + end do + + !$acc parallel loop collapse(5) gang vector default(present) private(r) + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + mv(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) + end do + end do + end do + end do + end do + end if + #:endif + end if + #:endfor + call nvtxEndRange +#endif + + end subroutine s_mpi_sendrecv_variables_buffers + + !> The purpose of this procedure is to optimally decompose + !! the computational domain among the available processors. + !! This is performed by attempting to award each processor, + !! in each of the coordinate directions, approximately the + !! same number of cells, and then recomputing the affected + !! global parameters. + subroutine s_mpi_decompose_computational_domain + +#ifdef MFC_MPI + + integer :: num_procs_x, num_procs_y, num_procs_z !< + !! Optimal number of processors in the x-, y- and z-directions + + real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !< + !! Non-optimal number of processors in the x-, y- and z-directions + + real(wp) :: fct_min !< + !! Processor factorization (fct) minimization parameter + + integer :: MPI_COMM_CART !< + !! Cartesian processor topology communicator + + integer :: rem_cells !< + !! Remaining number of cells, in a particular coordinate direction, + !! after the majority is divided up among the available processors + + integer :: i, j !< Generic loop iterators + + if (num_procs == 1 .and. parallel_io) then + do i = 1, num_dims + start_idx(i) = 0 + end do + return + end if + + ! 3D Cartesian Processor Topology + if (n > 0) then + + if (p > 0) then + + if (cyl_coord .and. p > 0) then + ! Implement pencil processor blocking if using cylindrical coordinates so + ! that all cells in azimuthal direction are stored on a single processor. + ! This is necessary for efficient application of Fourier filter near axis. + + ! Initial values of the processor factorization optimization + num_procs_x = 1 + num_procs_y = num_procs + num_procs_z = 1 + ierr = -1 + + ! Computing minimization variable for these initial values + tmp_num_procs_x = num_procs_x + tmp_num_procs_y = num_procs_y + tmp_num_procs_z = num_procs_z + fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + + ! Searching for optimal computational domain distribution + do i = 1, num_procs + + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then + + tmp_num_procs_x = i + tmp_num_procs_y = num_procs/i + + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + .and. & + (n + 1)/tmp_num_procs_y & + >= & + num_stcls_min*weno_order) then + + num_procs_x = i + num_procs_y = num_procs/i + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + ierr = 0 + + end if + + end if + + end do + + else + + ! Initial estimate of optimal processor topology + num_procs_x = 1 + num_procs_y = 1 + num_procs_z = num_procs + ierr = -1 + + ! Benchmarking the quality of this initial guess + tmp_num_procs_x = num_procs_x + tmp_num_procs_y = num_procs_y + tmp_num_procs_z = num_procs_z + fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + 10._wp*abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) + + ! Optimization of the initial processor topology + do i = 1, num_procs + + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then + + do j = 1, num_procs/i + + if (mod(num_procs/i, j) == 0 & + .and. & + (n + 1)/j >= num_stcls_min*weno_order) then + + tmp_num_procs_x = i + tmp_num_procs_y = j + tmp_num_procs_z = num_procs/(i*j) + + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) & + .and. & + (p + 1)/tmp_num_procs_z & + >= & + num_stcls_min*weno_order) & + then + + num_procs_x = i + num_procs_y = j + num_procs_z = num_procs/(i*j) + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) + ierr = 0 + + end if + + end if + + end do + + end if + + end do + + end if + + ! Verifying that a valid decomposition of the computational + ! domain has been established. If not, the simulation exits. + if (proc_rank == 0 .and. ierr == -1) then + call s_mpi_abort('Unsupported combination of values '// & + 'of num_procs, m, n, p and '// & + 'weno_order. Exiting.') + end if + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & + num_procs_y, num_procs_z/), & + (/.true., .true., .true./), & + .false., MPI_COMM_CART, ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & + proc_coords, ierr) + ! END: 3D Cartesian Processor Topology + + ! Global Parameters for z-direction + + ! Number of remaining cells + rem_cells = mod(p + 1, num_procs_z) + + ! Optimal number of cells per processor + p = (p + 1)/num_procs_z - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells + if (proc_coords(3) == i - 1) then + p = p + 1; exit + end if + end do + + ! Boundary condition at the beginning + if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then + proc_coords(3) = proc_coords(3) - 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_z%beg, ierr) + proc_coords(3) = proc_coords(3) + 1 + end if + + ! Boundary condition at the end + if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then + proc_coords(3) = proc_coords(3) + 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_z%end, ierr) + proc_coords(3) = proc_coords(3) - 1 + end if + + if (parallel_io) then + if (proc_coords(3) < rem_cells) then + start_idx(3) = (p + 1)*proc_coords(3) + else + start_idx(3) = (p + 1)*proc_coords(3) + rem_cells + end if + end if + + ! 2D Cartesian Processor Topology + else + + ! Initial estimate of optimal processor topology + num_procs_x = 1 + num_procs_y = num_procs + ierr = -1 + + ! Benchmarking the quality of this initial guess + tmp_num_procs_x = num_procs_x + tmp_num_procs_y = num_procs_y + fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + + ! Optimization of the initial processor topology + do i = 1, num_procs + + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then + + tmp_num_procs_x = i + tmp_num_procs_y = num_procs/i + + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + .and. & + (n + 1)/tmp_num_procs_y & + >= & + num_stcls_min*weno_order) then + + num_procs_x = i + num_procs_y = num_procs/i + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + ierr = 0 + + end if + + end if + + end do + + ! Verifying that a valid decomposition of the computational + ! domain has been established. If not, the simulation exits. + if (proc_rank == 0 .and. ierr == -1) then + call s_mpi_abort('Unsupported combination of values '// & + 'of num_procs, m, n and '// & + 'weno_order. Exiting.') + end if + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & + num_procs_y/), (/.true., & + .true./), .false., MPI_COMM_CART, & + ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & + proc_coords, ierr) + + end if + ! END: 2D Cartesian Processor Topology + + ! Global Parameters for y-direction + + ! Number of remaining cells + rem_cells = mod(n + 1, num_procs_y) + + ! Optimal number of cells per processor + n = (n + 1)/num_procs_y - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells + if (proc_coords(2) == i - 1) then + n = n + 1; exit + end if + end do + + ! Boundary condition at the beginning + if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then + proc_coords(2) = proc_coords(2) - 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_y%beg, ierr) + proc_coords(2) = proc_coords(2) + 1 + end if + + ! Boundary condition at the end + if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then + proc_coords(2) = proc_coords(2) + 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & + bc_y%end, ierr) + proc_coords(2) = proc_coords(2) - 1 + end if + + if (parallel_io) then + if (proc_coords(2) < rem_cells) then + start_idx(2) = (n + 1)*proc_coords(2) + else + start_idx(2) = (n + 1)*proc_coords(2) + rem_cells + end if + end if + + ! 1D Cartesian Processor Topology + else + + ! Optimal processor topology + num_procs_x = num_procs + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & + (/.true./), .false., MPI_COMM_CART, & + ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & + proc_coords, ierr) + + end if + + ! Global Parameters for x-direction + + ! Number of remaining cells + rem_cells = mod(m + 1, num_procs_x) + + ! Optimal number of cells per processor + m = (m + 1)/num_procs_x - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells + if (proc_coords(1) == i - 1) then + m = m + 1; exit + end if + end do + + ! Boundary condition at the beginning + if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then + proc_coords(1) = proc_coords(1) - 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) + proc_coords(1) = proc_coords(1) + 1 + end if + + ! Boundary condition at the end + if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then + proc_coords(1) = proc_coords(1) + 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) + proc_coords(1) = proc_coords(1) - 1 + end if + + if (parallel_io) then + if (proc_coords(1) < rem_cells) then + start_idx(1) = (m + 1)*proc_coords(1) + else + start_idx(1) = (m + 1)*proc_coords(1) + rem_cells + end if + end if + +#endif + + end subroutine s_mpi_decompose_computational_domain + + !> The goal of this procedure is to populate the buffers of + !! the grid variables by communicating with the neighboring + !! processors. Note that only the buffers of the cell-width + !! distributions are handled in such a way. This is because + !! the buffers of cell-boundary locations may be calculated + !! directly from those of the cell-width distributions. + !! @param mpi_dir MPI communication coordinate direction + !! @param pbc_loc Processor boundary condition (PBC) location +#ifndef MFC_PRE_PROCESS + subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc) + + integer, intent(in) :: mpi_dir + integer, intent(in) :: pbc_loc + +#ifdef MFC_MPI + + ! MPI Communication in x-direction + if (mpi_dir == 1) then + + if (pbc_loc == -1) then ! PBC at the beginning + + if (bc_x%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_x%end/bc_x%beg + call MPI_SENDRECV( & + dx(m - buff_size + 1), buff_size, & + mpi_p, bc_x%end, 0, & + dx(-buff_size), buff_size, & + mpi_p, bc_x%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the beginning only + + ! Send/receive buffer to/from bc_x%beg/bc_x%beg + call MPI_SENDRECV( & + dx(0), buff_size, & + mpi_p, bc_x%beg, 1, & + dx(-buff_size), buff_size, & + mpi_p, bc_x%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + else ! PBC at the end + + if (bc_x%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_x%beg/bc_x%end + call MPI_SENDRECV( & + dx(0), buff_size, & + mpi_p, bc_x%beg, 1, & + dx(m + 1), buff_size, & + mpi_p, bc_x%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the end only + + ! Send/receive buffer to/from bc_x%end/bc_x%end + call MPI_SENDRECV( & + dx(m - buff_size + 1), buff_size, & + mpi_p, bc_x%end, 0, & + dx(m + 1), buff_size, & + mpi_p, bc_x%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + end if + ! END: MPI Communication in x-direction + + ! MPI Communication in y-direction + elseif (mpi_dir == 2) then + + if (pbc_loc == -1) then ! PBC at the beginning + + if (bc_y%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_y%end/bc_y%beg + call MPI_SENDRECV( & + dy(n - buff_size + 1), buff_size, & + mpi_p, bc_y%end, 0, & + dy(-buff_size), buff_size, & + mpi_p, bc_y%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the beginning only + + ! Send/receive buffer to/from bc_y%beg/bc_y%beg + call MPI_SENDRECV( & + dy(0), buff_size, & + mpi_p, bc_y%beg, 1, & + dy(-buff_size), buff_size, & + mpi_p, bc_y%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + else ! PBC at the end + + if (bc_y%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_y%beg/bc_y%end + call MPI_SENDRECV( & + dy(0), buff_size, & + mpi_p, bc_y%beg, 1, & + dy(n + 1), buff_size, & + mpi_p, bc_y%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the end only + + ! Send/receive buffer to/from bc_y%end/bc_y%end + call MPI_SENDRECV( & + dy(n - buff_size + 1), buff_size, & + mpi_p, bc_y%end, 0, & + dy(n + 1), buff_size, & + mpi_p, bc_y%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + end if + ! END: MPI Communication in y-direction + + ! MPI Communication in z-direction + else + + if (pbc_loc == -1) then ! PBC at the beginning + + if (bc_z%end >= 0) then ! PBC at the beginning and end + + ! Send/receive buffer to/from bc_z%end/bc_z%beg + call MPI_SENDRECV( & + dz(p - buff_size + 1), buff_size, & + mpi_p, bc_z%end, 0, & + dz(-buff_size), buff_size, & + mpi_p, bc_z%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the beginning only + + ! Send/receive buffer to/from bc_z%beg/bc_z%beg + call MPI_SENDRECV( & + dz(0), buff_size, & + mpi_p, bc_z%beg, 1, & + dz(-buff_size), buff_size, & + mpi_p, bc_z%beg, 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + else ! PBC at the end + + if (bc_z%beg >= 0) then ! PBC at the end and beginning + + ! Send/receive buffer to/from bc_z%beg/bc_z%end + call MPI_SENDRECV( & + dz(0), buff_size, & + mpi_p, bc_z%beg, 1, & + dz(p + 1), buff_size, & + mpi_p, bc_z%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + else ! PBC at the end only + + ! Send/receive buffer to/from bc_z%end/bc_z%end + call MPI_SENDRECV( & + dz(p - buff_size + 1), buff_size, & + mpi_p, bc_z%end, 0, & + dz(p + 1), buff_size, & + mpi_p, bc_z%end, 1, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + end if + + end if + + end if + ! END: MPI Communication in z-direction +#endif + + end subroutine s_mpi_sendrecv_grid_variables_buffers +#endif + + !> Module deallocation and/or disassociation procedures + subroutine s_finalize_mpi_common_module + +#ifdef MFC_MPI + deallocate (buff_send, buff_recv) +#endif + + end subroutine s_finalize_mpi_common_module + +end module m_mpi_common diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 2fa160c5f5..bdfb163908 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -21,11 +21,10 @@ module m_boundary_common use m_compile_specific - implicit none type(scalar_field), dimension(:, :), allocatable :: bc_buffers - !$acc declare create(bc_buffers) +!$acc declare create(bc_buffers) #ifdef MFC_MPI integer, dimension(1:3, -1:1) :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE @@ -1787,7 +1786,6 @@ contains end subroutine s_assign_default_bc_type -#ifndef MFC_PRE_PROCESS !> The purpose of this subroutine is to populate the buffers !! of the grid variables, which are constituted of the cell- !! boundary locations and cell-width distributions, based on @@ -1796,14 +1794,15 @@ contains integer :: i !< Generic loop iterator -#ifndef MFC_POST_PROCESS - ! Required for compatibiliy between codes +#ifdef MFC_SIMULATION + ! Required for compatibility between codes type(int_bounds_info) :: offset_x, offset_y, offset_z offset_x%beg = buff_size; offset_x%end = buff_size offset_y%beg = buff_size; offset_y%end = buff_size offset_z%beg = buff_size; offset_z%end = buff_size #endif +#ifndef MFC_PRE_PROCESS ! Population of Buffers in x-direction ! Populating cell-width distribution buffer at bc_x%beg @@ -1821,12 +1820,12 @@ contains do i = 1, buff_size dx(-i) = dx(m - (i - 1)) end do - endif + end if ! Computing the cell-boundary and center locations buffer at bc_x%beg do i = 1, offset_x%beg x_cb(-1 - i) = x_cb(-i) - dx(-i) - enddo + end do do i = 1, buff_size x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp @@ -1847,7 +1846,7 @@ contains do i = 1, buff_size dx(m + i) = dx(i - 1) end do - endif + end if ! Populating the cell-boundary and center locations buffer at bc_x%end do i = 1, offset_x%end @@ -1856,7 +1855,7 @@ contains do i = 1, buff_size x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp - enddo + end do ! END: Population of Buffers in x-direction ! Population of Buffers in y-direction @@ -1878,16 +1877,16 @@ contains do i = 1, buff_size dy(-i) = dy(n - (i - 1)) end do - endif + end if ! Computing the cell-boundary and center locations buffer at bc_y%beg do i = 1, offset_y%beg y_cb(-1 - i) = y_cb(-i) - dy(-i) - enddo + end do do i = 1, buff_size y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp - enddo + end do ! Populating the cell-width distribution buffer at bc_y%end if (bc_y%end >= 0) then @@ -1904,12 +1903,12 @@ contains do i = 1, buff_size dy(n + i) = dy(i - 1) end do - endif + end if ! Populating the cell-boundary and center locations buffer at bc_y%end do i = 1, offset_y%end y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i) - enddo + end do do i = 1, buff_size y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp @@ -1935,16 +1934,16 @@ contains do i = 1, buff_size dz(-i) = dz(p - (i - 1)) end do - endif + end if ! Computing the cell-boundary and center locations buffer at bc_z%beg do i = 1, offset_z%beg z_cb(-1 - i) = z_cb(-i) - dz(-i) - enddo + end do do i = 1, buff_size z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp - enddo + end do ! Populating the cell-width distribution buffer at bc_z%end if (bc_z%end >= 0) then @@ -1961,21 +1960,22 @@ contains do i = 1, buff_size dz(p + i) = dz(i - 1) end do - endif + end if ! Populating the cell-boundary and center locations buffer at bc_z%end do i = 1, buff_size z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i) - enddo + end do do i = 1, buff_size z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp - enddo + end do ! END: Population of Buffers in z-direction - end subroutine s_populate_grid_variables_buffers #endif + end subroutine s_populate_grid_variables_buffers + subroutine s_finalize_boundary_common_module() if (bc_io) then diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index bf2260ec5b..760f1ae5ac 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -40,8 +40,8 @@ module m_mpi_common !$acc declare create(buff_send, buff_recv) - integer :: halo_size, nVars - !$acc declare create(halo_size, nVars) + integer :: halo_size + !$acc declare create(halo_size) contains @@ -51,8 +51,8 @@ contains subroutine s_initialize_mpi_common_module #ifdef MFC_MPI - ! Allocating buff_send/recv and. Please note that for the sake of - ! simplicity, both variables are provided sufficient storage to hold + ! Allocating buff_send/recv and. Please note that for the sake of + ! simplicity, both variables are provided sufficient storage to hold ! the largest buffer in the computational domain. if (qbmm .and. .not. polytropic) then @@ -63,7 +63,7 @@ contains if (n > 0) then if (p > 0) then - halo_size = NINT(-1._wp + 1._wp * buff_size*(v_size)* & + halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)* & & (m + 2*buff_size + 1)* & & (n + 2*buff_size + 1)* & & (p + 2*buff_size + 1)/ & @@ -598,8 +598,7 @@ contains type(scalar_field), dimension(1:), intent(inout) :: q_comm real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv - - integer, intent(in) :: mpi_dir, pbc_loc + integer, intent(in) :: mpi_dir, pbc_loc, nVar integer :: i, j, k, l, r, q !< Generic loop iterators @@ -611,7 +610,7 @@ contains logical :: beg_end_geq_0, qbmm_comm - integer :: pack_offset, unpack_offset, nVar + integer :: pack_offset, unpack_offset real(wp), pointer :: p_send, p_recv @@ -798,7 +797,7 @@ contains end do !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = nVar+ 1, nVar + 4 + do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -1591,7 +1590,6 @@ contains end subroutine s_mpi_sendrecv_grid_variables_buffers #endif - !> Module deallocation and/or disassociation procedures subroutine s_finalize_mpi_common_module diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index 1b8d91e260..73f6b2367f 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -593,16 +593,16 @@ subroutine s_initialize_data_input_module end if ! Allocating arrays to store the bc types - allocate(bc_type(1:num_dims,-1:1)) + allocate (bc_type(1:num_dims, -1:1)) - allocate(bc_type(1,-1)%sf(0:0,0:n,0:p)) - allocate(bc_type(1,1)%sf(0:0,0:n,0:p)) + allocate (bc_type(1, -1)%sf(0:0, 0:n, 0:p)) + allocate (bc_type(1, 1)%sf(0:0, 0:n, 0:p)) if (n > 0) then - allocate(bc_type(2,-1)%sf(-buff_size:m+buff_size,0:0,0:p)) - allocate(bc_type(2,1)%sf(-buff_size:m+buff_size,0:0,0:p)) + allocate (bc_type(2, -1)%sf(-buff_size:m + buff_size, 0:0, 0:p)) + allocate (bc_type(2, 1)%sf(-buff_size:m + buff_size, 0:0, 0:p)) if (p > 0) then - allocate(bc_type(3,-1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0)) - allocate(bc_type(3,1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0)) + allocate (bc_type(3, -1)%sf(-buff_size:m + buff_size, -buff_size:n + buff_size, 0:0)) + allocate (bc_type(3, 1)%sf(-buff_size:m + buff_size, -buff_size:n + buff_size, 0:0)) end if end if @@ -636,15 +636,15 @@ subroutine s_finalize_data_input_module deallocate (q_T_sf%sf) end if - deallocate(bc_type(1,-1)%sf, bc_type(1,1)%sf) + deallocate (bc_type(1, -1)%sf, bc_type(1, 1)%sf) if (n > 0) then - deallocate(bc_type(2,-1)%sf, bc_type(2, 1)%sf) + deallocate (bc_type(2, -1)%sf, bc_type(2, 1)%sf) if (p > 0) then - deallocate(bc_type(3,-1)%sf, bc_type(3,1)%sf) + deallocate (bc_type(3, -1)%sf, bc_type(3, 1)%sf) end if end if - deallocate(bc_type) + deallocate (bc_type) s_read_data_files => null() diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index e457c42722..218da370ae 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -123,7 +123,7 @@ subroutine s_read_input_file if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == -17) .or. & num_bc_patches > 0) then bc_io = .true. - endif + end if else call s_mpi_abort('File post_process.inp is missing. Exiting.') @@ -698,10 +698,10 @@ subroutine s_initialize_modules if (bubbles_euler .and. .not. polytropic) then call s_initialize_nonpoly() end if - if (num_procs > 1) then + if (num_procs > 1) then call s_initialize_mpi_proxy_module() call s_initialize_mpi_common_module() - endif + end if call s_initialize_boundary_common_module() call s_initialize_variables_conversion_module() call s_initialize_data_input_module() @@ -758,7 +758,7 @@ subroutine s_finalize_modules if (num_procs > 1) then call s_finalize_mpi_proxy_module() call s_finalize_mpi_common_module() - endif + end if call s_finalize_global_parameters_module() ! Finalizing the MPI environment diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 08d1893c1e..746e67573e 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -85,7 +85,7 @@ contains !$acc update device(levelset_norm%sf) ! Get neighboring IB variables from other processors - call s_populate_ib_buffers(ib_markers) + call s_populate_ib_buffers() !$acc update host(ib_markers%sf) @@ -108,9 +108,7 @@ contains end subroutine s_ibm_setup - subroutine s_populate_ib_buffers(ib_markers) - - type(integer_field), intent(inout) :: ib_markers + subroutine s_populate_ib_buffers() #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)] #:for LOCC, LOCI in [('beg', -1), ('end', 1)] diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index de42e53f5c..c1aaa8f4b3 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -58,7 +58,7 @@ contains if (ib) then if (n > 0) then if (p > 0) then - i_halo_size = -1 + gp_layers * & + i_halo_size = -1 + gp_layers* & & (m + 2*gp_layers + 1)* & & (n + 2*gp_layers + 1)* & & (p + 2*gp_layers + 1)/ & @@ -303,7 +303,7 @@ contains do l = 0, p do k = 0, n do j = 0, gp_layers - 1 - r = (j + gp_layers * (k + (n + 1) * l)) + r = (j + gp_layers*(k + (n + 1)*l)) ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) end do end do @@ -312,8 +312,8 @@ contains !$acc parallel loop collapse(3) gang vector default(present) private(r) do l = 0, p do k = 0, gp_layers - 1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers+ 1)* & + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & (k + gp_layers*l)) ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) end do @@ -380,7 +380,7 @@ contains do l = 0, p do k = 0, n do j = -gp_layers, -1 - r = (j + gp_layers * ((k + 1) + (n + 1) * l)) + r = (j + gp_layers*((k + 1) + (n + 1)*l)) ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) end do end do @@ -404,7 +404,7 @@ contains do j = -gp_layers, m + gp_layers r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l + gp_layers))) + (l + gp_layers))) ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) end do end do From 5bb1288b87b22208fb14be5593c0aa608885ed28 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Fri, 9 May 2025 12:17:52 -0400 Subject: [PATCH 17/62] garbage cleanup --- "\\" | 1603 ---------------------------------------------------------- 1 file changed, 1603 deletions(-) delete mode 100644 "\\" diff --git "a/\\" "b/\\" deleted file mode 100644 index 6262662a46..0000000000 --- "a/\\" +++ /dev/null @@ -1,1603 +0,0 @@ - -#:include 'macros.fpp' - -!> @brief The module serves as a proxy to the parameters and subroutines -!! available in the MPI implementation's MPI module. Specifically, -!! the purpose of the proxy is to harness basic MPI commands into -!! more complicated procedures as to accomplish the communication -!! goals for the simulation. -module m_mpi_common - -#ifdef MFC_MPI - use mpi !< Message passing interface (MPI) module -#endif - - use m_derived_types !< Definitions of the derived types - - use m_global_parameters !< Definitions of the global parameters - - use m_helper - - use ieee_arithmetic - - use m_nvtx - - implicit none - - integer, private :: ierr, v_size !< - !$acc declare create(v_size) - !! Generic flags used to identify and report MPI errors - - real(wp), private, allocatable, dimension(:), target :: buff_send !< - !! This variable is utilized to pack and send the buffer of the cell-average - !! primitive variables, for a single computational domain boundary at the - !! time, to the relevant neighboring processor. - - real(wp), private, allocatable, dimension(:), target :: buff_recv !< - !! buff_recv is utilized to receive and unpack the buffer of the cell- - !! average primitive variables, for a single computational domain boundary - !! at the time, from the relevant neighboring processor. - - !$acc declare create(buff_send, buff_recv) - - integer :: halo_size, nVars - !$acc declare create(halo_size, nVars) - - -contains - - !> The computation of parameters, the allocation of memory, - !! the association of pointers and/or the execution of any - !! other procedures that are necessary to setup the module. - subroutine s_initialize_mpi_common_module - -#ifdef MFC_MPI - ! Allocating buff_send/recv and. Please note that for the sake of - ! simplicity, both variables are provided sufficient storage to hold - ! the largest buffer in the computational domain. - - if (qbmm .and. .not. polytropic) then - v_size = sys_size + 2*nb*4 - else - v_size = sys_size - end if - - if (n > 0) then - if (p > 0) then - halo_size = nint(-1._wp + 1._wp*buff_size*(v_size)* & - & (m + 2*buff_size + 1)* & - & (n + 2*buff_size + 1)* & - & (p + 2*buff_size + 1)/ & - & (min(m, n, p) + 2*buff_size + 1)) - else - halo_size = -1 + buff_size*(v_size)* & - & (max(m, n) + 2*buff_size + 1) - end if - else - halo_size = -1 + buff_size*(v_size) - end if - - !$acc update device(halo_size, v_size) - - @:ALLOCATE(buff_send(0:halo_size), buff_recv(0:halo_size)) -#endif - - end subroutine s_initialize_mpi_common_module - - !> The subroutine initializes the MPI execution environment - !! and queries both the number of processors which will be - !! available for the job and the local processor rank. - subroutine s_mpi_initialize - -#ifndef MFC_MPI - - ! Serial run only has 1 processor - num_procs = 1 - ! Local processor rank is 0 - proc_rank = 0 - -#else - - ! Initializing the MPI environment - call MPI_INIT(ierr) - - ! Checking whether the MPI environment has been properly initialized - if (ierr /= MPI_SUCCESS) then - print '(A)', 'Unable to initialize MPI environment. Exiting.' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Querying the number of processors available for the job - call MPI_COMM_SIZE(MPI_COMM_WORLD, num_procs, ierr) - - ! Querying the rank of the local processor - call MPI_COMM_RANK(MPI_COMM_WORLD, proc_rank, ierr) - -#endif - - end subroutine s_mpi_initialize - - !! @param q_cons_vf Conservative variables - !! @param ib_markers track if a cell is within the immersed boundary - !! @param levelset closest distance from every cell to the IB - !! @param levelset_norm normalized vector from every cell to the closest point to the IB - !! @param beta Eulerian void fraction from lagrangian bubbles - subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta) - - type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf - type(integer_field), optional, intent(in) :: ib_markers - type(levelset_field), optional, intent(IN) :: levelset - type(levelset_norm_field), optional, intent(IN) :: levelset_norm - type(scalar_field), intent(in), optional :: beta - - integer, dimension(num_dims) :: sizes_glb, sizes_loc - integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start - -#ifdef MFC_MPI - - ! Generic loop iterator - integer :: i, j, q, k, l - - !Altered system size for the lagrangian subgrid bubble model - integer :: alt_sys - - if (present(beta)) then - alt_sys = sys_size + 1 - else - alt_sys = sys_size - end if - - do i = 1, sys_size - MPI_IO_DATA%var(i)%sf => q_cons_vf(i)%sf(0:m, 0:n, 0:p) - end do - - if (present(beta)) then - MPI_IO_DATA%var(alt_sys)%sf => beta%sf(0:m, 0:n, 0:p) - end if - - !Additional variables pb and mv for non-polytropic qbmm -#ifdef MFC_PRE_PROCESS - if (qbmm .and. .not. polytropic) then - do i = 1, nb - do j = 1, nnode - MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j)%sf => pb%sf(0:m, 0:n, 0:p, j, i) - MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv%sf(0:m, 0:n, 0:p, j, i) - end do - end do - end if -#endif - -#ifdef MFC_SIMULATION - if (qbmm .and. .not. polytropic) then - do i = 1, nb - do j = 1, nnode - MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j)%sf => pb_ts(1)%sf(0:m, 0:n, 0:p, j, i) - MPI_IO_DATA%var(sys_size + (i - 1)*nnode + j + nb*nnode)%sf => mv_ts(1)%sf(0:m, 0:n, 0:p, j, i) - end do - end do - end if -#endif - ! Define global(g) and local(l) sizes for flow variables - sizes_glb(1) = m_glb + 1; sizes_loc(1) = m + 1 - if (n > 0) then - sizes_glb(2) = n_glb + 1; sizes_loc(2) = n + 1 - if (p > 0) then - sizes_glb(3) = p_glb + 1; sizes_loc(3) = p + 1 - end if - end if - - ! Define the view for each variable - do i = 1, alt_sys - call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, & - MPI_ORDER_FORTRAN, mpi_p, MPI_IO_DATA%view(i), ierr) - call MPI_TYPE_COMMIT(MPI_IO_DATA%view(i), ierr) - end do - -#ifndef MFC_POST_PROCESS - if (qbmm .and. .not. polytropic) then - do i = sys_size + 1, sys_size + 2*nb*4 - call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, & - MPI_ORDER_FORTRAN, mpi_p, MPI_IO_DATA%view(i), ierr) - call MPI_TYPE_COMMIT(MPI_IO_DATA%view(i), ierr) - - end do - end if -#endif - - if (present(ib_markers)) then - -#ifdef MFC_PRE_PROCESS - MPI_IO_IB_DATA%var%sf => ib_markers%sf - MPI_IO_levelset_DATA%var%sf => levelset%sf - MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf -#else - MPI_IO_IB_DATA%var%sf => ib_markers%sf(0:m, 0:n, 0:p) - -#ifndef MFC_POST_PROCESS - MPI_IO_levelset_DATA%var%sf => levelset%sf(0:m, 0:n, 0:p, 1:num_ibs) - MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3) -#endif - -#endif - call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, & - MPI_ORDER_FORTRAN, MPI_INTEGER, MPI_IO_IB_DATA%view, ierr) - call MPI_TYPE_COMMIT(MPI_IO_IB_DATA%view, ierr) - -#ifndef MFC_POST_PROCESS - call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, & - MPI_ORDER_FORTRAN, mpi_p, MPI_IO_levelset_DATA%view, ierr) - call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, & - MPI_ORDER_FORTRAN, mpi_p, MPI_IO_levelsetnorm_DATA%view, ierr) - - call MPI_TYPE_COMMIT(MPI_IO_levelset_DATA%view, ierr) - call MPI_TYPE_COMMIT(MPI_IO_levelsetnorm_DATA%view, ierr) -#endif - end if - -#ifndef MFC_POST_PROCESS - if (present(ib_markers)) then - do j = 1, num_ibs - if (patch_ib(j)%c > 0) then - -#ifdef MFC_PRE_PROCESS - allocate (MPI_IO_airfoil_IB_DATA%var(1:2*Np)) -#endif - - airfoil_glb(1) = 3*Np*num_procs - airfoil_loc(1) = 3*Np - airfoil_start(1) = 3*proc_rank*Np - -#ifdef MFC_PRE_PROCESS - do i = 1, Np - MPI_IO_airfoil_IB_DATA%var(i)%x = airfoil_grid_l(i)%x - MPI_IO_airfoil_IB_DATA%var(i)%y = airfoil_grid_l(i)%y - end do -#endif - - call MPI_TYPE_CREATE_SUBARRAY(1, airfoil_glb, airfoil_loc, airfoil_start, & - MPI_ORDER_FORTRAN, mpi_p, MPI_IO_airfoil_IB_DATA%view(1), ierr) - call MPI_TYPE_COMMIT(MPI_IO_airfoil_IB_DATA%view(1), ierr) - -#ifdef MFC_PRE_PROCESS - do i = 1, Np - MPI_IO_airfoil_IB_DATA%var(Np + i)%x = airfoil_grid_u(i)%x - MPI_IO_airfoil_IB_DATA%var(Np + i)%y = airfoil_grid_u(i)%y - end do -#endif - call MPI_TYPE_CREATE_SUBARRAY(1, airfoil_glb, airfoil_loc, airfoil_start, & - MPI_ORDER_FORTRAN, mpi_p, MPI_IO_airfoil_IB_DATA%view(2), ierr) - call MPI_TYPE_COMMIT(MPI_IO_airfoil_IB_DATA%view(2), ierr) - - end if - end do - - end if -#endif - -#endif - - end subroutine s_initialize_mpi_data - - subroutine s_mpi_gather_data(my_vector, counts, gathered_vector, root) - - integer, intent(in) :: counts ! Array of vector lengths for each process - real(wp), intent(in), dimension(counts) :: my_vector ! Input vector on each process - integer, intent(in) :: root ! Rank of the root process - real(wp), allocatable, intent(out) :: gathered_vector(:) ! Gathered vector on the root process - - integer :: i, offset, ierr - integer, allocatable :: recounts(:), displs(:) - -#ifdef MFC_MPI - - allocate (recounts(num_procs)) - - call MPI_GATHER(counts, 1, MPI_INTEGER, recounts, 1, MPI_INTEGER, root, & - MPI_COMM_WORLD, ierr) - - allocate (displs(size(recounts))) - - displs(1) = 0 - - do i = 2, size(recounts) - displs(i) = displs(i - 1) + recounts(i - 1) - end do - - allocate (gathered_vector(sum(recounts))) - call MPI_GATHERV(my_vector, counts, mpi_p, gathered_vector, recounts, displs, mpi_p, & - root, MPI_COMM_WORLD, ierr) -#endif - end subroutine s_mpi_gather_data - - subroutine mpi_bcast_time_step_values(proc_time, time_avg) - - real(wp), dimension(0:num_procs - 1), intent(inout) :: proc_time - real(wp), intent(inout) :: time_avg - -#ifdef MFC_MPI - - call MPI_GATHER(time_avg, 1, mpi_p, proc_time(0), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - -#endif - - end subroutine mpi_bcast_time_step_values - - subroutine s_prohibit_abort(condition, message) - character(len=*), intent(in) :: condition, message - - print *, "" - print *, "CASE FILE ERROR" - print *, " - Prohibited condition: ", trim(condition) - if (len_trim(message) > 0) then - print *, " - Note: ", trim(message) - end if - print *, "" - call s_mpi_abort(code=CASE_FILE_ERROR_CODE) - end subroutine s_prohibit_abort - - !> The goal of this subroutine is to determine the global - !! extrema of the stability criteria in the computational - !! domain. This is performed by sifting through the local - !! extrema of each stability criterion. Note that each of - !! the local extrema is from a single process, within its - !! assigned section of the computational domain. Finally, - !! note that the global extrema values are only bookkeept - !! on the rank 0 processor. - !! @param icfl_max_loc Local maximum ICFL stability criterion - !! @param vcfl_max_loc Local maximum VCFL stability criterion - !! @param Rc_min_loc Local minimum Rc stability criterion - !! @param icfl_max_glb Global maximum ICFL stability criterion - !! @param vcfl_max_glb Global maximum VCFL stability criterion - !! @param Rc_min_glb Global minimum Rc stability criterion - subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, & - vcfl_max_loc, & - ccfl_max_loc, & - Rc_min_loc, & - icfl_max_glb, & - vcfl_max_glb, & - ccfl_max_glb, & - Rc_min_glb) - - real(wp), intent(in) :: icfl_max_loc - real(wp), intent(in) :: vcfl_max_loc - real(wp), intent(in) :: ccfl_max_loc - real(wp), intent(in) :: Rc_min_loc - - real(wp), intent(out) :: icfl_max_glb - real(wp), intent(out) :: vcfl_max_glb - real(wp), intent(out) :: ccfl_max_glb - real(wp), intent(out) :: Rc_min_glb - -#ifdef MFC_SIMULATION -#ifdef MFC_MPI - - ! Reducing local extrema of ICFL, VCFL, CCFL and Rc numbers to their - ! global extrema and bookkeeping the results on the rank 0 processor - call MPI_REDUCE(icfl_max_loc, icfl_max_glb, 1, & - mpi_p, MPI_MAX, 0, & - MPI_COMM_WORLD, ierr) - - if (viscous) then - call MPI_REDUCE(vcfl_max_loc, vcfl_max_glb, 1, & - mpi_p, MPI_MAX, 0, & - MPI_COMM_WORLD, ierr) - call MPI_REDUCE(Rc_min_loc, Rc_min_glb, 1, & - mpi_p, MPI_MIN, 0, & - MPI_COMM_WORLD, ierr) - end if - -#else - - icfl_max_glb = icfl_max_loc - - if (viscous) then - vcfl_max_glb = vcfl_max_loc - Rc_min_glb = Rc_min_loc - end if - -#endif -#endif - - end subroutine s_mpi_reduce_stability_criteria_extrema - - !> The following subroutine takes the input local variable - !! from all processors and reduces to the sum of all - !! values. The reduced variable is recorded back onto the - !! original local variable on each processor. - !! @param var_loc Some variable containing the local value which should be - !! reduced amongst all the processors in the communicator. - !! @param var_glb The globally reduced value - subroutine s_mpi_allreduce_sum(var_loc, var_glb) - - real(wp), intent(in) :: var_loc - real(wp), intent(out) :: var_glb - -#ifdef MFC_MPI - - ! Performing the reduction procedure - call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, & - MPI_SUM, MPI_COMM_WORLD, ierr) - -#endif - - end subroutine s_mpi_allreduce_sum - - !> The following subroutine takes the input local variable - !! from all processors and reduces to the minimum of all - !! values. The reduced variable is recorded back onto the - !! original local variable on each processor. - !! @param var_loc Some variable containing the local value which should be - !! reduced amongst all the processors in the communicator. - !! @param var_glb The globally reduced value - subroutine s_mpi_allreduce_min(var_loc, var_glb) - - real(wp), intent(in) :: var_loc - real(wp), intent(out) :: var_glb - -#ifdef MFC_MPI - - ! Performing the reduction procedure - call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, & - MPI_MIN, MPI_COMM_WORLD, ierr) - -#endif - - end subroutine s_mpi_allreduce_min - - !> The following subroutine takes the input local variable - !! from all processors and reduces to the maximum of all - !! values. The reduced variable is recorded back onto the - !! original local variable on each processor. - !! @param var_loc Some variable containing the local value which should be - !! reduced amongst all the processors in the communicator. - !! @param var_glb The globally reduced value - subroutine s_mpi_allreduce_max(var_loc, var_glb) - - real(wp), intent(in) :: var_loc - real(wp), intent(out) :: var_glb - -#ifdef MFC_MPI - - ! Performing the reduction procedure - call MPI_ALLREDUCE(var_loc, var_glb, 1, mpi_p, & - MPI_MAX, MPI_COMM_WORLD, ierr) - -#endif - - end subroutine s_mpi_allreduce_max - - !> The following subroutine takes the inputted variable and - !! determines its minimum value on the entire computational - !! domain. The result is stored back into inputted variable. - !! @param var_loc holds the local value to be reduced among - !! all the processors in communicator. On output, the variable holds - !! the minimum value, reduced amongst all of the local values. - subroutine s_mpi_reduce_min(var_loc) - - real(wp), intent(inout) :: var_loc - -#ifdef MFC_MPI - - ! Temporary storage variable that holds the reduced minimum value - real(wp) :: var_glb - - ! Performing reduction procedure and eventually storing its result - ! into the variable that was initially inputted into the subroutine - call MPI_REDUCE(var_loc, var_glb, 1, mpi_p, & - MPI_MIN, 0, MPI_COMM_WORLD, ierr) - - call MPI_BCAST(var_glb, 1, mpi_p, & - 0, MPI_COMM_WORLD, ierr) - - var_loc = var_glb - -#endif - - end subroutine s_mpi_reduce_min - - !> The following subroutine takes the first element of the - !! 2-element inputted variable and determines its maximum - !! value on the entire computational domain. The result is - !! stored back into the first element of the variable while - !! the rank of the processor that is in charge of the sub- - !! domain containing the maximum is stored into the second - !! element of the variable. - !! @param var_loc On input, this variable holds the local value and processor rank, - !! which are to be reduced among all the processors in communicator. - !! On output, this variable holds the maximum value, reduced amongst - !! all of the local values, and the process rank to which the value - !! belongs. - subroutine s_mpi_reduce_maxloc(var_loc) - - real(wp), dimension(2), intent(inout) :: var_loc - -#ifdef MFC_MPI - - real(wp), dimension(2) :: var_glb !< - !! Temporary storage variable that holds the reduced maximum value - !! and the rank of the processor with which the value is associated - - ! Performing reduction procedure and eventually storing its result - ! into the variable that was initially inputted into the subroutine - call MPI_REDUCE(var_loc, var_glb, 1, mpi_2p, & - MPI_MAXLOC, 0, MPI_COMM_WORLD, ierr) - - call MPI_BCAST(var_glb, 1, mpi_2p, & - 0, MPI_COMM_WORLD, ierr) - - var_loc = var_glb - -#endif - - end subroutine s_mpi_reduce_maxloc - - !> The subroutine terminates the MPI execution environment. - !! @param prnt error message to be printed - subroutine s_mpi_abort(prnt, code) - - character(len=*), intent(in), optional :: prnt - integer, intent(in), optional :: code - - if (present(prnt)) then - print *, prnt - call flush (6) - - end if - -#ifndef MFC_MPI - if (present(code)) then - stop code - else - stop 1 - end if -#else - ! Terminating the MPI environment - if (present(code)) then - call MPI_ABORT(MPI_COMM_WORLD, code, ierr) - else - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if -#endif - - end subroutine s_mpi_abort - - !>Halts all processes until all have reached barrier. - subroutine s_mpi_barrier - -#ifdef MFC_MPI - - ! Calling MPI_BARRIER - call MPI_BARRIER(MPI_COMM_WORLD, ierr) - -#endif - - end subroutine s_mpi_barrier - - !> The subroutine finalizes the MPI execution environment. - subroutine s_mpi_finalize - -#ifdef MFC_MPI - - ! Finalizing the MPI environment - call MPI_FINALIZE(ierr) - -#endif - - end subroutine s_mpi_finalize - - !> The goal of this procedure is to populate the buffers of - !! the cell-average conservative variables by communicating - !! with the neighboring processors. - !! @param q_cons_vf Cell-average conservative variables - !! @param mpi_dir MPI communication coordinate direction - !! @param pbc_loc Processor boundary condition (PBC) location - subroutine s_mpi_sendrecv_variables_buffers(q_comm, & - mpi_dir, & - pbc_loc, & - nVar, & - pb, mv) - - type(scalar_field), dimension(1:), intent(inout) :: q_comm - real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv - integer, intent(in) :: mpi_dir, pbc_loc, nVar - - integer :: i, j, k, l, r, q !< Generic loop iterators - - integer :: buffer_counts(1:3), buffer_count - - type(int_bounds_info) :: boundary_conditions(1:3) - integer :: beg_end(1:2), grid_dims(1:3) - integer :: dst_proc, src_proc, recv_tag, send_tag - - logical :: beg_end_geq_0, qbmm_comm - - integer :: pack_offset, unpack_offset - - real(wp), pointer :: p_send, p_recv - -#ifdef MFC_MPI - - call nvtxStartRange("RHS-COMM-PACKBUF") - - qbmm_comm = .false. - - if (present(pb) .and. present(mv) .and. qbmm .and. .not. polytropic) then - qbmm_comm = .true. - v_size = nVar + 2*nb*4 - buffer_counts = (/ & - buff_size*v_size*(n + 1)*(p + 1), & - buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), & - buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & - /) - else - v_size = nVar - buffer_counts = (/ & - buff_size*v_size*(n + 1)*(p + 1), & - buff_size*v_size*(m + 2*buff_size + 1)*(p + 1), & - buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & - /) - end if - - !$acc update device(v_size, nVar) - - buffer_count = buffer_counts(mpi_dir) - boundary_conditions = (/bc_x, bc_y, bc_z/) - beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) - beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 - - ! Implements: - ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc - ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] - ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] - ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] - ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] - - send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) - recv_tag = f_logical_to_int(pbc_loc == 1) - - dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) - src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) - - grid_dims = (/m, n, p/) - - pack_offset = 0 - if (f_xor(pbc_loc == 1, beg_end_geq_0)) then - pack_offset = grid_dims(mpi_dir) - buff_size + 1 - end if - - unpack_offset = 0 - if (pbc_loc == 1) then - unpack_offset = grid_dims(mpi_dir) + buff_size + 1 - end if - - ! Pack Buffer to Send - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVar - r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l) - end do - end do - end do - end do - - if (qbmm_comm) then - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = pb(j + pack_offset, k, l, i - nVar, q) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = mv(j + pack_offset, k, l, i - nVar, q) - end do - end do - end do - end do - end do - end if - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVar - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l) - end do - end do - end do - end do - - if (qbmm_comm) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = pb(j, k + pack_offset, l, i - nVar, q) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = mv(j, k + pack_offset, l, i - nVar, q) - end do - end do - end do - end do - end do - end if - #:else - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVar - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset) - end do - end do - end do - end do - - if (qbmm_comm) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = pb(j, k, l + pack_offset, i - nVar, q) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = mv(j, k, l + pack_offset, i - nVar, q) - end do - end do - end do - end do - end do - end if - #:endif - end if - #:endfor - call nvtxEndRange ! Packbuf - - p_send => buff_send(0) - p_recv => buff_recv(0) - - ! Send/Recv -#ifdef MFC_SIMULATION - #:for rdma_mpi in [False, True] - if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then - #:if rdma_mpi - !$acc data attach(p_send, p_recv) - !$acc host_data use_device(p_send, p_recv) - call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") - #:else - call nvtxStartRange("RHS-COMM-DEV2HOST") - !$acc update host(buff_send) - call nvtxEndRange - call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") - #:endif - - call MPI_SENDRECV( & - p_send, buffer_count, mpi_p, dst_proc, send_tag, & - p_recv, buffer_count, mpi_p, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA - - #:if rdma_mpi - !$acc end host_data - !$acc end data - !$acc wait - #:else - call nvtxStartRange("RHS-COMM-HOST2DEV") - !$acc update device(buff_recv) - call nvtxEndRange - #:endif - end if - #:endfor -#else - call MPI_SENDRECV( & - p_send, buffer_count, mpi_p, dst_proc, send_tag, & - p_recv, buffer_count, mpi_p, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) -#endif - - ! Unpack Received Buffer - call nvtxStartRange("RHS-COMM-UNPACKBUF") - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVar - r = (i - 1) + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - if (qbmm_comm) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - pb(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - mv(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) - end do - end do - end do - end do - end do - end if - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVar - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - if (qbmm_comm) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - pb(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - mv(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) - end do - end do - end do - end do - end do - end if - #:else - ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVar - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - if (qbmm_comm) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - pb(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - mv(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) - end do - end do - end do - end do - end do - end if - #:endif - end if - #:endfor - call nvtxEndRange -#endif - - end subroutine s_mpi_sendrecv_variables_buffers - - !> The purpose of this procedure is to optimally decompose - !! the computational domain among the available processors. - !! This is performed by attempting to award each processor, - !! in each of the coordinate directions, approximately the - !! same number of cells, and then recomputing the affected - !! global parameters. - subroutine s_mpi_decompose_computational_domain - -#ifdef MFC_MPI - - integer :: num_procs_x, num_procs_y, num_procs_z !< - !! Optimal number of processors in the x-, y- and z-directions - - real(wp) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !< - !! Non-optimal number of processors in the x-, y- and z-directions - - real(wp) :: fct_min !< - !! Processor factorization (fct) minimization parameter - - integer :: MPI_COMM_CART !< - !! Cartesian processor topology communicator - - integer :: rem_cells !< - !! Remaining number of cells, in a particular coordinate direction, - !! after the majority is divided up among the available processors - - integer :: i, j !< Generic loop iterators - - if (num_procs == 1 .and. parallel_io) then - do i = 1, num_dims - start_idx(i) = 0 - end do - return - end if - - ! 3D Cartesian Processor Topology - if (n > 0) then - - if (p > 0) then - - if (cyl_coord .and. p > 0) then - ! Implement pencil processor blocking if using cylindrical coordinates so - ! that all cells in azimuthal direction are stored on a single processor. - ! This is necessary for efficient application of Fourier filter near axis. - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - num_procs_z = 1 - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - else - - ! Initial estimate of optimal processor topology - num_procs_x = 1 - num_procs_y = 1 - num_procs_z = num_procs - ierr = -1 - - ! Benchmarking the quality of this initial guess - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + 10._wp*abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - - ! Optimization of the initial processor topology - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - do j = 1, num_procs/i - - if (mod(num_procs/i, j) == 0 & - .and. & - (n + 1)/j >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = j - tmp_num_procs_z = num_procs/(i*j) - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) & - .and. & - (p + 1)/tmp_num_procs_z & - >= & - num_stcls_min*weno_order) & - then - - num_procs_x = i - num_procs_y = j - num_procs_z = num_procs/(i*j) - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - ierr = 0 - - end if - - end if - - end do - - end if - - end do - - end if - - ! Verifying that a valid decomposition of the computational - ! domain has been established. If not, the simulation exits. - if (proc_rank == 0 .and. ierr == -1) then - call s_mpi_abort('Unsupported combination of values '// & - 'of num_procs, m, n, p and '// & - 'weno_order. Exiting.') - end if - - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & - num_procs_y, num_procs_z/), & - (/.true., .true., .true./), & - .false., MPI_COMM_CART, ierr) - - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & - proc_coords, ierr) - ! END: 3D Cartesian Processor Topology - - ! Global Parameters for z-direction - - ! Number of remaining cells - rem_cells = mod(p + 1, num_procs_z) - - ! Optimal number of cells per processor - p = (p + 1)/num_procs_z - 1 - - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(3) == i - 1) then - p = p + 1; exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then - proc_coords(3) = proc_coords(3) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%beg, ierr) - proc_coords(3) = proc_coords(3) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then - proc_coords(3) = proc_coords(3) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%end, ierr) - proc_coords(3) = proc_coords(3) - 1 - end if - - if (parallel_io) then - if (proc_coords(3) < rem_cells) then - start_idx(3) = (p + 1)*proc_coords(3) - else - start_idx(3) = (p + 1)*proc_coords(3) + rem_cells - end if - end if - - ! 2D Cartesian Processor Topology - else - - ! Initial estimate of optimal processor topology - num_procs_x = 1 - num_procs_y = num_procs - ierr = -1 - - ! Benchmarking the quality of this initial guess - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - fct_min = 10._wp*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Optimization of the initial processor topology - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - ! Verifying that a valid decomposition of the computational - ! domain has been established. If not, the simulation exits. - if (proc_rank == 0 .and. ierr == -1) then - call s_mpi_abort('Unsupported combination of values '// & - 'of num_procs, m, n and '// & - 'weno_order. Exiting.') - end if - - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & - num_procs_y/), (/.true., & - .true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & - proc_coords, ierr) - - end if - ! END: 2D Cartesian Processor Topology - - ! Global Parameters for y-direction - - ! Number of remaining cells - rem_cells = mod(n + 1, num_procs_y) - - ! Optimal number of cells per processor - n = (n + 1)/num_procs_y - 1 - - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(2) == i - 1) then - n = n + 1; exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then - proc_coords(2) = proc_coords(2) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%beg, ierr) - proc_coords(2) = proc_coords(2) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then - proc_coords(2) = proc_coords(2) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%end, ierr) - proc_coords(2) = proc_coords(2) - 1 - end if - - if (parallel_io) then - if (proc_coords(2) < rem_cells) then - start_idx(2) = (n + 1)*proc_coords(2) - else - start_idx(2) = (n + 1)*proc_coords(2) + rem_cells - end if - end if - - ! 1D Cartesian Processor Topology - else - - ! Optimal processor topology - num_procs_x = num_procs - - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & - (/.true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & - proc_coords, ierr) - - end if - - ! Global Parameters for x-direction - - ! Number of remaining cells - rem_cells = mod(m + 1, num_procs_x) - - ! Optimal number of cells per processor - m = (m + 1)/num_procs_x - 1 - - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(1) == i - 1) then - m = m + 1; exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then - proc_coords(1) = proc_coords(1) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) - proc_coords(1) = proc_coords(1) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then - proc_coords(1) = proc_coords(1) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) - proc_coords(1) = proc_coords(1) - 1 - end if - - if (parallel_io) then - if (proc_coords(1) < rem_cells) then - start_idx(1) = (m + 1)*proc_coords(1) - else - start_idx(1) = (m + 1)*proc_coords(1) + rem_cells - end if - end if - -#endif - - end subroutine s_mpi_decompose_computational_domain - - !> The goal of this procedure is to populate the buffers of - !! the grid variables by communicating with the neighboring - !! processors. Note that only the buffers of the cell-width - !! distributions are handled in such a way. This is because - !! the buffers of cell-boundary locations may be calculated - !! directly from those of the cell-width distributions. - !! @param mpi_dir MPI communication coordinate direction - !! @param pbc_loc Processor boundary condition (PBC) location -#ifndef MFC_PRE_PROCESS - subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc) - - integer, intent(in) :: mpi_dir - integer, intent(in) :: pbc_loc - -#ifdef MFC_MPI - - ! MPI Communication in x-direction - if (mpi_dir == 1) then - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_x%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - dx(m - buff_size + 1), buff_size, & - mpi_p, bc_x%end, 0, & - dx(-buff_size), buff_size, & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_x%beg/bc_x%beg - call MPI_SENDRECV( & - dx(0), buff_size, & - mpi_p, bc_x%beg, 1, & - dx(-buff_size), buff_size, & - mpi_p, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_x%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_x%beg/bc_x%end - call MPI_SENDRECV( & - dx(0), buff_size, & - mpi_p, bc_x%beg, 1, & - dx(m + 1), buff_size, & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the end only - - ! Send/receive buffer to/from bc_x%end/bc_x%end - call MPI_SENDRECV( & - dx(m - buff_size + 1), buff_size, & - mpi_p, bc_x%end, 0, & - dx(m + 1), buff_size, & - mpi_p, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - end if - ! END: MPI Communication in x-direction - - ! MPI Communication in y-direction - elseif (mpi_dir == 2) then - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_y%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_y%end/bc_y%beg - call MPI_SENDRECV( & - dy(n - buff_size + 1), buff_size, & - mpi_p, bc_y%end, 0, & - dy(-buff_size), buff_size, & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_y%beg/bc_y%beg - call MPI_SENDRECV( & - dy(0), buff_size, & - mpi_p, bc_y%beg, 1, & - dy(-buff_size), buff_size, & - mpi_p, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_y%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_y%beg/bc_y%end - call MPI_SENDRECV( & - dy(0), buff_size, & - mpi_p, bc_y%beg, 1, & - dy(n + 1), buff_size, & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the end only - - ! Send/receive buffer to/from bc_y%end/bc_y%end - call MPI_SENDRECV( & - dy(n - buff_size + 1), buff_size, & - mpi_p, bc_y%end, 0, & - dy(n + 1), buff_size, & - mpi_p, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - end if - ! END: MPI Communication in y-direction - - ! MPI Communication in z-direction - else - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_z%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_z%end/bc_z%beg - call MPI_SENDRECV( & - dz(p - buff_size + 1), buff_size, & - mpi_p, bc_z%end, 0, & - dz(-buff_size), buff_size, & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_z%beg/bc_z%beg - call MPI_SENDRECV( & - dz(0), buff_size, & - mpi_p, bc_z%beg, 1, & - dz(-buff_size), buff_size, & - mpi_p, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_z%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_z%beg/bc_z%end - call MPI_SENDRECV( & - dz(0), buff_size, & - mpi_p, bc_z%beg, 1, & - dz(p + 1), buff_size, & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the end only - - ! Send/receive buffer to/from bc_z%end/bc_z%end - call MPI_SENDRECV( & - dz(p - buff_size + 1), buff_size, & - mpi_p, bc_z%end, 0, & - dz(p + 1), buff_size, & - mpi_p, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - end if - - end if - ! END: MPI Communication in z-direction -#endif - - end subroutine s_mpi_sendrecv_grid_variables_buffers -#endif - - !> Module deallocation and/or disassociation procedures - subroutine s_finalize_mpi_common_module - -#ifdef MFC_MPI - deallocate (buff_send, buff_recv) -#endif - - end subroutine s_finalize_mpi_common_module - -end module m_mpi_common From 9adc5f8a15249aa8c84b281ff08bdc3ef23de10c Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Tue, 13 May 2025 22:42:47 -0400 Subject: [PATCH 18/62] bug fixes --- examples/3D_TaylorGreenVortex/case.py | 4 +- src/common/m_boundary_common.fpp | 6 +- src/common/m_mpi_common.fpp | 121 ++++++++++++++++++++++++-- 3 files changed, 119 insertions(+), 12 deletions(-) diff --git a/examples/3D_TaylorGreenVortex/case.py b/examples/3D_TaylorGreenVortex/case.py index 33889c7958..de440980cb 100644 --- a/examples/3D_TaylorGreenVortex/case.py +++ b/examples/3D_TaylorGreenVortex/case.py @@ -2,7 +2,7 @@ import math import json -N = 49 +N = 256 Re = 1600 L = 1 @@ -41,7 +41,7 @@ "p": N, "cyl_coord": "F", "dt": dt, - "t_step_start": 0, + "t_step_start": 13529, "t_step_stop": Nt, "t_step_save": int(Nt / 100), # Simulation Algorithm Parameters diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index bdfb163908..4810ee1223 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -1029,9 +1029,7 @@ contains integer :: j, i, q -#ifdef MFC_PRE_PROCESS - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, -1, k, l) -#else +#ifdef MFC_SIMULATION if (bc_dir == 1) then !< x-direction if (bc_loc == -1) then !bc_x%beg do i = 1, sys_size @@ -1081,6 +1079,8 @@ contains end do end if end if +#else + call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) #endif end subroutine s_dirichlet diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 760f1ae5ac..c167f07ec8 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -1222,7 +1222,7 @@ contains end do ! Boundary condition at the beginning - if (proc_coords(3) > 0 .or. (bc_z%beg == -1 .and. num_procs_z > 1)) then + if (proc_coords(3) > 0 .or. (bc_z%beg == BC_PERIODIC .and. num_procs_z > 1)) then proc_coords(3) = proc_coords(3) - 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_z%beg, ierr) @@ -1230,19 +1230,55 @@ contains end if ! Boundary condition at the end - if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == -1 .and. num_procs_z > 1)) then + if (proc_coords(3) < num_procs_z - 1 .or. (bc_z%end == BC_PERIODIC .and. num_procs_z > 1)) then proc_coords(3) = proc_coords(3) + 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_z%end, ierr) proc_coords(3) = proc_coords(3) - 1 end if +#ifdef MFC_POST_PROCESS + ! Ghost zone at the beginning + if (proc_coords(3) > 0 .and. format == 1) then + offset_z%beg = 2 + else + offset_z%beg = 0 + end if + + ! Ghost zone at the end + if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then + offset_z%end = 2 + else + offset_z%end = 0 + end if +#endif + + ! Beginning and end sub-domain boundary locations if (parallel_io) then if (proc_coords(3) < rem_cells) then start_idx(3) = (p + 1)*proc_coords(3) else start_idx(3) = (p + 1)*proc_coords(3) + rem_cells end if + else +#ifdef MFC_PRE_PROCESS + if (old_grid .neqv. .true.) then + dz = (z_domain%end - z_domain%beg)/real(p_glb + 1, wp) + + if (proc_coords(3) < rem_cells) then + z_domain%beg = z_domain%beg + dz*real((p + 1)* & + proc_coords(3)) + z_domain%end = z_domain%end - dz*real((p + 1)* & + (num_procs_z - proc_coords(3) - 1) & + - (num_procs_z - rem_cells)) + else + z_domain%beg = z_domain%beg + dz*real((p + 1)* & + proc_coords(3) + rem_cells) + z_domain%end = z_domain%end - dz*real((p + 1)* & + (num_procs_z - proc_coords(3) - 1)) + end if + end if +#endif end if ! 2D Cartesian Processor Topology @@ -1325,7 +1361,7 @@ contains end do ! Boundary condition at the beginning - if (proc_coords(2) > 0 .or. (bc_y%beg == -1 .and. num_procs_y > 1)) then + if (proc_coords(2) > 0 .or. (bc_y%beg == BC_PERIODIC .and. num_procs_y > 1)) then proc_coords(2) = proc_coords(2) - 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_y%beg, ierr) @@ -1333,19 +1369,55 @@ contains end if ! Boundary condition at the end - if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == -1 .and. num_procs_y > 1)) then + if (proc_coords(2) < num_procs_y - 1 .or. (bc_y%end == BC_PERIODIC .and. num_procs_y > 1)) then proc_coords(2) = proc_coords(2) + 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_y%end, ierr) proc_coords(2) = proc_coords(2) - 1 end if +#ifdef MFC_POST_PROCESS + ! Ghost zone at the beginning + if (proc_coords(2) > 0 .and. format == 1) then + offset_y%beg = 2 + else + offset_y%beg = 0 + end if + + ! Ghost zone at the end + if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then + offset_y%end = 2 + else + offset_y%end = 0 + end if +#endif + + ! Beginning and end sub-domain boundary locations if (parallel_io) then if (proc_coords(2) < rem_cells) then start_idx(2) = (n + 1)*proc_coords(2) else start_idx(2) = (n + 1)*proc_coords(2) + rem_cells end if + else +#ifdef MFC_PRE_PROCESS + if (old_grid .neqv. .true.) then + dy = (y_domain%end - y_domain%beg)/real(n_glb + 1, wp) + + if (proc_coords(2) < rem_cells) then + y_domain%beg = y_domain%beg + dy*real((n + 1)* & + proc_coords(2)) + y_domain%end = y_domain%end - dy*real((n + 1)* & + (num_procs_y - proc_coords(2) - 1) & + - (num_procs_y - rem_cells)) + else + y_domain%beg = y_domain%beg + dy*real((n + 1)* & + proc_coords(2) + rem_cells) + y_domain%end = y_domain%end - dy*real((n + 1)* & + (num_procs_y - proc_coords(2) - 1)) + end if + end if +#endif end if ! 1D Cartesian Processor Topology @@ -1381,27 +1453,62 @@ contains end do ! Boundary condition at the beginning - if (proc_coords(1) > 0 .or. (bc_x%beg == -1 .and. num_procs_x > 1)) then + if (proc_coords(1) > 0 .or. (bc_x%beg == BC_PERIODIC .and. num_procs_x > 1)) then proc_coords(1) = proc_coords(1) - 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) proc_coords(1) = proc_coords(1) + 1 end if ! Boundary condition at the end - if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == -1 .and. num_procs_x > 1)) then + if (proc_coords(1) < num_procs_x - 1 .or. (bc_x%end == BC_PERIODIC .and. num_procs_x > 1)) then proc_coords(1) = proc_coords(1) + 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) proc_coords(1) = proc_coords(1) - 1 end if +#ifdef MFC_POST_PROCESS + ! Ghost zone at the beginning + if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then + offset_x%beg = 2 + else + offset_x%beg = 0 + end if + + ! Ghost zone at the end + if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then + offset_x%end = 2 + else + offset_x%end = 0 + end if +#endif + + ! Beginning and end sub-domain boundary locations if (parallel_io) then if (proc_coords(1) < rem_cells) then start_idx(1) = (m + 1)*proc_coords(1) else start_idx(1) = (m + 1)*proc_coords(1) + rem_cells end if + else +#ifdef MFC_PRE_PROCESS + if (old_grid .neqv. .true.) then + dx = (x_domain%end - x_domain%beg)/real(m_glb + 1, wp) + + if (proc_coords(1) < rem_cells) then + x_domain%beg = x_domain%beg + dx*real((m + 1)* & + proc_coords(1)) + x_domain%end = x_domain%end - dx*real((m + 1)* & + (num_procs_x - proc_coords(1) - 1) & + - (num_procs_x - rem_cells)) + else + x_domain%beg = x_domain%beg + dx*real((m + 1)* & + proc_coords(1) + rem_cells) + x_domain%end = x_domain%end - dx*real((m + 1)* & + (num_procs_x - proc_coords(1) - 1)) + end if + end if +#endif end if - #endif end subroutine s_mpi_decompose_computational_domain From 6448b3701fa44700b99955107ddb27f961ce5749 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Tue, 13 May 2025 23:46:34 -0400 Subject: [PATCH 19/62] fix no mpi test --- src/post_process/m_global_parameters.fpp | 2 +- src/simulation/m_mpi_proxy.fpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index ba36a8be19..70882a7ff4 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -474,7 +474,7 @@ contains integer :: i, j, fac ! Setting m_root equal to m in the case of a 1D serial simulation - if (num_procs > 1 .and. n == 0) m_root = m_glb + if (n == 0) m_root = m_glb ! Gamma/Pi_inf Model if (model_eqns == 1) then diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index c1aaa8f4b3..be8c822921 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -429,9 +429,11 @@ contains subroutine s_finalize_mpi_proxy_module() +#ifdef MFC_MPI if (ib) then @:DEALLOCATE(ib_buff_send, ib_buff_recv) end if +#endif end subroutine s_finalize_mpi_proxy_module From a96f4c6eb9c5c372c2e70927ca0416ce3729d399 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Mon, 19 May 2025 13:32:26 -0400 Subject: [PATCH 20/62] bug fixes --- "\\" | 439 +++++++++++++++++++++++++++++++ src/common/m_boundary_common.fpp | 6 + src/common/m_mpi_common.fpp | 2 +- temp.txt | 225 ++++++++++++++++ 4 files changed, 671 insertions(+), 1 deletion(-) create mode 100644 "\\" create mode 100644 temp.txt diff --git "a/\\" "b/\\" new file mode 100644 index 0000000000..5d2a6a0944 --- /dev/null +++ "b/\\" @@ -0,0 +1,439 @@ +!> +!! @file m_mpi_proxy.f90 +!! @brief Contains module m_mpi_proxy + +#:include 'case.fpp' +#:include 'macros.fpp' + +!> @brief The module serves as a proxy to the parameters and subroutines +!! available in the MPI implementation's MPI module. Specifically, +!! the purpose of the proxy is to harness basic MPI commands into +!! more complicated procedures as to accomplish the communication +!! goals for the simulation. +module m_mpi_proxy + +#ifdef MFC_MPI + use mpi !< Message passing interface (MPI) module +#endif + + use m_helper_basic !< Functions to compare floating point numbers + + use m_helper + + use m_derived_types !< Definitions of the derived types + + use m_global_parameters !< Definitions of the global parameters + + use m_mpi_common + + use m_nvtx + + use ieee_arithmetic + + implicit none + + integer, private, allocatable, dimension(:), target :: ib_buff_send !< + !! This variable is utilized to pack and send the buffer of the immersed + !! boundary markers, for a single computational domain boundary at the + !! time, to the relevant neighboring processor. + + integer, private, allocatable, dimension(:), target :: ib_buff_recv !< + !! q_cons_buff_recv is utilized to receive and unpack the buffer of the + !! immersed boundary markers, for a single computational domain boundary + !! at the time, from the relevant neighboring processor. + + !> @name Generic flags used to identify and report MPI errors + !> @{ + integer, private :: ierr + !> @} + + integer :: i_halo_size + !$acc declare create(i_halo_size) + +contains + + subroutine s_initialize_mpi_proxy_module() + +#ifdef MFC_MPI + if (ib) then + if (n > 0) then + if (p > 0) then + i_halo_size = -1 + gp_layers* & + & (m + 2*gp_layers + 1)* & + & (n + 2*gp_layers + 1)* & + & (p + 2*gp_layers + 1)/ & + & (min(m, n, p) + 2*gp_layers + 1) + else + i_halo_size = -1 + gp_layers* & + & (max(m, n) + 2*gp_layers + 1) + end if + else + i_halo_size = -1 + gp_layers + end if + + !$acc update device(i_halo_size) + @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size)) + end if +#endif + + end subroutine s_initialize_mpi_proxy_module + + !> Since only the processor with rank 0 reads and verifies + !! the consistency of user inputs, these are initially not + !! available to the other processors. Then, the purpose of + !! this subroutine is to distribute the user inputs to the + !! remaining processors in the communicator. + subroutine s_mpi_bcast_user_inputs() + +#ifdef MFC_MPI + + integer :: i, j !< Generic loop iterator + + call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + + #:for VAR in ['k_x', 'k_y', 'k_z', 'w_x', 'w_y', 'w_z', 'p_x', 'p_y', & + & 'p_z', 'g_x', 'g_y', 'g_z'] + call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in ['t_step_old', 'm', 'n', 'p', 'm_glb', 'n_glb', 'p_glb', & + & 't_step_start','t_step_stop','t_step_save','t_step_print', & + & 'model_eqns','time_stepper', 'riemann_solver', 'low_Mach', & + & 'wave_speeds', 'avg_state', 'precision', 'bc_x%beg', 'bc_x%end', & + & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end', 'fd_order', & + & 'num_probes', 'num_integrals', 'bubble_model', 'thermal', & + & 'R0_type', 'num_source', 'relax_model', 'num_ibs', 'n_start', & + & 'num_bc_patches'] + call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in [ 'run_time_info','cyl_coord', 'mpp_lim', & + & 'mp_weno', 'rdma_mpi', 'weno_flat', 'riemann_flat', & + & 'weno_Re_flux', 'alt_soundspeed', 'null_weights', 'mixture_err', & + & 'parallel_io', 'hypoelasticity', 'bubbles_euler', 'polytropic', & + & 'polydisperse', 'qbmm', 'acoustic_source', 'probe_wrt', 'integral_wrt', & + & 'prim_vars_wrt', 'weno_avg', 'file_per_process', 'relax', & + & 'adv_n', 'adap_dt', 'ib', 'bodyForces', 'bf_x', 'bf_y', 'bf_z', & + & 'bc_x%grcbc_in', 'bc_x%grcbc_out', 'bc_x%grcbc_vel_out', & + & 'bc_y%grcbc_in', 'bc_y%grcbc_out', 'bc_y%grcbc_vel_out', & + & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out', & + & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', & + & 'viscous', 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & + & 'hyperelasticity', 'rkck_adap_dt', 'bc_io', 'powell', 'cont_damage' ] + call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + #:endfor + + if (chemistry) then + #:for VAR in [ 'diffusion', 'reactions' ] + call MPI_BCAST(chem_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in [ 'gamma_method' ] + call MPI_BCAST(chem_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + #:endfor + end if + + if (bubbles_lagrange) then + #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', & + & 'write_bubbles', 'write_bubbles_stats'] + call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in ['solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb'] + call MPI_BCAST(lag_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in [ 'c0', 'rho0', 'T0', 'x0', 'diffcoefvap', 'epsilonb','charwidth', & + & 'valmaxvoid', 'Thost'] + call MPI_BCAST(lag_params%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + end if + + #:for VAR in [ 'dt','weno_eps','teno_CT','pref','rhoref','R0ref','Web','Ca', 'sigma', & + & 'Re_inv', 'poly_sigma', 'palpha_eps', 'ptgalpha_eps', 'pi_fac', & + & 'bc_x%vb1','bc_x%vb2','bc_x%vb3','bc_x%ve1','bc_x%ve2','bc_x%ve2', & + & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', & + & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', & + & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', & + & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end', & + & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', & + & 'z_b', 't_stop', 't_save', 'cfl_target', 'rkck_tolerance', 'Bx0', & + & 'tau_star', 'cont_damage_s', 'alpha_bar' ] + call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + + do i = 1, 3 + #:for VAR in [ 'bc_x%vel_in', 'bc_x%vel_out', 'bc_y%vel_in', 'bc_y%vel_out', & + & 'bc_z%vel_in', 'bc_z%vel_out'] + call MPI_BCAST(${VAR}$ (i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + end do + + #:if not MFC_CASE_OPTIMIZATION + call MPI_BCAST(mapped_weno, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(wenoz, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(teno, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(weno_order, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(nb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(num_fluids, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(wenoz_q, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(mhd, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(relativity, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + #:endif + + do i = 1, num_fluids_max + #:for VAR in [ 'gamma','pi_inf','mul0','ss','pv','gamma_v','M_v', & + & 'mu_v','k_v', 'cp_v','G', 'cv', 'qv', 'qvp' ] + call MPI_BCAST(fluid_pp(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr) + end do + + do i = 1, num_fluids_max + #:for VAR in ['bc_x%alpha_rho_in','bc_x%alpha_in','bc_y%alpha_rho_in','bc_y%alpha_in','bc_z%alpha_rho_in','bc_z%alpha_in'] + call MPI_BCAST(${VAR}$ (i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + end do + + do i = 1, num_ibs + #:for VAR in [ 'radius', 'length_x', 'length_y', & + & 'x_centroid', 'y_centroid', 'c', 'm', 'p', 't', 'theta', 'slip' ] + call MPI_BCAST(patch_ib(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + call MPI_BCAST(patch_ib(i)%geometry, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + end do + + do j = 1, num_probes_max + do i = 1, 3 + call MPI_BCAST(acoustic(j)%loc(i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + end do + + call MPI_BCAST(acoustic(j)%dipole, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + + #:for VAR in [ 'pulse', 'support', 'num_elements', 'element_on', 'bb_num_freq' ] + call MPI_BCAST(acoustic(j)%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in [ 'mag', 'length', 'height', & + 'wavelength', 'frequency', 'gauss_sigma_dist', 'gauss_sigma_time', & + 'npulse', 'dir', 'delay', 'foc_length', 'aperture', & + 'element_spacing_angle', 'element_polygon_ratio', 'rotate_angle', & + 'bb_bandwidth', 'bb_lowest_freq' ] + call MPI_BCAST(acoustic(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in [ 'x','y','z' ] + call MPI_BCAST(probe(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in [ 'xmin', 'xmax', 'ymin', 'ymax', 'zmin', 'zmax' ] + call MPI_BCAST(integral(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + #:endfor + end do + +#endif + + end subroutine s_mpi_bcast_user_inputs + + subroutine s_mpi_sendrecv_ib_buffers(ib_markers, mpi_dir, pbc_loc) + + type(integer_field), intent(inout) :: ib_markers + + integer, intent(in) :: mpi_dir, pbc_loc + + integer :: i, j, k, l, r, q !< Generic loop iterators + + integer :: buffer_counts(1:3), buffer_count + + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag + + logical :: beg_end_geq_0, qbmm_comm + + integer :: pack_offset, unpack_offset + + integer, pointer :: p_i_send, p_i_recv + +#ifdef MFC_MPI + + call nvtxStartRange("IBM-COMM-PACKBUF") + + buffer_counts = (/ & + gp_layers*(n + 1)*(p + 1), & + gp_layers*(m + 2*gp_layers + 1)*(p + 1), & + gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1) & + /) + + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 + + ! Implements: + ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc + ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] + ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] + ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] + ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] + + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) + + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) + + grid_dims = (/m, n, p/) + + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) - gp_layers + 1 + end if + + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + gp_layers + 1 + end if + + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = 0, gp_layers - 1 + r = (j + gp_layers*(k + (n + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) + end do + end do + end do + #:elif mpi_dir == 2 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = 0, gp_layers - 1 + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + (k + gp_layers*l)) + ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) + end do + end do + end do + #:else + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, gp_layers - 1 + do k = -gp_layers, n + gp_layers + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) + end do + end do + end do + #:endif + end if + #:endfor + call nvtxEndRange ! Packbuf + + p_i_send => ib_buff_send(0) + p_i_recv => ib_buff_recv(0) + + ! Send/Recv + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + !$acc data attach(p_i_send, p_i_recv) + !$acc host_data use_device(p_i_send, p_i_recv) + call nvtxStartRange("IBM-COMM-SENDRECV-RDMA") + #:else + call nvtxStartRange("IBM-COMM-DEV2HOST") + !$acc update host(ib_buff_send) + call nvtxEndRange + call nvtxStartRange("IBM-COMM-SENDRECV-NO-RMDA") + #:endif + + call MPI_SENDRECV( & + p_i_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, & + p_i_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + + #:if rdma_mpi + !$acc end host_data + !$acc end data + !$acc wait + #:else + call nvtxStartRange("IBM-COMM-HOST2DEV") + !$acc update device(ib_buff_recv) + call nvtxEndRange + #:endif + end if + #:endfor + + ! Unpack Received Buffer + call nvtxStartRange("IBM-COMM-UNPACKBUF") + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = -gp_layers, -1 + r = (j + gp_layers*((k + 1) + (n + 1)*l)) + ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) + end do + end do + end do + #:elif mpi_dir == 2 + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = 0, p + do k = -gp_layers, -1 + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + ((k + gp_layers) + gp_layers*l)) + ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) + end do + end do + end do + #:else + !$acc parallel loop collapse(3) gang vector default(present) private(r) + do l = -gp_layers, -1 + do k = -gp_layers, n + gp_layers + do j = -gp_layers, m + gp_layers + r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & + ((k + gp_layers) + (n + 2*gp_layers + 1)* & + (l + gp_layers))) + ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) + end do + end do + end do + #:endif + end if + #:endfor + call nvtxEndRange +#endif + + end subroutine s_mpi_sendrecv_ib_buffers + + subroutine s_mpi_send_random_number(phi_rn, num_freq) + integer, intent(in) :: num_freq + real(wp), intent(inout), dimension(1:num_freq) :: phi_rn + +#ifdef MFC_MPI + call MPI_BCAST(phi_rn, num_freq, mpi_p, 0, MPI_COMM_WORLD, ierr) +#endif + + end subroutine s_mpi_send_random_number + + subroutine s_finalize_mpi_proxy_module() + +#ifdef MFC_MPI + if (ib) then + @:DEALLOCATE(ib_buff_send, ib_buff_recv) + end if +#endif + + end subroutine s_finalize_mpi_proxy_module + +end module m_mpi_proxy diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 4810ee1223..b47f4b592c 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -255,9 +255,11 @@ contains integer :: j, q, i +#ifndef MFC_POST_PROCESS if (qbmm .and. .not. polytropic) then call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l) end if +#endif if (bc_dir == 1) then !< x-direction if (bc_loc == -1) then !bc_x%beg @@ -795,9 +797,11 @@ contains integer :: j, q, i +#ifndef MFC_POST_PROCESS if (qbmm .and. .not. polytropic) then call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l) end if +#endif if (bc_dir == 1) then !< x-direction if (bc_loc == -1) then !< bc_x%beg @@ -894,9 +898,11 @@ contains integer :: j, q, i +#ifndef MFC_POST_PROCESS if (qbmm .and. .not. polytropic) then call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l) end if +#endif if (bc_dir == 1) then !< x-direction if (bc_loc == -1) then !< bc_x%beg diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index c167f07ec8..185a84892e 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -637,7 +637,7 @@ contains /) end if - !$acc update device(v_size, nVar) + !$acc update device(v_size) buffer_count = buffer_counts(mpi_dir) boundary_conditions = (/bc_x, bc_y, bc_z/) diff --git a/temp.txt b/temp.txt new file mode 100644 index 0000000000..cc44ee4e57 --- /dev/null +++ b/temp.txt @@ -0,0 +1,225 @@ + 2 74 224 + 0 32 96 + 1 32 97 + 2 32 98 + 0 33 99 + 1 33 100 + 2 33 101 + 0 34 102 + 1 34 103 + 2 34 104 + 0 35 105 + 1 35 106 + 2 35 107 + 0 36 108 + 1 36 109 + 2 36 110 + 0 37 111 + 1 37 112 + 2 37 113 + 0 38 114 + 1 38 115 + 2 38 116 + 0 39 117 + 1 39 118 + 2 39 119 + 0 40 120 + 1 40 121 + 2 40 122 + 0 41 123 + 1 41 124 + 2 41 125 + 0 42 126 + 1 42 127 + 2 10 32 + 0 11 33 + 1 11 34 + 2 11 35 + 0 12 36 + 1 12 37 + 2 12 38 + 0 13 39 + 1 13 40 + 2 13 41 + 0 14 42 + 1 14 43 + 2 14 44 + 0 15 45 + 1 15 46 + 2 15 47 + 0 16 48 + 1 16 49 + 2 16 50 + 0 17 51 + 1 17 52 + 2 17 53 + 0 18 54 + 1 18 55 + 2 18 56 + 0 19 57 + 1 19 58 + 2 19 59 + 0 20 60 + 1 20 61 + 2 20 62 + 0 21 63 + 0 0 0 + 1 0 1 + 2 0 2 + 0 1 3 + 1 1 4 + 2 1 5 + 0 2 6 + 1 2 7 + 2 2 8 + 0 3 9 + 1 3 10 + 2 3 11 + 0 4 12 + 1 4 13 + 2 4 14 + 0 5 15 + 1 5 16 + 2 5 17 + 0 6 18 + 1 6 19 + 2 6 20 + 0 7 21 + 1 7 22 + 2 7 23 + 0 8 24 + 1 8 25 + 2 8 26 + 0 9 27 + 1 9 28 + 2 9 29 + 0 10 30 + 1 10 31 + 1 53 160 + 2 53 161 + 0 54 162 + 1 54 163 + 2 54 164 + 0 55 165 + 1 55 166 + 2 55 167 + 0 56 168 + 1 56 169 + 2 56 170 + 0 57 171 + 1 57 172 + 2 57 173 + 0 58 174 + 1 58 175 + 2 58 176 + 0 59 177 + 1 59 178 + 2 59 179 + 0 60 180 + 1 60 181 + 2 60 182 + 0 61 183 + 1 61 184 + 2 61 185 + 0 62 186 + 1 62 187 + 2 62 188 + 0 63 189 + 1 63 190 + 2 63 191 + 0 64 192 + 1 64 193 + 2 64 194 + 0 65 195 + 1 65 196 + 2 65 197 + 0 66 198 + 1 66 199 + 2 66 200 + 0 67 201 + 1 67 202 + 2 67 203 + 0 68 204 + 1 68 205 + 2 68 206 + 0 69 207 + 1 69 208 + 2 69 209 + 0 70 210 + 1 70 211 + 2 70 212 + 0 71 213 + 1 71 214 + 2 71 215 + 0 72 216 + 1 72 217 + 2 72 218 + 0 73 219 + 1 73 220 + 2 73 221 + 0 74 222 + 1 74 223 + 2 42 128 + 0 43 129 + 1 43 130 + 2 43 131 + 0 44 132 + 1 44 133 + 2 44 134 + 0 45 135 + 1 45 136 + 2 45 137 + 0 46 138 + 1 46 139 + 2 46 140 + 0 47 141 + 1 47 142 + 2 47 143 + 0 48 144 + 1 48 145 + 2 48 146 + 0 49 147 + 1 49 148 + 2 49 149 + 0 50 150 + 1 50 151 + 2 50 152 + 0 51 153 + 1 51 154 + 2 51 155 + 0 52 156 + 1 52 157 + 2 52 158 + 0 53 159 + 1 21 64 + 2 21 65 + 0 22 66 + 1 22 67 + 2 22 68 + 0 23 69 + 1 23 70 + 2 23 71 + 0 24 72 + 1 24 73 + 2 24 74 + 0 25 75 + 1 25 76 + 2 25 77 + 0 26 78 + 1 26 79 + 2 26 80 + 0 27 81 + 1 27 82 + 2 27 83 + 0 28 84 + 1 28 85 + 2 28 86 + 0 29 87 + 1 29 88 + 2 29 89 + 0 30 90 + 1 30 91 + 2 30 92 + 0 31 93 + 1 31 94 + 2 31 95 From b817ef15f002c1a682d321c18f4a736251706de6 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Mon, 19 May 2025 13:32:46 -0400 Subject: [PATCH 21/62] remove extra files --- "\\" | 439 ------------------------------------------------------- temp.txt | 225 ---------------------------- 2 files changed, 664 deletions(-) delete mode 100644 "\\" delete mode 100644 temp.txt diff --git "a/\\" "b/\\" deleted file mode 100644 index 5d2a6a0944..0000000000 --- "a/\\" +++ /dev/null @@ -1,439 +0,0 @@ -!> -!! @file m_mpi_proxy.f90 -!! @brief Contains module m_mpi_proxy - -#:include 'case.fpp' -#:include 'macros.fpp' - -!> @brief The module serves as a proxy to the parameters and subroutines -!! available in the MPI implementation's MPI module. Specifically, -!! the purpose of the proxy is to harness basic MPI commands into -!! more complicated procedures as to accomplish the communication -!! goals for the simulation. -module m_mpi_proxy - -#ifdef MFC_MPI - use mpi !< Message passing interface (MPI) module -#endif - - use m_helper_basic !< Functions to compare floating point numbers - - use m_helper - - use m_derived_types !< Definitions of the derived types - - use m_global_parameters !< Definitions of the global parameters - - use m_mpi_common - - use m_nvtx - - use ieee_arithmetic - - implicit none - - integer, private, allocatable, dimension(:), target :: ib_buff_send !< - !! This variable is utilized to pack and send the buffer of the immersed - !! boundary markers, for a single computational domain boundary at the - !! time, to the relevant neighboring processor. - - integer, private, allocatable, dimension(:), target :: ib_buff_recv !< - !! q_cons_buff_recv is utilized to receive and unpack the buffer of the - !! immersed boundary markers, for a single computational domain boundary - !! at the time, from the relevant neighboring processor. - - !> @name Generic flags used to identify and report MPI errors - !> @{ - integer, private :: ierr - !> @} - - integer :: i_halo_size - !$acc declare create(i_halo_size) - -contains - - subroutine s_initialize_mpi_proxy_module() - -#ifdef MFC_MPI - if (ib) then - if (n > 0) then - if (p > 0) then - i_halo_size = -1 + gp_layers* & - & (m + 2*gp_layers + 1)* & - & (n + 2*gp_layers + 1)* & - & (p + 2*gp_layers + 1)/ & - & (min(m, n, p) + 2*gp_layers + 1) - else - i_halo_size = -1 + gp_layers* & - & (max(m, n) + 2*gp_layers + 1) - end if - else - i_halo_size = -1 + gp_layers - end if - - !$acc update device(i_halo_size) - @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size)) - end if -#endif - - end subroutine s_initialize_mpi_proxy_module - - !> Since only the processor with rank 0 reads and verifies - !! the consistency of user inputs, these are initially not - !! available to the other processors. Then, the purpose of - !! this subroutine is to distribute the user inputs to the - !! remaining processors in the communicator. - subroutine s_mpi_bcast_user_inputs() - -#ifdef MFC_MPI - - integer :: i, j !< Generic loop iterator - - call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) - - #:for VAR in ['k_x', 'k_y', 'k_z', 'w_x', 'w_y', 'w_z', 'p_x', 'p_y', & - & 'p_z', 'g_x', 'g_y', 'g_z'] - call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - - #:for VAR in ['t_step_old', 'm', 'n', 'p', 'm_glb', 'n_glb', 'p_glb', & - & 't_step_start','t_step_stop','t_step_save','t_step_print', & - & 'model_eqns','time_stepper', 'riemann_solver', 'low_Mach', & - & 'wave_speeds', 'avg_state', 'precision', 'bc_x%beg', 'bc_x%end', & - & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end', 'fd_order', & - & 'num_probes', 'num_integrals', 'bubble_model', 'thermal', & - & 'R0_type', 'num_source', 'relax_model', 'num_ibs', 'n_start', & - & 'num_bc_patches'] - call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - #:endfor - - #:for VAR in [ 'run_time_info','cyl_coord', 'mpp_lim', & - & 'mp_weno', 'rdma_mpi', 'weno_flat', 'riemann_flat', & - & 'weno_Re_flux', 'alt_soundspeed', 'null_weights', 'mixture_err', & - & 'parallel_io', 'hypoelasticity', 'bubbles_euler', 'polytropic', & - & 'polydisperse', 'qbmm', 'acoustic_source', 'probe_wrt', 'integral_wrt', & - & 'prim_vars_wrt', 'weno_avg', 'file_per_process', 'relax', & - & 'adv_n', 'adap_dt', 'ib', 'bodyForces', 'bf_x', 'bf_y', 'bf_z', & - & 'bc_x%grcbc_in', 'bc_x%grcbc_out', 'bc_x%grcbc_vel_out', & - & 'bc_y%grcbc_in', 'bc_y%grcbc_out', 'bc_y%grcbc_vel_out', & - & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out', & - & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', & - & 'viscous', 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & - & 'hyperelasticity', 'rkck_adap_dt', 'bc_io', 'powell', 'cont_damage' ] - call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - #:endfor - - if (chemistry) then - #:for VAR in [ 'diffusion', 'reactions' ] - call MPI_BCAST(chem_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - #:endfor - - #:for VAR in [ 'gamma_method' ] - call MPI_BCAST(chem_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - #:endfor - end if - - if (bubbles_lagrange) then - #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', & - & 'write_bubbles', 'write_bubbles_stats'] - call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - #:endfor - - #:for VAR in ['solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb'] - call MPI_BCAST(lag_params%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - #:endfor - - #:for VAR in [ 'c0', 'rho0', 'T0', 'x0', 'diffcoefvap', 'epsilonb','charwidth', & - & 'valmaxvoid', 'Thost'] - call MPI_BCAST(lag_params%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - end if - - #:for VAR in [ 'dt','weno_eps','teno_CT','pref','rhoref','R0ref','Web','Ca', 'sigma', & - & 'Re_inv', 'poly_sigma', 'palpha_eps', 'ptgalpha_eps', 'pi_fac', & - & 'bc_x%vb1','bc_x%vb2','bc_x%vb3','bc_x%ve1','bc_x%ve2','bc_x%ve2', & - & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', & - & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', & - & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', & - & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end', & - & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', & - & 'z_b', 't_stop', 't_save', 'cfl_target', 'rkck_tolerance', 'Bx0', & - & 'tau_star', 'cont_damage_s', 'alpha_bar' ] - call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - - do i = 1, 3 - #:for VAR in [ 'bc_x%vel_in', 'bc_x%vel_out', 'bc_y%vel_in', 'bc_y%vel_out', & - & 'bc_z%vel_in', 'bc_z%vel_out'] - call MPI_BCAST(${VAR}$ (i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - end do - - #:if not MFC_CASE_OPTIMIZATION - call MPI_BCAST(mapped_weno, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(wenoz, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(teno, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(weno_order, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(nb, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(num_fluids, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(wenoz_q, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(mhd, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(relativity, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - #:endif - - do i = 1, num_fluids_max - #:for VAR in [ 'gamma','pi_inf','mul0','ss','pv','gamma_v','M_v', & - & 'mu_v','k_v', 'cp_v','G', 'cv', 'qv', 'qvp' ] - call MPI_BCAST(fluid_pp(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr) - end do - - do i = 1, num_fluids_max - #:for VAR in ['bc_x%alpha_rho_in','bc_x%alpha_in','bc_y%alpha_rho_in','bc_y%alpha_in','bc_z%alpha_rho_in','bc_z%alpha_in'] - call MPI_BCAST(${VAR}$ (i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - end do - - do i = 1, num_ibs - #:for VAR in [ 'radius', 'length_x', 'length_y', & - & 'x_centroid', 'y_centroid', 'c', 'm', 'p', 't', 'theta', 'slip' ] - call MPI_BCAST(patch_ib(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - call MPI_BCAST(patch_ib(i)%geometry, 2, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - end do - - do j = 1, num_probes_max - do i = 1, 3 - call MPI_BCAST(acoustic(j)%loc(i), 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - end do - - call MPI_BCAST(acoustic(j)%dipole, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) - - #:for VAR in [ 'pulse', 'support', 'num_elements', 'element_on', 'bb_num_freq' ] - call MPI_BCAST(acoustic(j)%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - #:endfor - - #:for VAR in [ 'mag', 'length', 'height', & - 'wavelength', 'frequency', 'gauss_sigma_dist', 'gauss_sigma_time', & - 'npulse', 'dir', 'delay', 'foc_length', 'aperture', & - 'element_spacing_angle', 'element_polygon_ratio', 'rotate_angle', & - 'bb_bandwidth', 'bb_lowest_freq' ] - call MPI_BCAST(acoustic(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - - #:for VAR in [ 'x','y','z' ] - call MPI_BCAST(probe(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - - #:for VAR in [ 'xmin', 'xmax', 'ymin', 'ymax', 'zmin', 'zmax' ] - call MPI_BCAST(integral(j)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - #:endfor - end do - -#endif - - end subroutine s_mpi_bcast_user_inputs - - subroutine s_mpi_sendrecv_ib_buffers(ib_markers, mpi_dir, pbc_loc) - - type(integer_field), intent(inout) :: ib_markers - - integer, intent(in) :: mpi_dir, pbc_loc - - integer :: i, j, k, l, r, q !< Generic loop iterators - - integer :: buffer_counts(1:3), buffer_count - - type(int_bounds_info) :: boundary_conditions(1:3) - integer :: beg_end(1:2), grid_dims(1:3) - integer :: dst_proc, src_proc, recv_tag, send_tag - - logical :: beg_end_geq_0, qbmm_comm - - integer :: pack_offset, unpack_offset - - integer, pointer :: p_i_send, p_i_recv - -#ifdef MFC_MPI - - call nvtxStartRange("IBM-COMM-PACKBUF") - - buffer_counts = (/ & - gp_layers*(n + 1)*(p + 1), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1) & - /) - - buffer_count = buffer_counts(mpi_dir) - boundary_conditions = (/bc_x, bc_y, bc_z/) - beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) - beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 - - ! Implements: - ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc - ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] - ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] - ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] - ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] - - send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) - recv_tag = f_logical_to_int(pbc_loc == 1) - - dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) - src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) - - grid_dims = (/m, n, p/) - - pack_offset = 0 - if (f_xor(pbc_loc == 1, beg_end_geq_0)) then - pack_offset = grid_dims(mpi_dir) - gp_layers + 1 - end if - - unpack_offset = 0 - if (pbc_loc == 1) then - unpack_offset = grid_dims(mpi_dir) + gp_layers + 1 - end if - - ! Pack Buffer to Send - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, gp_layers - 1 - r = (j + gp_layers*(k + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) - end do - end do - end do - #:elif mpi_dir == 2 - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, gp_layers - 1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - (k + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) - end do - end do - end do - #:else - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, gp_layers - 1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) - end do - end do - end do - #:endif - end if - #:endfor - call nvtxEndRange ! Packbuf - - p_i_send => ib_buff_send(0) - p_i_recv => ib_buff_recv(0) - - ! Send/Recv - #:for rdma_mpi in [False, True] - if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then - #:if rdma_mpi - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - call nvtxStartRange("IBM-COMM-SENDRECV-RDMA") - #:else - call nvtxStartRange("IBM-COMM-DEV2HOST") - !$acc update host(ib_buff_send) - call nvtxEndRange - call nvtxStartRange("IBM-COMM-SENDRECV-NO-RMDA") - #:endif - - call MPI_SENDRECV( & - p_i_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, & - p_i_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA - - #:if rdma_mpi - !$acc end host_data - !$acc end data - !$acc wait - #:else - call nvtxStartRange("IBM-COMM-HOST2DEV") - !$acc update device(ib_buff_recv) - call nvtxEndRange - #:endif - end if - #:endfor - - ! Unpack Received Buffer - call nvtxStartRange("IBM-COMM-UNPACKBUF") - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -gp_layers, -1 - r = (j + gp_layers*((k + 1) + (n + 1)*l)) - ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) - end do - end do - end do - #:elif mpi_dir == 2 - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = -gp_layers, -1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + gp_layers*l)) - ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) - end do - end do - end do - #:else - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = -gp_layers, -1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l + gp_layers))) - ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) - end do - end do - end do - #:endif - end if - #:endfor - call nvtxEndRange -#endif - - end subroutine s_mpi_sendrecv_ib_buffers - - subroutine s_mpi_send_random_number(phi_rn, num_freq) - integer, intent(in) :: num_freq - real(wp), intent(inout), dimension(1:num_freq) :: phi_rn - -#ifdef MFC_MPI - call MPI_BCAST(phi_rn, num_freq, mpi_p, 0, MPI_COMM_WORLD, ierr) -#endif - - end subroutine s_mpi_send_random_number - - subroutine s_finalize_mpi_proxy_module() - -#ifdef MFC_MPI - if (ib) then - @:DEALLOCATE(ib_buff_send, ib_buff_recv) - end if -#endif - - end subroutine s_finalize_mpi_proxy_module - -end module m_mpi_proxy diff --git a/temp.txt b/temp.txt deleted file mode 100644 index cc44ee4e57..0000000000 --- a/temp.txt +++ /dev/null @@ -1,225 +0,0 @@ - 2 74 224 - 0 32 96 - 1 32 97 - 2 32 98 - 0 33 99 - 1 33 100 - 2 33 101 - 0 34 102 - 1 34 103 - 2 34 104 - 0 35 105 - 1 35 106 - 2 35 107 - 0 36 108 - 1 36 109 - 2 36 110 - 0 37 111 - 1 37 112 - 2 37 113 - 0 38 114 - 1 38 115 - 2 38 116 - 0 39 117 - 1 39 118 - 2 39 119 - 0 40 120 - 1 40 121 - 2 40 122 - 0 41 123 - 1 41 124 - 2 41 125 - 0 42 126 - 1 42 127 - 2 10 32 - 0 11 33 - 1 11 34 - 2 11 35 - 0 12 36 - 1 12 37 - 2 12 38 - 0 13 39 - 1 13 40 - 2 13 41 - 0 14 42 - 1 14 43 - 2 14 44 - 0 15 45 - 1 15 46 - 2 15 47 - 0 16 48 - 1 16 49 - 2 16 50 - 0 17 51 - 1 17 52 - 2 17 53 - 0 18 54 - 1 18 55 - 2 18 56 - 0 19 57 - 1 19 58 - 2 19 59 - 0 20 60 - 1 20 61 - 2 20 62 - 0 21 63 - 0 0 0 - 1 0 1 - 2 0 2 - 0 1 3 - 1 1 4 - 2 1 5 - 0 2 6 - 1 2 7 - 2 2 8 - 0 3 9 - 1 3 10 - 2 3 11 - 0 4 12 - 1 4 13 - 2 4 14 - 0 5 15 - 1 5 16 - 2 5 17 - 0 6 18 - 1 6 19 - 2 6 20 - 0 7 21 - 1 7 22 - 2 7 23 - 0 8 24 - 1 8 25 - 2 8 26 - 0 9 27 - 1 9 28 - 2 9 29 - 0 10 30 - 1 10 31 - 1 53 160 - 2 53 161 - 0 54 162 - 1 54 163 - 2 54 164 - 0 55 165 - 1 55 166 - 2 55 167 - 0 56 168 - 1 56 169 - 2 56 170 - 0 57 171 - 1 57 172 - 2 57 173 - 0 58 174 - 1 58 175 - 2 58 176 - 0 59 177 - 1 59 178 - 2 59 179 - 0 60 180 - 1 60 181 - 2 60 182 - 0 61 183 - 1 61 184 - 2 61 185 - 0 62 186 - 1 62 187 - 2 62 188 - 0 63 189 - 1 63 190 - 2 63 191 - 0 64 192 - 1 64 193 - 2 64 194 - 0 65 195 - 1 65 196 - 2 65 197 - 0 66 198 - 1 66 199 - 2 66 200 - 0 67 201 - 1 67 202 - 2 67 203 - 0 68 204 - 1 68 205 - 2 68 206 - 0 69 207 - 1 69 208 - 2 69 209 - 0 70 210 - 1 70 211 - 2 70 212 - 0 71 213 - 1 71 214 - 2 71 215 - 0 72 216 - 1 72 217 - 2 72 218 - 0 73 219 - 1 73 220 - 2 73 221 - 0 74 222 - 1 74 223 - 2 42 128 - 0 43 129 - 1 43 130 - 2 43 131 - 0 44 132 - 1 44 133 - 2 44 134 - 0 45 135 - 1 45 136 - 2 45 137 - 0 46 138 - 1 46 139 - 2 46 140 - 0 47 141 - 1 47 142 - 2 47 143 - 0 48 144 - 1 48 145 - 2 48 146 - 0 49 147 - 1 49 148 - 2 49 149 - 0 50 150 - 1 50 151 - 2 50 152 - 0 51 153 - 1 51 154 - 2 51 155 - 0 52 156 - 1 52 157 - 2 52 158 - 0 53 159 - 1 21 64 - 2 21 65 - 0 22 66 - 1 22 67 - 2 22 68 - 0 23 69 - 1 23 70 - 2 23 71 - 0 24 72 - 1 24 73 - 2 24 74 - 0 25 75 - 1 25 76 - 2 25 77 - 0 26 78 - 1 26 79 - 2 26 80 - 0 27 81 - 1 27 82 - 2 27 83 - 0 28 84 - 1 28 85 - 2 28 86 - 0 29 87 - 1 29 88 - 2 29 89 - 0 30 90 - 1 30 91 - 2 30 92 - 0 31 93 - 1 31 94 - 2 31 95 From 5cf0fef62e56820a702af3724fdaba83e8495cd9 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Mon, 19 May 2025 14:12:40 -0400 Subject: [PATCH 22/62] fix build + toolchain --- src/simulation/m_time_steppers.fpp | 2 +- toolchain/mfc/run/case_dicts.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 20def0ae5e..5af09f5ed6 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -938,7 +938,7 @@ contains elseif (bubbles_lagrange) then - call s_populate_variables_buffers(q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf, bc_type) + call s_populate_variables_buffers(bc_type, q_prim_vf, pb_ts(1)%sf, mv_ts(1)%sf) call s_compute_bubble_EL_dynamics(q_cons_ts(1)%vf, q_prim_vf, t_step, rhs_vf, stage) call s_transfer_data_to_tmp() call s_smear_voidfraction() diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index 44e195221e..e50858b3b4 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -291,11 +291,7 @@ def analytic(self): 'surface_tension': ParamType.LOG, 'viscous': ParamType.LOG, 'bubbles_lagrange': ParamType.LOG, -<<<<<<< HEAD - 'rkck_tolerance': ParamType.REAL, -======= 'num_bc_patches': ParamType.INT, ->>>>>>> upstream/master 'powell': ParamType.LOG, 'tau_star': ParamType.REAL, 'cont_damage_s': ParamType.REAL, From 645f2cf64e9983188500740291284145582fdec1 Mon Sep 17 00:00:00 2001 From: wilfonba Date: Thu, 22 May 2025 13:32:59 -0400 Subject: [PATCH 23/62] fix boundary_common on Frontier --- src/common/m_boundary_common.fpp | 171 +++++++++++++++---------------- toolchain/modules | 6 +- 2 files changed, 85 insertions(+), 92 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index b47f4b592c..97134421fa 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -93,18 +93,23 @@ contains do k = 0, n select case (int(bc_type(1, -1)%sf(0, k, l))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, -1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 1, -1, k, l) + call s_symmetry(q_prim_vf, 1, -1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 1, -1, k, l) + call s_periodic(q_prim_vf, 1, -1, k, l, pb, mv) case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 1, -1, k, l) + call s_slip_wall(q_prim_vf, 1, -1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 1, -1, k, l) + call s_no_slip_wall(q_prim_vf, 1, -1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 1, -1, k, l) + call s_dirichlet(q_prim_vf, 1, -1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, -1, k, l, pb, mv) + end if end do end do end if @@ -117,18 +122,23 @@ contains do k = 0, n select case (int(bc_type(1, 1)%sf(0, k, l))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, 1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 1, 1, k, l) + call s_symmetry(q_prim_vf, 1, 1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 1, 1, k, l) + call s_periodic(q_prim_vf, 1, 1, k, l, pb, mv) case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 1, 1, k, l) + call s_slip_wall(q_prim_vf, 1, 1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 1, 1, k, l) + call s_no_slip_wall(q_prim_vf, 1, 1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 1, 1, k, l) + call s_dirichlet(q_prim_vf, 1, 1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, 1, k, l, pb, mv) + end if end do end do end if @@ -145,20 +155,26 @@ contains do k = -buff_size, m + buff_size select case (int(bc_type(2, -1)%sf(k, 0, l))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, -1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) case (BC_AXIS) - call s_axis(q_prim_vf, pb, mv, 2, -1, k, l) + call s_axis(q_prim_vf, 2, -1, k, l, pb, mv) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 2, -1, k, l) + call s_symmetry(q_prim_vf, 2, -1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 2, -1, k, l) + call s_periodic(q_prim_vf, 2, -1, k, l, pb, mv) case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 2, -1, k, l) + call s_slip_wall(q_prim_vf, 2, -1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 2, -1, k, l) + call s_no_slip_wall(q_prim_vf, 2, -1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 2, -1, k, l) + call s_dirichlet(q_prim_vf, 2, -1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP) .and. & + (bc_type(2, -1)%sf(0, k, l) /= BC_AXIS)) then + call s_qbmm_extrapolation(2, -1, k, l, pb, mv) + end if end do end do end if @@ -171,18 +187,23 @@ contains do k = -buff_size, m + buff_size select case (int(bc_type(2, 1)%sf(k, 0, l))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, 1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 2, 1, k, l) + call s_symmetry(q_prim_vf, 2, 1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 2, 1, k, l) + call s_periodic(q_prim_vf, 2, 1, k, l, pb, mv) case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 2, 1, k, l) + call s_slip_wall(q_prim_vf, 2, 1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 2, 1, k, l) + call s_no_slip_wall(q_prim_vf, 2, 1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 2, 1, k, l) + call s_dirichlet(q_prim_vf, 2, 1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(2, 1, k, l, pb, mv) + end if end do end do end if @@ -199,18 +220,23 @@ contains do k = -buff_size, m + buff_size select case (int(bc_type(3, -1)%sf(k, l, 0))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, -1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 3, -1, k, l) + call s_symmetry(q_prim_vf, 3, -1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 3, -1, k, l) + call s_periodic(q_prim_vf, 3, -1, k, l, pb, mv) case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 3, -1, k, l) + call s_slip_wall(q_prim_vf, 3, -1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 3, -1, k, l) + call s_no_slip_wall(q_prim_vf, 3, -1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 3, -1, k, l) + call s_dirichlet(q_prim_vf, 3, -1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, -1, k, l, pb, mv) + end if end do end do end if @@ -223,18 +249,23 @@ contains do k = -buff_size, m + buff_size select case (int(bc_type(3, 1)%sf(k, l, 0))) case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, 1, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l) case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, pb, mv, 3, 1, k, l) + call s_symmetry(q_prim_vf, 3, 1, k, l, pb, mv) case (BC_PERIODIC) - call s_periodic(q_prim_vf, pb, mv, 3, 1, k, l) + call s_periodic(q_prim_vf, 3, 1, k, l, pb, mv) case (BC_SlIP_WALL) - call s_slip_wall(q_prim_vf, pb, mv, 3, 1, k, l) + call s_slip_wall(q_prim_vf, 3, 1, k, l) case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, pb, mv, 3, 1, k, l) + call s_no_slip_wall(q_prim_vf, 3, 1, k, l) case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, pb, mv, 3, 1, k, l) + call s_dirichlet(q_prim_vf, 3, 1, k, l) end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, 1, k, l, pb, mv) + end if end do end do end if @@ -242,25 +273,18 @@ contains end subroutine s_populate_variables_buffers - subroutine s_ghost_cell_extrapolation(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) + subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_ghost_cell_extrapolation #else !$acc routine seq #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l integer :: j, q, i -#ifndef MFC_POST_PROCESS - if (qbmm .and. .not. polytropic) then - call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l) - end if -#endif - if (bc_dir == 1) then !< x-direction if (bc_loc == -1) then !bc_x%beg do i = 1, sys_size @@ -313,14 +337,10 @@ contains end subroutine s_ghost_cell_extrapolation - subroutine s_symmetry(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_symmetry -#else + subroutine s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv) !$acc routine seq -#endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -577,14 +597,10 @@ contains end subroutine s_symmetry - subroutine s_periodic(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_periodic -#else + subroutine s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv) !$acc routine seq -#endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -720,14 +736,10 @@ contains end subroutine s_periodic - subroutine s_axis(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_axis -#else + subroutine s_axis(q_prim_vf, bc_dir, bc_loc, k, l, pb, mv) !$acc routine seq -#endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -784,25 +796,18 @@ contains end subroutine s_axis - subroutine s_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) + subroutine s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_slip_wall #else !$acc routine seq #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l integer :: j, q, i -#ifndef MFC_POST_PROCESS - if (qbmm .and. .not. polytropic) then - call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l) - end if -#endif - if (bc_dir == 1) then !< x-direction if (bc_loc == -1) then !< bc_x%beg do i = 1, sys_size @@ -885,25 +890,18 @@ contains end subroutine s_slip_wall - subroutine s_no_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) + subroutine s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_no_slip_wall #else !$acc routine seq #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l integer :: j, q, i -#ifndef MFC_POST_PROCESS - if (qbmm .and. .not. polytropic) then - call s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l) - end if -#endif - if (bc_dir == 1) then !< x-direction if (bc_loc == -1) then !< bc_x%beg do i = 1, sys_size @@ -1022,14 +1020,13 @@ contains end subroutine s_no_slip_wall - subroutine s_dirichlet(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) + subroutine s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_dirichlet #else !$acc routine seq #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -1086,18 +1083,14 @@ contains end if end if #else - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) + call s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l) #endif end subroutine s_dirichlet - subroutine s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc, k, l) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_qbmm_extrapolation -#else + subroutine s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb, mv) !$acc routine seq -#endif - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv + real(wp), optional, dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: bc_dir, bc_loc integer, intent(in) :: k, l @@ -1969,7 +1962,7 @@ contains end if ! Populating the cell-boundary and center locations buffer at bc_z%end - do i = 1, buff_size + do i = 1, offset_z%end z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i) end do diff --git a/toolchain/modules b/toolchain/modules index 8221e77aaa..023039a915 100644 --- a/toolchain/modules +++ b/toolchain/modules @@ -48,9 +48,9 @@ p-gpu nvhpc/24.5 hpcx/2.19-cuda cuda/12.1.1 p-gpu MFC_CUDA_CC=70,75,80,89,90 NVHPC_CUDA_HOME=$CUDA_HOME CC=nvc CXX=nvc++ FC=nvfortran f OLCF Frontier -f-all cce/18.0.0 cpe/24.07 rocm/6.1.3 cray-mpich/8.1.28 -f-all cray-fftw cray-hdf5 cray-python omniperf -f-gpu craype-accel-amd-gfx90a +f-all cpe/25.03 rocm/6.3.1 +f-all cray-fftw cray-hdf5 cray-python +f-gpu craype-accel-amd-gfx90a rocprofiler-compute/3.0.0 d NCSA Delta d-all python/3.11.6 From b3c8e8a72ab887416b004e7a4f90cce9716c7cab Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Mon, 26 May 2025 14:47:24 -0400 Subject: [PATCH 24/62] wall BCs for EL and bug fixes --- src/simulation/m_bubbles.fpp | 5 +- src/simulation/m_bubbles_EL.fpp | 118 +++++++++++++++++------- src/simulation/m_bubbles_EL_kernels.fpp | 14 +-- src/simulation/m_global_parameters.fpp | 12 ++- src/simulation/m_start_up.fpp | 4 + 5 files changed, 107 insertions(+), 46 deletions(-) diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index c93c333e41..6aa38ed24b 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -47,7 +47,10 @@ contains real(wp) :: fCpbw, fCpinf, fCpinf_dot, fH, fHdot, c_gas, c_liquid real(wp) :: f_rddot - if (bubble_model == 1) then + if (bubble_model == 0) then + ! Particle + f_rddot = 0._wp + else if (bubble_model == 1) then ! Gilmore bubbles fCpinf = fP - pref fCpbw = f_cpbw(fR0, fR, fV, fpb) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 7d5a442e69..5b0ece63f2 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -320,17 +320,17 @@ contains call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) ! Check if the bubble is located in the ghost cell of a symmetric boundary - if ((bc_x%beg == BC_REFLECTIVE .and. cell(1) < 0) .or. & - (bc_x%end == BC_REFLECTIVE .and. cell(1) > m) .or. & - (bc_y%beg == BC_REFLECTIVE .and. cell(2) < 0) .or. & - (bc_y%end == BC_REFLECTIVE .and. cell(2) > n)) then - call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric boundary.") + if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. & + (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. & + (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. & + (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then + call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") end if if (p > 0) then - if ((bc_z%beg == BC_REFLECTIVE .and. cell(3) < 0) .or. & - (bc_z%end == BC_REFLECTIVE .and. cell(3) > p)) then - call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric boundary.") + if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. & + (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then + call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") end if end if @@ -545,13 +545,11 @@ contains end do end if - -! Radial motion model + ! Radial motion model adap_dt_stop_max = 0 !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) & !$acc reduction(MAX:adap_dt_stop_max) copy(adap_dt_stop_max) copyin(stage) do k = 1, nBubs - ! Keller-Miksis model ! Current bubble state myPb = gas_p(k, 2) @@ -610,20 +608,22 @@ contains end if - if (lag_params%vel_model == 1) then - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), & - cell, l, q_prim_vf) - mtn_dveldt(k, l, stage) = 0._wp - elseif (lag_params%vel_model == 2) then - mtn_dposdt(k, l, stage) = mtn_vel(k,l,1) - mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1),& - intfc_rad(k,1), mtn_vel(k,l,1), & - gas_mg(k), gas_mv(k), & - cell, l, q_prim_vf) - else - mtn_dposdt(k, l, stage) = 0._wp - mtn_dveldt(k, l, stage) = 0._wp - end if + do l = 1, 3 + if (lag_params%vel_model == 1) then + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), & + cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = 0._wp + elseif (lag_params%vel_model == 2) then + mtn_dposdt(k, l, stage) = mtn_vel(k,l,1) + mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1), & + intfc_rad(k,1), mtn_vel(k,l,1), & + gas_mg(k), gas_mv(k, 1), & + Re(1), cell, l, q_prim_vf) + else + mtn_dposdt(k, l, stage) = 0._wp + mtn_dveldt(k, l, stage) = 0._wp + end if + end do adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) @@ -1044,6 +1044,8 @@ contains gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) + call s_transfer_data_to_tmp() call s_write_void_evol(mytime) if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() @@ -1066,6 +1068,8 @@ contains gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) + elseif (stage == 2) then !$acc parallel loop gang vector default(present) private(k) do k = 1, nBubs @@ -1078,10 +1082,11 @@ contains gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp end do + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) + call s_transfer_data_to_tmp() call s_write_void_evol(mytime) if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - if (lag_params%write_bubbles) then !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel) call s_write_lag_particles(mytime) @@ -1102,6 +1107,8 @@ contains gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) + elseif (stage == 2) then !$acc parallel loop gang vector default(present) private(k) do k = 1, nBubs @@ -1113,6 +1120,9 @@ contains gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp end do + + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) + elseif (stage == 3) then !$acc parallel loop gang vector default(present) private(k) do k = 1, nBubs @@ -1125,6 +1135,8 @@ contains gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) end do + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) + call s_transfer_data_to_tmp() call s_write_void_evol(mytime) if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() @@ -1135,11 +1147,49 @@ contains end if end if - end if end subroutine s_update_lagrange_tdv_rk + !> This subroutine enforces reflective and wall boundary conditions for EL bubbles + subroutine s_enforce_EL_bubbles_boundary_conditions(dest) + + integer :: dest + integer :: k + + !$acc parallel loop gang vector default(present) + do k = 1, nBubs + + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then + mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest) + else if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then + mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest) + end if + + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,2,dest) < y_cb(-1) + intfc_rad(k,dest)) then + mtn_pos(k, 2, dest) = y_cb(-1) + intfc_rad(k,dest) + else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then + mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest) + end if + + if (p > 0) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,3,dest) < z_cb(-1) + intfc_rad(k,dest)) then + mtn_pos(k, 3, dest) = z_cb(-1) + intfc_rad(k,dest) + else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then + mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest) + end if + end if + + end do + + end subroutine s_enforce_EL_bubbles_boundary_conditions + !> This subroutine returns the computational coordinate of the cell for the given position. !! @param pos Input coordinates !! @param cell Computational coordinate of the cell @@ -1242,25 +1292,25 @@ contains (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1))) end if - ! For symmetric boundary condition - if (bc_x%beg == BC_REFLECTIVE) then + ! For symmetric and wall boundary condition + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then particle_in_domain = (particle_in_domain .and. (pos_part(1) >= x_cb(-1))) end if - if (bc_x%end == BC_REFLECTIVE) then + if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then particle_in_domain = (particle_in_domain .and. (pos_part(1) < x_cb(m))) end if - if (bc_y%beg == BC_REFLECTIVE .and. (.not. cyl_coord)) then + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then particle_in_domain = (particle_in_domain .and. (pos_part(2) >= y_cb(-1))) end if - if (bc_y%end == BC_REFLECTIVE .and. (.not. cyl_coord)) then + if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then particle_in_domain = (particle_in_domain .and. (pos_part(2) < y_cb(n))) end if if (p > 0) then - if (bc_z%beg == BC_REFLECTIVE) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then particle_in_domain = (particle_in_domain .and. (pos_part(3) >= z_cb(-1))) end if - if (bc_z%end == BC_REFLECTIVE) then + if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then particle_in_domain = (particle_in_domain .and. (pos_part(3) < z_cb(p))) end if end if diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 43a81c0ec3..f4944850a9 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -473,13 +473,13 @@ contains end function f_interpolate_velocity - function f_get_acceleration(pos,rad,vel,mg,mv,cell,i,q_prim_vf) result(a) + function f_get_acceleration(pos,rad,vel,mg,mv,Re,cell,i,q_prim_vf) result(a) !$acc routine seq integer, dimension(3) :: cell integer :: i type(scalar_field), dimension(sys_size) :: q_prim_vf - real(wp) :: a, area, vol, mass, force, drag_force, vel - real(wp) :: pos, rad, dp, v_rel, mg, mv + real(wp) :: a, vol, mass, force, vel + real(wp) :: pos, rad, dp, v_rel, mg, mv, Re if (i == 1) then dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - & @@ -488,7 +488,7 @@ contains elseif (i == 2) then dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - & q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / & - (y_cc(cell(2) + 1) -y_cc(cell(2) - 1)) + (y_cc(cell(2) + 1) - y_cc(cell(2) - 1)) elseif (i == 3 .and. p > 0) then dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - & q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / & @@ -497,12 +497,12 @@ contains dp = 0._wp end if - area = pi * rad**2._wp - force = -1._wp * area * dp + vol = (4._wp/3._wp) * pi * rad**3._wp + force = -1._wp * vol * dp if (lag_params%drag_model == 1) then ! Stokes drag v_rel = vel - f_interpolate_velocity(pos,cell,i,q_prim_vf) - force = force - (6._wp * pi * rad * v_rel) / fluid_pp(1)%Re(1) + force = force - (6._wp * pi * rad * v_rel) / Re end if a = force / (mg + mv) diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 6082804d4c..e82405d85c 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -328,6 +328,10 @@ module m_global_parameters !$acc declare create(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3) !$acc declare create(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3) + !$acc declare create(bc_x%beg, bc_x%end) + !$acc declare create(bc_y%beg, bc_y%end) + !$acc declare create(bc_z%beg, bc_z%end) + integer :: fd_order !< !! The order of the finite-difference (fd) approximations of the first-order !! derivatives that need to be evaluated when the CoM or flow probe data @@ -1250,12 +1254,12 @@ contains @:ALLOCATE(x_cc(-buff_size:m + buff_size)) @:ALLOCATE(dx(-buff_size:m + buff_size)) - if (n == 0) return; + if (n == 0) return; @:ALLOCATE(y_cb(-1 - buff_size:n + buff_size)) @:ALLOCATE(y_cc(-buff_size:n + buff_size)) @:ALLOCATE(dy(-buff_size:n + buff_size)) - if (p == 0) return; + if (p == 0) return; @:ALLOCATE(z_cb(-1 - buff_size:p + buff_size)) @:ALLOCATE(z_cc(-buff_size:p + buff_size)) @:ALLOCATE(dz(-buff_size:p + buff_size)) @@ -1334,10 +1338,10 @@ contains ! Deallocating grid variables for the x-, y- and z-directions @:DEALLOCATE(x_cb, x_cc, dx) - if (n == 0) return; + if (n == 0) return; @:DEALLOCATE(y_cb, y_cc, dy) - if (p == 0) return; + if (p == 0) return; @:DEALLOCATE(z_cb, z_cc, dz) end subroutine s_finalize_global_parameters_module diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 5959c00ed0..49500840a2 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1656,6 +1656,10 @@ contains !$acc update device(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3) !$acc update device(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3) + !$acc update device(bc_x%beg, bc_x%end) + !$acc update device(bc_y%beg, bc_y%end) + !$acc update device(bc_z%beg, bc_z%end) + !$acc update device(bc_x%grcbc_in, bc_x%grcbc_out, bc_x%grcbc_vel_out) !$acc update device(bc_y%grcbc_in, bc_y%grcbc_out, bc_y%grcbc_vel_out) !$acc update device(bc_z%grcbc_in, bc_z%grcbc_out, bc_z%grcbc_vel_out) From d58d5a83d7ac49b346a9c7b259e433ef1b6fed84 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Sun, 1 Jun 2025 18:09:23 -0400 Subject: [PATCH 25/62] bug fix for moving bubbles --- src/simulation/m_bubbles_EL.fpp | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 5b0ece63f2..72155bcb13 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -608,17 +608,17 @@ contains end if - do l = 1, 3 + do l = 1, num_dims if (lag_params%vel_model == 1) then mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), & cell, l, q_prim_vf) mtn_dveldt(k, l, stage) = 0._wp elseif (lag_params%vel_model == 2) then - mtn_dposdt(k, l, stage) = mtn_vel(k,l,1) - mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,1), & - intfc_rad(k,1), mtn_vel(k,l,1), & - gas_mg(k), gas_mv(k, 1), & - Re(1), cell, l, q_prim_vf) + mtn_dposdt(k, l, stage) = mtn_vel(k,l,2) + mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), & + intfc_rad(k,1), mtn_vel(k,l,2), & + gas_mg(k), gas_mv(k, 2), & + Re(1), myRho, cell, l, q_prim_vf) else mtn_dposdt(k, l, stage) = 0._wp mtn_dveldt(k, l, stage) = 0._wp @@ -834,11 +834,17 @@ contains integer :: smearGrid, smearGridz logical :: celloutside - scoord = mtn_s(bub_id, 1:3, 2) f_pinfl = 0._wp - !< Find current bubble cell - cell(:) = int(scoord(:)) + if (lag_params%vel_model > 0) then + cell = -buff_size + call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) + scoord = mtn_s(bub_id, 1:3, 2) + else + scoord = mtn_s(bub_id, 1:3, 2) + cell(:) = int(scoord(:)) + end if + !$acc loop seq do i = 1, num_dims if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 From 22ba445eacc1674163e2ba30c09df5b447511181 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Sun, 1 Jun 2025 18:11:35 -0400 Subject: [PATCH 26/62] drag models --- src/common/m_constants.fpp | 1 + src/common/m_derived_types.fpp | 1 + src/simulation/m_bubbles_EL_kernels.fpp | 39 +++++++++++++++++-------- src/simulation/m_global_parameters.fpp | 1 + toolchain/mfc/run/case_dicts.py | 2 +- 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index 02e2eb9bd4..0c17e9789f 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -13,6 +13,7 @@ module m_constants real(wp), parameter :: small_alf = 1e-11_wp !< Small alf tolerance real(wp), parameter :: pi = 3.141592653589793_wp !< Pi real(wp), parameter :: verysmall = 1.e-12_wp !< Very small number + real(wp), parameter :: Re_b_min = 1.e-6_wp !< minimum bubble reynolds number for drag coeff calc. integer, parameter :: num_stcls_min = 5 !< Minimum # of stencils integer, parameter :: path_len = 400 !< Maximum path length diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index fd174840ac..d5978bb91b 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -434,6 +434,7 @@ module m_derived_types integer :: nBubs_glb !< Global number of bubbles integer :: vel_model !< Particle velocity model integer :: drag_model !< Particle drag model + real(wp) :: c_d !< Drag coefficient real(wp) :: epsilonb !< Standard deviation scaling for the gaussian function real(wp) :: charwidth !< Domain virtual depth (z direction, for 2D simulations) real(wp) :: valmaxvoid !< Maximum void fraction permitted diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index f4944850a9..ea0281ce14 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -453,16 +453,13 @@ contains y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) , cell(3)) x3 = y_cc(cell(2) + 1) y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) - elseif (i == 3 .and. p > 0) then + elseif (i == 3) then x1 = z_cc(cell(3) - 1) y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) x2 = z_cc(cell(3)) y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) ) x3 = z_cc(cell(3) + 1) y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) - else - x1 = 0._wp; x2 = 0._wp; x3 = 0._wp - y1 = 0._wp; y2 = 0._wp; y3 = 0._wp endif L1 = ((pos - x2)*(pos - x3)) / ((x1 - x2)*(x1 - x3)) @@ -473,13 +470,13 @@ contains end function f_interpolate_velocity - function f_get_acceleration(pos,rad,vel,mg,mv,Re,cell,i,q_prim_vf) result(a) + function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,q_prim_vf) result(a) !$acc routine seq integer, dimension(3) :: cell integer :: i type(scalar_field), dimension(sys_size) :: q_prim_vf - real(wp) :: a, vol, mass, force, vel - real(wp) :: pos, rad, dp, v_rel, mg, mv, Re + real(wp) :: a, vol, mass, force, vel, area + real(wp) :: pos, rad, dp, v_rel, mg, mv, Re, rho, c_d if (i == 1) then dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - & @@ -489,24 +486,42 @@ contains dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - & q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / & (y_cc(cell(2) + 1) - y_cc(cell(2) - 1)) - elseif (i == 3 .and. p > 0) then + elseif (i == 3) then dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - & q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / & (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) - else - dp = 0._wp end if vol = (4._wp/3._wp) * pi * rad**3._wp force = -1._wp * vol * dp - if (lag_params%drag_model == 1) then ! Stokes drag - v_rel = vel - f_interpolate_velocity(pos,cell,i,q_prim_vf) + v_rel = vel - f_interpolate_velocity(pos,cell,i,q_prim_vf) + + if (lag_params%drag_model == 1) then ! Free slip Stokes drag + force = force - (4._wp * pi * rad * v_rel) / Re + else if (lag_params%drag_model == 2) then ! No slip Stokes drag force = force - (6._wp * pi * rad * v_rel) / Re + elseif (lag_params%drag_model == 3) then ! Clift and Gauvin (1971) Cd fit + area = pi * rad**2._wp + c_d = f_get_clift_gauvin_drag_coeff(v_rel, Re, rho, rad) + force = force - 0.5_wp * sign(1._wp, v_rel) * rho * (v_rel ** 2._wp) * c_d * area end if a = force / (mg + mv) end function f_get_acceleration + function f_get_clift_gauvin_drag_coeff(v_rel, mu_inv, rho, rad) result(c_d) +!$acc routine seq + real(wp) :: v_rel, mu_inv, rho, rad + real(wp) :: c_d + real(wp) :: Re_b ! Bubble Reynolds number + + Re_b = max(2._wp * rho * abs(v_rel) * rad * mu_inv, Re_b_min) + + C_d = 24._wp * (1 + 0.15_wp * Re_b ** (0.687_wp)) / Re_b + !C_d = C_d + 0.42_wp / (1 + (42500 / (Re_b ** 1.16_wp))) + + end function f_get_clift_gauvin_drag_coeff + end module m_bubbles_EL_kernels diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 3f2a295bf9..6b3ef62bd1 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -751,6 +751,7 @@ contains lag_params%nBubs_glb = dflt_int lag_params%vel_model = dflt_int lag_params%drag_model = dflt_int + lag_params%c_d = dflt_real lag_params%epsilonb = 1._wp lag_params%charwidth = dflt_real lag_params%valmaxvoid = dflt_real diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index cc05bcddf2..ad35b62ae1 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -307,7 +307,7 @@ def analytic(self): SIMULATION[f'lag_params%{var}'] = ParamType.INT for var in [ 'epsilonb', 'valmaxvoid', 'charwidth', 'diffcoefvap', - 'c0', 'rho0', 'T0', 'x0', 'Thost' ]: + 'c0', 'rho0', 'T0', 'x0', 'Thost', 'c_d' ]: SIMULATION[f'lag_params%{var}'] = ParamType.REAL for var in [ 'diffusion', 'reactions' ]: From 854484a397d1aaaf4ab1ceb12c95303b51400b2e Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Sun, 1 Jun 2025 18:49:52 -0400 Subject: [PATCH 27/62] QBMMM BC Bug Fix --- src/common/m_boundary_common.fpp | 10 +++++----- src/simulation/m_bubbles_EL.fpp | 13 +++++++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 97134421fa..29ce08c70e 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -171,8 +171,8 @@ contains end select if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP) .and. & - (bc_type(2, -1)%sf(0, k, l) /= BC_AXIS)) then + (bc_type(2, -1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & + (bc_type(2, -1)%sf(k, 0, l) /= BC_AXIS)) then call s_qbmm_extrapolation(2, -1, k, l, pb, mv) end if end do @@ -201,7 +201,7 @@ contains end select if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then call s_qbmm_extrapolation(2, 1, k, l, pb, mv) end if end do @@ -234,7 +234,7 @@ contains end select if (qbmm .and. (.not. polytropic) .and. & - (bc_type(3, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + (bc_type(3, -1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then call s_qbmm_extrapolation(3, -1, k, l, pb, mv) end if end do @@ -263,7 +263,7 @@ contains end select if (qbmm .and. (.not. polytropic) .and. & - (bc_type(3, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then call s_qbmm_extrapolation(3, 1, k, l, pb, mv) end if end do diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 72155bcb13..c377b0bfc2 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -25,6 +25,8 @@ module m_bubbles_EL use m_helper + use m_ibm + implicit none !(nBub) @@ -1163,6 +1165,8 @@ contains integer :: dest integer :: k + integer, dimension(3) :: cell + !$acc parallel loop gang vector default(present) do k = 1, nBubs @@ -1192,6 +1196,15 @@ contains end if end if + if (ib) then + cell = -buff_size + call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1)) + + if (ib_markers%sf(cell(1), cell(2), cell(3)) == 1) then + print*, "In IB" + end if + end if + end do end subroutine s_enforce_EL_bubbles_boundary_conditions From 35ac23566437718b67882593a00c9e023b337979 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Mon, 2 Jun 2025 14:35:27 -0400 Subject: [PATCH 28/62] IBM support for EL Bubbles --- src/simulation/m_bubbles_EL.fpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index c377b0bfc2..8f00cf8788 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -1162,8 +1162,8 @@ contains !> This subroutine enforces reflective and wall boundary conditions for EL bubbles subroutine s_enforce_EL_bubbles_boundary_conditions(dest) - integer :: dest - integer :: k + integer, intent(in) :: dest + integer :: k, i, patch_id integer, dimension(3) :: cell @@ -1200,8 +1200,15 @@ contains cell = -buff_size call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1)) - if (ib_markers%sf(cell(1), cell(2), cell(3)) == 1) then - print*, "In IB" + if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then + patch_id = ib_markers%sf(cell(1), cell(2), cell(3)) + + !$acc routine seq + do i = 1, num_dims + mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - & + levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) & + * levelset%sf(cell(1), cell(2), cell(3), 1) + end do end if end if From a02fe6df22e3cf5b469462f3ae8ab672fa71a5d3 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Mon, 2 Jun 2025 18:01:43 -0400 Subject: [PATCH 29/62] stuff --- src/simulation/m_bubbles_EL.fpp | 24 ++++++++++++++++-------- src/simulation/m_time_steppers.fpp | 2 +- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 8f00cf8788..beda4de145 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -571,6 +571,8 @@ contains ! Obtaining driving pressure call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) + !if (k == 14) print*, k, cell(1), cell(2), cell(3) + ! Obtain liquid density and computing speed of sound from pinf !$acc loop seq do i = 1, num_fluids @@ -840,8 +842,13 @@ contains if (lag_params%vel_model > 0) then cell = -buff_size - call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) + call s_locate_cell(mtn_pos(bub_id, 1:3, 2), cell, mtn_s(bub_id, 1:3, 2)) + !print*, mtn_pos(bub_id, 1, 2), mtn_pos(bub_id, 2, 2) scoord = mtn_s(bub_id, 1:3, 2) + + !if (k == 14) print*, "pinf cell", cell + !if (k == 14) print*, "pinf scoord", scoord + !if (k == 14) print*, "pinf pos", mtn_pos(bub_id,:,2) else scoord = mtn_s(bub_id, 1:3, 2) cell(:) = int(scoord(:)) @@ -1164,10 +1171,9 @@ contains integer, intent(in) :: dest integer :: k, i, patch_id - integer, dimension(3) :: cell - !$acc parallel loop gang vector default(present) + !$acc parallel loop gang vector default(present) private(cell) do k = 1, nBubs if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & @@ -1198,17 +1204,19 @@ contains if (ib) then cell = -buff_size - call s_locate_cell(mtn_pos(k, 1:3, 1), cell, mtn_s(k, 1:3, 1)) - + call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) + !if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then patch_id = ib_markers%sf(cell(1), cell(2), cell(3)) - - !$acc routine seq + if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell do i = 1, num_dims mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - & levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) & - * levelset%sf(cell(1), cell(2), cell(3), 1) + * levelset%sf(cell(1), cell(2), cell(3), patch_id) end do + cell = -buff_size + call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) + if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell end if end if diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 5af09f5ed6..de07dfd631 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -1020,7 +1020,7 @@ contains integer :: i, j, k, l call nvtxStartRange("RHS-BODYFORCES") - call s_compute_body_forces_rhs(q_prim_vf, q_cons_vf, rhs_vf) + call s_compute_body_forces_rhs(q_cons_vf, q_prim_vf, rhs_vf) !$acc parallel loop collapse(4) gang vector default(present) do i = momxb, E_idx From 27d8a4a430a94215af97f69f469bbd3e8156f044 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Tue, 10 Jun 2025 11:58:32 -0400 Subject: [PATCH 30/62] MPI EL on CPUs --- MPIRefactor.txt | 1614 ------------------- src/common/m_mpi_common.fpp | 57 +- src/post_process/m_global_parameters.fpp | 5 + src/pre_process/m_global_parameters.fpp | 5 + "src/simulation/\\" | 1837 ++++++++++++++++++++++ src/simulation/m_bubbles_EL.fpp | 80 +- src/simulation/m_bubbles_EL_kernels.fpp | 20 +- src/simulation/m_global_parameters.fpp | 5 + src/simulation/m_mpi_proxy.fpp | 224 ++- src/simulation/m_rhs.fpp | 15 +- src/simulation/m_time_steppers.fpp | 2 +- 11 files changed, 2210 insertions(+), 1654 deletions(-) delete mode 100644 MPIRefactor.txt create mode 100644 "src/simulation/\\" diff --git a/MPIRefactor.txt b/MPIRefactor.txt deleted file mode 100644 index fe7a7b4511..0000000000 --- a/MPIRefactor.txt +++ /dev/null @@ -1,1614 +0,0 @@ -mfc: Found CMake: /fastscratch/bwilfong3/software/MFC-Wilfong/build/cmake/bin/cmake. -mfc: OK > (venv) Entered the Python 3.10.16 virtual environment (>= 3.9). - - .=++*: -+*+=. | bwilfong3@wingtip-gpu3 [Linux] - :+ -*- == =* . | ------------------------------ - :*+ == ++ .+- | - :*##-.....:*+ .#%+++=--+=:::. | --jobs 32 - -=-++-======#=--**+++==+*++=::-:. | --mpi --gpu --no-debug --no-gcov --no-unified --no-single - .:++=----------====+*= ==..:%..... | --targets pre_process and simulation - .:-=++++===--==+=-+= +. := | - +#=::::::::=%=. -+: =+ *: | ---------------------------------------------------------- - .*=-=*=.. :=+*+: -...-- | $ ./mfc.sh (build, run, test, clean, count, packer) --help - - Acquiring /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/case.py... - Acquiring /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/case.py... - Build | syscheck, syscheck, pre_process, and simulation | Case Optimized: /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/case.py - - - Run - Using queue system Interactive. - Using baked-in template for default. - Generating input files for syscheck... - - Generating syscheck.inp: - INFO: Forwarded 0/90 parameters. - - Generating input files for pre_process... - - Generating pre_process.inp: - INFO: Forwarded 72/90 parameters. - - Generating input files for simulation... - - Generating simulation.inp: - INFO: Forwarded 41/90 parameters. - - $ /bin/bash /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/MFC.sh - -+-----------------------------------------------------------------------------------------------------------+ -| MFC case # MFC @ /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/case.py: | -+-----------------------------------------------------------------------------------------------------------+ -| * Start-time 14:00:47 * Start-date 14:00:47 | -| * Partition N/A * Walltime 01:00:00 | -| * Account N/A * Nodes 1 | -| * Job Name MFC * Engine interactive | -| * QoS N/A * Binary N/A | -| * Queue System Interactive * Email N/A | -+-----------------------------------------------------------------------------------------------------------+ - -mfc: WARNING > This is the default template. -mfc: WARNING > It is not intended to support all systems and execution engines. -mfc: WARNING > Consider using a different template via the --computer option if you encounter problems. -mfc: OK > :) Selected MPI launcher mpirun. Use --binary to override. -mfc: OK > :) Running syscheck: - - [TEST] MPI: call mpi_init(ierr) - [TEST] MPI: call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr) - [TEST] MPI: call mpi_barrier(MPI_COMM_WORLD, ierr) - [TEST] MPI: call assert(rank >= 0) - [TEST] MPI: call mpi_comm_size(MPI_COMM_WORLD, nRanks, ierr) - [TEST] MPI: call assert(nRanks > 0 .and. rank < nRanks) - [TEST] ACC: devtype = acc_get_device_type() - [TEST] ACC: num_devices = acc_get_num_devices(devtype) - [TEST] ACC: call assert(num_devices > 0) - [TEST] ACC: call acc_set_device_num(mod(rank, nRanks), devtype) - [TEST] ACC: allocate(arr(1:N)) - [TEST] ACC: !$acc enter data create(arr(1:N)) - [TEST] ACC: !$acc parallel loop - [TEST] ACC: !$acc update host(arr(1:N)) - [TEST] ACC: !$acc exit data delete(arr) - [TEST] MPI: call mpi_barrier(MPI_COMM_WORLD, ierr) - [TEST] MPI: call mpi_finalize(ierr) - - Syscheck: PASSED. -Generating '/nethome/bwilfong3/tmp/nsys-report-3830.qdstrm' - [1/8] [0% ] report1.nsys-rep [1/8] [0% ] report1.nsys-rep [1/8] [==========49% ] report1.nsys-rep [1/8] [=======================95% ] report1.nsys-rep [1/8] [=======================96% ] report1.nsys-rep [1/8] [========================100%] report1.nsys-rep [1/8] [========================100%] report1.nsys-rep - [2/8] [0% ] report1.sqlite [2/8] [1% ] report1.sqlite [2/8] [2% ] report1.sqlite [2/8] [3% ] report1.sqlite [2/8] [4% ] report1.sqlite [2/8] [5% ] report1.sqlite [2/8] [6% ] report1.sqlite [2/8] [7% ] report1.sqlite [2/8] [8% ] report1.sqlite [2/8] [9% ] report1.sqlite [2/8] [10% ] report1.sqlite [2/8] [11% ] report1.sqlite [2/8] [12% ] report1.sqlite [2/8] [13% ] report1.sqlite [2/8] [14% ] report1.sqlite [2/8] [=15% ] report1.sqlite [2/8] [=16% ] report1.sqlite [2/8] [=17% ] report1.sqlite [2/8] [==18% ] report1.sqlite [2/8] [==19% ] report1.sqlite [2/8] [==20% ] report1.sqlite [2/8] [==21% ] report1.sqlite [2/8] [===22% ] report1.sqlite [2/8] [===23% ] report1.sqlite [2/8] [===24% ] report1.sqlite [2/8] [====25% ] report1.sqlite [2/8] [====26% ] report1.sqlite [2/8] [====27% ] report1.sqlite [2/8] [====28% ] report1.sqlite [2/8] [=====29% ] report1.sqlite [2/8] [=====30% ] report1.sqlite [2/8] [=====31% ] report1.sqlite [2/8] [=====32% ] report1.sqlite [2/8] [======33% ] report1.sqlite [2/8] [======34% ] report1.sqlite [2/8] [======35% ] report1.sqlite [2/8] [=======36% ] report1.sqlite [2/8] [=======37% ] report1.sqlite [2/8] [=======38% ] report1.sqlite [2/8] [=======39% ] report1.sqlite [2/8] [========40% ] report1.sqlite [2/8] [========41% ] report1.sqlite [2/8] [========42% ] report1.sqlite [2/8] [=========43% ] report1.sqlite [2/8] [=========44% ] report1.sqlite [2/8] [=========45% ] report1.sqlite [2/8] [=========46% ] report1.sqlite [2/8] [==========47% ] report1.sqlite [2/8] [==========48% ] report1.sqlite [2/8] [==========49% ] report1.sqlite [2/8] [===========50% ] report1.sqlite [2/8] [===========51% ] report1.sqlite [2/8] [===========52% ] report1.sqlite [2/8] [===========53% ] report1.sqlite [2/8] [============54% ] report1.sqlite [2/8] [============55% ] report1.sqlite [2/8] [============56% ] report1.sqlite [2/8] [============57% ] report1.sqlite [2/8] [=============58% ] report1.sqlite [2/8] [=============59% ] report1.sqlite [2/8] [=============60% ] report1.sqlite [2/8] [==============61% ] report1.sqlite [2/8] [==============62% ] report1.sqlite [2/8] [==============63% ] report1.sqlite [2/8] [==============64% ] report1.sqlite [2/8] [===============65% ] report1.sqlite [2/8] [===============66% ] report1.sqlite [2/8] [===============67% ] report1.sqlite [2/8] [================68% ] report1.sqlite [2/8] [================69% ] report1.sqlite [2/8] [================70% ] report1.sqlite [2/8] [================71% ] report1.sqlite [2/8] [=================72% ] report1.sqlite [2/8] [=================73% ] report1.sqlite [2/8] [=================74% ] report1.sqlite [2/8] [==================75% ] report1.sqlite [2/8] [==================76% ] report1.sqlite [2/8] [==================77% ] report1.sqlite [2/8] [==================78% ] report1.sqlite [2/8] [===================79% ] report1.sqlite [2/8] [===================80% ] report1.sqlite [2/8] [===================81% ] report1.sqlite [2/8] [===================82% ] report1.sqlite [2/8] [====================83% ] report1.sqlite [2/8] [====================84% ] report1.sqlite [2/8] [====================85% ] report1.sqlite [2/8] [=====================86% ] report1.sqlite [2/8] [=====================87% ] report1.sqlite [2/8] [=====================88% ] report1.sqlite [2/8] [=====================89% ] report1.sqlite [2/8] [======================90% ] report1.sqlite [2/8] [======================91% ] report1.sqlite [2/8] [======================92% ] report1.sqlite [2/8] [=======================93% ] report1.sqlite [2/8] [=======================94% ] report1.sqlite [2/8] [=======================95% ] report1.sqlite [2/8] [=======================96% ] report1.sqlite [2/8] [========================97% ] report1.sqlite [2/8] [========================98% ] report1.sqlite [2/8] [========================99% ] report1.sqlite [2/8] [========================100%] report1.sqlite [2/8] [========================100%] report1.sqlite -[3/8] Executing 'nvtx_sum' stats report -[4/8] Executing 'cuda_api_sum' stats report - - Time (%) Total Time (ns) Num Calls Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Name - -------- --------------- --------- --------- --------- -------- -------- ----------- ---------------------- - 48.0 849,042 1 849,042.0 849,042.0 849,042 849,042 0.0 cuMemAllocHost_v2 - 41.1 727,231 1 727,231.0 727,231.0 727,231 727,231 0.0 cuModuleLoadDataEx - 6.8 121,094 2 60,547.0 60,547.0 19,619 101,475 57,880.9 cuMemAlloc_v2 - 1.5 25,990 1 25,990.0 25,990.0 25,990 25,990 0.0 cuLaunchKernel - 1.0 18,462 1 18,462.0 18,462.0 18,462 18,462 0.0 cuMemcpyDtoHAsync_v2 - 0.6 11,129 1 11,129.0 11,129.0 11,129 11,129 0.0 cuMemcpyHtoDAsync_v2 - 0.5 8,387 4 2,096.8 2,002.0 707 3,676 1,222.8 cuStreamSynchronize - 0.2 2,716 1 2,716.0 2,716.0 2,716 2,716 0.0 cuInit - 0.1 2,532 1 2,532.0 2,532.0 2,532 2,532 0.0 cuModuleGetLoadingMode - 0.1 2,101 3 700.3 448.0 367 1,286 508.8 cuCtxSetCurrent - -[5/8] Executing 'cuda_gpu_kern_sum' stats report - - Time (%) Total Time (ns) Instances Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Name - -------- --------------- --------- -------- -------- -------- -------- ----------- --------------- - 100.0 1,920 1 1,920.0 1,920.0 1,920 1,920 0.0 syscheck_72_gpu - -[6/8] Executing 'cuda_gpu_mem_time_sum' stats report - - Time (%) Total Time (ns) Count Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Operation - -------- --------------- ----- -------- -------- -------- -------- ----------- ---------------------------- - 64.0 2,272 1 2,272.0 2,272.0 2,272 2,272 0.0 [CUDA memcpy Device-to-Host] - 36.0 1,280 1 1,280.0 1,280.0 1,280 1,280 0.0 [CUDA memcpy Host-to-Device] - -[7/8] Executing 'cuda_gpu_mem_size_sum' stats report - - Total (MB) Count Avg (MB) Med (MB) Min (MB) Max (MB) StdDev (MB) Operation - ---------- ----- -------- -------- -------- -------- ----------- ---------------------------- - 0.001 1 0.001 0.001 0.001 0.001 0.000 [CUDA memcpy Device-to-Host] - 0.000 1 0.000 0.000 0.000 0.000 0.000 [CUDA memcpy Host-to-Device] - -[8/8] Executing 'openacc_sum' stats report - - Time (%) Total Time (ns) Num Calls Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Name - -------- --------------- --------- ------------- ------------- ----------- ----------- ----------- --------------------------------- - 99.8 107,957,962 1 107,957,962.0 107,957,962.0 107,957,962 107,957,962 0.0 Device Init - 0.1 72,467 1 72,467.0 72,467.0 72,467 72,467 0.0 Enter Data@syscheck.fpp:71 - 0.0 40,021 1 40,021.0 40,021.0 40,021 40,021 0.0 Compute Construct@syscheck.fpp:72 - 0.0 30,068 1 30,068.0 30,068.0 30,068 30,068 0.0 Update@syscheck.fpp:76 - 0.0 28,352 1 28,352.0 28,352.0 28,352 28,352 0.0 Enqueue Upload - 0.0 27,962 1 27,962.0 27,962.0 27,962 27,962 0.0 Enqueue Launch@syscheck.fpp:72 - 0.0 20,103 1 20,103.0 20,103.0 20,103 20,103 0.0 Enqueue Download@syscheck.fpp:76 - 0.0 13,219 1 13,219.0 13,219.0 13,219 13,219 0.0 Exit Data@syscheck.fpp:77 - 0.0 5,457 1 5,457.0 5,457.0 5,457 5,457 0.0 Wait@syscheck.fpp:71 - 0.0 4,477 2 2,238.5 2,238.5 1,494 2,983 1,052.9 Wait@syscheck.fpp:72 - 0.0 4,124 1 4,124.0 4,124.0 4,124 4,124 0.0 Exit Data@syscheck.fpp:72 - 0.0 3,276 1 3,276.0 3,276.0 3,276 3,276 0.0 Enter Data@syscheck.fpp:72 - 0.0 2,644 1 2,644.0 2,644.0 2,644 2,644 0.0 Wait@syscheck.fpp:76 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@syscheck.fpp:71 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@syscheck.fpp:71 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@syscheck.fpp:77 - -Generated: - /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report1.nsys-rep - /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report1.sqlite - -mfc: OK > :) Running pre_process: - - Pre-processing a 200x200x200 case on 1 rank(s) - Processing patch 1 - Processing patch 2 - Elapsed Time 6.484550952911377 -Generating '/nethome/bwilfong3/tmp/nsys-report-f00c.qdstrm' - [1/8] [0% ] report2.nsys-rep [1/8] [0% ] report2.nsys-rep [1/8] [=17% ] report2.nsys-rep [1/8] [===24% ] report2.nsys-rep [1/8] [============55% ] report2.nsys-rep [1/8] [====================85% ] report2.nsys-rep [1/8] [=====================86% ] report2.nsys-rep [1/8] [=====================87% ] report2.nsys-rep [1/8] [=====================88% ] report2.nsys-rep [1/8] [=====================89% ] report2.nsys-rep [1/8] [======================90% ] report2.nsys-rep [1/8] [========================100%] report2.nsys-rep [1/8] [========================100%] report2.nsys-rep - [2/8] [0% ] report2.sqlite [2/8] [1% ] report2.sqlite [2/8] [2% ] report2.sqlite [2/8] [3% ] report2.sqlite [2/8] [4% ] report2.sqlite [2/8] [5% ] report2.sqlite [2/8] [6% ] report2.sqlite [2/8] [7% ] report2.sqlite [2/8] [8% ] report2.sqlite [2/8] [9% ] report2.sqlite [2/8] [10% ] report2.sqlite [2/8] [11% ] report2.sqlite [2/8] [12% ] report2.sqlite [2/8] [13% ] report2.sqlite [2/8] [14% ] report2.sqlite [2/8] [=15% ] report2.sqlite [2/8] [=16% ] report2.sqlite [2/8] [=17% ] report2.sqlite [2/8] [==18% ] report2.sqlite [2/8] [==19% ] report2.sqlite [2/8] [==20% ] report2.sqlite [2/8] [==21% ] report2.sqlite [2/8] [===22% ] report2.sqlite [2/8] [===23% ] report2.sqlite [2/8] [===24% ] report2.sqlite [2/8] [====25% ] report2.sqlite [2/8] [====26% ] report2.sqlite [2/8] [====27% ] report2.sqlite [2/8] [====28% ] report2.sqlite [2/8] [=====29% ] report2.sqlite [2/8] [=====30% ] report2.sqlite [2/8] [=====31% ] report2.sqlite [2/8] [=====32% ] report2.sqlite [2/8] [======33% ] report2.sqlite [2/8] [======34% ] report2.sqlite [2/8] [======35% ] report2.sqlite [2/8] [=======36% ] report2.sqlite [2/8] [=======37% ] report2.sqlite [2/8] [=======38% ] report2.sqlite [2/8] [=======39% ] report2.sqlite [2/8] [========40% ] report2.sqlite [2/8] [========41% ] report2.sqlite [2/8] [========42% ] report2.sqlite [2/8] [=========43% ] report2.sqlite [2/8] [=========44% ] report2.sqlite [2/8] [=========45% ] report2.sqlite [2/8] [=========46% ] report2.sqlite [2/8] [==========47% ] report2.sqlite [2/8] [==========48% ] report2.sqlite [2/8] [==========49% ] report2.sqlite [2/8] [===========50% ] report2.sqlite [2/8] [===========51% ] report2.sqlite [2/8] [===========52% ] report2.sqlite [2/8] [===========53% ] report2.sqlite [2/8] [============54% ] report2.sqlite [2/8] [============55% ] report2.sqlite [2/8] [============56% ] report2.sqlite [2/8] [============57% ] report2.sqlite [2/8] [=============58% ] report2.sqlite [2/8] [=============59% ] report2.sqlite [2/8] [=============60% ] report2.sqlite [2/8] [==============61% ] report2.sqlite [2/8] [==============62% ] report2.sqlite [2/8] [==============63% ] report2.sqlite [2/8] [==============64% ] report2.sqlite [2/8] [===============65% ] report2.sqlite [2/8] [===============66% ] report2.sqlite [2/8] [===============67% ] report2.sqlite [2/8] [================68% ] report2.sqlite [2/8] [================69% ] report2.sqlite [2/8] [================70% ] report2.sqlite [2/8] [================71% ] report2.sqlite [2/8] [=================72% ] report2.sqlite [2/8] [=================73% ] report2.sqlite [2/8] [=================74% ] report2.sqlite [2/8] [==================75% ] report2.sqlite [2/8] [==================76% ] report2.sqlite [2/8] [==================77% ] report2.sqlite [2/8] [==================78% ] report2.sqlite [2/8] [===================79% ] report2.sqlite [2/8] [===================80% ] report2.sqlite [2/8] [===================81% ] report2.sqlite [2/8] [===================82% ] report2.sqlite [2/8] [====================83% ] report2.sqlite [2/8] [====================84% ] report2.sqlite [2/8] [====================85% ] report2.sqlite [2/8] [=====================86% ] report2.sqlite [2/8] [=====================87% ] report2.sqlite [2/8] [=====================88% ] report2.sqlite [2/8] [=====================89% ] report2.sqlite [2/8] [======================90% ] report2.sqlite [2/8] [======================91% ] report2.sqlite [2/8] [======================92% ] report2.sqlite [2/8] [=======================93% ] report2.sqlite [2/8] [=======================94% ] report2.sqlite [2/8] [=======================95% ] report2.sqlite [2/8] [=======================96% ] report2.sqlite [2/8] [========================97% ] report2.sqlite [2/8] [========================98% ] report2.sqlite [2/8] [========================99% ] report2.sqlite [2/8] [========================100%] report2.sqlite [2/8] [========================100%] report2.sqlite -[3/8] Executing 'nvtx_sum' stats report -[4/8] Executing 'cuda_api_sum' stats report - - Time (%) Total Time (ns) Num Calls Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Name - -------- --------------- --------- -------- -------- -------- -------- ----------- ---------------------- - 100.0 2,331 1 2,331.0 2,331.0 2,331 2,331 0.0 cuModuleGetLoadingMode - -[5/8] Executing 'cuda_gpu_kern_sum' stats report -[6/8] Executing 'cuda_gpu_mem_time_sum' stats report -[7/8] Executing 'cuda_gpu_mem_size_sum' stats report -[8/8] Executing 'openacc_sum' stats report -Generated: - /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report2.nsys-rep - /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report2.sqlite - -mfc: OK > :) Running simulation: - - Simulating a case-optimized 200x200x200 case on 1 rank(s) with OpenACC offloading. - [ 0%] Time step 1 of 31 @ t_step = 0 - [ 4%] Time step 2 of 31 @ t_step = 1 - [ 7%] Time step 3 of 31 @ t_step = 2 - [ 10%] Time step 4 of 31 @ t_step = 3 - [ 13%] Time step 5 of 31 @ t_step = 4 - [ 17%] Time step 6 of 31 @ t_step = 5 - [ 20%] Time step 7 of 31 @ t_step = 6 - [ 23%] Time step 8 of 31 @ t_step = 7 - [ 26%] Time step 9 of 31 @ t_step = 8 - [ 30%] Time step 10 of 31 @ t_step = 9 - [ 33%] Time step 11 of 31 @ t_step = 10 - [ 36%] Time step 12 of 31 @ t_step = 11 - [ 39%] Time step 13 of 31 @ t_step = 12 - [ 42%] Time step 14 of 31 @ t_step = 13 - [ 46%] Time step 15 of 31 @ t_step = 14 - [ 49%] Time step 16 of 31 @ t_step = 15 - [ 52%] Time step 17 of 31 @ t_step = 16 - [ 55%] Time step 18 of 31 @ t_step = 17 - [ 59%] Time step 19 of 31 @ t_step = 18 - [ 62%] Time step 20 of 31 @ t_step = 19 - [ 65%] Time step 21 of 31 @ t_step = 20 - [ 68%] Time step 22 of 31 @ t_step = 21 - [ 71%] Time step 23 of 31 @ t_step = 22 - [ 75%] Time step 24 of 31 @ t_step = 23 - [ 78%] Time step 25 of 31 @ t_step = 24 - [ 81%] Time step 26 of 31 @ t_step = 25 - [ 84%] Time step 27 of 31 @ t_step = 26 - [ 88%] Time step 28 of 31 @ t_step = 27 - [ 91%] Time step 29 of 31 @ t_step = 28 - [ 94%] Time step 30 of 31 @ t_step = 29 - Performance: 0.7153143678123322 ns/gp/eq/rhs -Generating '/nethome/bwilfong3/tmp/nsys-report-e3c3.qdstrm' - [1/8] [0% ] report3.nsys-rep [1/8] [0% ] report3.nsys-rep [1/8] [6% ] report3.nsys-rep [1/8] [5% ] report3.nsys-rep [1/8] [11% ] report3.nsys-rep [1/8] [==21% ] report3.nsys-rep [1/8] [=====31% ] report3.nsys-rep [1/8] [========41% ] report3.nsys-rep [1/8] [===========51% ] report3.nsys-rep [1/8] [==============61% ] report3.nsys-rep [1/8] [================71% ] report3.nsys-rep [1/8] [==================77% ] report3.nsys-rep [1/8] [==================78% ] report3.nsys-rep [1/8] [========================100%] report3.nsys-rep [1/8] [========================100%] report3.nsys-rep - [2/8] [0% ] report3.sqlite [2/8] [1% ] report3.sqlite [2/8] [2% ] report3.sqlite [2/8] [3% ] report3.sqlite [2/8] [4% ] report3.sqlite [2/8] [5% ] report3.sqlite [2/8] [6% ] report3.sqlite [2/8] [7% ] report3.sqlite [2/8] [8% ] report3.sqlite [2/8] [9% ] report3.sqlite [2/8] [10% ] report3.sqlite [2/8] [11% ] report3.sqlite [2/8] [12% ] report3.sqlite [2/8] [13% ] report3.sqlite [2/8] [14% ] report3.sqlite [2/8] [=15% ] report3.sqlite [2/8] [=16% ] report3.sqlite [2/8] [=17% ] report3.sqlite [2/8] [==18% ] report3.sqlite [2/8] [==19% ] report3.sqlite [2/8] [==20% ] report3.sqlite [2/8] [==21% ] report3.sqlite [2/8] [===22% ] report3.sqlite [2/8] [===23% ] report3.sqlite [2/8] [===24% ] report3.sqlite [2/8] [====25% ] report3.sqlite [2/8] [====26% ] report3.sqlite [2/8] [====27% ] report3.sqlite [2/8] [====28% ] report3.sqlite [2/8] [=====29% ] report3.sqlite [2/8] [=====30% ] report3.sqlite [2/8] [=====31% ] report3.sqlite [2/8] [=====32% ] report3.sqlite [2/8] [======33% ] report3.sqlite [2/8] [======34% ] report3.sqlite [2/8] [======35% ] report3.sqlite [2/8] [=======36% ] report3.sqlite [2/8] [=======37% ] report3.sqlite [2/8] [=======38% ] report3.sqlite [2/8] [=======39% ] report3.sqlite [2/8] [========40% ] report3.sqlite [2/8] [========41% ] report3.sqlite [2/8] [========42% ] report3.sqlite [2/8] [=========43% ] report3.sqlite [2/8] [=========44% ] report3.sqlite [2/8] [=========45% ] report3.sqlite [2/8] [=========46% ] report3.sqlite [2/8] [==========47% ] report3.sqlite [2/8] [==========48% ] report3.sqlite [2/8] [==========49% ] report3.sqlite [2/8] [===========50% ] report3.sqlite [2/8] [===========51% ] report3.sqlite [2/8] [===========52% ] report3.sqlite [2/8] [===========53% ] report3.sqlite [2/8] [============54% ] report3.sqlite [2/8] [============55% ] report3.sqlite [2/8] [============56% ] report3.sqlite [2/8] [============57% ] report3.sqlite [2/8] [=============58% ] report3.sqlite [2/8] [=============59% ] report3.sqlite [2/8] [=============60% ] report3.sqlite [2/8] [==============61% ] report3.sqlite [2/8] [==============62% ] report3.sqlite [2/8] [==============63% ] report3.sqlite [2/8] [==============64% ] report3.sqlite [2/8] [===============65% ] report3.sqlite [2/8] [===============66% ] report3.sqlite [2/8] [===============67% ] report3.sqlite [2/8] [================68% ] report3.sqlite [2/8] [================69% ] report3.sqlite [2/8] [================70% ] report3.sqlite [2/8] [================71% ] report3.sqlite [2/8] [=================72% ] report3.sqlite [2/8] [=================73% ] report3.sqlite [2/8] [=================74% ] report3.sqlite [2/8] [==================75% ] report3.sqlite [2/8] [==================76% ] report3.sqlite [2/8] [==================77% ] report3.sqlite [2/8] [==================78% ] report3.sqlite [2/8] [===================79% ] report3.sqlite [2/8] [===================80% ] report3.sqlite [2/8] [===================81% ] report3.sqlite [2/8] [===================82% ] report3.sqlite [2/8] [====================83% ] report3.sqlite [2/8] [====================84% ] report3.sqlite [2/8] [====================85% ] report3.sqlite [2/8] [=====================86% ] report3.sqlite [2/8] [=====================87% ] report3.sqlite [2/8] [=====================88% ] report3.sqlite [2/8] [=====================89% ] report3.sqlite [2/8] [======================90% ] report3.sqlite [2/8] [======================91% ] report3.sqlite [2/8] [======================92% ] report3.sqlite [2/8] [=======================93% ] report3.sqlite [2/8] [=======================94% ] report3.sqlite [2/8] [=======================95% ] report3.sqlite [2/8] [=======================96% ] report3.sqlite [2/8] [========================97% ] report3.sqlite [2/8] [========================98% ] report3.sqlite [2/8] [========================99% ] report3.sqlite [2/8] [========================100%] report3.sqlite [2/8] [========================100%] report3.sqlite -[3/8] Executing 'nvtx_sum' stats report - - Time (%) Total Time (ns) Instances Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Style Range - -------- --------------- --------- --------------- --------------- ------------- ------------- ----------- ------- --------------------- - 25.1 5,556,131,040 1 5,556,131,040.0 5,556,131,040.0 5,556,131,040 5,556,131,040 0.0 PushPop SIMULATION-TIME-MARCH - 19.7 4,343,486,923 30 144,782,897.4 144,523,688.0 144,224,270 151,966,428 1,420,233.3 PushPop TIMESTEP - 18.7 4,132,222,633 90 45,913,584.8 45,808,604.0 45,724,623 52,483,659 729,504.9 PushPop COMPUTE-RHS - 7.5 1,649,133,287 270 6,107,901.1 5,774,906.5 5,708,817 6,886,989 513,412.2 PushPop RHS-WENO - 6.8 1,492,020,514 270 5,526,001.9 5,211,245.5 5,037,210 8,508,008 573,089.0 PushPop RHS-RIEMANN-SOLVER - 6.4 1,423,381,163 1 1,423,381,163.0 1,423,381,163.0 1,423,381,163 1,423,381,163 0.0 PushPop INIT - 5.5 1,211,622,280 1 1,211,622,280.0 1,211,622,280.0 1,211,622,280 1,211,622,280 0.0 PushPop SAVE-DATA - 3.4 740,475,264 1 740,475,264.0 740,475,264.0 740,475,264 740,475,264 0.0 PushPop INIT-MODULES - 2.9 630,345,732 270 2,334,613.8 2,374,501.0 2,188,147 3,693,835 122,116.5 PushPop RHS-ADVECTION-SRC - 2.8 620,236,026 1 620,236,026.0 620,236,026.0 620,236,026 620,236,026 0.0 PushPop INIT-MPI - 0.5 103,612,551 90 1,151,250.6 1,143,181.0 1,140,578 1,803,069 69,542.1 PushPop RHS-CONVERT - 0.4 80,835,591 1 80,835,591.0 80,835,591.0 80,835,591 80,835,591 0.0 PushPop FINALIZE-MODULES - 0.3 62,647,077 1 62,647,077.0 62,647,077.0 62,647,077 62,647,077 0.0 PushPop INIT-GPU-VARS - 0.2 46,654,954 90 518,388.4 508,176.0 503,473 1,304,662 84,017.3 PushPop RHS-COMMUNICATION - 0.0 106,687 270 395.1 275.5 162 2,831 419.7 PushPop RHS-HYPOELASTICITY - 0.0 83,918 270 310.8 230.5 104 2,128 329.9 PushPop RHS-MHD - 0.0 53,827 120 448.6 276.0 130 4,553 670.4 PushPop RHS-ELASTIC - -[4/8] Executing 'cuda_api_sum' stats report - - Time (%) Total Time (ns) Num Calls Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Name - -------- --------------- --------- ------------ ------------ ---------- ---------- ----------- ---------------------- - 91.8 4,239,454,329 14,595 290,473.1 6,639.0 442 12,077,131 875,932.1 cuStreamSynchronize - 4.2 193,440,619 38 5,090,542.6 239,459.5 233,982 28,768,773 9,655,708.6 cuMemcpyDtoHAsync_v2 - 1.7 77,550,358 5,596 13,858.2 2,461.0 2,024 7,834,190 292,223.2 cuMemcpyHtoDAsync_v2 - 0.9 41,644,729 1 41,644,729.0 41,644,729.0 41,644,729 41,644,729 0.0 cuModuleLoadDataEx - 0.5 23,663,193 1 23,663,193.0 23,663,193.0 23,663,193 23,663,193 0.0 cuMemHostAlloc - 0.5 21,146,694 6,121 3,454.8 3,199.0 2,713 35,564 869.4 cuLaunchKernel - 0.3 13,049,363 243 53,701.1 75,275.0 1,809 320,571 47,890.1 cuMemAlloc_v2 - 0.1 5,900,487 3,098 1,904.6 1,701.0 1,396 11,795 599.6 cuMemsetD32Async - 0.0 885,633 1 885,633.0 885,633.0 885,633 885,633 0.0 cuMemAllocHost_v2 - 0.0 2,758 3 919.3 594.0 273 1,891 856.7 cuCtxSetCurrent - 0.0 1,885 1 1,885.0 1,885.0 1,885 1,885 0.0 cuModuleGetLoadingMode - 0.0 1,561 1 1,561.0 1,561.0 1,561 1,561 0.0 cuInit - -[5/8] Executing 'cuda_gpu_kern_sum' stats report - - Time (%) Total Time (ns) Instances Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Name - -------- --------------- --------- ------------ ------------ ---------- ---------- ----------- ---------------------------------------------------------------------------- - 9.9 416,089,697 90 4,623,218.9 4,621,818.5 4,616,714 4,661,451 6,274.4 m_weno_s_weno_797_gpu__2 - 9.9 416,057,697 90 4,622,863.3 4,619,467.5 4,617,291 4,629,931 4,723.0 m_weno_s_weno_797_gpu__1 - 9.9 415,779,420 90 4,619,771.3 4,615,755.0 4,613,963 4,627,307 4,933.4 m_weno_s_weno_797_gpu - 8.4 350,819,554 90 3,897,995.0 3,897,780.5 3,890,789 3,904,773 3,667.2 m_riemann_solvers_s_hllc_riemann_solver_2449_gpu_1 - 8.4 350,508,165 90 3,894,535.2 3,894,132.5 3,886,500 3,925,669 4,850.2 m_riemann_solvers_s_hllc_riemann_solver_2449_gpu_2 - 8.3 346,941,412 90 3,854,904.6 3,855,059.5 3,847,716 3,863,715 3,599.7 m_riemann_solvers_s_hllc_riemann_solver_2449_gpu - 4.6 192,740,190 90 2,141,557.7 2,141,284.0 2,138,036 2,161,044 3,262.1 m_weno_s_initialize_weno_1209_gpu - 4.3 180,014,829 90 2,000,164.8 1,999,906.5 1,995,731 2,017,203 3,654.5 m_riemann_solvers_s_finalize_riemann_solver_5038_gpu - 3.0 126,642,266 90 1,407,136.3 1,407,213.0 1,403,628 1,411,086 1,797.9 m_rhs_s_compute_advection_source_term_1117_gpu - 3.0 126,007,860 90 1,400,087.3 1,400,109.0 1,395,949 1,409,869 2,102.8 m_rhs_s_compute_advection_source_term_1322_gpu - 2.5 104,706,485 90 1,163,405.4 1,162,939.0 1,161,387 1,173,611 1,935.5 m_rhs_s_compute_rhs_938_gpu - 2.4 102,629,501 90 1,140,327.8 1,138,459.0 1,137,738 1,143,819 2,387.1 m_rhs_s_compute_advection_source_term_1008_gpu - 2.4 101,209,868 90 1,124,554.1 1,124,603.0 1,122,315 1,126,731 970.6 m_variables_conversion_s_convert_conservative_to_primitive_variables_886_gpu - 2.4 100,723,842 90 1,119,153.8 1,119,658.0 1,117,066 1,121,547 1,835.3 m_rhs_s_compute_rhs_635_gpu - 2.3 97,362,414 90 1,081,804.6 1,080,346.0 1,078,859 1,085,130 2,189.4 m_weno_s_initialize_weno_1179_gpu - 2.3 96,209,503 90 1,068,994.5 1,067,834.0 1,066,346 1,071,818 1,961.4 m_riemann_solvers_s_finalize_riemann_solver_4986_gpu - 2.2 92,780,226 90 1,030,891.4 1,029,674.0 1,029,033 1,033,194 1,680.6 m_weno_s_initialize_weno_1141_gpu - 2.1 87,143,916 90 968,265.7 967,400.5 966,506 970,184 1,445.8 m_riemann_solvers_s_finalize_riemann_solver_5090_gpu - 1.8 74,516,980 30 2,483,899.3 2,483,991.0 2,479,544 2,487,767 2,157.5 m_data_output_s_write_run_time_information_280_gpu - 1.0 43,655,287 30 1,455,176.2 1,455,341.0 1,449,389 1,462,574 3,705.9 m_time_steppers_s_3rd_order_tvd_rk_843_gpu - 1.0 43,617,519 30 1,453,917.3 1,453,341.5 1,446,637 1,462,190 3,312.8 m_time_steppers_s_3rd_order_tvd_rk_770_gpu - 0.9 36,698,516 30 1,223,283.9 1,223,179.5 1,220,908 1,225,899 2,096.1 m_time_steppers_s_3rd_order_tvd_rk_698_gpu - 0.9 35,884,076 90 398,712.0 398,627.0 397,444 400,676 664.5 m_rhs_s_compute_advection_source_term_1259_gpu - 0.9 35,849,934 90 398,332.6 398,307.5 396,612 400,900 817.6 m_rhs_s_compute_advection_source_term_1496_gpu - 0.9 35,657,615 90 396,195.7 396,131.5 395,012 397,604 601.3 m_rhs_s_compute_advection_source_term_1086_gpu - 0.5 22,455,606 90 249,506.7 249,634.0 248,483 251,299 536.9 m_riemann_solvers_s_finalize_riemann_solver_5065_gpu - 0.5 20,922,471 90 232,471.9 232,418.0 230,402 234,946 977.2 m_boundary_common_s_populate_variables_buffers_135_gpu - 0.3 12,077,135 1 12,077,135.0 12,077,135.0 12,077,135 12,077,135 0.0 m_rhs_s_initialize_rhs_module_595_gpu - 0.3 11,555,275 90 128,391.9 128,369.0 127,873 128,961 313.6 m_riemann_solvers_s_finalize_riemann_solver_5012_gpu - 0.2 10,021,407 90 111,349.0 111,393.0 110,849 112,001 281.9 m_riemann_solvers_s_finalize_riemann_solver_5102_gpu - 0.2 8,476,463 270 31,394.3 31,424.5 30,112 32,609 462.1 m_variables_conversion_s_convert_primitive_to_flux_variables_1486_gpu - 0.2 8,055,083 90 89,500.9 89,456.5 87,425 92,193 870.2 m_cbc_s_finalize_cbc_1341_gpu - 0.2 6,798,914 90 75,543.5 75,409.0 73,153 79,456 1,190.5 m_boundary_common_s_populate_variables_buffers_164_gpu - 0.1 5,495,696 90 61,063.3 60,928.5 59,233 63,712 924.5 m_cbc_s_cbc_760_gpu_2 - 0.1 5,479,797 90 60,886.6 60,768.0 59,361 63,425 835.8 m_cbc_s_cbc_760_gpu_1 - 0.1 5,443,123 90 60,479.1 60,416.0 58,752 62,529 792.1 m_cbc_s_cbc_760_gpu - 0.1 4,847,150 90 53,857.2 53,840.5 53,409 54,368 215.8 m_cbc_s_finalize_cbc_1445_gpu - 0.1 4,171,398 30 139,046.6 139,089.0 138,594 139,585 293.4 m_data_output_s_write_run_time_information_318_gpu - 0.1 3,730,853 90 41,453.9 41,408.0 40,960 42,465 330.0 m_cbc_s_initialize_cbc_1085_gpu - 0.1 3,511,677 90 39,018.6 39,040.0 38,080 39,713 336.0 m_cbc_s_initialize_cbc_1108_gpu - 0.1 3,313,820 90 36,820.2 36,833.0 36,193 37,505 295.6 m_cbc_s_initialize_cbc_1260_gpu - 0.1 3,147,677 90 34,974.2 34,976.0 34,592 35,456 187.4 m_cbc_s_finalize_cbc_1392_gpu - 0.1 3,074,368 90 34,159.6 34,128.5 33,248 35,265 395.2 m_boundary_common_s_populate_variables_buffers_197_gpu - 0.1 3,051,648 90 33,907.2 33,888.0 33,568 34,337 180.0 m_cbc_s_initialize_cbc_1184_gpu - 0.1 2,848,443 90 31,649.4 31,632.5 31,137 32,352 291.2 m_boundary_common_s_populate_variables_buffers_262_gpu - 0.1 2,826,427 30 94,214.2 93,952.0 92,129 97,121 1,474.8 m_data_output_s_write_run_time_information_318_gpu__red - 0.1 2,809,400 90 31,215.6 31,232.0 30,720 32,000 228.9 m_cbc_s_initialize_cbc_1237_gpu - 0.1 2,570,675 90 28,563.1 28,544.0 28,192 29,024 183.4 m_cbc_s_initialize_cbc_1161_gpu - 0.1 2,322,772 90 25,808.6 25,760.5 24,576 27,679 576.7 m_cbc_s_cbc_717_gpu_1 - 0.1 2,322,323 90 25,803.6 25,792.0 24,928 27,232 518.0 m_cbc_s_cbc_717_gpu - 0.1 2,313,912 90 25,710.1 25,616.5 24,800 27,841 535.0 m_cbc_s_cbc_717_gpu_2 - 0.0 1,529,426 90 16,993.6 16,992.0 16,255 18,048 317.3 m_boundary_common_s_populate_variables_buffers_229_gpu - 0.0 1,435,822 90 15,953.6 15,904.0 15,456 16,576 246.3 m_boundary_common_s_populate_variables_buffers_291_gpu - 0.0 1,315,084 90 14,612.0 14,529.0 13,760 16,096 469.6 m_cbc_s_finalize_cbc_1353_gpu - 0.0 1,304,080 90 14,489.8 14,496.0 13,793 15,648 364.9 m_cbc_s_finalize_cbc_1376_gpu - 0.0 942,406 90 10,471.2 10,495.5 9,729 11,200 294.9 m_cbc_s_cbc_737_gpu - 0.0 932,877 90 10,365.3 10,352.0 9,472 11,328 348.2 m_cbc_s_cbc_737_gpu_2 - 0.0 918,157 90 10,201.7 10,224.0 9,216 11,168 385.9 m_cbc_s_cbc_737_gpu_1 - 0.0 911,813 90 10,131.3 10,144.0 9,792 10,369 123.7 m_cbc_s_finalize_cbc_1481_gpu - 0.0 897,476 90 9,972.0 9,984.0 9,664 10,337 132.0 m_cbc_s_finalize_cbc_1458_gpu - 0.0 791,649 90 8,796.1 8,768.0 8,512 9,312 152.3 m_cbc_s_initialize_cbc_1121_gpu - 0.0 773,028 90 8,589.2 8,576.0 8,320 8,896 124.8 m_cbc_s_initialize_cbc_1144_gpu - 0.0 767,812 90 8,531.2 8,512.0 8,256 8,928 153.2 m_cbc_s_initialize_cbc_1097_gpu - 0.0 696,165 90 7,735.2 7,712.0 7,456 8,160 134.6 m_cbc_s_initialize_cbc_1296_gpu - 0.0 681,025 90 7,566.9 7,552.0 7,072 7,904 165.3 m_cbc_s_initialize_cbc_1220_gpu - 0.0 648,641 90 7,207.1 7,168.0 7,008 7,520 123.4 m_cbc_s_initialize_cbc_1273_gpu - 0.0 622,053 90 6,911.7 6,912.0 6,624 7,424 150.3 m_cbc_s_initialize_cbc_1197_gpu - 0.0 620,804 90 6,897.8 6,880.0 6,528 7,328 169.6 m_cbc_s_finalize_cbc_1428_gpu - 0.0 602,209 90 6,691.2 6,688.0 6,528 7,040 94.9 m_cbc_s_initialize_cbc_1249_gpu - 0.0 594,472 90 6,605.2 6,592.0 6,432 6,848 103.5 m_cbc_s_finalize_cbc_1405_gpu - 0.0 567,978 90 6,310.9 6,304.0 6,111 6,720 107.5 m_cbc_s_initialize_cbc_1173_gpu - -[6/8] Executing 'cuda_gpu_mem_time_sum' stats report - - Time (%) Total Time (ns) Count Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Operation - -------- --------------- ----- ----------- -------- -------- ---------- ----------- ---------------------------- - 71.6 178,474,331 38 4,696,692.9 1,664.0 1,664 27,670,726 9,349,057.8 [CUDA memcpy Device-to-Host] - 26.9 66,903,272 5,596 11,955.6 1,056.0 991 7,652,198 285,442.5 [CUDA memcpy Host-to-Device] - 1.5 3,756,091 3,098 1,212.4 1,056.0 992 1,984 256.0 [CUDA memset] - -[7/8] Executing 'cuda_gpu_mem_size_sum' stats report - - Total (MB) Count Avg (MB) Med (MB) Min (MB) Max (MB) StdDev (MB) Operation - ---------- ----- -------- -------- -------- -------- ----------- ---------------------------- - 586.516 5,596 0.105 0.000 0.000 73.035 2.760 [CUDA memcpy Host-to-Device] - 584.277 38 15.376 0.000 0.000 73.035 30.175 [CUDA memcpy Device-to-Host] - 0.013 3,098 0.000 0.000 0.000 0.000 0.000 [CUDA memset] - -[8/8] Executing 'openacc_sum' stats report - - Time (%) Total Time (ns) Num Calls Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Name - -------- --------------- --------- ------------- ------------- ----------- ----------- ----------- ------------------------------------------------- - 13.4 1,250,848,660 270 4,632,772.8 4,631,543.5 4,624,229 4,671,891 5,580.2 Compute Construct@m_weno.fpp:797 - 13.4 1,249,882,607 540 2,314,597.4 2,311,349.5 2,056 4,665,923 2,314,285.0 Wait@m_weno.fpp:797 - 11.3 1,053,050,176 270 3,900,185.8 3,904,208.0 3,859,076 5,654,119 108,985.3 Compute Construct@m_riemann_solvers.fpp:2449 - 11.3 1,051,799,364 540 1,947,776.6 1,930,833.0 1,955 5,646,863 1,948,701.8 Wait@m_riemann_solvers.fpp:2449 - 2.1 193,652,964 90 2,151,699.6 2,152,170.0 2,147,468 2,171,156 3,397.6 Compute Construct@m_weno.fpp:1209 - 2.1 193,330,633 180 1,074,059.1 1,072,409.0 1,958 2,165,526 1,074,660.9 Wait@m_weno.fpp:1209 - 2.0 186,408,230 8 23,301,028.8 22,562,643.0 18,585,494 28,786,288 3,716,929.5 Update@m_start_up.fpp:1193 - 2.0 186,275,668 8 23,284,458.5 22,540,639.0 18,568,804 28,772,517 3,717,693.3 Enqueue Download@m_start_up.fpp:1193 - 1.9 181,024,641 90 2,011,384.9 2,012,962.5 2,007,029 2,029,224 3,792.6 Compute Construct@m_riemann_solvers.fpp:5038 - 1.9 180,637,889 180 1,003,543.8 1,001,911.5 2,094 2,022,050 1,003,767.5 Wait@m_riemann_solvers.fpp:5038 - 1.6 152,569,231 1 152,569,231.0 152,569,231.0 152,569,231 152,569,231 0.0 Device Init - 1.4 127,524,434 90 1,416,938.2 1,416,992.0 1,413,114 1,421,826 1,984.1 Compute Construct@m_rhs.fpp:1117 - 1.4 127,228,639 180 706,825.8 705,605.0 2,033 1,415,458 706,426.4 Wait@m_rhs.fpp:1117 - 1.4 126,914,882 90 1,410,165.4 1,409,912.0 1,405,358 1,433,807 3,399.0 Compute Construct@m_rhs.fpp:1322 - 1.4 126,623,638 180 703,464.7 701,715.5 1,996 1,428,242 703,088.2 Wait@m_rhs.fpp:1322 - 1.1 105,635,392 90 1,173,726.6 1,174,350.0 1,171,004 1,183,555 2,185.0 Compute Construct@m_rhs.fpp:938 - 1.1 105,337,066 180 585,205.9 584,688.0 2,083 1,178,377 584,333.3 Wait@m_rhs.fpp:938 - 1.1 103,499,428 90 1,149,993.6 1,150,466.0 1,146,841 1,156,325 2,597.5 Compute Construct@m_rhs.fpp:1008 - 1.1 103,217,825 180 573,432.4 572,278.5 2,185 1,148,282 572,639.5 Wait@m_rhs.fpp:1008 - 1.1 102,215,531 90 1,135,728.1 1,135,479.0 1,133,078 1,141,059 1,633.5 Compute Construct@m_variables_conversion.fpp:886 - 1.1 101,828,592 180 565,714.4 565,300.5 2,241 1,131,119 564,625.7 Wait@m_variables_conversion.fpp:886 - 1.1 101,768,752 90 1,130,763.9 1,131,035.5 1,127,083 1,167,795 4,435.9 Compute Construct@m_rhs.fpp:635 - 1.1 101,359,813 180 563,110.1 562,322.0 2,456 1,126,454 561,936.1 Wait@m_rhs.fpp:635 - 1.1 98,266,239 90 1,091,847.1 1,092,958.5 1,088,676 1,095,989 2,295.9 Compute Construct@m_weno.fpp:1179 - 1.1 97,938,114 180 544,100.6 543,073.5 2,183 1,089,076 543,261.1 Wait@m_weno.fpp:1179 - 1.0 97,256,086 90 1,080,623.2 1,080,523.5 1,077,445 1,085,473 2,280.9 Compute Construct@m_riemann_solvers.fpp:4986 - 1.0 96,851,542 180 538,064.1 537,177.0 2,183 1,076,471 536,981.5 Wait@m_riemann_solvers.fpp:4986 - 1.0 93,672,129 90 1,040,801.4 1,041,305.0 1,038,063 1,045,627 1,952.5 Compute Construct@m_weno.fpp:1141 - 1.0 93,347,022 180 518,594.6 517,941.0 1,890 1,037,406 517,730.9 Wait@m_weno.fpp:1141 - 0.9 88,158,070 90 979,534.1 979,809.5 977,095 984,751 1,780.3 Compute Construct@m_riemann_solvers.fpp:5090 - 0.9 87,757,597 180 487,542.2 486,596.5 2,205 974,844 486,291.9 Wait@m_riemann_solvers.fpp:5090 - 0.8 74,865,105 30 2,495,503.5 2,495,608.0 2,491,010 2,499,427 2,265.6 Compute Construct@m_data_output.fpp:280 - 0.8 74,726,676 60 1,245,444.6 1,243,514.0 2,347 2,492,248 1,253,194.6 Wait@m_data_output.fpp:280 - 0.7 62,634,956 20 3,131,747.8 82,004.5 14,260 7,871,967 3,888,670.1 Update@m_start_up.fpp.f90:54 - 0.7 62,182,948 85 731,564.1 2,628.0 2,222 7,835,806 2,271,843.5 Enqueue Upload@m_start_up.fpp.f90:54 - 0.5 44,024,848 30 1,467,494.9 1,467,029.5 1,461,329 1,474,592 3,752.0 Compute Construct@m_time_steppers.fpp:843 - 0.5 43,985,551 30 1,466,185.0 1,465,305.5 1,458,442 1,474,513 3,505.6 Compute Construct@m_time_steppers.fpp:770 - 0.5 43,780,776 30 1,459,359.2 1,459,511.0 1,453,410 1,466,562 3,720.3 Wait@m_time_steppers.fpp:843 - 0.5 43,742,732 30 1,458,091.1 1,457,490.5 1,451,269 1,466,692 3,288.5 Wait@m_time_steppers.fpp:770 - 0.4 37,043,224 30 1,234,774.1 1,236,126.0 1,231,696 1,238,694 2,262.6 Compute Construct@m_time_steppers.fpp:698 - 0.4 36,847,491 90 409,416.6 409,381.5 407,723 411,800 940.4 Compute Construct@m_rhs.fpp:1259 - 0.4 36,827,252 30 1,227,575.1 1,227,755.0 1,224,966 1,230,396 2,012.8 Wait@m_time_steppers.fpp:698 - 0.4 36,820,995 90 409,122.2 408,941.0 406,632 413,061 1,263.5 Compute Construct@m_rhs.fpp:1496 - 0.4 36,617,753 90 406,863.9 406,722.5 405,073 410,996 1,135.9 Compute Construct@m_rhs.fpp:1086 - 0.4 36,497,064 180 202,761.5 202,439.0 2,170 404,866 200,781.9 Wait@m_rhs.fpp:1259 - 0.4 36,470,319 180 202,612.9 202,304.0 2,423 405,212 200,595.9 Wait@m_rhs.fpp:1496 - 0.4 36,270,659 180 201,503.7 201,139.0 2,085 401,974 199,526.6 Wait@m_rhs.fpp:1086 - 0.3 23,431,978 90 260,355.3 260,194.5 258,688 266,998 1,122.2 Compute Construct@m_riemann_solvers.fpp:5065 - 0.2 23,067,797 180 128,154.4 127,817.5 2,201 260,975 126,015.1 Wait@m_riemann_solvers.fpp:5065 - 0.2 21,929,869 90 243,665.2 243,487.0 240,723 249,561 1,499.3 Compute Construct@m_boundary_common.fpp:135 - 0.2 21,531,930 180 119,621.8 119,289.0 2,203 239,161 117,306.0 Wait@m_boundary_common.fpp:135 - 0.2 19,911,481 270 73,746.2 72,651.5 70,711 305,908 14,239.4 Compute Construct@m_cbc.fpp:760 - 0.2 16,902,775 270 62,602.9 62,528.0 49,975 69,770 1,341.3 Wait@m_cbc.fpp:1024 - 0.1 12,485,606 90 138,729.0 138,623.5 137,315 141,207 659.8 Compute Construct@m_riemann_solvers.fpp:5012 - 0.1 12,156,707 180 67,537.3 67,307.5 2,094 133,448 65,255.2 Wait@m_riemann_solvers.fpp:5012 - 0.1 12,116,640 1 12,116,640.0 12,116,640.0 12,116,640 12,116,640 0.0 Compute Construct@m_rhs.fpp:595 - 0.1 12,078,596 1 12,078,596.0 12,078,596.0 12,078,596 12,078,596 0.0 Wait@m_rhs.fpp:595 - 0.1 11,153,111 270 41,307.8 41,215.0 39,830 44,605 817.3 Compute Construct@m_variables_conversion.fpp:1486 - 0.1 10,961,187 90 121,791.0 121,654.0 120,786 124,670 739.6 Compute Construct@m_riemann_solvers.fpp:5102 - 0.1 10,628,053 180 59,044.7 58,959.0 2,018 116,470 56,754.2 Wait@m_riemann_solvers.fpp:5102 - 0.1 10,174,198 540 18,841.1 18,715.0 1,955 38,600 16,499.7 Wait@m_variables_conversion.fpp:1486 - 0.1 9,582,682 270 35,491.4 35,342.5 33,786 39,715 833.5 Compute Construct@m_cbc.fpp:717 - 0.1 8,951,624 90 99,462.5 99,469.5 97,344 104,475 1,104.2 Compute Construct@m_cbc.fpp:1341 - 0.1 8,664,902 180 48,138.3 47,463.0 1,906 96,539 46,006.1 Wait@m_cbc.fpp:1341 - 0.1 8,135,343 270 30,130.9 30,111.0 27,914 32,218 595.2 Wait@m_cbc.fpp:717 - 0.1 8,015,214 270 29,686.0 29,434.5 27,871 36,530 1,282.7 Update@m_variables_conversion.fpp:1481 - 0.1 7,969,925 30 265,664.2 263,774.0 261,460 299,346 6,813.7 Compute Construct@m_data_output.fpp:317 - 0.1 7,702,934 90 85,588.2 85,329.5 83,151 90,619 1,371.6 Compute Construct@m_boundary_common.fpp:164 - 0.1 7,397,633 180 41,098.0 40,290.5 1,891 83,915 38,867.0 Wait@m_boundary_common.fpp:164 - 0.1 7,224,644 30 240,821.5 240,119.5 235,041 248,449 2,685.4 Enqueue Download@m_data_output.fpp:318 - 0.1 6,644,951 270 24,610.9 23,518.5 22,805 38,374 2,507.9 Update@m_cbc.fpp:1079 - 0.1 5,895,669 270 21,835.8 20,855.0 20,010 42,013 2,540.1 Update@m_riemann_solvers.fpp:3487 - 0.1 5,741,926 90 63,799.2 63,623.5 62,980 71,071 951.2 Compute Construct@m_cbc.fpp:1445 - 0.1 5,677,067 270 21,026.2 20,369.0 19,012 29,600 1,895.2 Update@m_weno.fpp:667 - 0.1 5,460,983 270 20,225.9 19,404.0 18,638 28,775 1,956.1 Update@m_riemann_solvers.fpp:3500 - 0.1 5,454,572 180 30,303.2 30,248.5 1,871 58,994 28,117.9 Wait@m_cbc.fpp:1445 - 0.1 5,327,857 270 19,732.8 19,628.5 18,502 24,708 819.4 Compute Construct@m_cbc.fpp:737 - 0.1 5,232,788 270 19,380.7 18,972.0 17,804 26,658 1,330.2 Update@m_riemann_solvers.fpp:3501 - 0.0 4,639,757 90 51,552.9 51,210.5 50,170 61,320 1,520.9 Compute Construct@m_cbc.fpp:1085 - 0.0 4,444,722 270 16,461.9 16,076.0 15,059 21,589 1,082.6 Update@m_cbc.fpp:668 - 0.0 4,350,633 90 48,340.4 48,191.5 47,273 51,279 812.0 Compute Construct@m_cbc.fpp:1108 - 0.0 4,297,233 180 23,873.5 27,427.0 1,848 46,316 21,523.9 Wait@m_cbc.fpp:1085 - 0.0 4,154,069 90 46,156.3 46,064.0 44,925 49,207 696.8 Compute Construct@m_cbc.fpp:1260 - 0.0 4,134,404 270 15,312.6 14,902.0 14,258 22,506 1,140.7 Update@m_cbc.fpp:1080 - 0.0 4,083,068 180 22,683.7 22,752.0 2,196 43,913 20,396.7 Wait@m_cbc.fpp:1108 - 0.0 4,032,900 90 44,810.0 44,702.0 43,493 48,495 635.0 Compute Construct@m_cbc.fpp:1392 - 0.0 3,965,758 90 44,064.0 43,920.5 42,929 46,735 667.9 Compute Construct@m_boundary_common.fpp:197 - 0.0 3,924,756 270 14,536.1 14,561.5 13,411 15,935 417.2 Wait@m_cbc.fpp:737 - 0.0 3,909,937 90 43,443.7 43,223.0 42,386 46,787 801.4 Compute Construct@m_cbc.fpp:1184 - 0.0 3,909,539 1,620 2,413.3 2,245.0 1,989 9,853 514.5 Enqueue Upload@m_variables_conversion.fpp:1481 - 0.0 3,886,433 180 21,591.3 21,655.5 2,010 41,630 19,296.7 Wait@m_cbc.fpp:1260 - 0.0 3,755,646 180 20,864.7 20,774.5 1,917 40,499 18,661.6 Wait@m_cbc.fpp:1392 - 0.0 3,742,258 90 41,580.6 41,261.5 40,583 55,642 1,671.1 Compute Construct@m_boundary_common.fpp:262 - 0.0 3,704,893 90 41,165.5 40,934.5 40,285 44,449 778.6 Compute Construct@m_cbc.fpp:1237 - 0.0 3,673,408 180 20,407.8 19,825.5 1,980 39,607 18,169.1 Wait@m_boundary_common.fpp:197 - 0.0 3,630,135 180 20,167.4 20,193.0 1,978 40,042 17,837.8 Wait@m_cbc.fpp:1184 - 0.0 3,600,968 1,080 3,334.2 3,098.5 2,025 11,217 913.4 Enqueue Upload@m_cbc.fpp:1079 - 0.0 3,463,476 90 38,483.1 38,137.0 37,554 51,707 1,574.4 Compute Construct@m_cbc.fpp:1161 - 0.0 3,453,354 180 19,185.3 19,439.5 1,855 49,901 16,927.6 Wait@m_boundary_common.fpp:262 - 0.0 3,383,018 180 18,794.5 21,836.0 1,888 36,102 16,453.1 Wait@m_cbc.fpp:1237 - 0.0 3,219,140 810 3,974.2 3,511.5 2,984 21,333 1,067.5 Enqueue Upload@m_riemann_solvers.fpp:3487 - 0.0 3,144,190 180 17,467.7 18,651.5 1,893 45,377 15,193.7 Wait@m_cbc.fpp:1161 - 0.0 3,085,992 270 11,429.6 11,286.5 10,554 14,798 598.3 Update@m_cbc.fpp:673 - 0.0 3,070,733 270 11,373.1 11,103.5 10,313 20,610 990.0 Update@m_weno.fpp:1138 - 0.0 3,058,569 270 11,328.0 11,197.5 10,412 14,103 536.6 Update@m_cbc.fpp:1336 - 0.0 3,003,459 270 11,123.9 11,438.5 6,545 13,176 1,360.7 Wait@m_variables_conversion.fpp:1481 - 0.0 2,999,770 810 3,703.4 3,354.0 2,709 11,142 925.4 Enqueue Upload@m_weno.fpp:667 - 0.0 2,847,341 810 3,515.2 3,260.5 2,922 7,155 626.4 Enqueue Upload@m_riemann_solvers.fpp:3500 - 0.0 2,569,302 810 3,172.0 3,019.5 2,695 9,636 542.4 Enqueue Upload@m_riemann_solvers.fpp:3501 - 0.0 2,414,762 90 26,830.7 26,713.0 25,856 29,751 678.9 Compute Construct@m_boundary_common.fpp:229 - 0.0 2,329,097 90 25,878.9 25,776.0 24,909 29,745 656.5 Compute Construct@m_boundary_common.fpp:291 - 0.0 2,189,685 90 24,329.8 24,113.0 23,269 27,395 852.1 Compute Construct@m_cbc.fpp:1353 - 0.0 2,171,114 90 24,123.5 23,822.0 22,896 31,159 1,181.1 Compute Construct@m_cbc.fpp:1376 - 0.0 2,152,550 270 7,972.4 8,122.0 6,132 9,241 607.5 Wait@m_cbc.fpp:1079 - 0.0 2,121,413 180 11,785.6 11,697.5 2,029 22,452 9,502.9 Wait@m_boundary_common.fpp:229 - 0.0 2,102,768 270 7,788.0 7,908.5 6,412 11,146 523.7 Wait@m_cbc.fpp:668 - 0.0 2,028,904 180 11,271.7 11,319.5 1,938 21,073 8,993.4 Wait@m_boundary_common.fpp:291 - 0.0 1,948,938 270 7,218.3 7,263.5 5,130 8,332 412.4 Wait@m_riemann_solvers.fpp:3501 - 0.0 1,945,902 270 7,207.0 7,211.5 6,408 7,945 196.4 Wait@m_cbc.fpp:1336 - 0.0 1,915,548 180 10,641.9 9,307.0 1,906 20,432 8,369.1 Wait@m_cbc.fpp:1353 - 0.0 1,908,450 270 7,068.3 7,089.0 5,403 8,173 290.2 Wait@m_cbc.fpp:1080 - 0.0 1,898,091 270 7,030.0 7,032.0 6,062 12,000 464.9 Wait@m_weno.fpp:667 - 0.0 1,893,017 180 10,516.8 10,626.0 1,905 25,777 8,187.2 Wait@m_cbc.fpp:1376 - 0.0 1,840,372 270 6,816.2 6,810.5 5,939 10,201 285.4 Wait@m_weno.fpp:1138 - 0.0 1,840,347 270 6,816.1 6,805.5 4,851 9,943 405.4 Wait@m_riemann_solvers.fpp:3500 - 0.0 1,832,900 270 6,788.5 6,720.5 3,422 11,373 489.5 Wait@m_riemann_solvers.fpp:3487 - 0.0 1,772,366 90 19,693.0 19,419.5 18,666 23,024 918.9 Compute Construct@m_cbc.fpp:1458 - 0.0 1,769,896 270 6,555.2 6,540.0 5,586 8,123 222.3 Wait@m_cbc.fpp:673 - 0.0 1,763,916 90 19,599.1 19,467.0 18,986 22,482 596.4 Compute Construct@m_cbc.fpp:1481 - 0.0 1,738,964 16 108,685.3 106,295.5 92,275 130,271 11,217.6 Enter Data@m_time_steppers.fpp:103 - 0.0 1,699,728 540 3,147.6 3,183.5 2,130 7,597 779.3 Enqueue Upload@m_cbc.fpp:668 - 0.0 1,697,000 540 3,142.6 2,961.5 2,620 8,989 621.3 Enqueue Upload@m_cbc.fpp:1080 - 0.0 1,689,421 16 105,588.8 98,911.5 92,517 124,844 11,959.2 Enter Data@m_rhs.fpp:543 - 0.0 1,670,770 90 18,564.1 18,380.5 17,941 20,979 610.0 Compute Construct@m_cbc.fpp:1121 - 0.0 1,652,033 90 18,355.9 18,115.0 17,331 21,471 773.1 Compute Construct@m_cbc.fpp:1097 - 0.0 1,609,518 90 17,883.5 17,696.0 17,208 21,890 778.2 Compute Construct@m_cbc.fpp:1144 - 0.0 1,536,132 90 17,068.1 16,808.0 16,265 24,071 987.4 Compute Construct@m_cbc.fpp:1296 - 0.0 1,523,884 90 16,932.0 16,763.5 16,013 19,558 663.2 Compute Construct@m_cbc.fpp:1273 - 0.0 1,522,251 90 16,913.9 16,672.5 16,061 19,843 826.9 Compute Construct@m_cbc.fpp:1220 - 0.0 1,509,362 180 8,385.3 8,316.5 1,988 14,924 6,051.8 Wait@m_cbc.fpp:1481 - 0.0 1,500,245 90 16,669.4 16,533.0 15,967 18,961 651.4 Compute Construct@m_cbc.fpp:1197 - 0.0 1,493,810 180 8,298.9 8,411.5 2,002 14,714 5,959.1 Wait@m_cbc.fpp:1458 - 0.0 1,480,783 90 16,453.1 16,191.5 15,673 19,202 814.1 Compute Construct@m_cbc.fpp:1249 - 0.0 1,478,736 90 16,430.4 16,308.0 15,672 19,753 742.7 Compute Construct@m_cbc.fpp:1428 - 0.0 1,462,683 90 16,252.0 15,942.5 15,435 26,038 1,225.2 Compute Construct@m_cbc.fpp:1405 - 0.0 1,429,527 90 15,883.6 15,683.5 15,198 19,149 755.3 Compute Construct@m_cbc.fpp:1173 - 0.0 1,422,884 270 5,269.9 5,086.0 4,374 14,995 933.6 Enqueue Launch@m_riemann_solvers.fpp:2449 - 0.0 1,400,781 180 7,782.1 9,708.5 1,960 13,703 5,438.7 Wait@m_cbc.fpp:1121 - 0.0 1,379,267 180 7,662.6 8,927.0 1,950 13,994 5,318.3 Wait@m_cbc.fpp:1097 - 0.0 1,347,346 180 7,485.3 7,680.0 2,186 16,425 5,162.6 Wait@m_cbc.fpp:1144 - 0.0 1,273,415 180 7,074.5 9,989.5 1,986 12,592 4,707.5 Wait@m_cbc.fpp:1296 - 0.0 1,265,237 180 7,029.1 7,298.5 1,896 14,882 4,710.1 Wait@m_cbc.fpp:1220 - 0.0 1,253,710 180 6,965.1 6,910.5 1,923 12,078 4,633.0 Wait@m_cbc.fpp:1273 - 0.0 1,231,162 180 6,839.8 6,841.0 1,969 11,892 4,486.0 Wait@m_cbc.fpp:1197 - 0.0 1,213,121 270 4,493.0 4,383.0 3,905 6,531 398.4 Enter Data@m_variables_conversion.fpp:1486 - 0.0 1,212,499 180 6,736.1 9,925.0 2,029 11,857 4,348.8 Wait@m_cbc.fpp:1249 - 0.0 1,209,426 180 6,719.0 6,705.0 1,962 11,817 4,413.0 Wait@m_cbc.fpp:1428 - 0.0 1,201,674 180 6,676.0 8,450.0 1,956 11,757 4,307.9 Wait@m_cbc.fpp:1405 - 0.0 1,184,977 270 4,388.8 4,234.0 3,918 7,103 559.5 Enqueue Launch@m_variables_conversion.fpp:1486 - 0.0 1,180,050 270 4,370.6 4,160.5 3,646 11,271 721.5 Enqueue Launch@m_weno.fpp:797 - 0.0 1,164,490 180 6,469.4 6,647.0 1,925 11,096 4,128.5 Wait@m_cbc.fpp:1173 - 0.0 1,123,650 270 4,161.7 4,058.0 3,663 7,411 383.3 Enter Data@m_weno.fpp:797 - 0.0 1,120,110 270 4,148.6 3,982.5 3,561 11,141 720.8 Enter Data@m_riemann_solvers.fpp:2449 - 0.0 1,045,033 270 3,870.5 3,746.5 3,394 9,220 581.0 Enqueue Launch@m_cbc.fpp:760 - 0.0 1,034,772 270 3,832.5 3,679.5 3,415 7,689 553.7 Enqueue Launch@m_cbc.fpp:717 - 0.0 1,032,587 9 114,731.9 116,217.0 98,780 138,854 13,474.8 Enter Data@m_rhs.fpp:281 - 0.0 1,011,771 270 3,747.3 3,584.0 3,334 9,036 629.1 Enqueue Launch@m_cbc.fpp:737 - 0.0 934,256 9 103,806.2 101,886.0 95,969 114,182 6,517.5 Enter Data@m_rhs.fpp:280 - 0.0 909,780 8 113,722.5 115,200.0 98,732 131,867 10,882.3 Enter Data@m_rhs.fpp:503 - 0.0 908,242 8 113,530.3 104,954.5 98,923 157,515 19,028.3 Enter Data@m_time_steppers.fpp:135 - 0.0 904,158 270 3,348.7 3,223.5 2,905 6,103 467.9 Enqueue Upload@m_cbc.fpp:673 - 0.0 881,202 8 110,150.3 107,698.5 99,331 126,051 10,011.6 Enter Data@m_time_steppers.fpp:305 - 0.0 865,683 270 3,206.2 3,020.0 2,425 12,541 872.3 Enqueue Upload@m_weno.fpp:1138 - 0.0 857,348 8 107,168.5 102,164.0 93,946 129,138 13,983.5 Enter Data@m_rhs.fpp:499 - 0.0 850,116 8 106,264.5 100,714.5 95,395 123,343 11,490.1 Enter Data@m_rhs.fpp:188 - 0.0 727,815 270 2,695.6 2,588.0 2,354 5,182 418.4 Enqueue Upload@m_cbc.fpp:1336 - 0.0 487,746 90 5,419.4 5,098.5 4,658 10,489 850.7 Enter Data@m_boundary_common.fpp:135 - 0.0 486,965 90 5,410.7 4,803.0 4,245 39,344 3,687.8 Enqueue Launch@m_rhs.fpp:635 - 0.0 476,874 90 5,298.6 5,009.0 4,626 8,751 774.1 Enqueue Launch@m_riemann_solvers.fpp:4986 - 0.0 475,971 90 5,288.6 4,954.0 4,546 9,006 913.9 Enqueue Launch@m_riemann_solvers.fpp:5090 - 0.0 470,504 90 5,227.8 4,910.0 4,288 12,450 1,162.2 Enqueue Launch@m_boundary_common.fpp:135 - 0.0 469,989 270 1,740.7 1,710.0 1,591 2,667 152.7 Exit Data@m_variables_conversion.fpp:1486 - 0.0 462,572 90 5,139.7 4,591.0 4,103 10,182 1,223.4 Enqueue Launch@m_variables_conversion.fpp:886 - 0.0 456,902 90 5,076.7 4,882.5 4,554 8,640 636.2 Enqueue Launch@m_riemann_solvers.fpp:5038 - 0.0 447,921 90 4,976.9 4,883.5 4,343 7,497 373.3 Enter Data@m_rhs.fpp:1086 - 0.0 445,472 90 4,949.7 4,818.0 4,585 6,689 438.1 Enter Data@m_rhs.fpp:1496 - 0.0 444,295 90 4,936.6 4,842.5 4,469 7,787 392.8 Enter Data@m_rhs.fpp:1259 - 0.0 436,975 90 4,855.3 4,757.5 4,081 7,967 654.7 Enqueue Launch@m_riemann_solvers.fpp:5065 - 0.0 435,702 270 1,613.7 1,568.0 1,418 2,942 198.4 Exit Data@m_riemann_solvers.fpp:2449 - 0.0 433,836 90 4,820.4 4,518.0 4,050 9,238 829.3 Enter Data@m_variables_conversion.fpp:886 - 0.0 428,501 270 1,587.0 1,551.0 1,380 2,993 173.7 Exit Data@m_weno.fpp:797 - 0.0 426,437 4 106,609.3 106,251.0 98,485 115,450 9,090.8 Enter Data@m_rhs.fpp:192 - 0.0 423,765 90 4,708.5 4,405.5 3,983 8,379 830.0 Enqueue Launch@m_rhs.fpp:1496 - 0.0 421,648 90 4,685.0 4,559.0 3,997 6,752 637.7 Enqueue Launch@m_rhs.fpp:1259 - 0.0 420,931 90 4,677.0 4,341.0 3,928 8,583 863.9 Enqueue Launch@m_rhs.fpp:1086 - 0.0 414,471 90 4,605.2 4,444.0 3,971 9,785 723.0 Enter Data@m_rhs.fpp:635 - 0.0 409,122 90 4,545.8 4,267.0 3,990 9,982 839.9 Enqueue Launch@m_cbc.fpp:1085 - 0.0 407,137 90 4,523.7 4,296.0 4,019 7,985 640.6 Enqueue Launch@m_riemann_solvers.fpp:5102 - 0.0 402,403 90 4,471.1 4,249.5 3,942 6,380 459.2 Enqueue Launch@m_riemann_solvers.fpp:5012 - 0.0 396,882 90 4,409.8 4,159.5 3,928 7,089 614.8 Enqueue Launch@m_cbc.fpp:1237 - 0.0 395,055 90 4,389.5 4,086.5 3,816 7,399 740.4 Enqueue Launch@m_weno.fpp:1179 - 0.0 393,224 90 4,369.2 4,183.5 3,849 7,547 649.3 Enqueue Launch@m_weno.fpp:1141 - 0.0 390,911 90 4,343.5 4,115.5 3,880 6,999 610.8 Enqueue Launch@m_cbc.fpp:1161 - 0.0 389,777 90 4,330.9 4,024.5 3,727 8,094 802.7 Enqueue Launch@m_weno.fpp:1209 - 0.0 380,540 90 4,228.2 4,023.0 3,729 6,972 564.6 Enqueue Launch@m_boundary_common.fpp:164 - 0.0 376,698 90 4,185.5 3,784.0 3,519 8,206 963.2 Enqueue Launch@m_rhs.fpp:938 - 0.0 376,688 90 4,185.4 4,091.5 3,825 7,195 456.8 Enqueue Launch@m_boundary_common.fpp:291 - 0.0 376,652 90 4,185.0 4,083.5 3,625 6,088 335.7 Enter Data@m_boundary_common.fpp:164 - 0.0 372,470 90 4,138.6 3,955.0 3,726 6,832 554.5 Enqueue Launch@m_rhs.fpp:1117 - 0.0 370,963 90 4,121.8 3,963.0 3,576 7,651 514.0 Enter Data@m_riemann_solvers.fpp:5090 - 0.0 370,441 90 4,116.0 4,022.0 3,524 6,345 376.8 Enter Data@m_boundary_common.fpp:262 - 0.0 369,404 90 4,104.5 4,049.0 3,765 5,651 263.9 Enter Data@m_boundary_common.fpp:197 - 0.0 369,257 90 4,102.9 3,970.0 3,697 6,428 503.2 Enqueue Launch@m_boundary_common.fpp:229 - 0.0 367,313 90 4,081.3 3,891.0 3,621 7,144 637.1 Enqueue Launch@m_rhs.fpp:1322 - 0.0 367,140 90 4,079.3 3,984.0 3,760 6,191 353.4 Enter Data@m_boundary_common.fpp:229 - 0.0 367,106 90 4,079.0 3,998.0 3,694 5,985 364.1 Enter Data@m_boundary_common.fpp:291 - 0.0 365,910 90 4,065.7 3,963.0 3,663 6,435 457.9 Enqueue Launch@m_boundary_common.fpp:197 - 0.0 364,238 90 4,047.1 3,904.5 3,533 6,818 548.3 Enqueue Launch@m_boundary_common.fpp:262 - 0.0 360,918 90 4,010.2 3,798.0 3,534 6,788 603.1 Enqueue Launch@m_cbc.fpp:1184 - 0.0 359,864 90 3,998.5 3,771.5 3,511 6,632 683.2 Enqueue Launch@m_cbc.fpp:1376 - 0.0 359,814 90 3,997.9 3,866.5 3,504 5,407 414.7 Enter Data@m_riemann_solvers.fpp:4986 - 0.0 359,804 90 3,997.8 3,841.5 3,637 6,883 596.2 Enqueue Launch@m_cbc.fpp:1341 - 0.0 359,359 90 3,992.9 3,741.5 3,492 8,401 791.8 Enqueue Launch@m_rhs.fpp:1008 - 0.0 359,131 90 3,990.3 3,804.0 3,535 11,071 872.4 Enqueue Launch@m_cbc.fpp:1445 - 0.0 358,957 90 3,988.4 3,904.0 3,446 6,125 362.2 Enter Data@m_riemann_solvers.fpp:5038 - 0.0 358,907 90 3,987.9 3,739.0 3,483 6,287 725.7 Enqueue Launch@m_cbc.fpp:1458 - 0.0 357,663 90 3,974.0 3,740.0 3,467 7,056 737.0 Enqueue Launch@m_cbc.fpp:1249 - 0.0 356,174 90 3,957.5 3,757.5 3,495 7,385 660.3 Enqueue Launch@m_cbc.fpp:1097 - 0.0 352,190 90 3,913.2 3,732.5 3,480 6,413 572.8 Enqueue Launch@m_cbc.fpp:1197 - 0.0 350,315 90 3,892.4 3,642.0 3,389 11,314 967.8 Enqueue Launch@m_cbc.fpp:1296 - 0.0 350,309 90 3,892.3 3,738.5 3,503 5,875 476.8 Enqueue Launch@m_cbc.fpp:1121 - 0.0 349,743 90 3,886.0 3,775.0 3,537 6,358 439.5 Enqueue Launch@m_cbc.fpp:1392 - 0.0 349,536 90 3,883.7 3,686.5 3,440 5,569 523.8 Enter Data@m_rhs.fpp:938 - 0.0 349,263 90 3,880.7 3,702.0 3,490 7,194 589.4 Enqueue Launch@m_cbc.fpp:1428 - 0.0 349,211 90 3,880.1 3,640.5 3,443 6,613 702.1 Enqueue Launch@m_cbc.fpp:1108 - 0.0 348,845 90 3,876.1 3,654.0 3,407 6,940 643.4 Enqueue Launch@m_cbc.fpp:1173 - 0.0 348,787 90 3,875.4 3,584.5 3,373 13,535 1,168.6 Enqueue Launch@m_cbc.fpp:1405 - 0.0 348,719 90 3,874.7 3,664.5 3,437 6,647 660.7 Enqueue Launch@m_cbc.fpp:1273 - 0.0 347,992 90 3,866.6 3,661.5 3,430 7,193 634.6 Enqueue Launch@m_cbc.fpp:1353 - 0.0 345,675 90 3,840.8 3,680.5 3,434 6,224 503.5 Enqueue Launch@m_cbc.fpp:1260 - 0.0 344,755 90 3,830.6 3,657.0 3,393 6,760 536.0 Enqueue Launch@m_cbc.fpp:1144 - 0.0 341,736 20 17,086.8 10,722.0 6,787 30,056 10,291.1 Wait@m_start_up.fpp.f90:54 - 0.0 340,824 90 3,786.9 3,585.0 3,384 6,941 673.6 Enqueue Launch@m_cbc.fpp:1220 - 0.0 339,031 90 3,767.0 3,710.0 3,406 4,947 259.6 Enter Data@m_riemann_solvers.fpp:5065 - 0.0 337,047 90 3,745.0 3,626.0 3,381 6,260 482.2 Enqueue Launch@m_cbc.fpp:1481 - 0.0 334,988 90 3,722.1 3,668.5 3,257 4,674 212.6 Enter Data@m_riemann_solvers.fpp:5102 - 0.0 331,710 90 3,685.7 3,626.0 3,275 4,611 206.3 Enter Data@m_riemann_solvers.fpp:5012 - 0.0 330,039 90 3,667.1 3,509.5 3,346 6,355 496.5 Enter Data@m_rhs.fpp:1008 - 0.0 320,699 90 3,563.3 3,302.5 2,883 11,487 1,014.5 Enter Data@m_cbc.fpp:1085 - 0.0 319,784 90 3,553.2 3,455.0 3,154 5,623 355.7 Enter Data@m_rhs.fpp:1117 - 0.0 319,005 90 3,544.5 3,346.5 3,079 5,722 500.9 Enter Data@m_weno.fpp:1209 - 0.0 315,838 60 5,264.0 5,181.5 4,544 7,622 551.4 Enqueue Launch@m_data_output.fpp:318 - 0.0 315,603 90 3,506.7 3,365.0 3,222 5,044 413.1 Enter Data@m_weno.fpp:1179 - 0.0 315,359 90 3,504.0 3,322.5 2,895 10,379 830.5 Enter Data@m_cbc.fpp:1237 - 0.0 314,078 90 3,489.8 3,437.0 3,140 4,964 244.0 Enter Data@m_rhs.fpp:1322 - 0.0 311,747 90 3,463.9 3,341.5 2,927 5,123 354.6 Enter Data@m_cbc.fpp:1376 - 0.0 311,147 90 3,457.2 3,310.5 2,965 7,677 552.5 Enter Data@m_cbc.fpp:1161 - 0.0 310,994 90 3,455.5 3,320.5 3,021 5,472 402.3 Enter Data@m_cbc.fpp:1184 - 0.0 310,482 90 3,449.8 3,304.0 3,001 9,834 738.5 Enter Data@m_cbc.fpp:1296 - 0.0 309,573 90 3,439.7 3,323.0 3,017 7,631 510.3 Enter Data@m_cbc.fpp:1405 - 0.0 309,458 90 3,438.4 3,242.0 2,980 10,999 873.4 Enter Data@m_cbc.fpp:1249 - 0.0 308,468 90 3,427.4 3,329.0 3,029 5,516 341.0 Enter Data@m_cbc.fpp:1260 - 0.0 308,143 90 3,423.8 3,311.5 2,989 5,302 376.0 Enter Data@m_cbc.fpp:1458 - 0.0 307,589 90 3,417.7 3,313.0 3,203 5,060 335.0 Enter Data@m_cbc.fpp:1108 - 0.0 307,403 90 3,415.6 3,289.0 2,993 7,901 538.0 Enter Data@m_cbc.fpp:1121 - 0.0 306,786 90 3,408.7 3,316.0 3,007 4,979 302.0 Enter Data@m_cbc.fpp:1481 - 0.0 306,685 90 3,407.6 3,308.0 3,198 4,965 307.1 Enter Data@m_cbc.fpp:1144 - 0.0 306,427 90 3,404.7 3,330.0 3,012 4,778 267.4 Enter Data@m_cbc.fpp:1341 - 0.0 305,934 90 3,399.3 3,287.5 2,980 4,780 316.4 Enter Data@m_cbc.fpp:1197 - 0.0 304,982 90 3,388.7 3,300.0 2,876 4,896 310.2 Enter Data@m_weno.fpp:1141 - 0.0 304,741 90 3,386.0 3,285.5 2,905 5,053 317.5 Enter Data@m_cbc.fpp:1273 - 0.0 302,810 90 3,364.6 3,238.0 2,874 5,304 386.4 Enter Data@m_cbc.fpp:1173 - 0.0 302,501 90 3,361.1 3,258.5 2,931 4,731 295.8 Enter Data@m_cbc.fpp:1353 - 0.0 301,958 90 3,355.1 3,289.5 3,007 4,827 259.6 Enter Data@m_cbc.fpp:1392 - 0.0 301,884 90 3,354.3 3,287.5 2,926 4,669 270.6 Enter Data@m_cbc.fpp:1445 - 0.0 300,875 90 3,343.1 3,225.5 2,886 6,789 451.5 Enter Data@m_cbc.fpp:1097 - 0.0 300,869 90 3,343.0 3,273.0 2,957 4,737 257.5 Enter Data@m_cbc.fpp:1428 - 0.0 300,124 90 3,334.7 3,256.0 2,860 4,444 236.3 Enter Data@m_cbc.fpp:1220 - 0.0 198,393 35 5,668.4 5,662.0 4,350 6,688 596.5 Enqueue Upload@m_time_steppers.fpp:103 - 0.0 175,087 32 5,471.5 5,566.0 4,775 6,345 510.2 Enqueue Upload@m_rhs.fpp:543 - 0.0 174,823 30 5,827.4 5,665.0 5,197 8,613 681.7 Enqueue Upload@m_data_output.fpp:317 - 0.0 171,480 46 3,727.8 3,657.5 2,895 6,186 799.1 Enqueue Upload@m_rhs.fpp:653 - 0.0 167,545 30 5,584.8 5,427.0 4,803 7,404 577.2 Enqueue Launch@m_time_steppers.fpp:843 - 0.0 166,370 90 1,848.6 1,767.0 1,622 5,363 412.6 Exit Data@m_boundary_common.fpp:135 - 0.0 164,462 30 5,482.1 5,423.5 4,686 8,261 734.3 Enqueue Launch@m_time_steppers.fpp:770 - 0.0 164,382 30 5,479.4 5,064.0 4,685 7,888 841.3 Enqueue Launch@m_data_output.fpp:280 - 0.0 152,034 30 5,067.8 4,769.5 4,329 8,498 834.7 Enqueue Launch@m_time_steppers.fpp:698 - 0.0 151,119 90 1,679.1 1,587.5 1,494 3,669 299.5 Exit Data@m_rhs.fpp:1086 - 0.0 150,178 90 1,668.6 1,610.0 1,474 2,472 198.7 Exit Data@m_variables_conversion.fpp:886 - 0.0 149,710 90 1,663.4 1,629.5 1,536 2,642 157.4 Exit Data@m_boundary_common.fpp:164 - 0.0 149,294 90 1,658.8 1,597.0 1,500 2,588 177.3 Exit Data@m_rhs.fpp:1259 - 0.0 149,053 90 1,656.1 1,613.5 1,552 2,435 164.7 Exit Data@m_boundary_common.fpp:229 - 0.0 148,270 90 1,647.4 1,599.5 1,500 2,344 158.6 Exit Data@m_rhs.fpp:1496 - 0.0 147,760 90 1,641.8 1,606.0 1,533 2,839 177.3 Exit Data@m_boundary_common.fpp:262 - 0.0 147,439 90 1,638.2 1,607.5 1,506 2,478 148.7 Exit Data@m_boundary_common.fpp:197 - 0.0 146,469 90 1,627.4 1,603.5 1,516 2,488 126.5 Exit Data@m_boundary_common.fpp:291 - 0.0 143,814 30 4,793.8 4,600.0 4,281 5,707 456.7 Enter Data@m_data_output.fpp:280 - 0.0 133,103 1 133,103.0 133,103.0 133,103 133,103 0.0 Update@m_boundary_common.fpp:1816 - 0.0 130,394 20 6,519.7 6,497.0 6,341 6,687 101.6 Wait@m_rhs.fpp:653 - 0.0 118,917 21 5,662.7 5,739.0 4,822 7,908 749.9 Enqueue Upload@m_rhs.fpp:281 - 0.0 118,731 90 1,319.2 1,220.5 1,109 5,362 460.9 Exit Data@m_rhs.fpp:635 - 0.0 114,711 1 114,711.0 114,711.0 114,711 114,711 0.0 Enter Data@m_time_steppers.fpp:328 - 0.0 112,756 90 1,252.8 1,183.5 1,104 2,708 194.9 Exit Data@m_rhs.fpp:1322 - 0.0 111,649 1 111,649.0 111,649.0 111,649 111,649 0.0 Update@m_global_parameters.fpp:1222 - 0.0 110,727 60 1,845.5 1,966.5 1,069 3,031 676.2 Wait@m_data_output.fpp:318 - 0.0 109,378 19 5,756.7 5,557.0 3,952 8,036 896.3 Enqueue Upload@m_time_steppers.fpp:135 - 0.0 108,736 16 6,796.0 6,393.5 6,076 7,846 783.1 Wait@m_time_steppers.fpp:103 - 0.0 108,570 16 6,785.6 6,343.5 5,808 7,899 805.1 Wait@m_rhs.fpp:543 - 0.0 106,828 90 1,187.0 1,161.0 1,036 1,490 88.1 Exit Data@m_rhs.fpp:1117 - 0.0 105,701 90 1,174.5 1,145.0 1,021 1,446 90.3 Exit Data@m_rhs.fpp:1008 - 0.0 103,371 90 1,148.6 1,091.0 990 1,529 126.2 Exit Data@m_rhs.fpp:938 - 0.0 102,012 90 1,133.5 1,117.5 997 1,837 125.9 Exit Data@m_riemann_solvers.fpp:5038 - 0.0 99,992 16 6,249.5 5,896.0 4,897 14,415 2,260.6 Enqueue Upload@m_rhs.fpp:499 - 0.0 99,819 90 1,109.1 1,066.0 971 1,559 108.8 Exit Data@m_riemann_solvers.fpp:4986 - 0.0 99,184 90 1,102.0 1,068.0 875 3,985 324.7 Exit Data@m_riemann_solvers.fpp:5102 - 0.0 98,765 90 1,097.4 1,067.5 984 1,388 88.8 Exit Data@m_riemann_solvers.fpp:5090 - 0.0 98,346 18 5,463.7 5,443.0 4,857 6,478 528.6 Enqueue Upload@m_rhs.fpp:280 - 0.0 97,840 1 97,840.0 97,840.0 97,840 97,840 0.0 Enter Data@m_rhs.fpp:518 - 0.0 96,744 90 1,074.9 1,054.0 880 2,546 190.3 Exit Data@m_weno.fpp:1209 - 0.0 93,641 1 93,641.0 93,641.0 93,641 93,641 0.0 Update@m_global_parameters.fpp:1215 - 0.0 93,529 16 5,845.6 5,854.0 5,073 6,637 547.8 Enqueue Upload@m_time_steppers.fpp:305 - 0.0 93,369 90 1,037.4 981.0 884 2,544 192.9 Exit Data@m_weno.fpp:1179 - 0.0 93,268 16 5,829.3 5,940.5 4,901 7,165 652.9 Enqueue Upload@m_rhs.fpp:503 - 0.0 91,966 2 45,983.0 45,983.0 21,800 70,166 34,199.9 Enqueue Upload@m_boundary_common.fpp:1816 - 0.0 90,733 90 1,008.1 979.0 894 1,400 86.1 Exit Data@m_weno.fpp:1141 - 0.0 88,282 16 5,517.6 5,522.0 4,788 6,249 534.4 Enqueue Upload@m_rhs.fpp:188 - 0.0 85,846 90 953.8 916.0 842 1,417 125.4 Exit Data@m_riemann_solvers.fpp:5065 - 0.0 85,667 90 951.9 885.0 832 2,205 209.0 Exit Data@m_cbc.fpp:1376 - 0.0 82,259 90 914.0 888.0 828 1,363 97.6 Exit Data@m_cbc.fpp:1144 - 0.0 81,305 90 903.4 868.0 824 1,651 122.7 Exit Data@m_cbc.fpp:1341 - 0.0 80,450 90 893.9 858.0 802 1,428 127.9 Exit Data@m_cbc.fpp:1445 - 0.0 79,442 90 882.7 831.5 792 1,617 143.3 Exit Data@m_cbc.fpp:1353 - 0.0 78,218 90 869.1 827.0 780 1,507 122.4 Exit Data@m_cbc.fpp:1161 - 0.0 78,046 90 867.2 833.0 777 2,073 160.4 Exit Data@m_riemann_solvers.fpp:5012 - 0.0 77,814 90 864.6 834.0 795 1,679 124.0 Exit Data@m_cbc.fpp:1458 - 0.0 77,757 28 2,777.0 2,438.5 2,093 7,541 1,023.9 Enqueue Upload@m_global_parameters.fpp:1222 - 0.0 77,724 90 863.6 834.0 788 1,602 117.6 Exit Data@m_cbc.fpp:1108 - 0.0 77,661 90 862.9 842.0 794 1,242 73.8 Exit Data@m_cbc.fpp:1392 - 0.0 77,570 90 861.9 831.0 783 1,415 105.6 Exit Data@m_cbc.fpp:1405 - 0.0 76,542 90 850.5 817.0 780 1,270 98.4 Exit Data@m_cbc.fpp:1260 - 0.0 76,434 90 849.3 818.5 749 1,547 118.0 Exit Data@m_cbc.fpp:1428 - 0.0 76,244 90 847.2 821.5 768 1,345 98.7 Exit Data@m_cbc.fpp:1184 - 0.0 76,115 90 845.7 817.0 756 1,411 113.6 Exit Data@m_cbc.fpp:1220 - 0.0 75,550 90 839.4 813.5 761 1,688 114.0 Exit Data@m_cbc.fpp:1296 - 0.0 75,471 90 838.6 820.0 774 1,240 68.2 Exit Data@m_cbc.fpp:1481 - 0.0 74,679 90 829.8 802.0 732 1,433 109.5 Exit Data@m_cbc.fpp:1085 - 0.0 74,279 90 825.3 803.0 756 1,306 89.5 Exit Data@m_cbc.fpp:1121 - 0.0 73,914 90 821.3 806.0 763 1,182 61.9 Exit Data@m_cbc.fpp:1197 - 0.0 73,662 90 818.5 788.5 711 1,382 111.0 Exit Data@m_cbc.fpp:1237 - 0.0 73,575 90 817.5 800.0 752 1,253 81.2 Exit Data@m_cbc.fpp:1273 - 0.0 71,067 90 789.6 754.0 707 1,438 129.6 Exit Data@m_cbc.fpp:1173 - 0.0 70,369 90 781.9 759.0 713 1,243 96.1 Exit Data@m_cbc.fpp:1249 - 0.0 70,130 3 23,376.7 23,758.0 21,725 24,647 1,497.9 Enter Data@m_rhs.fpp:277 - 0.0 69,640 90 773.8 748.0 703 1,120 86.6 Exit Data@m_cbc.fpp:1097 - 0.0 68,555 3 22,851.7 23,564.0 21,229 23,762 1,408.8 Enter Data@m_rhs.fpp:493 - 0.0 63,278 9 7,030.9 7,332.0 6,072 7,917 849.2 Wait@m_rhs.fpp:281 - 0.0 62,647 1 62,647.0 62,647.0 62,647 62,647 0.0 Update@m_global_parameters.fpp:1027 - 0.0 62,601 5 12,520.2 13,243.0 9,062 15,247 2,354.7 Enqueue Upload@m_boundary_common.fpp:1826 - 0.0 61,589 3 20,529.7 19,571.0 18,561 23,457 2,585.0 Enter Data@m_rhs.fpp:494 - 0.0 60,754 3 20,251.3 19,968.0 18,870 21,916 1,542.6 Enter Data@m_rhs.fpp:495 - 0.0 59,546 3 19,848.7 19,822.0 19,624 20,100 239.1 Enter Data@m_rhs.fpp:278 - 0.0 58,793 9 6,532.6 6,224.0 6,054 7,803 638.0 Wait@m_rhs.fpp:280 - 0.0 57,532 23 2,501.4 2,335.0 2,202 4,210 443.4 Enqueue Upload@m_global_parameters.fpp:1215 - 0.0 56,463 8 7,057.9 7,060.0 6,178 7,976 837.5 Wait@m_rhs.fpp:503 - 0.0 55,683 3 18,561.0 16,065.0 15,568 24,050 4,760.1 Enter Data@m_rhs.fpp:375 - 0.0 55,669 8 6,958.6 6,519.0 6,158 8,538 921.1 Wait@m_rhs.fpp:188 - 0.0 54,722 8 6,840.3 6,228.0 6,053 8,462 987.5 Wait@m_time_steppers.fpp:135 - 0.0 52,318 8 6,539.8 6,247.5 5,983 7,675 704.8 Wait@m_time_steppers.fpp:305 - 0.0 51,546 8 6,443.3 6,158.5 5,777 7,745 770.7 Wait@m_rhs.fpp:499 - 0.0 51,330 3 17,110.0 16,150.0 15,786 19,394 1,986.4 Enter Data@m_rhs.fpp:372 - 0.0 49,182 1 49,182.0 49,182.0 49,182 49,182 0.0 Enter Data@m_global_parameters.fpp:1241 - 0.0 48,494 8 6,061.8 6,186.0 4,165 7,377 1,162.5 Enqueue Upload@m_weno.fpp:628 - 0.0 48,051 8 6,006.4 5,675.5 3,628 8,275 1,599.7 Enqueue Upload@m_weno.fpp:630 - 0.0 47,686 8 5,960.8 5,905.0 5,361 7,105 553.4 Wait@m_start_up.fpp:1193 - 0.0 47,443 1 47,443.0 47,443.0 47,443 47,443 0.0 Update@m_boundary_common.fpp:1821 - 0.0 47,070 1 47,070.0 47,070.0 47,070 47,070 0.0 Update@m_boundary_common.fpp:1826 - 0.0 45,534 8 5,691.8 5,703.0 5,081 6,345 550.4 Enqueue Upload@m_rhs.fpp:192 - 0.0 45,216 8 5,652.0 5,493.0 3,770 7,718 1,327.4 Enqueue Upload@m_weno.fpp:632 - 0.0 44,594 3 14,864.7 15,720.0 13,058 15,816 1,565.4 Enter Data@m_rhs.fpp:378 - 0.0 44,013 16 2,750.8 1,502.5 826 11,558 2,817.9 Exit Data@m_time_steppers.fpp:1090 - 0.0 42,709 1 42,709.0 42,709.0 42,709 42,709 0.0 Enter Data@m_mpi_common.fpp:81 - 0.0 42,424 1 42,424.0 42,424.0 42,424 42,424 0.0 Enter Data@m_rhs.fpp:574 - 0.0 42,140 1 42,140.0 42,140.0 42,140 42,140 0.0 Enter Data@m_cbc.fpp:402 - 0.0 41,981 1 41,981.0 41,981.0 41,981 41,981 0.0 Enter Data@m_cbc.fpp:403 - 0.0 41,866 1 41,866.0 41,866.0 41,866 41,866 0.0 Enter Data@m_cbc.fpp:400 - 0.0 41,602 12 3,466.8 3,271.5 2,999 4,043 402.1 Enqueue Upload@m_cbc.fpp:400 - 0.0 41,210 1 41,210.0 41,210.0 41,210 41,210 0.0 Update@m_weno.fpp:628 - 0.0 40,905 1 40,905.0 40,905.0 40,905 40,905 0.0 Enter Data@m_cbc.fpp:401 - 0.0 40,476 12 3,373.0 3,208.5 3,065 3,894 341.1 Enqueue Upload@m_cbc.fpp:402 - 0.0 40,441 12 3,370.1 3,248.0 2,892 3,950 365.0 Enqueue Upload@m_cbc.fpp:401 - 0.0 40,279 1 40,279.0 40,279.0 40,279 40,279 0.0 Update@m_weno.fpp:632 - 0.0 40,139 2 20,069.5 20,069.5 19,577 20,562 696.5 Enter Data@m_time_steppers.fpp:98 - 0.0 38,917 1 38,917.0 38,917.0 38,917 38,917 0.0 Update@m_cbc.fpp:428 - 0.0 38,452 1 38,452.0 38,452.0 38,452 38,452 0.0 Update@m_variables_conversion.fpp:815 - 0.0 38,424 1 38,424.0 38,424.0 38,424 38,424 0.0 Enter Data@m_rhs.fpp:184 - 0.0 38,135 3 12,711.7 13,223.0 3,927 20,985 8,540.5 Enqueue Upload@m_global_parameters.fpp:1238 - 0.0 37,950 1 37,950.0 37,950.0 37,950 37,950 0.0 Update@m_weno.fpp:630 - 0.0 37,685 1 37,685.0 37,685.0 37,685 37,685 0.0 Enter Data@m_time_steppers.fpp:319 - 0.0 37,452 1 37,452.0 37,452.0 37,452 37,452 0.0 Enqueue Upload - 0.0 36,908 8 4,613.5 4,968.5 2,678 5,806 946.1 Enqueue Upload@m_mpi_common.fpp:79 - 0.0 35,585 30 1,186.2 1,178.5 1,023 1,654 121.2 Exit Data@m_data_output.fpp:280 - 0.0 35,553 11 3,232.1 3,025.0 2,891 4,304 414.4 Enqueue Upload@m_variables_conversion.fpp:815 - 0.0 34,128 6 5,688.0 5,264.0 4,540 7,815 1,341.4 Enqueue Upload@m_weno.fpp:179 - 0.0 32,837 6 5,472.8 5,207.0 3,669 7,647 1,466.3 Enqueue Upload@m_global_parameters.fpp:1241 - 0.0 32,452 6 5,408.7 5,189.5 3,252 7,690 1,713.4 Enqueue Upload@m_weno.fpp:134 - 0.0 31,923 9 3,547.0 3,566.0 2,939 4,267 454.1 Enqueue Upload@m_rhs.fpp:378 - 0.0 31,450 1 31,450.0 31,450.0 31,450 31,450 0.0 Enter Data@m_cbc.fpp:256 - 0.0 30,705 1 30,705.0 30,705.0 30,705 30,705 0.0 Update@m_cbc.fpp:375 - 0.0 30,220 2 15,110.0 15,110.0 14,069 16,151 1,472.2 Enqueue Upload@m_boundary_common.fpp:1821 - 0.0 28,956 1 28,956.0 28,956.0 28,956 28,956 0.0 Update@m_rhs.fpp:580 - 0.0 28,835 8 3,604.4 3,506.5 3,169 4,266 425.4 Enqueue Upload@m_cbc.fpp:396 - 0.0 28,790 6 4,798.3 4,700.0 3,838 5,939 717.1 Enqueue Upload@m_weno.fpp:189 - 0.0 28,688 1 28,688.0 28,688.0 28,688 28,688 0.0 Enter Data@m_data_output.fpp:1758 - 0.0 28,473 6 4,745.5 4,369.5 3,126 7,190 1,642.7 Enqueue Upload@m_cbc.fpp:168 - 0.0 27,547 4 6,886.8 6,850.0 6,101 7,746 780.0 Wait@m_rhs.fpp:192 - 0.0 27,517 1 27,517.0 27,517.0 27,517 27,517 0.0 Enter Data@m_weno.fpp:149 - 0.0 27,513 6 4,585.5 4,377.0 3,081 6,682 1,484.7 Enqueue Upload@m_riemann_solvers.fpp:3401 - 0.0 27,497 1 27,497.0 27,497.0 27,497 27,497 0.0 Enter Data@m_cbc.fpp:197 - 0.0 27,370 1 27,370.0 27,370.0 27,370 27,370 0.0 Enter Data@m_cbc.fpp:154 - 0.0 27,110 1 27,110.0 27,110.0 27,110 27,110 0.0 Enter Data@m_global_parameters.fpp:1242 - 0.0 27,079 4 6,769.8 5,410.0 2,458 13,801 5,263.2 Enqueue Upload@m_global_parameters.fpp:1027 - 0.0 26,979 1 26,979.0 26,979.0 26,979 26,979 0.0 Enter Data@m_time_steppers.fpp:220 - 0.0 26,896 1 26,896.0 26,896.0 26,896 26,896 0.0 Enter Data@m_cbc.fpp:201 - 0.0 26,852 1 26,852.0 26,852.0 26,852 26,852 0.0 Enter Data@m_riemann_solvers.fpp:3407 - 0.0 26,810 1 26,810.0 26,810.0 26,810 26,810 0.0 Enter Data@m_weno.fpp:134 - 0.0 26,791 1 26,791.0 26,791.0 26,791 26,791 0.0 Enter Data@m_cbc.fpp:191 - 0.0 26,730 1 26,730.0 26,730.0 26,730 26,730 0.0 Enter Data@m_weno.fpp:166 - 0.0 26,693 1 26,693.0 26,693.0 26,693 26,693 0.0 Enter Data@m_cbc.fpp:158 - 0.0 26,617 1 26,617.0 26,617.0 26,617 26,617 0.0 Enter Data@m_weno.fpp:202 - 0.0 26,613 1 26,613.0 26,613.0 26,613 26,613 0.0 Enter Data@m_riemann_solvers.fpp:3382 - 0.0 26,589 1 26,589.0 26,589.0 26,589 26,589 0.0 Enter Data@m_rhs.fpp:291 - 0.0 26,485 1 26,485.0 26,485.0 26,485 26,485 0.0 Enter Data@m_time_steppers.fpp:95 - 0.0 26,452 1 26,452.0 26,452.0 26,452 26,452 0.0 Enter Data@m_rhs.fpp:310 - 0.0 26,332 1 26,332.0 26,332.0 26,332 26,332 0.0 Enter Data@m_riemann_solvers.fpp:3373 - 0.0 26,291 1 26,291.0 26,291.0 26,291 26,291 0.0 Enter Data@m_rhs.fpp:312 - 0.0 26,286 1 26,286.0 26,286.0 26,286 26,286 0.0 Enter Data@m_time_steppers.fpp:132 - 0.0 26,282 1 26,282.0 26,282.0 26,282 26,282 0.0 Enter Data@m_riemann_solvers.fpp:3379 - 0.0 26,270 1 26,270.0 26,270.0 26,270 26,270 0.0 Enter Data@m_cbc.fpp:164 - 0.0 26,248 1 26,248.0 26,248.0 26,248 26,248 0.0 Enter Data@m_riemann_solvers.fpp:3376 - 0.0 26,209 1 26,209.0 26,209.0 26,209 26,209 0.0 Enter Data@m_riemann_solvers.fpp:3355 - 0.0 26,188 1 26,188.0 26,188.0 26,188 26,188 0.0 Enter Data@m_rhs.fpp:298 - 0.0 26,086 1 26,086.0 26,086.0 26,086 26,086 0.0 Enter Data@m_riemann_solvers.fpp:3349 - 0.0 26,046 1 26,046.0 26,046.0 26,046 26,046 0.0 Enter Data@m_weno.fpp:136 - 0.0 26,044 1 26,044.0 26,044.0 26,044 26,044 0.0 Enter Data@m_cbc.fpp:363 - 0.0 26,022 1 26,022.0 26,022.0 26,022 26,022 0.0 Enter Data@m_rhs.fpp:300 - 0.0 26,012 6 4,335.3 4,363.5 3,421 5,149 733.0 Enqueue Upload@m_cbc.fpp:191 - 0.0 26,002 1 26,002.0 26,002.0 26,002 26,002 0.0 Enter Data@m_cbc.fpp:211 - 0.0 25,981 1 25,981.0 25,981.0 25,981 25,981 0.0 Enter Data@m_cbc.fpp:236 - 0.0 25,953 1 25,953.0 25,953.0 25,953 25,953 0.0 Enter Data@m_cbc.fpp:168 - 0.0 25,901 6 4,316.8 4,267.5 3,391 5,300 690.2 Enqueue Upload@m_cbc.fpp:197 - 0.0 25,884 2 12,942.0 12,942.0 12,035 13,849 1,282.7 Enter Data@m_rhs.fpp:216 - 0.0 25,869 1 25,869.0 25,869.0 25,869 25,869 0.0 Enter Data@m_weno.fpp:140 - 0.0 25,854 1 25,854.0 25,854.0 25,854 25,854 0.0 Enter Data@m_riemann_solvers.fpp:3346 - 0.0 25,835 1 25,835.0 25,835.0 25,835 25,835 0.0 Enter Data@m_riemann_solvers.fpp:3352 - 0.0 25,760 1 25,760.0 25,760.0 25,760 25,760 0.0 Enter Data@m_weno.fpp:191 - 0.0 25,731 1 25,731.0 25,731.0 25,731 25,731 0.0 Enter Data@m_riemann_solvers.fpp:3410 - 0.0 25,671 6 4,278.5 4,169.0 3,382 5,304 630.6 Enqueue Upload@m_weno.fpp:136 - 0.0 25,601 1 25,601.0 25,601.0 25,601 25,601 0.0 Enter Data@m_variables_conversion.fpp:630 - 0.0 25,600 6 4,266.7 3,717.5 3,099 7,623 1,702.8 Enqueue Upload@m_weno.fpp:140 - 0.0 25,600 1 25,600.0 25,600.0 25,600 25,600 0.0 Enter Data@m_variables_conversion.fpp:626 - 0.0 25,581 1 25,581.0 25,581.0 25,581 25,581 0.0 Enter Data@m_time_steppers.fpp:260 - 0.0 25,565 6 4,260.8 4,142.0 3,164 5,240 843.6 Enqueue Upload@m_rhs.fpp:291 - 0.0 25,554 1 25,554.0 25,554.0 25,554 25,554 0.0 Enter Data@m_variables_conversion.fpp:628 - 0.0 25,540 6 4,256.7 4,128.0 3,164 5,810 1,053.5 Enqueue Upload@m_cbc.fpp:211 - 0.0 25,515 1 25,515.0 25,515.0 25,515 25,515 0.0 Enter Data@m_cbc.fpp:263 - 0.0 25,507 1 25,507.0 25,507.0 25,507 25,507 0.0 Enter Data@m_weno.fpp:197 - 0.0 25,506 8 3,188.3 3,213.5 2,869 3,349 153.9 Enqueue Upload@m_cbc.fpp:428 - 0.0 25,501 6 4,250.2 4,349.5 3,267 4,956 651.7 Enqueue Upload@m_cbc.fpp:154 - 0.0 25,495 1 25,495.0 25,495.0 25,495 25,495 0.0 Enter Data@m_global_parameters.fpp:1243 - 0.0 25,492 1 25,492.0 25,492.0 25,492 25,492 0.0 Enter Data@m_riemann_solvers.fpp:3401 - 0.0 25,473 1 25,473.0 25,473.0 25,473 25,473 0.0 Enter Data@m_rhs.fpp:326 - 0.0 25,451 6 4,241.8 4,266.5 3,119 5,358 907.2 Enqueue Upload@m_cbc.fpp:148 - 0.0 25,410 1 25,410.0 25,410.0 25,410 25,410 0.0 Enter Data@m_cbc.fpp:207 - 0.0 25,409 1 25,409.0 25,409.0 25,409 25,409 0.0 Enter Data@m_weno.fpp:194 - 0.0 25,395 1 25,395.0 25,395.0 25,395 25,395 0.0 Enter Data@m_global_parameters.fpp:1246 - 0.0 25,386 1 25,386.0 25,386.0 25,386 25,386 0.0 Enter Data@m_rhs.fpp:489 - 0.0 25,375 1 25,375.0 25,375.0 25,375 25,375 0.0 Enter Data@m_rhs.fpp:293 - 0.0 25,372 1 25,372.0 25,372.0 25,372 25,372 0.0 Enter Data@m_weno.fpp:174 - 0.0 25,366 1 25,366.0 25,366.0 25,366 25,366 0.0 Enter Data@m_cbc.fpp:246 - 0.0 25,361 1 25,361.0 25,361.0 25,361 25,361 0.0 Enter Data@m_rhs.fpp:483 - 0.0 25,335 1 25,335.0 25,335.0 25,335 25,335 0.0 Enter Data@m_cbc.fpp:328 - 0.0 25,334 1 25,334.0 25,334.0 25,334 25,334 0.0 Enter Data@m_rhs.fpp:324 - 0.0 25,333 1 25,333.0 25,333.0 25,333 25,333 0.0 Enter Data@m_riemann_solvers.fpp:3404 - 0.0 25,323 1 25,323.0 25,323.0 25,323 25,323 0.0 Enter Data@m_rhs.fpp:487 - 0.0 25,317 1 25,317.0 25,317.0 25,317 25,317 0.0 Enter Data@m_rhs.fpp:273 - 0.0 25,310 1 25,310.0 25,310.0 25,310 25,310 0.0 Enter Data@m_cbc.fpp:242 - 0.0 25,306 5 5,061.2 5,102.0 3,713 6,095 890.9 Enqueue Upload@m_time_steppers.fpp:328 - 0.0 25,258 1 25,258.0 25,258.0 25,258 25,258 0.0 Enter Data@m_global_parameters.fpp:1247 - 0.0 25,242 1 25,242.0 25,242.0 25,242 25,242 0.0 Enter Data@m_rhs.fpp:388 - 0.0 25,217 1 25,217.0 25,217.0 25,217 25,217 0.0 Enter Data@m_rhs.fpp:488 - 0.0 25,191 1 25,191.0 25,191.0 25,191 25,191 0.0 Enter Data@m_cbc.fpp:366 - 0.0 25,168 6 4,194.7 4,223.0 3,672 4,767 451.3 Enqueue Upload@m_rhs.fpp:277 - 0.0 25,148 1 25,148.0 25,148.0 25,148 25,148 0.0 Enter Data@m_weno.fpp:142 - 0.0 25,146 2 12,573.0 12,573.0 12,418 12,728 219.2 Enter Data@m_rhs.fpp:209 - 0.0 25,131 1 25,131.0 25,131.0 25,131 25,131 0.0 Enter Data@m_weno.fpp:168 - 0.0 25,130 1 25,130.0 25,130.0 25,130 25,130 0.0 Enter Data@m_rhs.fpp:325 - 0.0 25,118 6 4,186.3 4,171.0 3,302 5,053 757.1 Enqueue Upload@m_weno.fpp:202 - 0.0 25,116 1 25,116.0 25,116.0 25,116 25,116 0.0 Enter Data@m_global_parameters.fpp:1248 - 0.0 25,069 1 25,069.0 25,069.0 25,069 25,069 0.0 Enter Data@m_cbc.fpp:292 - 0.0 25,067 1 25,067.0 25,067.0 25,067 25,067 0.0 Enter Data@m_variables_conversion.fpp:631 - 0.0 25,065 1 25,065.0 25,065.0 25,065 25,065 0.0 Enter Data@m_cbc.fpp:252 - 0.0 25,011 1 25,011.0 25,011.0 25,011 25,011 0.0 Enter Data@m_weno.fpp:139 - 0.0 25,008 1 25,008.0 25,008.0 25,008 25,008 0.0 Enter Data@m_weno.fpp:171 - 0.0 25,006 1 25,006.0 25,006.0 25,006 25,006 0.0 Enter Data@m_variables_conversion.fpp:627 - 0.0 24,967 1 24,967.0 24,967.0 24,967 24,967 0.0 Enter Data@m_variables_conversion.fpp:632 - 0.0 24,928 1 24,928.0 24,928.0 24,928 24,928 0.0 Enter Data@m_global_parameters.fpp:1253 - 0.0 24,898 1 24,898.0 24,898.0 24,898 24,898 0.0 Enter Data@m_cbc.fpp:289 - 0.0 24,841 1 24,841.0 24,841.0 24,841 24,841 0.0 Enter Data@m_rhs.fpp:389 - 0.0 24,824 1 24,824.0 24,824.0 24,824 24,824 0.0 Enter Data@m_variables_conversion.fpp:629 - 0.0 24,798 1 24,798.0 24,798.0 24,798 24,798 0.0 Enter Data@m_time_steppers.fpp:302 - 0.0 24,751 1 24,751.0 24,751.0 24,751 24,751 0.0 Enter Data@m_global_parameters.fpp:1251 - 0.0 24,744 1 24,744.0 24,744.0 24,744 24,744 0.0 Enter Data@m_global_parameters.fpp:1252 - 0.0 24,669 6 4,111.5 4,104.0 3,118 5,216 883.8 Enqueue Upload@m_riemann_solvers.fpp:3404 - 0.0 24,668 1 24,668.0 24,668.0 24,668 24,668 0.0 Enter Data@m_weno.fpp:195 - 0.0 24,623 1 24,623.0 24,623.0 24,623 24,623 0.0 Enter Data@m_weno.fpp:172 - 0.0 24,598 6 4,099.7 4,076.5 3,371 4,881 662.4 Enqueue Upload@m_riemann_solvers.fpp:3379 - 0.0 24,555 6 4,092.5 4,060.5 3,056 5,169 863.9 Enqueue Upload@m_riemann_solvers.fpp:3373 - 0.0 24,545 1 24,545.0 24,545.0 24,545 24,545 0.0 Enter Data@m_rhs.fpp:390 - 0.0 24,511 1 24,511.0 24,511.0 24,511 24,511 0.0 Enter Data@m_rhs.fpp:274 - 0.0 24,499 1 24,499.0 24,499.0 24,499 24,499 0.0 Enter Data@m_rhs.fpp:391 - 0.0 24,461 1 24,461.0 24,461.0 24,461 24,461 0.0 Enter Data@m_cbc.fpp:325 - 0.0 24,445 1 24,445.0 24,445.0 24,445 24,445 0.0 Enter Data@m_rhs.fpp:387 - 0.0 24,437 1 24,437.0 24,437.0 24,437 24,437 0.0 Enter Data@m_variables_conversion.fpp:633 - 0.0 24,381 6 4,063.5 3,969.5 3,088 5,166 862.5 Enqueue Upload@m_riemann_solvers.fpp:3346 - 0.0 24,351 6 4,058.5 3,971.5 3,108 5,245 870.3 Enqueue Upload@m_riemann_solvers.fpp:3349 - 0.0 24,294 6 4,049.0 3,904.5 3,125 5,151 785.3 Enqueue Upload@m_riemann_solvers.fpp:3382 - 0.0 24,281 6 4,046.8 4,034.0 3,106 4,985 766.3 Enqueue Upload@m_cbc.fpp:164 - 0.0 24,280 1 24,280.0 24,280.0 24,280 24,280 0.0 Update@m_global_parameters.fpp:1226 - 0.0 24,271 1 24,271.0 24,271.0 24,271 24,271 0.0 Enqueue Launch@m_rhs.fpp:595 - 0.0 24,199 1 24,199.0 24,199.0 24,199 24,199 0.0 Enter Data@m_rhs.fpp:484 - 0.0 24,169 1 24,169.0 24,169.0 24,169 24,169 0.0 Wait@m_cbc.fpp:402 - 0.0 24,090 6 4,015.0 4,115.0 3,306 4,696 574.3 Enqueue Upload@m_cbc.fpp:201 - 0.0 24,022 6 4,003.7 3,966.5 3,087 4,957 822.9 Enqueue Upload@m_rhs.fpp:293 - 0.0 23,987 1 23,987.0 23,987.0 23,987 23,987 0.0 Wait@m_global_parameters.fpp:1215 - 0.0 23,960 1 23,960.0 23,960.0 23,960 23,960 0.0 Wait@m_cbc.fpp:400 - 0.0 23,949 6 3,991.5 3,934.5 3,160 4,968 757.7 Enqueue Upload@m_rhs.fpp:310 - 0.0 23,938 6 3,989.7 3,878.0 3,105 5,238 846.6 Enqueue Upload@m_cbc.fpp:242 - 0.0 23,877 6 3,979.5 4,014.0 3,081 4,938 774.8 Enqueue Upload@m_cbc.fpp:207 - 0.0 23,871 6 3,978.5 3,929.0 3,120 4,934 769.5 Enqueue Upload@m_riemann_solvers.fpp:3355 - 0.0 23,847 6 3,974.5 3,919.5 3,177 4,831 691.4 Enqueue Upload@m_cbc.fpp:158 - 0.0 23,793 6 3,965.5 3,844.0 3,034 5,248 891.1 Enqueue Upload@m_rhs.fpp:300 - 0.0 23,735 6 3,955.8 3,928.0 3,149 4,818 717.7 Enqueue Upload@m_riemann_solvers.fpp:3376 - 0.0 23,557 6 3,926.2 3,932.5 3,115 4,848 721.1 Enqueue Upload@m_cbc.fpp:246 - 0.0 23,543 6 3,923.8 3,932.5 2,950 5,014 842.2 Enqueue Upload@m_cbc.fpp:252 - 0.0 23,521 1 23,521.0 23,521.0 23,521 23,521 0.0 Wait@m_rhs.fpp:574 - 0.0 23,496 1 23,496.0 23,496.0 23,496 23,496 0.0 Wait@m_mpi_common.fpp:81 - 0.0 23,485 1 23,485.0 23,485.0 23,485 23,485 0.0 Wait@m_cbc.fpp:403 - 0.0 23,476 6 3,912.7 3,813.5 2,994 5,107 854.4 Enqueue Upload@m_rhs.fpp:298 - 0.0 23,366 6 3,894.3 3,872.5 3,087 4,986 698.3 Enqueue Upload@m_weno.fpp:139 - 0.0 23,318 6 3,886.3 3,796.5 3,191 4,515 492.9 Enqueue Upload@m_rhs.fpp:495 - 0.0 23,316 6 3,886.0 3,852.0 3,119 4,750 687.9 Enqueue Upload@m_riemann_solvers.fpp:3407 - 0.0 23,312 1 23,312.0 23,312.0 23,312 23,312 0.0 Wait@m_cbc.fpp:401 - 0.0 23,254 6 3,875.7 3,869.5 3,040 4,730 743.9 Enqueue Upload@m_riemann_solvers.fpp:3352 - 0.0 23,150 6 3,858.3 3,715.5 3,110 4,987 737.5 Enqueue Upload@m_weno.fpp:171 - 0.0 23,008 1 23,008.0 23,008.0 23,008 23,008 0.0 Update@m_global_parameters.fpp:1219 - 0.0 22,982 6 3,830.3 3,782.0 3,030 4,594 598.4 Enqueue Upload@m_cbc.fpp:236 - 0.0 22,907 6 3,817.8 3,794.5 3,130 4,425 487.6 Enqueue Upload@m_rhs.fpp:494 - 0.0 22,829 1 22,829.0 22,829.0 22,829 22,829 0.0 Update@m_global_parameters.fpp:1216 - 0.0 22,783 2 11,391.5 11,391.5 10,902 11,881 692.3 Update@m_cbc.fpp.f90:450 - 0.0 22,769 6 3,794.8 3,810.0 3,163 4,632 499.6 Enqueue Upload@m_weno.fpp:149 - 0.0 22,726 6 3,787.7 3,746.5 3,270 4,240 400.0 Enqueue Upload@m_rhs.fpp:493 - 0.0 22,655 6 3,775.8 3,533.0 3,411 4,402 458.6 Enqueue Upload@m_data_output.fpp:1758 - 0.0 22,462 1 22,462.0 22,462.0 22,462 22,462 0.0 Enter Data@m_boundary_common.fpp:55 - 0.0 22,359 6 3,726.5 3,586.5 3,158 4,528 491.9 Enqueue Upload@m_global_parameters.fpp:1243 - 0.0 22,292 6 3,715.3 3,428.5 3,267 4,389 515.5 Enqueue Upload@m_global_parameters.fpp:1242 - 0.0 22,251 6 3,708.5 3,556.5 3,225 4,457 497.5 Enqueue Upload@m_weno.fpp:166 - 0.0 22,216 6 3,702.7 3,547.0 3,152 4,606 548.2 Enqueue Upload@m_global_parameters.fpp:1247 - 0.0 22,195 6 3,699.2 3,667.5 3,206 4,138 402.6 Enqueue Upload@m_rhs.fpp:278 - 0.0 22,113 1 22,113.0 22,113.0 22,113 22,113 0.0 Enter Data@m_rhs.fpp:185 - 0.0 22,066 6 3,677.7 3,512.5 3,162 4,307 454.4 Enqueue Upload@m_variables_conversion.fpp:632 - 0.0 21,971 1 21,971.0 21,971.0 21,971 21,971 0.0 Enter Data@m_rhs.fpp:367 - 0.0 21,808 1 21,808.0 21,808.0 21,808 21,808 0.0 Enter Data@m_cbc.fpp:148 - 0.0 21,796 6 3,632.7 3,589.5 3,001 4,643 593.0 Enqueue Upload@m_variables_conversion.fpp:631 - 0.0 21,749 1 21,749.0 21,749.0 21,749 21,749 0.0 Enter Data@m_time_steppers.fpp:321 - 0.0 21,626 6 3,604.3 3,362.5 3,158 4,265 495.6 Enqueue Upload@m_global_parameters.fpp:1246 - 0.0 21,618 6 3,603.0 3,502.5 3,008 4,510 577.4 Enqueue Upload@m_weno.fpp:195 - 0.0 21,617 1 21,617.0 21,617.0 21,617 21,617 0.0 Enter Data@m_weno.fpp:179 - 0.0 21,514 6 3,585.7 3,540.5 3,054 4,261 407.9 Enqueue Upload@m_rhs.fpp:312 - 0.0 21,490 6 3,581.7 3,591.0 3,098 4,204 442.4 Enqueue Upload@m_rhs.fpp:375 - 0.0 21,427 6 3,571.2 3,501.0 2,980 4,405 573.4 Enqueue Upload@m_rhs.fpp:387 - 0.0 21,381 1 21,381.0 21,381.0 21,381 21,381 0.0 Enter Data@m_rhs.fpp:369 - 0.0 21,355 3 7,118.3 7,069.0 5,728 8,558 1,415.6 Enqueue Upload@m_rhs.fpp:595 - 0.0 21,348 6 3,558.0 3,400.0 3,220 4,058 339.0 Enqueue Upload@m_weno.fpp:168 - 0.0 21,253 6 3,542.2 3,386.0 3,020 4,317 487.6 Enqueue Upload@m_global_parameters.fpp:1248 - 0.0 21,241 6 3,540.2 3,453.0 3,118 4,225 417.8 Enqueue Upload@m_weno.fpp:172 - 0.0 21,150 6 3,525.0 3,425.0 2,941 4,170 498.6 Enqueue Upload@m_cbc.fpp:256 - 0.0 21,095 6 3,515.8 3,361.0 3,149 4,170 411.3 Enqueue Upload@m_weno.fpp:191 - 0.0 21,047 6 3,507.8 3,335.0 3,070 4,146 470.1 Enqueue Upload@m_mpi_common.fpp:81 - 0.0 21,033 6 3,505.5 3,479.5 3,017 3,957 406.4 Enqueue Upload@m_rhs.fpp:372 - 0.0 21,000 6 3,500.0 3,342.5 3,087 4,075 450.9 Enqueue Upload@m_cbc.fpp:363 - 0.0 20,957 1 20,957.0 20,957.0 20,957 20,957 0.0 Enter Data@m_weno.fpp:189 - 0.0 20,905 6 3,484.2 3,332.0 3,055 4,292 494.6 Enqueue Upload@m_variables_conversion.fpp:629 - 0.0 20,902 6 3,483.7 3,403.0 2,979 4,345 502.9 Enqueue Upload@m_rhs.fpp:483 - 0.0 20,888 6 3,481.3 3,517.0 3,025 4,090 415.2 Enqueue Upload@m_rhs.fpp:392 - 0.0 20,875 6 3,479.2 3,321.5 3,090 4,006 398.3 Enqueue Upload@m_variables_conversion.fpp:628 - 0.0 20,760 6 3,460.0 3,324.0 3,102 4,009 366.8 Enqueue Upload@m_cbc.fpp:263 - 0.0 20,756 6 3,459.3 3,292.5 3,092 3,975 373.8 Enqueue Upload@m_rhs.fpp:324 - 0.0 20,754 1 20,754.0 20,754.0 20,754 20,754 0.0 Enter Data@m_riemann_solvers.fpp:3321 - 0.0 20,734 6 3,455.7 3,274.5 3,063 4,010 405.0 Enqueue Upload@m_rhs.fpp:488 - 0.0 20,624 1 20,624.0 20,624.0 20,624 20,624 0.0 Enter Data@m_time_steppers.fpp:327 - 0.0 20,608 6 3,434.7 3,286.0 3,065 4,082 420.4 Enqueue Upload@m_global_parameters.fpp:1251 - 0.0 20,604 6 3,434.0 3,338.0 2,884 4,325 532.9 Enqueue Upload@m_cbc.fpp:325 - 0.0 20,598 6 3,433.0 3,456.0 2,984 4,007 393.8 Enqueue Upload@m_rhs.fpp:487 - 0.0 20,590 6 3,431.7 3,289.0 3,034 4,192 471.0 Enqueue Upload@m_rhs.fpp:389 - 0.0 20,523 6 3,420.5 3,183.5 3,026 4,187 487.1 Enqueue Upload@m_variables_conversion.fpp:630 - 0.0 20,431 6 3,405.2 3,286.5 3,002 4,033 415.2 Enqueue Upload@m_weno.fpp:194 - 0.0 20,354 6 3,392.3 3,259.0 3,022 4,080 434.9 Enqueue Upload@m_rhs.fpp:325 - 0.0 20,316 6 3,386.0 3,366.0 2,930 3,852 383.7 Enqueue Upload@m_cbc.fpp:403 - 0.0 20,311 6 3,385.2 3,255.5 2,972 3,971 382.8 Enqueue Upload@m_variables_conversion.fpp:626 - 0.0 20,279 6 3,379.8 3,176.0 3,063 3,882 383.3 Enqueue Upload@m_rhs.fpp:273 - 0.0 20,269 6 3,378.2 3,166.5 3,056 3,896 389.3 Enqueue Upload@m_rhs.fpp:574 - 0.0 20,207 6 3,367.8 3,257.0 2,933 4,064 448.1 Enqueue Upload@m_variables_conversion.fpp:627 - 0.0 20,203 6 3,367.2 3,256.0 2,919 4,145 469.6 Enqueue Upload@m_cbc.fpp:289 - 0.0 20,143 1 20,143.0 20,143.0 20,143 20,143 0.0 Enter Data@m_time_steppers.fpp:325 - 0.0 20,089 6 3,348.2 3,203.0 2,949 4,018 408.3 Enqueue Upload@m_global_parameters.fpp:1252 - 0.0 20,063 1 20,063.0 20,063.0 20,063 20,063 0.0 Update@m_cbc.fpp:383 - 0.0 20,007 1 20,007.0 20,007.0 20,007 20,007 0.0 Enter Data@m_time_steppers.fpp:324 - 0.0 19,987 6 3,331.2 3,174.5 2,889 3,839 400.2 Enqueue Upload@m_rhs.fpp:388 - 0.0 19,961 6 3,326.8 3,178.0 2,915 3,982 428.4 Enqueue Upload@m_rhs.fpp:391 - 0.0 19,877 6 3,312.8 3,232.5 2,923 3,912 375.5 Enqueue Upload@m_rhs.fpp:390 - 0.0 19,872 2 9,936.0 9,936.0 3,582 16,290 8,985.9 Enqueue Upload@m_rhs.fpp:580 - 0.0 19,766 6 3,294.3 3,133.0 2,964 3,914 398.9 Enqueue Upload@m_rhs.fpp:484 - 0.0 19,511 3 6,503.7 6,463.0 6,457 6,591 75.7 Wait@m_rhs.fpp:278 - 0.0 19,480 3 6,493.3 6,454.0 6,351 6,675 165.5 Wait@m_rhs.fpp:372 - 0.0 19,406 5 3,881.2 3,838.0 3,247 4,631 508.5 Enqueue Upload@m_time_steppers.fpp:254 - 0.0 19,396 3 6,465.3 6,451.0 6,357 6,588 116.2 Wait@m_rhs.fpp:378 - 0.0 19,370 1 19,370.0 19,370.0 19,370 19,370 0.0 Update@m_mpi_common.fpp:79 - 0.0 19,314 3 6,438.0 6,461.0 6,357 6,496 72.3 Wait@m_rhs.fpp:375 - 0.0 19,244 6 3,207.3 3,222.0 3,055 3,302 92.1 Enqueue Upload@m_cbc.fpp:375 - 0.0 19,114 1 19,114.0 19,114.0 19,114 19,114 0.0 Enter Data@m_time_steppers.fpp:251 - 0.0 19,090 3 6,363.3 6,352.0 6,322 6,416 48.0 Wait@m_rhs.fpp:495 - 0.0 19,065 3 6,355.0 6,310.0 6,288 6,467 97.6 Wait@m_rhs.fpp:493 - 0.0 19,049 3 6,349.7 6,353.0 6,313 6,383 35.1 Wait@m_rhs.fpp:494 - 0.0 18,935 3 6,311.7 6,264.0 6,243 6,428 101.3 Wait@m_rhs.fpp:277 - 0.0 18,891 3 6,297.0 5,618.0 4,671 8,602 2,051.6 Enqueue Upload@m_riemann_solvers.fpp:3341 - 0.0 18,763 1 18,763.0 18,763.0 18,763 18,763 0.0 Wait@m_global_parameters.fpp:1222 - 0.0 18,334 1 18,334.0 18,334.0 18,334 18,334 0.0 Update@m_global_parameters.fpp:1221 - 0.0 18,268 1 18,268.0 18,268.0 18,268 18,268 0.0 Enter Data@m_time_steppers.fpp:322 - 0.0 18,120 5 3,624.0 3,445.0 3,148 4,179 461.8 Enqueue Upload@m_time_steppers.fpp:295 - 0.0 17,480 4 4,370.0 4,355.5 3,093 5,676 1,155.6 Enqueue Upload@m_rhs.fpp:216 - 0.0 17,043 1 17,043.0 17,043.0 17,043 17,043 0.0 Enter Data@m_rhs.fpp:368 - 0.0 17,029 1 17,029.0 17,029.0 17,029 17,029 0.0 Enter Data@m_time_steppers.fpp:254 - 0.0 16,864 1 16,864.0 16,864.0 16,864 16,864 0.0 Wait@m_riemann_solvers.fpp:3407 - 0.0 16,602 2 8,301.0 8,301.0 1,298 15,304 9,903.7 Wait@m_rhs.fpp:2147 - 0.0 16,588 1 16,588.0 16,588.0 16,588 16,588 0.0 Wait@m_global_parameters.fpp:1242 - 0.0 16,370 1 16,370.0 16,370.0 16,370 16,370 0.0 Wait@m_riemann_solvers.fpp:3373 - 0.0 16,266 1 16,266.0 16,266.0 16,266 16,266 0.0 Wait@m_weno.fpp:140 - 0.0 16,258 5 3,251.6 3,104.0 2,772 4,090 520.1 Enqueue Upload@m_cbc.fpp.f90:450 - 0.0 16,247 1 16,247.0 16,247.0 16,247 16,247 0.0 Wait@m_rhs.fpp:310 - 0.0 16,225 1 16,225.0 16,225.0 16,225 16,225 0.0 Wait@m_cbc.fpp:363 - 0.0 16,219 1 16,219.0 16,219.0 16,219 16,219 0.0 Enter Data@m_rhs.fpp:392 - 0.0 16,204 1 16,204.0 16,204.0 16,204 16,204 0.0 Wait@m_rhs.fpp:298 - 0.0 16,168 1 16,168.0 16,168.0 16,168 16,168 0.0 Enter Data@m_time_steppers.fpp:295 - 0.0 16,153 1 16,153.0 16,153.0 16,153 16,153 0.0 Wait@m_variables_conversion.fpp:626 - 0.0 16,140 1 16,140.0 16,140.0 16,140 16,140 0.0 Enter Data@m_time_steppers.fpp:292 - 0.0 16,125 1 16,125.0 16,125.0 16,125 16,125 0.0 Wait@m_rhs.fpp:312 - 0.0 16,116 1 16,116.0 16,116.0 16,116 16,116 0.0 Wait@m_data_output.fpp:1758 - 0.0 16,115 1 16,115.0 16,115.0 16,115 16,115 0.0 Wait@m_riemann_solvers.fpp:3376 - 0.0 16,099 1 16,099.0 16,099.0 16,099 16,099 0.0 Wait@m_weno.fpp:149 - 0.0 16,093 1 16,093.0 16,093.0 16,093 16,093 0.0 Wait@m_riemann_solvers.fpp:3382 - 0.0 16,092 1 16,092.0 16,092.0 16,092 16,092 0.0 Wait@m_riemann_solvers.fpp:3352 - 0.0 16,077 1 16,077.0 16,077.0 16,077 16,077 0.0 Wait@m_rhs.fpp:483 - 0.0 16,042 1 16,042.0 16,042.0 16,042 16,042 0.0 Wait@m_time_steppers.fpp:132 - 0.0 16,024 1 16,024.0 16,024.0 16,024 16,024 0.0 Wait@m_cbc.fpp:197 - 0.0 16,024 1 16,024.0 16,024.0 16,024 16,024 0.0 Wait@m_rhs.fpp:300 - 0.0 16,012 1 16,012.0 16,012.0 16,012 16,012 0.0 Wait@m_weno.fpp:174 - 0.0 16,011 1 16,011.0 16,011.0 16,011 16,011 0.0 Wait@m_cbc.fpp:164 - 0.0 16,002 1 16,002.0 16,002.0 16,002 16,002 0.0 Wait@m_cbc.fpp:263 - 0.0 16,001 1 16,001.0 16,001.0 16,001 16,001 0.0 Wait@m_cbc.fpp:201 - 0.0 15,997 1 15,997.0 15,997.0 15,997 15,997 0.0 Update@m_rhs.fpp:182 - 0.0 15,980 1 15,980.0 15,980.0 15,980 15,980 0.0 Wait@m_riemann_solvers.fpp:3355 - 0.0 15,945 1 15,945.0 15,945.0 15,945 15,945 0.0 Wait@m_weno.fpp:191 - 0.0 15,928 1 15,928.0 15,928.0 15,928 15,928 0.0 Wait@m_rhs.fpp:325 - 0.0 15,923 1 15,923.0 15,923.0 15,923 15,923 0.0 Wait@m_cbc.fpp:366 - 0.0 15,921 1 15,921.0 15,921.0 15,921 15,921 0.0 Wait@m_variables_conversion.fpp:630 - 0.0 15,914 1 15,914.0 15,914.0 15,914 15,914 0.0 Wait@m_variables_conversion.fpp:627 - 0.0 15,904 1 15,904.0 15,904.0 15,904 15,904 0.0 Wait@m_weno.fpp:202 - 0.0 15,896 1 15,896.0 15,896.0 15,896 15,896 0.0 Wait@m_riemann_solvers.fpp:3349 - 0.0 15,891 1 15,891.0 15,891.0 15,891 15,891 0.0 Wait@m_variables_conversion.fpp:631 - 0.0 15,885 1 15,885.0 15,885.0 15,885 15,885 0.0 Wait@m_weno.fpp:166 - 0.0 15,881 1 15,881.0 15,881.0 15,881 15,881 0.0 Wait@m_weno.fpp:197 - 0.0 15,873 1 15,873.0 15,873.0 15,873 15,873 0.0 Wait@m_time_steppers.fpp:220 - 0.0 15,870 1 15,870.0 15,870.0 15,870 15,870 0.0 Wait@m_cbc.fpp:158 - 0.0 15,865 1 15,865.0 15,865.0 15,865 15,865 0.0 Wait@m_rhs.fpp:326 - 0.0 15,862 1 15,862.0 15,862.0 15,862 15,862 0.0 Wait@m_riemann_solvers.fpp:3379 - 0.0 15,853 1 15,853.0 15,853.0 15,853 15,853 0.0 Wait@m_time_steppers.fpp:260 - 0.0 15,846 1 15,846.0 15,846.0 15,846 15,846 0.0 Wait@m_cbc.fpp:328 - 0.0 15,799 1 15,799.0 15,799.0 15,799 15,799 0.0 Wait@m_cbc.fpp:211 - 0.0 15,788 1 15,788.0 15,788.0 15,788 15,788 0.0 Wait@m_cbc.fpp:168 - 0.0 15,774 1 15,774.0 15,774.0 15,774 15,774 0.0 Wait@m_rhs.fpp:324 - 0.0 15,773 1 15,773.0 15,773.0 15,773 15,773 0.0 Wait@m_rhs.fpp:489 - 0.0 15,749 1 15,749.0 15,749.0 15,749 15,749 0.0 Wait@m_global_parameters.fpp:1253 - 0.0 15,749 1 15,749.0 15,749.0 15,749 15,749 0.0 Wait@m_variables_conversion.fpp:628 - 0.0 15,741 1 15,741.0 15,741.0 15,741 15,741 0.0 Wait@m_weno.fpp:194 - 0.0 15,715 1 15,715.0 15,715.0 15,715 15,715 0.0 Wait@m_boundary_common.fpp:1826 - 0.0 15,714 1 15,714.0 15,714.0 15,714 15,714 0.0 Wait@m_cbc.fpp:289 - 0.0 15,710 1 15,710.0 15,710.0 15,710 15,710 0.0 Wait@m_rhs.fpp:291 - 0.0 15,700 1 15,700.0 15,700.0 15,700 15,700 0.0 Wait@m_cbc.fpp:292 - 0.0 15,671 1 15,671.0 15,671.0 15,671 15,671 0.0 Wait@m_cbc.fpp:236 - 0.0 15,667 1 15,667.0 15,667.0 15,667 15,667 0.0 Update@m_cbc.fpp:396 - 0.0 15,649 1 15,649.0 15,649.0 15,649 15,649 0.0 Wait@m_rhs.fpp:388 - 0.0 15,648 1 15,648.0 15,648.0 15,648 15,648 0.0 Wait@m_rhs.fpp:488 - 0.0 15,632 1 15,632.0 15,632.0 15,632 15,632 0.0 Wait@m_rhs.fpp:273 - 0.0 15,626 1 15,626.0 15,626.0 15,626 15,626 0.0 Wait@m_riemann_solvers.fpp:3410 - 0.0 15,600 1 15,600.0 15,600.0 15,600 15,600 0.0 Wait@m_rhs.fpp:487 - 0.0 15,597 1 15,597.0 15,597.0 15,597 15,597 0.0 Wait@m_rhs.fpp:389 - 0.0 15,592 1 15,592.0 15,592.0 15,592 15,592 0.0 Wait@m_cbc.fpp:191 - 0.0 15,590 1 15,590.0 15,590.0 15,590 15,590 0.0 Wait@m_global_parameters.fpp:1251 - 0.0 15,587 1 15,587.0 15,587.0 15,587 15,587 0.0 Wait@m_riemann_solvers.fpp:3401 - 0.0 15,576 1 15,576.0 15,576.0 15,576 15,576 0.0 Wait@m_variables_conversion.fpp:629 - 0.0 15,551 1 15,551.0 15,551.0 15,551 15,551 0.0 Wait@m_global_parameters.fpp:1252 - 0.0 15,542 1 15,542.0 15,542.0 15,542 15,542 0.0 Wait@m_global_parameters.fpp:1243 - 0.0 15,501 1 15,501.0 15,501.0 15,501 15,501 0.0 Wait@m_riemann_solvers.fpp:3346 - 0.0 15,466 1 15,466.0 15,466.0 15,466 15,466 0.0 Wait@m_cbc.fpp:154 - 0.0 15,440 1 15,440.0 15,440.0 15,440 15,440 0.0 Wait@m_rhs.fpp:391 - 0.0 15,437 1 15,437.0 15,437.0 15,437 15,437 0.0 Wait@m_cbc.fpp:242 - 0.0 15,436 1 15,436.0 15,436.0 15,436 15,436 0.0 Wait@m_cbc.fpp:207 - 0.0 15,434 1 15,434.0 15,434.0 15,434 15,434 0.0 Wait@m_global_parameters.fpp:1247 - 0.0 15,394 1 15,394.0 15,394.0 15,394 15,394 0.0 Wait@m_weno.fpp:139 - 0.0 15,337 1 15,337.0 15,337.0 15,337 15,337 0.0 Wait@m_weno.fpp:195 - 0.0 15,299 1 15,299.0 15,299.0 15,299 15,299 0.0 Wait@m_variables_conversion.fpp:632 - 0.0 15,260 1 15,260.0 15,260.0 15,260 15,260 0.0 Wait@m_time_steppers.fpp:95 - 0.0 15,248 1 15,248.0 15,248.0 15,248 15,248 0.0 Wait@m_rhs.fpp:274 - 0.0 15,236 1 15,236.0 15,236.0 15,236 15,236 0.0 Wait@m_cbc.fpp:252 - 0.0 15,230 1 15,230.0 15,230.0 15,230 15,230 0.0 Wait@m_rhs.fpp:293 - 0.0 15,208 1 15,208.0 15,208.0 15,208 15,208 0.0 Wait@m_weno.fpp:171 - 0.0 15,193 1 15,193.0 15,193.0 15,193 15,193 0.0 Wait@m_riemann_solvers.fpp:3404 - 0.0 15,192 1 15,192.0 15,192.0 15,192 15,192 0.0 Wait@m_rhs.fpp:387 - 0.0 15,192 1 15,192.0 15,192.0 15,192 15,192 0.0 Wait@m_rhs.fpp:484 - 0.0 15,172 1 15,172.0 15,172.0 15,172 15,172 0.0 Wait@m_weno.fpp:168 - 0.0 15,169 1 15,169.0 15,169.0 15,169 15,169 0.0 Wait@m_time_steppers.fpp:302 - 0.0 15,161 1 15,161.0 15,161.0 15,161 15,161 0.0 Wait@m_rhs.fpp:390 - 0.0 15,158 4 3,789.5 3,785.0 3,380 4,208 436.4 Enqueue Upload@m_time_steppers.fpp:98 - 0.0 15,143 1 15,143.0 15,143.0 15,143 15,143 0.0 Wait@m_weno.fpp:142 - 0.0 15,121 1 15,121.0 15,121.0 15,121 15,121 0.0 Wait@m_global_parameters.fpp:1246 - 0.0 15,068 1 15,068.0 15,068.0 15,068 15,068 0.0 Wait@m_weno.fpp:172 - 0.0 15,050 1 15,050.0 15,050.0 15,050 15,050 0.0 Wait@m_variables_conversion.fpp:633 - 0.0 15,038 1 15,038.0 15,038.0 15,038 15,038 0.0 Wait@m_cbc.fpp:325 - 0.0 15,028 1 15,028.0 15,028.0 15,028 15,028 0.0 Update@m_global_parameters.fpp:1178 - 0.0 14,985 1 14,985.0 14,985.0 14,985 14,985 0.0 Wait@m_cbc.fpp:246 - 0.0 14,952 1 14,952.0 14,952.0 14,952 14,952 0.0 Update@m_cbc.fpp:389 - 0.0 14,951 1 14,951.0 14,951.0 14,951 14,951 0.0 Wait@m_global_parameters.fpp:1248 - 0.0 14,896 1 14,896.0 14,896.0 14,896 14,896 0.0 Wait@m_weno.fpp:136 - 0.0 14,721 4 3,680.3 3,578.5 3,364 4,200 362.7 Enqueue Upload@m_cbc.fpp:592 - 0.0 14,708 1 14,708.0 14,708.0 14,708 14,708 0.0 Update@m_global_parameters.fpp:1188 - 0.0 14,700 1 14,700.0 14,700.0 14,700 14,700 0.0 Update@m_global_parameters.fpp:1224 - 0.0 14,531 1 14,531.0 14,531.0 14,531 14,531 0.0 Wait@m_boundary_common.fpp:1821 - 0.0 14,510 3 4,836.7 4,915.0 3,947 5,648 853.2 Enqueue Upload@m_rhs.fpp:217 - 0.0 14,508 4 3,627.0 3,636.0 3,041 4,195 479.3 Enqueue Upload@m_rhs.fpp:209 - 0.0 14,352 4 3,588.0 3,091.0 2,921 5,249 1,110.9 Enqueue Upload@m_global_parameters.fpp:1226 - 0.0 14,159 1 14,159.0 14,159.0 14,159 14,159 0.0 Wait@m_boundary_common.fpp:1816 - 0.0 13,981 1 13,981.0 13,981.0 13,981 13,981 0.0 Update@m_cbc.fpp:592 - 0.0 13,223 2 6,611.5 6,611.5 6,600 6,623 16.3 Wait@m_cbc.fpp.f90:450 - 0.0 13,129 2 6,564.5 6,564.5 6,491 6,638 103.9 Wait@m_rhs.fpp:209 - 0.0 12,883 2 6,441.5 6,441.5 6,386 6,497 78.5 Wait@m_rhs.fpp:216 - 0.0 12,722 16 795.1 756.0 722 982 84.9 Exit Data@m_rhs.fpp:2253 - 0.0 12,640 2 6,320.0 6,320.0 6,310 6,330 14.1 Wait@m_time_steppers.fpp:98 - 0.0 12,056 1 12,056.0 12,056.0 12,056 12,056 0.0 Update@m_global_parameters.fpp:1160 - 0.0 11,653 1 11,653.0 11,653.0 11,653 11,653 0.0 Update@m_global_parameters.fpp:1150 - 0.0 11,485 3 3,828.3 3,751.0 3,257 4,477 613.7 Enqueue Upload@m_time_steppers.fpp:257 - 0.0 11,341 3 3,780.3 3,845.0 3,279 4,217 472.3 Enqueue Upload@m_boundary_common.fpp:55 - 0.0 11,258 2 5,629.0 5,629.0 5,118 6,140 722.7 Enqueue Upload@m_rhs.fpp:518 - 0.0 11,246 3 3,748.7 3,559.0 3,377 4,310 494.6 Enqueue Upload@m_time_steppers.fpp:220 - 0.0 11,077 4 2,769.3 2,418.0 2,319 3,922 770.3 Enqueue Upload@m_global_parameters.fpp:1216 - 0.0 11,065 2 5,532.5 5,532.5 2,782 8,283 3,889.8 Enqueue Upload@m_cbc.fpp:383 - 0.0 11,025 1 11,025.0 11,025.0 11,025 11,025 0.0 Update@m_global_parameters.fpp:1218 - 0.0 10,990 3 3,663.3 3,378.0 3,235 4,377 622.2 Enqueue Upload@m_time_steppers.fpp:95 - 0.0 10,979 1 10,979.0 10,979.0 10,979 10,979 0.0 Update@m_riemann_solvers.fpp:3326 - 0.0 10,974 1 10,974.0 10,974.0 10,974 10,974 0.0 Enter Data@m_global_parameters.fpp:1234 - 0.0 10,881 3 3,627.0 3,309.0 3,300 4,272 558.6 Enqueue Upload@m_riemann_solvers.fpp:3321 - 0.0 10,828 3 3,609.3 3,458.0 3,245 4,125 459.1 Enqueue Upload@m_weno.fpp:142 - 0.0 10,709 3 3,569.7 3,348.0 3,210 4,151 508.2 Enqueue Upload@m_variables_conversion.fpp:623 - 0.0 10,663 3 3,554.3 3,360.0 3,220 4,083 463.2 Enqueue Upload@m_time_steppers.fpp:298 - 0.0 10,383 3 3,461.0 3,206.0 3,156 4,021 485.6 Enqueue Upload@m_rhs.fpp:326 - 0.0 10,218 3 3,406.0 3,504.0 3,011 3,703 356.3 Enqueue Upload@m_weno.fpp:197 - 0.0 10,207 3 3,402.3 3,169.0 3,114 3,924 452.6 Enqueue Upload@m_riemann_solvers.fpp:3410 - 0.0 10,183 1 10,183.0 10,183.0 10,183 10,183 0.0 Wait@m_global_parameters.fpp:1219 - 0.0 10,149 3 3,383.0 3,223.0 3,106 3,820 382.9 Enqueue Upload@m_time_steppers.fpp:132 - 0.0 10,149 3 3,383.0 3,180.0 3,124 3,845 401.1 Enqueue Upload@m_time_steppers.fpp:260 - 0.0 10,127 3 3,375.7 3,156.0 3,138 3,833 396.2 Enqueue Upload@m_cbc.fpp:292 - 0.0 10,115 3 3,371.7 3,131.0 3,128 3,856 419.4 Enqueue Upload@m_variables_conversion.fpp:633 - 0.0 10,035 4 2,508.8 2,378.5 2,197 3,081 391.9 Enqueue Upload@m_global_parameters.fpp:1219 - 0.0 10,021 3 3,340.3 3,083.0 3,059 3,879 466.7 Enqueue Upload@m_cbc.fpp:328 - 0.0 10,005 3 3,335.0 3,089.0 3,072 3,844 440.9 Enqueue Upload@m_weno.fpp:174 - 0.0 9,935 3 3,311.7 3,190.0 3,014 3,731 373.7 Enqueue Upload@m_time_steppers.fpp:302 - 0.0 9,799 3 3,266.3 3,079.0 2,987 3,733 406.8 Enqueue Upload@m_rhs.fpp:489 - 0.0 9,782 3 3,260.7 3,187.0 2,932 3,663 371.0 Enqueue Upload@m_global_parameters.fpp:1253 - 0.0 9,696 3 3,232.0 3,109.0 2,933 3,654 375.9 Enqueue Upload@m_rhs.fpp:274 - 0.0 9,671 3 3,223.7 3,098.0 2,923 3,650 379.4 Enqueue Upload@m_cbc.fpp:366 - 0.0 9,233 8 1,154.1 1,122.0 930 1,441 161.9 Exit Data@m_rhs.fpp:2257 - 0.0 8,976 1 8,976.0 8,976.0 8,976 8,976 0.0 Wait@m_global_parameters.fpp:1216 - 0.0 8,849 2 4,424.5 4,424.5 3,979 4,870 630.0 Enqueue Upload@m_time_steppers.fpp:321 - 0.0 8,833 1 8,833.0 8,833.0 8,833 8,833 0.0 Enter Data@m_time_steppers.fpp:298 - 0.0 8,751 1 8,751.0 8,751.0 8,751 8,751 0.0 Wait@m_global_parameters.fpp:1221 - 0.0 8,729 1 8,729.0 8,729.0 8,729 8,729 0.0 Enter Data@m_rhs.fpp:181 - 0.0 8,566 8 1,070.8 1,097.0 867 1,142 87.5 Exit Data@m_rhs.fpp:2258 - 0.0 8,427 1 8,427.0 8,427.0 8,427 8,427 0.0 Enter Data@m_time_steppers.fpp:257 - 0.0 8,379 2 4,189.5 4,189.5 3,801 4,578 549.4 Enqueue Upload@m_rhs.fpp:184 - 0.0 8,158 1 8,158.0 8,158.0 8,158 8,158 0.0 Wait@m_weno.fpp:630 - 0.0 8,149 1 8,149.0 8,149.0 8,149 8,149 0.0 Wait@m_weno.fpp:628 - 0.0 8,129 1 8,129.0 8,129.0 8,129 8,129 0.0 Wait@m_weno.fpp:632 - 0.0 8,097 2 4,048.5 4,048.5 3,664 4,433 543.8 Enqueue Upload@m_time_steppers.fpp:322 - 0.0 8,035 1 8,035.0 8,035.0 8,035 8,035 0.0 Wait@m_cbc.fpp:389 - 0.0 7,768 2 3,884.0 3,884.0 3,481 4,287 569.9 Enqueue Upload@m_time_steppers.fpp:251 - 0.0 7,735 2 3,867.5 3,867.5 3,491 4,244 532.5 Enqueue Upload@m_time_steppers.fpp:325 - 0.0 7,732 2 3,866.0 3,866.0 3,481 4,251 544.5 Enqueue Upload@m_time_steppers.fpp:324 - 0.0 7,708 1 7,708.0 7,708.0 7,708 7,708 0.0 Wait@m_variables_conversion.fpp:815 - 0.0 7,683 1 7,683.0 7,683.0 7,683 7,683 0.0 Wait@m_global_parameters.fpp:1188 - 0.0 7,615 2 3,807.5 3,807.5 3,424 4,191 542.4 Enqueue Upload@m_time_steppers.fpp:327 - 0.0 7,575 1 7,575.0 7,575.0 7,575 7,575 0.0 Wait@m_global_parameters.fpp:1224 - 0.0 7,544 1 7,544.0 7,544.0 7,544 7,544 0.0 Wait@m_mpi_common.fpp:79 - 0.0 7,523 2 3,761.5 3,761.5 3,405 4,118 504.2 Enqueue Upload@m_rhs.fpp:367 - 0.0 7,475 2 3,737.5 3,737.5 3,338 4,137 565.0 Enqueue Upload@m_rhs.fpp:369 - 0.0 7,406 1 7,406.0 7,406.0 7,406 7,406 0.0 Wait@m_time_steppers.fpp:298 - 0.0 7,395 3 2,465.0 2,417.0 2,383 2,595 113.9 Enqueue Upload@m_global_parameters.fpp:1221 - 0.0 7,395 2 3,697.5 3,697.5 3,339 4,056 507.0 Enqueue Upload@m_rhs.fpp:185 - 0.0 7,298 2 3,649.0 3,649.0 3,253 4,045 560.0 Enqueue Upload@m_time_steppers.fpp:292 - 0.0 7,285 1 7,285.0 7,285.0 7,285 7,285 0.0 Wait@m_global_parameters.fpp:1241 - 0.0 7,199 2 3,599.5 3,599.5 3,242 3,957 505.6 Enqueue Upload@m_rhs.fpp:646 - 0.0 7,103 1 7,103.0 7,103.0 7,103 7,103 0.0 Wait@m_cbc.fpp:383 - 0.0 7,028 1 7,028.0 7,028.0 7,028 7,028 0.0 Wait@m_cbc.fpp:428 - 0.0 6,978 1 6,978.0 6,978.0 6,978 6,978 0.0 Wait@m_cbc.fpp:396 - 0.0 6,952 1 6,952.0 6,952.0 6,952 6,952 0.0 Wait@m_cbc.fpp:256 - 0.0 6,946 2 3,473.0 3,473.0 3,139 3,807 472.3 Enqueue Upload@m_rhs.fpp:368 - 0.0 6,933 1 6,933.0 6,933.0 6,933 6,933 0.0 Wait@m_cbc.fpp:148 - 0.0 6,906 1 6,906.0 6,906.0 6,906 6,906 0.0 Wait@m_global_parameters.fpp:1226 - 0.0 6,886 1 6,886.0 6,886.0 6,886 6,886 0.0 Wait@m_rhs.fpp:392 - 0.0 6,836 1 6,836.0 6,836.0 6,836 6,836 0.0 Wait@m_rhs.fpp:184 - 0.0 6,831 1 6,831.0 6,831.0 6,831 6,831 0.0 Wait@m_time_steppers.fpp:257 - 0.0 6,815 1 6,815.0 6,815.0 6,815 6,815 0.0 Wait@m_rhs.fpp:182 - 0.0 6,785 1 6,785.0 6,785.0 6,785 6,785 0.0 Wait@m_global_parameters.fpp:1150 - 0.0 6,753 1 6,753.0 6,753.0 6,753 6,753 0.0 Wait@m_cbc.fpp:375 - 0.0 6,720 1 6,720.0 6,720.0 6,720 6,720 0.0 Wait@m_rhs.fpp:646 - 0.0 6,710 1 6,710.0 6,710.0 6,710 6,710 0.0 Wait@m_time_steppers.fpp:325 - 0.0 6,691 2 3,345.5 3,345.5 2,942 3,749 570.6 Enqueue Upload@m_rhs.fpp:182 - 0.0 6,617 1 6,617.0 6,617.0 6,617 6,617 0.0 Wait@m_global_parameters.fpp:1178 - 0.0 6,604 1 6,604.0 6,604.0 6,604 6,604 0.0 Wait@m_riemann_solvers.fpp:3321 - 0.0 6,588 1 6,588.0 6,588.0 6,588 6,588 0.0 Wait@m_global_parameters.fpp:1218 - 0.0 6,587 1 6,587.0 6,587.0 6,587 6,587 0.0 Wait@m_time_steppers.fpp:251 - 0.0 6,578 1 6,578.0 6,578.0 6,578 6,578 0.0 Wait@m_weno.fpp:179 - 0.0 6,552 1 6,552.0 6,552.0 6,552 6,552 0.0 Wait@m_riemann_solvers.fpp:3326 - 0.0 6,538 1 6,538.0 6,538.0 6,538 6,538 0.0 Wait@m_rhs.fpp:580 - 0.0 6,530 2 3,265.0 3,265.0 3,244 3,286 29.7 Enqueue Upload@m_global_parameters.fpp:1178 - 0.0 6,453 1 6,453.0 6,453.0 6,453 6,453 0.0 Wait@m_boundary_common.fpp:55 - 0.0 6,437 1 6,437.0 6,437.0 6,437 6,437 0.0 Wait@m_time_steppers.fpp:322 - 0.0 6,434 8 804.3 787.5 762 887 42.7 Exit Data@m_time_steppers.fpp:1112 - 0.0 6,432 1 6,432.0 6,432.0 6,432 6,432 0.0 Wait@m_time_steppers.fpp:319 - 0.0 6,422 1 6,422.0 6,422.0 6,422 6,422 0.0 Wait@m_cbc.fpp:592 - 0.0 6,422 1 6,422.0 6,422.0 6,422 6,422 0.0 Wait@m_global_parameters.fpp:1160 - 0.0 6,420 1 6,420.0 6,420.0 6,420 6,420 0.0 Wait@m_time_steppers.fpp:292 - 0.0 6,401 1 6,401.0 6,401.0 6,401 6,401 0.0 Wait@m_time_steppers.fpp:254 - 0.0 6,365 1 6,365.0 6,365.0 6,365 6,365 0.0 Wait@m_time_steppers.fpp:295 - 0.0 6,356 1 6,356.0 6,356.0 6,356 6,356 0.0 Wait@m_rhs.fpp:368 - 0.0 6,335 1 6,335.0 6,335.0 6,335 6,335 0.0 Wait@m_weno.fpp:134 - 0.0 6,309 1 6,309.0 6,309.0 6,309 6,309 0.0 Wait@m_weno.fpp:189 - 0.0 6,305 1 6,305.0 6,305.0 6,305 6,305 0.0 Wait@m_time_steppers.fpp:327 - 0.0 6,302 1 6,302.0 6,302.0 6,302 6,302 0.0 Wait@m_time_steppers.fpp:324 - 0.0 6,286 1 6,286.0 6,286.0 6,286 6,286 0.0 Wait@m_rhs.fpp:369 - 0.0 6,279 1 6,279.0 6,279.0 6,279 6,279 0.0 Wait@m_rhs.fpp:185 - 0.0 6,273 1 6,273.0 6,273.0 6,273 6,273 0.0 Wait@m_global_parameters.fpp:1027 - 0.0 6,234 1 6,234.0 6,234.0 6,234 6,234 0.0 Wait@m_time_steppers.fpp:328 - 0.0 6,201 1 6,201.0 6,201.0 6,201 6,201 0.0 Wait@m_rhs.fpp:367 - 0.0 6,183 1 6,183.0 6,183.0 6,183 6,183 0.0 Enqueue Upload@m_time_steppers.fpp:319 - 0.0 6,177 1 6,177.0 6,177.0 6,177 6,177 0.0 Wait@m_time_steppers.fpp:321 - 0.0 6,157 1 6,157.0 6,157.0 6,157 6,157 0.0 Wait@m_rhs.fpp:518 - 0.0 5,720 1 5,720.0 5,720.0 5,720 5,720 0.0 Enter Data@m_variables_conversion.fpp:623 - 0.0 5,676 1 5,676.0 5,676.0 5,676 5,676 0.0 Enter Data@m_global_parameters.fpp:1235 - 0.0 5,325 2 2,662.5 2,662.5 2,365 2,960 420.7 Enqueue Upload@m_global_parameters.fpp:1224 - 0.0 5,261 2 2,630.5 2,630.5 2,581 2,680 70.0 Enqueue Upload@m_global_parameters.fpp:1188 - 0.0 5,105 2 2,552.5 2,552.5 2,373 2,732 253.9 Enqueue Upload@m_cbc.fpp:389 - 0.0 4,590 2 2,295.0 2,295.0 2,199 2,391 135.8 Wait@m_rhs.fpp:210 - 0.0 4,546 8 568.3 572.0 511 620 49.0 Exit Data@m_time_steppers.fpp:1153 - 0.0 4,362 2 2,181.0 2,181.0 2,166 2,196 21.2 Wait@m_rhs.fpp:217 - 0.0 4,194 1 4,194.0 4,194.0 4,194 4,194 0.0 Enqueue Upload@m_global_parameters.fpp:1160 - 0.0 3,798 4 949.5 831.0 783 1,353 271.1 Exit Data@m_rhs.fpp:2158 - 0.0 3,564 1 3,564.0 3,564.0 3,564 3,564 0.0 Exit Data@m_global_parameters.fpp:1327 - 0.0 3,552 2 1,776.0 1,776.0 1,604 1,948 243.2 Wait@m_rhs.fpp:2153 - 0.0 3,289 4 822.3 820.5 793 855 26.3 Exit Data@m_rhs.fpp:2159 - 0.0 3,198 1 3,198.0 3,198.0 3,198 3,198 0.0 Enter Data@m_riemann_solvers.fpp:3341 - 0.0 3,173 1 3,173.0 3,173.0 3,173 3,173 0.0 Enqueue Upload@m_global_parameters.fpp:1150 - 0.0 3,145 1 3,145.0 3,145.0 3,145 3,145 0.0 Enqueue Upload@m_global_parameters.fpp:1218 - 0.0 2,910 1 2,910.0 2,910.0 2,910 2,910 0.0 Enqueue Upload@m_riemann_solvers.fpp:3326 - 0.0 2,792 1 2,792.0 2,792.0 2,792 2,792 0.0 Exit Data@m_cbc.fpp:1523 - 0.0 2,732 1 2,732.0 2,732.0 2,732 2,732 0.0 Wait@m_variables_conversion.fpp:623 - 0.0 2,695 1 2,695.0 2,695.0 2,695 2,695 0.0 Wait@m_rhs.fpp:181 - 0.0 2,479 1 2,479.0 2,479.0 2,479 2,479 0.0 Exit Data@m_riemann_solvers.fpp:5135 - 0.0 2,425 1 2,425.0 2,425.0 2,425 2,425 0.0 Exit Data@m_variables_conversion.fpp:1583 - 0.0 2,384 1 2,384.0 2,384.0 2,384 2,384 0.0 Enter Data@m_global_parameters.fpp:1238 - 0.0 2,285 1 2,285.0 2,285.0 2,285 2,285 0.0 Enter Data@m_global_parameters.fpp:1236 - 0.0 2,203 1 2,203.0 2,203.0 2,203 2,203 0.0 Exit Data@m_data_output.fpp:1783 - 0.0 1,826 3 608.7 569.0 482 775 150.5 Exit Data@m_rhs.fpp:2280 - 0.0 1,591 1 1,591.0 1,591.0 1,591 1,591 0.0 Wait@m_global_parameters.fpp:1234 - 0.0 1,580 1 1,580.0 1,580.0 1,580 1,580 0.0 Exit Data@m_weno.fpp:1409 - 0.0 1,362 1 1,362.0 1,362.0 1,362 1,362 0.0 Wait@m_global_parameters.fpp:1235 - 0.0 1,255 1 1,255.0 1,255.0 1,255 1,255 0.0 Exit Data@m_riemann_solvers.fpp:5163 - 0.0 1,224 1 1,224.0 1,224.0 1,224 1,224 0.0 Wait@m_riemann_solvers.fpp:3341 - 0.0 1,184 1 1,184.0 1,184.0 1,184 1,184 0.0 Exit Data@m_rhs.fpp:2277 - 0.0 1,179 2 589.5 589.5 433 746 221.3 Exit Data@m_time_steppers.fpp:1093 - 0.0 1,042 1 1,042.0 1,042.0 1,042 1,042 0.0 Wait@m_global_parameters.fpp:1238 - 0.0 1,025 1 1,025.0 1,025.0 1,025 1,025 0.0 Exit Data@m_rhs.fpp:2283 - 0.0 1,013 1 1,013.0 1,013.0 1,013 1,013 0.0 Exit Data@m_riemann_solvers.fpp:5162 - 0.0 1,007 1 1,007.0 1,007.0 1,007 1,007 0.0 Wait@m_global_parameters.fpp:1236 - 0.0 978 1 978.0 978.0 978 978 0.0 Exit Data@m_cbc.fpp:1548 - 0.0 818 1 818.0 818.0 818 818 0.0 Exit Data@m_cbc.fpp:1537 - 0.0 736 1 736.0 736.0 736 736 0.0 Exit Data@m_rhs.fpp:2163 - 0.0 735 1 735.0 735.0 735 735 0.0 Exit Data@m_weno.fpp:1412 - 0.0 709 1 709.0 709.0 709 709 0.0 Exit Data@m_global_parameters.fpp:1330 - 0.0 692 1 692.0 692.0 692 692 0.0 Exit Data@m_riemann_solvers.fpp:5137 - 0.0 687 1 687.0 687.0 687 687 0.0 Exit Data@m_weno.fpp:1430 - 0.0 664 1 664.0 664.0 664 664 0.0 Exit Data@m_riemann_solvers.fpp:5164 - 0.0 662 1 662.0 662.0 662 662 0.0 Exit Data@m_riemann_solvers.fpp:5136 - 0.0 661 1 661.0 661.0 661 661 0.0 Exit Data@m_weno.fpp:1432 - 0.0 660 1 660.0 660.0 660 660 0.0 Exit Data@m_riemann_solvers.fpp:5138 - 0.0 658 1 658.0 658.0 658 658 0.0 Exit Data@m_riemann_solvers.fpp:5151 - 0.0 657 1 657.0 657.0 657 657 0.0 Exit Data@m_cbc.fpp:1527 - 0.0 655 1 655.0 655.0 655 655 0.0 Exit Data@m_rhs.fpp:2166 - 0.0 653 1 653.0 653.0 653 653 0.0 Exit Data@m_riemann_solvers.fpp:5161 - 0.0 652 1 652.0 652.0 652 652 0.0 Exit Data@m_weno.fpp:1420 - 0.0 651 1 651.0 651.0 651 651 0.0 Exit Data@m_riemann_solvers.fpp:5149 - 0.0 647 1 647.0 647.0 647 647 0.0 Exit Data@m_global_parameters.fpp:1333 - 0.0 646 1 646.0 646.0 646 646 0.0 Exit Data@m_riemann_solvers.fpp:5148 - 0.0 644 1 644.0 644.0 644 644 0.0 Exit Data@m_riemann_solvers.fpp:5150 - 0.0 640 1 640.0 640.0 640 640 0.0 Exit Data@m_weno.fpp:1433 - 0.0 635 1 635.0 635.0 635 635 0.0 Exit Data@m_weno.fpp:1423 - 0.0 634 1 634.0 634.0 634 634 0.0 Exit Data@m_cbc.fpp:1532 - 0.0 626 1 626.0 626.0 626 626 0.0 Exit Data@m_rhs.fpp:2170 - 0.0 623 1 623.0 623.0 623 623 0.0 Exit Data@m_weno.fpp:1413 - 0.0 621 1 621.0 621.0 621 621 0.0 Exit Data@m_cbc.fpp:1525 - 0.0 611 1 611.0 611.0 611 611 0.0 Exit Data@m_time_steppers.fpp:1149 - 0.0 608 1 608.0 608.0 608 608 0.0 Exit Data@m_weno.fpp:1422 - 0.0 602 1 602.0 602.0 602 602 0.0 Exit Data@m_weno.fpp:1414 - 0.0 600 1 600.0 600.0 600 600 0.0 Exit Data@m_cbc.fpp:1539 - 0.0 594 1 594.0 594.0 594 594 0.0 Exit Data@m_rhs.fpp:2245 - 0.0 583 1 583.0 583.0 583 583 0.0 Exit Data@m_rhs.fpp:2246 - 0.0 580 1 580.0 580.0 580 580 0.0 Exit Data@m_time_steppers.fpp:1097 - 0.0 578 1 578.0 578.0 578 578 0.0 Exit Data@m_cbc.fpp:1561 - 0.0 560 1 560.0 560.0 560 560 0.0 Exit Data@m_cbc.fpp:1541 - 0.0 555 1 555.0 555.0 555 555 0.0 Exit Data@m_weno.fpp:1424 - 0.0 543 1 543.0 543.0 543 543 0.0 Exit Data@m_weno.fpp:1434 - 0.0 541 1 541.0 541.0 541 541 0.0 Exit Data@m_cbc.fpp:1530 - 0.0 536 1 536.0 536.0 536 536 0.0 Exit Data@m_cbc.fpp:1534 - 0.0 531 1 531.0 531.0 531 531 0.0 Exit Data@m_rhs.fpp:2162 - 0.0 526 1 526.0 526.0 526 526 0.0 Exit Data@m_cbc.fpp:1545 - 0.0 495 1 495.0 495.0 495 495 0.0 Exit Data@m_cbc.fpp:1552 - 0.0 490 1 490.0 490.0 490 490 0.0 Exit Data@m_time_steppers.fpp:1156 - 0.0 489 1 489.0 489.0 489 489 0.0 Exit Data@m_cbc.fpp:1563 - 0.0 488 1 488.0 488.0 488 488 0.0 Exit Data@m_cbc.fpp:1569 - 0.0 488 1 488.0 488.0 488 488 0.0 Exit Data@m_cbc.fpp:1571 - 0.0 476 1 476.0 476.0 476 476 0.0 Exit Data@m_cbc.fpp:1554 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_boundary_common.fpp:1826 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp.f90:450 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:148 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:154 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:158 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:164 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:168 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:191 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:197 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:201 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:207 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:211 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:236 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:242 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:246 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:252 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:256 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:263 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:289 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:325 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:363 - 0.0 0 2 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:396 - 0.0 0 2 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:400 - 0.0 0 2 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:401 - 0.0 0 2 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:402 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:592 - 0.0 0 4 0.0 0.0 0 0 0.0 Alloc@m_cbc.fpp:760 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_data_output.fpp:1758 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_global_parameters.fpp:1238 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_global_parameters.fpp:1241 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_global_parameters.fpp:1242 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_global_parameters.fpp:1243 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_global_parameters.fpp:1246 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_global_parameters.fpp:1247 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_global_parameters.fpp:1248 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_global_parameters.fpp:1251 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_global_parameters.fpp:1252 - 0.0 0 2 0.0 0.0 0 0 0.0 Alloc@m_mpi_common.fpp:79 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:184 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:185 - 0.0 0 8 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:188 - 0.0 0 4 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:192 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:217 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:273 - 0.0 0 3 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:277 - 0.0 0 3 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:278 - 0.0 0 9 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:280 - 0.0 0 10 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:281 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:291 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:293 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:298 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:300 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:310 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:312 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:324 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:325 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:367 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:368 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:369 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:378 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:387 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:388 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:389 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:390 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:391 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:392 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:483 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:484 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:487 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:488 - 0.0 0 3 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:493 - 0.0 0 3 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:494 - 0.0 0 3 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:495 - 0.0 0 8 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:499 - 0.0 0 8 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:503 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:518 - 0.0 0 16 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:543 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:595 - 0.0 0 2 0.0 0.0 0 0 0.0 Alloc@m_rhs.fpp:653 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3341 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3346 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3349 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3352 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3355 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3373 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3376 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3379 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3382 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3401 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3404 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_riemann_solvers.fpp:3407 - 0.0 0 17 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:103 - 0.0 0 9 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:135 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:257 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:298 - 0.0 0 8 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:305 - 0.0 0 2 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:319 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:321 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:322 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:324 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:325 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:327 - 0.0 0 2 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:328 - 0.0 0 2 0.0 0.0 0 0 0.0 Alloc@m_time_steppers.fpp:98 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_variables_conversion.fpp:623 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_variables_conversion.fpp:626 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_variables_conversion.fpp:627 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_variables_conversion.fpp:628 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_variables_conversion.fpp:629 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_variables_conversion.fpp:630 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_variables_conversion.fpp:631 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_variables_conversion.fpp:632 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_variables_conversion.fpp:815 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:134 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:136 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:139 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:140 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:149 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:166 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:168 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:171 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:172 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:179 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:189 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:191 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:194 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:195 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:202 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:628 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:630 - 0.0 0 1 0.0 0.0 0 0 0.0 Alloc@m_weno.fpp:632 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_boundary_common.fpp:1826 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp.f90:450 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:148 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:154 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:158 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:164 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:168 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:191 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:197 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:201 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:207 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:211 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:236 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:242 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:246 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:252 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:256 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:263 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:289 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:325 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:363 - 0.0 0 2 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:396 - 0.0 0 2 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:400 - 0.0 0 2 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:401 - 0.0 0 2 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:402 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:592 - 0.0 0 1,080 0.0 0.0 0 0 0.0 Create@m_cbc.fpp:760 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_data_output.fpp:1758 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_global_parameters.fpp:1238 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_global_parameters.fpp:1241 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_global_parameters.fpp:1242 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_global_parameters.fpp:1243 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_global_parameters.fpp:1246 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_global_parameters.fpp:1247 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_global_parameters.fpp:1248 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_global_parameters.fpp:1251 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_global_parameters.fpp:1252 - 0.0 0 2 0.0 0.0 0 0 0.0 Create@m_mpi_common.fpp:79 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:184 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:185 - 0.0 0 8 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:188 - 0.0 0 4 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:192 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:217 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:273 - 0.0 0 3 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:277 - 0.0 0 3 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:278 - 0.0 0 9 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:280 - 0.0 0 10 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:281 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:291 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:293 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:298 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:300 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:310 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:312 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:324 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:325 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:367 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:368 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:369 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:378 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:387 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:388 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:389 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:390 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:391 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:392 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:483 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:484 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:487 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:488 - 0.0 0 3 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:493 - 0.0 0 3 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:494 - 0.0 0 3 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:495 - 0.0 0 8 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:499 - 0.0 0 8 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:503 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:518 - 0.0 0 16 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:543 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:595 - 0.0 0 2 0.0 0.0 0 0 0.0 Create@m_rhs.fpp:653 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3341 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3346 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3349 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3352 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3355 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3373 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3376 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3379 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3382 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3401 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3404 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_riemann_solvers.fpp:3407 - 0.0 0 17 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:103 - 0.0 0 9 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:135 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:257 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:298 - 0.0 0 8 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:305 - 0.0 0 2 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:319 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:321 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:322 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:324 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:325 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:327 - 0.0 0 2 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:328 - 0.0 0 2 0.0 0.0 0 0 0.0 Create@m_time_steppers.fpp:98 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_variables_conversion.fpp:623 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_variables_conversion.fpp:626 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_variables_conversion.fpp:627 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_variables_conversion.fpp:628 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_variables_conversion.fpp:629 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_variables_conversion.fpp:630 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_variables_conversion.fpp:631 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_variables_conversion.fpp:632 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_variables_conversion.fpp:815 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:134 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:136 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:139 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:140 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:149 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:166 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:168 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:171 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:172 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:179 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:189 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:191 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:194 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:195 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:202 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:628 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:630 - 0.0 0 1 0.0 0.0 0 0 0.0 Create@m_weno.fpp:632 - 0.0 0 1,080 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1024 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1523 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1525 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1527 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1530 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1532 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1534 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1537 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1539 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1541 - 0.0 0 8 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1545 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1548 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1552 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1554 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1561 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1563 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1569 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_cbc.fpp:1571 - 0.0 0 3 0.0 0.0 0 0 0.0 Delete@m_global_parameters.fpp:1327 - 0.0 0 3 0.0 0.0 0 0 0.0 Delete@m_global_parameters.fpp:1330 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_global_parameters.fpp:1333 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_rhs.fpp:2162 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_rhs.fpp:2163 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_rhs.fpp:2166 - 0.0 0 3 0.0 0.0 0 0 0.0 Delete@m_rhs.fpp:2170 - 0.0 0 3 0.0 0.0 0 0 0.0 Delete@m_rhs.fpp:2245 - 0.0 0 3 0.0 0.0 0 0 0.0 Delete@m_rhs.fpp:2280 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_rhs.fpp:2283 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5135 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5136 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5137 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5138 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5148 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5149 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5150 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5151 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5161 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5162 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5163 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_riemann_solvers.fpp:5164 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_time_steppers.fpp:1093 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_time_steppers.fpp:1112 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_time_steppers.fpp:1153 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_time_steppers.fpp:1156 - 0.0 0 5 0.0 0.0 0 0 0.0 Delete@m_variables_conversion.fpp:1583 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1409 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1412 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1413 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1414 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1420 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1422 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1423 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1424 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1430 - 0.0 0 2 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1432 - 0.0 0 1 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1433 - 0.0 0 8 0.0 0.0 0 0 0.0 Delete@m_weno.fpp:1434 - -Generated: - /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report3.nsys-rep - /fastscratch/bwilfong3/software/MFC-Wilfong/examples/3D_performance_test/report3.sqlite - -+-----------------------------------------------------------------------------------------------------------+ -| Finished MFC: | -| * Total-time: 38s * Exit Code: 0 | -| * End-time: 14:01:25 * End-date: 14:01:25 | -+-----------------------------------------------------------------------------------------------------------+ - -mfc: (venv) Exiting the Python virtual environment. diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 185a84892e..8ccab395be 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -399,6 +399,31 @@ contains end subroutine s_mpi_reduce_stability_criteria_extrema + !> The following subroutine takes the inputted variable and + !! determines its sum on the entire computational domain. + !! @param var_loc holds the local value to be reduced among + !! all the processors in communicator. On output, the variable holds + !! the sum, reduced amongst all of the local values. + subroutine s_mpi_reduce_int_sum(var_loc) + + integer, intent(inout) :: var_loc + +#ifdef MFC_MPI + + ! Temporary storage variable that holds the reduced sum value + integer :: var_glb + + ! Performing reduction procedure and eventually storing its result + ! into the variable that was initially inputted into the subroutine + call MPI_REDUCE(var_loc, var_glb, 1, MPI_INTEGER, & + MPI_SUM, 0, MPI_COMM_WORLD, ierr) + + var_loc = var_glb + +#endif + + end subroutine s_mpi_reduce_int_sum + !> The following subroutine takes the input local variable !! from all processors and reduces to the sum of all !! values. The reduced variable is recorded back onto the @@ -1061,7 +1086,13 @@ contains !! Remaining number of cells, in a particular coordinate direction, !! after the majority is divided up among the available processors - integer :: i, j !< Generic loop iterators + integer :: i, j, k !< Generic loop iterators + + integer, dimension(1:num_dims) :: neighbor_coords + + nidx(1)%beg = 0; nidx(1)%end = 0 + nidx(2)%beg = 0; nidx(2)%end = 0 + nidx(3)%beg = 0; nidx(3)%end = 0 if (num_procs == 1 .and. parallel_io) then do i = 1, num_dims @@ -1227,6 +1258,7 @@ contains call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_z%beg, ierr) proc_coords(3) = proc_coords(3) + 1 + nidx(3)%beg = -1 end if ! Boundary condition at the end @@ -1235,6 +1267,7 @@ contains call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_z%end, ierr) proc_coords(3) = proc_coords(3) - 1 + nidx(3)%end = 1 end if #ifdef MFC_POST_PROCESS @@ -1366,6 +1399,7 @@ contains call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_y%beg, ierr) proc_coords(2) = proc_coords(2) + 1 + nidx(2)%beg = -1 end if ! Boundary condition at the end @@ -1374,6 +1408,7 @@ contains call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & bc_y%end, ierr) proc_coords(2) = proc_coords(2) - 1 + nidx(2)%end = 1 end if #ifdef MFC_POST_PROCESS @@ -1457,6 +1492,7 @@ contains proc_coords(1) = proc_coords(1) - 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) proc_coords(1) = proc_coords(1) + 1 + nidx(1)%beg = -1 end if ! Boundary condition at the end @@ -1464,6 +1500,7 @@ contains proc_coords(1) = proc_coords(1) + 1 call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) proc_coords(1) = proc_coords(1) - 1 + nidx(1)%end = 1 end if #ifdef MFC_POST_PROCESS @@ -1509,6 +1546,24 @@ contains end if #endif end if + + @:ALLOCATE(neighbor_ranks(nidx(1)%beg:nidx(1)%end, & + nidx(2)%beg:nidx(2)%end, & + nidx(3)%beg:nidx(3)%end)) + + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) > 0) then + neighbor_coords(1) = proc_coords(1) + i + if (num_dims > 1) neighbor_coords(2) = proc_coords(2) + j + if (num_dims > 2) neighbor_coords(3) = proc_coords(3) + k + call MPI_CART_RANK(MPI_COMM_CART, neighbor_coords, & + neighbor_ranks(i, j, k), ierr) + end if + end do + end do + end do #endif end subroutine s_mpi_decompose_computational_domain diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 48780f00d4..0f45f59cde 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -171,6 +171,11 @@ module m_global_parameters integer, allocatable, dimension(:) :: proc_coords !< !! Processor coordinates in MPI_CART_COMM + type(int_bounds_info), dimension(3) :: nidx + + integer, allocatable, dimension(:,:,:) :: neighbor_ranks + !! Neighbor ranks for lagrangian particle communication + integer, allocatable, dimension(:) :: start_idx !< !! Starting cell-center index of local processor in global grid diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index d73f3e8ce6..fecb7acc9c 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -163,6 +163,11 @@ module m_global_parameters integer, allocatable, dimension(:) :: proc_coords !< !! Processor coordinates in MPI_CART_COMM + type(int_bounds_info), dimension(3) :: nidx + + integer, allocatable, dimension(:,:,:) :: neighbor_ranks + !! Neighbor ranks for lagrangian particle communication + integer, allocatable, dimension(:) :: start_idx !< !! Starting cell-center index of local processor in global grid diff --git "a/src/simulation/\\" "b/src/simulation/\\" new file mode 100644 index 0000000000..2559c0ffff --- /dev/null +++ "b/src/simulation/\\" @@ -0,0 +1,1837 @@ +!> +!! @file m_bubbles_EL.fpp +!! @brief Contains module m_bubbles_EL + +#:include 'macros.fpp' + +!> @brief This module is used to to compute the volume-averaged bubble model +module m_bubbles_EL + + use m_global_parameters !< Definitions of the global parameters + + use m_mpi_proxy !< Message passing interface (MPI) module proxy + + use m_mpi_common + + use m_bubbles_EL_kernels !< Definitions of the kernel functions + + use m_bubbles !< General bubble dynamics procedures + + use m_variables_conversion !< State variables type conversion procedures + + use m_compile_specific + + use m_boundary_common + + use m_sim_helpers + + use m_helper + + use m_ibm + + implicit none + + !(nBub) + integer, allocatable, dimension(:, :) :: lag_id !< Global and local IDs + real(wp), allocatable, dimension(:) :: bub_R0 !< Initial bubble radius + real(wp), allocatable, dimension(:) :: Rmax_stats !< Maximum radius + real(wp), allocatable, dimension(:) :: Rmin_stats !< Minimum radius + real(wp), allocatable, dimension(:) :: gas_mg !< Bubble's gas mass + real(wp), allocatable, dimension(:) :: gas_betaT !< heatflux model (Preston et al., 2007) + real(wp), allocatable, dimension(:) :: gas_betaC !< massflux model (Preston et al., 2007) + real(wp), allocatable, dimension(:) :: bub_dphidt !< subgrid velocity potential (Maeda & Colonius, 2018) + !(nBub, 1 -> actual val or 2 -> temp val) + real(wp), allocatable, dimension(:, :) :: gas_p !< Pressure in the bubble + real(wp), allocatable, dimension(:, :) :: gas_mv !< Vapor mass in the bubble + real(wp), allocatable, dimension(:, :) :: intfc_rad !< Bubble radius + real(wp), allocatable, dimension(:, :) :: intfc_vel !< Velocity of the bubble interface + !(nBub, 1-> x or 2->y or 3 ->z, 1 -> actual or 2 -> temporal val) + real(wp), allocatable, dimension(:, :, :) :: mtn_pos !< Bubble's position + real(wp), allocatable, dimension(:, :, :) :: mtn_posPrev !< Bubble's previous position + real(wp), allocatable, dimension(:, :, :) :: mtn_vel !< Bubble's velocity + real(wp), allocatable, dimension(:, :, :) :: mtn_s !< Bubble's computational cell position in real format + !(nBub, 1-> x or 2->y or 3 ->z, time-stage) + real(wp), allocatable, dimension(:, :) :: intfc_draddt !< Time derivative of bubble's radius + real(wp), allocatable, dimension(:, :) :: intfc_dveldt !< Time derivative of bubble's interface velocity + real(wp), allocatable, dimension(:, :) :: gas_dpdt !< Time derivative of gas pressure + real(wp), allocatable, dimension(:, :) :: gas_dmvdt !< Time derivative of the vapor mass in the bubble + real(wp), allocatable, dimension(:, :, :) :: mtn_dposdt !< Time derivative of the bubble's position + real(wp), allocatable, dimension(:, :, :) :: mtn_dveldt !< Time derivative of the bubble's velocity + + !$acc declare create(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt, & + !$acc gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & + !$acc gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt) + + integer, private :: lag_num_ts !< Number of time stages in the time-stepping scheme + + !$acc declare create(lag_num_ts) + + integer :: nBubs !< Number of bubbles in the local domain + real(wp) :: Rmax_glb, Rmin_glb !< Maximum and minimum bubbe size in the local domain + type(vector_field) :: q_beta !< Projection of the lagrangian particles in the Eulerian framework + integer :: q_beta_idx !< Size of the q_beta vector field + + !$acc declare create(nBubs, Rmax_glb, Rmin_glb, q_beta, q_beta_idx) + +contains + + !> Initializes the lagrangian subgrid bubble solver + !! @param q_cons_vf Initial conservative variables + subroutine s_initialize_bubbles_EL_module(q_cons_vf) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + + integer :: nBubs_glb, i + + ! Setting number of time-stages for selected time-stepping scheme + lag_num_ts = time_stepper + + ! Allocate space for the Eulerian fields needed to map the effect of the bubbles + if (lag_params%solver_approach == 1) then + ! One-way coupling + q_beta_idx = 3 + elseif (lag_params%solver_approach == 2) then + ! Two-way coupling + q_beta_idx = 4 + if (p == 0) then + !Subgrid noise model for 2D approximation + q_beta_idx = 6 + end if + else + call s_mpi_abort('Please check the lag_params%solver_approach input') + end if + + !$acc update device(lag_num_ts, q_beta_idx) + + @:ALLOCATE(q_beta%vf(1:q_beta_idx)) + + do i = 1, q_beta_idx + @:ALLOCATE(q_beta%vf(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + end do + + @:ACC_SETUP_VFs(q_beta) + + ! Allocating space for lagrangian variables + nBubs_glb = lag_params%nBubs_glb + + @:ALLOCATE(lag_id(1:nBubs_glb, 1:2)) + @:ALLOCATE(bub_R0(1:nBubs_glb)) + @:ALLOCATE(Rmax_stats(1:nBubs_glb)) + @:ALLOCATE(Rmin_stats(1:nBubs_glb)) + @:ALLOCATE(gas_mg(1:nBubs_glb)) + @:ALLOCATE(gas_betaT(1:nBubs_glb)) + @:ALLOCATE(gas_betaC(1:nBubs_glb)) + @:ALLOCATE(bub_dphidt(1:nBubs_glb)) + @:ALLOCATE(gas_p(1:nBubs_glb, 1:2)) + @:ALLOCATE(gas_mv(1:nBubs_glb, 1:2)) + @:ALLOCATE(intfc_rad(1:nBubs_glb, 1:2)) + @:ALLOCATE(intfc_vel(1:nBubs_glb, 1:2)) + @:ALLOCATE(mtn_pos(1:nBubs_glb, 1:3, 1:2)) + @:ALLOCATE(mtn_posPrev(1:nBubs_glb, 1:3, 1:2)) + @:ALLOCATE(mtn_vel(1:nBubs_glb, 1:3, 1:2)) + @:ALLOCATE(mtn_s(1:nBubs_glb, 1:3, 1:2)) + @:ALLOCATE(intfc_draddt(1:nBubs_glb, 1:lag_num_ts)) + @:ALLOCATE(intfc_dveldt(1:nBubs_glb, 1:lag_num_ts)) + @:ALLOCATE(gas_dpdt(1:nBubs_glb, 1:lag_num_ts)) + @:ALLOCATE(gas_dmvdt(1:nBubs_glb, 1:lag_num_ts)) + @:ALLOCATE(mtn_dposdt(1:nBubs_glb, 1:3, 1:lag_num_ts)) + @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts)) + + if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol + + if (num_procs > 1) call s_initialize_particles_mpi(lag_num_ts) + + ! Starting bubbles + call s_start_lagrange_inputs() + call s_read_input_bubbles(q_cons_vf) + + end subroutine s_initialize_bubbles_EL_module + + !> The purpose of this procedure is to start lagrange bubble parameters applying nondimensionalization if needed + subroutine s_start_lagrange_inputs() + + integer :: id_bubbles, id_host + real(wp) :: rho0, c0, T0, x0, p0 + + id_bubbles = num_fluids + id_host = num_fluids - 1 + + !Reference values + rho0 = lag_params%rho0 + c0 = lag_params%c0 + T0 = lag_params%T0 + x0 = lag_params%x0 + p0 = rho0*c0*c0 + + !Update inputs + Tw = lag_params%Thost/T0 + pv = fluid_pp(id_host)%pv/p0 + gamma_v = fluid_pp(id_bubbles)%gamma_v + gamma_n = fluid_pp(id_host)%gamma_v + k_vl = fluid_pp(id_bubbles)%k_v*(T0/(x0*rho0*c0*c0*c0)) + k_nl = fluid_pp(id_host)%k_v*(T0/(x0*rho0*c0*c0*c0)) + cp_v = fluid_pp(id_bubbles)%cp_v*(T0/(c0*c0)) + cp_n = fluid_pp(id_host)%cp_v*(T0/(c0*c0)) + R_v = (R_uni/fluid_pp(id_bubbles)%M_v)*(T0/(c0*c0)) + R_n = (R_uni/fluid_pp(id_host)%M_v)*(T0/(c0*c0)) + lag_params%diffcoefvap = lag_params%diffcoefvap/(x0*c0) + ss = fluid_pp(id_host)%ss/(rho0*x0*c0*c0) + mul0 = fluid_pp(id_host)%mul0/(rho0*x0*c0) + + ! Parameters used in bubble_model + Web = 1._wp/ss + Re_inv = mul0 + + ! Need improvements to accept polytropic gas compression, isothermal and adiabatic thermal models, and + ! the Gilmore and RP bubble models. + polytropic = .false. ! Forcing no polytropic model + thermal = 3 ! Forcing constant transfer coefficient model based on Preston et al., 2007 + ! If Keller-Miksis model is not selected, then no radial motion + + end subroutine s_start_lagrange_inputs + + !> The purpose of this procedure is to obtain the initial bubbles' information + !! @param q_cons_vf Conservative variables + subroutine s_read_input_bubbles(q_cons_vf) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + + real(wp), dimension(8) :: inputBubble + real(wp) :: qtime + integer :: id, bub_id, save_count + integer :: i, ios + logical :: file_exist, indomain + + character(LEN=path_len + 2*name_len) :: path_D_dir !< + + ! Initialize number of particles + bub_id = 0 + id = 0 + + ! Read the input lag_bubble file or restart point + if (cfl_dt) then + save_count = n_start + qtime = n_start*t_save + else + save_count = t_step_start + qtime = t_step_start*dt + end if + + if (save_count == 0) then + if (proc_rank == 0) print *, 'Reading lagrange bubbles input file.' + inquire (file='input/lag_bubbles.dat', exist=file_exist) + if (file_exist) then + open (94, file='input/lag_bubbles.dat', form='formatted', iostat=ios) + do while (ios == 0) + read (94, *, iostat=ios) (inputBubble(i), i=1, 8) + if (ios /= 0) cycle + indomain = particle_in_domain(inputBubble(1:3)) + id = id + 1 + if (id > lag_params%nBubs_glb .and. proc_rank == 0) then + call s_mpi_abort("Current number of bubbles is larger than nBubs_glb") + end if + if (indomain) then + bub_id = bub_id + 1 + call s_add_bubbles(inputBubble, q_cons_vf, bub_id) + lag_id(bub_id, 1) = id !global ID + lag_id(bub_id, 2) = bub_id !local ID + nBubs = bub_id ! local number of bubbles + end if + end do + close (94) + else + call s_mpi_abort("Initialize the lagrange bubbles in input/lag_bubbles.dat") + end if + else + if (proc_rank == 0) print *, 'Restarting lagrange bubbles at save_count: ', save_count + call s_restart_bubbles(bub_id, save_count) + end if + + print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id + + call s_mpi_reduce_int_sum(bub_id) + + if (proc_rank == 0) then + if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat') + end if + + !$acc update device(bubbles_lagrange, lag_params) + + !$acc update device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, & + !$acc bub_dphidt, gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & + !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, nBubs) + + Rmax_glb = min(dflt_real, -dflt_real) + Rmin_glb = max(dflt_real, -dflt_real) + !$acc update device(Rmax_glb, Rmin_glb) + + !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc) + + !Populate temporal variables + call s_transfer_data_to_tmp() + call s_smear_voidfraction() + + if (lag_params%write_bubbles) call s_write_lag_particles(qtime) + + if (save_count == 0) then + ! Create ./D directory + write (path_D_dir, '(A,I0,A,I0)') trim(case_dir)//'/D' + call my_inquire(path_D_dir, file_exist) + if (.not. file_exist) call s_create_directory(trim(path_D_dir)) + call s_write_restart_lag_bubbles(save_count) ! Needed for post_processing + call s_write_void_evol(qtime) + end if + + end subroutine s_read_input_bubbles + + !> The purpose of this procedure is to obtain the information of the bubbles when starting fresh + !! @param inputBubble Bubble information + !! @param q_cons_vf Conservative variables + !! @param bub_id Local id of the bubble + subroutine s_add_bubbles(inputBubble, q_cons_vf, bub_id) + + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + real(wp), dimension(8), intent(in) :: inputBubble + integer, intent(in) :: bub_id + integer :: i + + real(wp) :: pliq, volparticle, concvap, totalmass, kparticle, cpparticle + real(wp) :: omegaN, PeG, PeT, rhol, pcrit, qv, gamma, pi_inf, dynP + integer, dimension(3) :: cell + real(wp), dimension(2) :: Re + real(wp) :: massflag, heatflag, Re_trans, Im_trans + + massflag = 0._wp + heatflag = 0._wp + if (lag_params%massTransfer_model) massflag = 1._wp + if (lag_params%heatTransfer_model) heatflag = 1._wp + + bub_R0(bub_id) = inputBubble(7) + Rmax_stats(bub_id) = min(dflt_real, -dflt_real) + Rmin_stats(bub_id) = max(dflt_real, -dflt_real) + bub_dphidt(bub_id) = 0._wp + intfc_rad(bub_id, 1) = inputBubble(7) + intfc_vel(bub_id, 1) = inputBubble(8) + mtn_pos(bub_id, 1:3, 1) = inputBubble(1:3) + mtn_posPrev(bub_id, 1:3, 1) = mtn_pos(bub_id, 1:3, 1) + mtn_vel(bub_id, 1:3, 1) = inputBubble(4:6) + + if (cyl_coord .and. p == 0) then + mtn_pos(bub_id, 2, 1) = sqrt(mtn_pos(bub_id, 2, 1)**2._wp + & + mtn_pos(bub_id, 3, 1)**2._wp) + !Storing azimuthal angle (-Pi to Pi)) into the third coordinate variable + mtn_pos(bub_id, 3, 1) = atan2(inputBubble(3), inputBubble(2)) + mtn_posPrev(bub_id, 1:3, 1) = mtn_pos(bub_id, 1:3, 1) + end if + + cell = -buff_size + call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) + + ! Check if the bubble is located in the ghost cell of a symmetric boundary + if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. & + (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. & + (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. & + (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then + call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") + end if + + if (p > 0) then + if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. & + (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then + call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") + end if + end if + + ! If particle is in the ghost cells, find the closest non-ghost cell + cell(1) = min(max(cell(1), 0), m) + cell(2) = min(max(cell(2), 0), n) + if (p > 0) cell(3) = min(max(cell(3), 0), p) + call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), & + rhol, gamma, pi_inf, qv, Re) + dynP = 0._wp + do i = 1, num_dims + dynP = dynP + 0.5_wp*q_cons_vf(contxe + i)%sf(cell(1), cell(2), cell(3))**2/rhol + end do + pliq = (q_cons_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - dynP - pi_inf)/gamma + if (pliq < 0) print *, "Negative pressure", proc_rank, & + q_cons_vf(E_idx)%sf(cell(1), cell(2), cell(3)), pi_inf, gamma, pliq, cell, dynP + + ! Initial particle pressure + gas_p(bub_id, 1) = pliq + 2._wp*(1._wp/Web)/bub_R0(bub_id) + if ((1._wp/Web) /= 0._wp) then + pcrit = pv - 4._wp*(1._wp/Web)/(3._wp*sqrt(3._wp*gas_p(bub_id, 1)*bub_R0(bub_id)**3._wp/(2._wp*(1._wp/Web)))) + pref = gas_p(bub_id, 1) + else + pcrit = 0._wp + end if + + ! Initial particle mass + volparticle = 4._wp/3._wp*pi*bub_R0(bub_id)**3._wp ! volume + gas_mv(bub_id, 1) = pv*volparticle*(1._wp/(R_v*Tw))*(massflag) ! vapermass + gas_mg(bub_id) = (gas_p(bub_id, 1) - pv*(massflag))*volparticle*(1._wp/(R_n*Tw)) ! gasmass + if (gas_mg(bub_id) <= 0._wp) then + call s_mpi_abort("The initial mass of gas inside the bubble is negative. Check the initial conditions.") + end if + totalmass = gas_mg(bub_id) + gas_mv(bub_id, 1) ! totalmass + + ! Bubble natural frequency + concvap = gas_mv(bub_id, 1)/(gas_mv(bub_id, 1) + gas_mg(bub_id)) + omegaN = (3._wp*(gas_p(bub_id, 1) - pv*(massflag)) + 4._wp*(1._wp/Web)/bub_R0(bub_id))/rhol + if (pv*(massflag) > gas_p(bub_id, 1)) then + call s_mpi_abort("Lagrange bubble initially located in a region with pressure below the vapor pressure.") + end if + omegaN = sqrt(omegaN/bub_R0(bub_id)**2._wp) + + cpparticle = concvap*cp_v + (1._wp - concvap)*cp_n + kparticle = concvap*k_vl + (1._wp - concvap)*k_nl + + ! Mass and heat transfer coefficients (based on Preston 2007) + PeT = totalmass/volparticle*cpparticle*bub_R0(bub_id)**2._wp*omegaN/kparticle + call s_transcoeff(1._wp, PeT, Re_trans, Im_trans) + gas_betaT(bub_id) = Re_trans*(heatflag)*kparticle + + PeG = bub_R0(bub_id)**2._wp*omegaN/lag_params%diffcoefvap + call s_transcoeff(1._wp, PeG, Re_trans, Im_trans) + gas_betaC(bub_id) = Re_trans*(massflag)*lag_params%diffcoefvap + + if (gas_mg(bub_id) <= 0._wp) then + call s_mpi_abort("Negative gas mass in the bubble, check if the bubble is in the domain.") + end if + + end subroutine s_add_bubbles + + !> The purpose of this procedure is to obtain the information of the bubbles from a restart point. + !! @param bub_id Local ID of the particle + !! @param save_count File identifier + subroutine s_restart_bubbles(bub_id, save_count) + + integer, intent(inout) :: bub_id, save_count + + character(LEN=path_len + 2*name_len) :: file_loc + +#ifdef MFC_MPI + real(wp), dimension(20) :: inputvals + integer, dimension(MPI_STATUS_SIZE) :: status + integer(kind=MPI_OFFSET_KIND) :: disp + integer :: view + + integer, dimension(3) :: cell + logical :: indomain, particle_file, file_exist + + integer, dimension(2) :: gsizes, lsizes, start_idx_part + integer :: ifile, ierr, tot_data, id + integer :: i + + write (file_loc, '(a,i0,a)') 'lag_bubbles_mpi_io_', save_count, '.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (file=trim(file_loc), exist=file_exist) + + if (file_exist) then + if (proc_rank == 0) then + open (9, file=trim(file_loc), form='unformatted', status='unknown') + read (9) tot_data, mytime, dt + close (9) + print *, 'Reading lag_bubbles_mpi_io: ', tot_data, mytime, dt + end if + else + print '(a)', trim(file_loc)//' is missing. exiting.' + call s_mpi_abort + end if + + call MPI_BCAST(tot_data, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(mytime, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + + gsizes(1) = tot_data + gsizes(2) = 21 + lsizes(1) = tot_data + lsizes(2) = 21 + start_idx_part(1) = 0 + start_idx_part(2) = 0 + + call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & + MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_type_COMMIT(view, ierr) + + ! Open the file to write all flow variables + write (file_loc, '(a,i0,a)') 'lag_bubbles_', save_count, '.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (file=trim(file_loc), exist=particle_file) + + if (particle_file) then + call MPI_FILE_open(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + disp = 0._wp + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & + 'native', mpi_info_null, ierr) + allocate (MPI_IO_DATA_lag_bubbles(tot_data, 1:21)) + call MPI_FILE_read_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*tot_data, & + mpi_p, status, ierr) + do i = 1, tot_data + id = int(MPI_IO_DATA_lag_bubbles(i, 1)) + inputvals(1:20) = MPI_IO_DATA_lag_bubbles(i, 2:21) + indomain = particle_in_domain(inputvals(1:3)) + if (indomain .and. (id > 0)) then + bub_id = bub_id + 1 + nBubs = bub_id ! local number of bubbles + lag_id(bub_id, 1) = id ! global ID + lag_id(bub_id, 2) = bub_id ! local ID + mtn_pos(bub_id, 1:3, 1) = inputvals(1:3) + mtn_posPrev(bub_id, 1:3, 1) = inputvals(4:6) + mtn_vel(bub_id, 1:3, 1) = inputvals(7:9) + intfc_rad(bub_id, 1) = inputvals(10) + intfc_vel(bub_id, 1) = inputvals(11) + bub_R0(bub_id) = inputvals(12) + Rmax_stats(bub_id) = inputvals(13) + Rmin_stats(bub_id) = inputvals(14) + bub_dphidt(bub_id) = inputvals(15) + gas_p(bub_id, 1) = inputvals(16) + gas_mv(bub_id, 1) = inputvals(17) + gas_mg(bub_id) = inputvals(18) + gas_betaT(bub_id) = inputvals(19) + gas_betaC(bub_id) = inputvals(20) + cell = -buff_size + call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) + end if + end do + deallocate (MPI_IO_DATA_lag_bubbles) + end if + call MPI_FILE_CLOSE(ifile, ierr) +#endif + + end subroutine s_restart_bubbles + + !> Contains the bubble dynamics subroutines. + !! @param q_cons_vf Conservative variables + !! @param q_prim_vf Primitive variables + !! @param rhs_vf Calculated change of conservative variables + !! @param t_step Current time step + !! @param stage Current stage in the time-stepper algorithm + subroutine s_compute_bubble_EL_dynamics(q_cons_vf, q_prim_vf, t_step, rhs_vf, stage) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf + type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf + integer, intent(in) :: t_step, stage + + real(wp) :: myVapFlux + real(wp) :: preterm1, term2, paux, pint, Romega, term1_fac + real(wp) :: myR_m, mygamma_m, myPb, myMass_n, myMass_v + real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot + real(wp) :: myPinf, aux1, aux2, myCson, myRho + real(wp) :: gamma, pi_inf, qv + real(wp), dimension(contxe) :: myalpha_rho, myalpha + real(wp), dimension(2) :: Re + integer, dimension(3) :: cell + + integer :: adap_dt_stop_max, adap_dt_stop !< Fail-safe exit if max iteration count reached + real(wp) :: dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu !< Dummy variables for unified subgrid bubble subroutines + + integer :: i, k, l + + call nvtxStartRange("LAGRANGE-BUBBLE-DYNAMICS") + + ! Subgrid p_inf model based on Maeda and Colonius (2018). + if (lag_params%pressure_corrector) then + ! Calculate velocity potentials (valid for one bubble per cell) + !$acc parallel loop gang vector default(present) private(k, cell) + do k = 1, nBubs + call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) + myR0 = bub_R0(k) + myR = intfc_rad(k, 2) + myV = intfc_vel(k, 2) + myPb = gas_p(k, 2) + pint = f_cpbw_KM(myR0, myR, myV, myPb) + pint = pint + 0.5_wp*myV**2._wp + if (lag_params%cluster_type == 2) then + bub_dphidt(k) = (paux - pint) + term2 + ! Accounting for the potential induced by the bubble averaged over the control volume + ! Note that this is based on the incompressible flow assumption near the bubble. + term1_fac = 3._wp/2._wp*(myR*(Romega**2._wp - myR**2._wp))/(Romega**3._wp - myR**3._wp) + bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac) + end if + end do + end if + + ! Radial motion model + adap_dt_stop_max = 0 + !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) & + !$acc reduction(MAX:adap_dt_stop_max) copy(adap_dt_stop_max) copyin(stage) + do k = 1, nBubs + + ! Current bubble state + myPb = gas_p(k, 2) + myMass_n = gas_mg(k) + myMass_v = gas_mv(k, 2) + myR = intfc_rad(k, 2) + myV = intfc_vel(k, 2) + myBeta_c = gas_betaC(k) + myBeta_t = gas_betaT(k) + myR0 = bub_R0(k) + + ! Vapor and heat fluxes + myVapFlux = f_vflux(myR, myV, myPb, myMass_v, k, myMass_n, myBeta_c, myR_m, mygamma_m) + myPbdot = f_bpres_dot(myVapFlux, myR, myV, myPb, myMass_v, k, myBeta_t, myR_m, mygamma_m) + myMvdot = 4._wp*pi*myR**2._wp*myVapFlux + + ! Obtaining driving pressure + call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) + + ! Obtain liquid density and computing speed of sound from pinf + !$acc loop seq + do i = 1, num_fluids + myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3)) + myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) + end do + call s_convert_species_to_mixture_variables_acc(myRho, gamma, pi_inf, qv, myalpha, & + myalpha_rho, Re, cell(1), cell(2), cell(3)) + call s_compute_cson_from_pinf(k, q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson) + + ! Adaptive time stepping + adap_dt_stop = 0 + + if (adap_dt) then + + call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & + dmntait, dmBtait, dm_bub_adv_src, dm_divu, & + k, myMass_v, myMass_n, myBeta_c, & + myBeta_t, myCson, adap_dt_stop) + + ! Update bubble state + intfc_rad(k, 1) = myR + intfc_vel(k, 1) = myV + gas_p(k, 1) = myPb + gas_mv(k, 1) = myMass_v + + else + + ! Radial acceleration from bubble models + intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, & + myPb, myPbdot, dmalf, dmntait, dmBtait, & + dm_bub_adv_src, dm_divu, & + myCson) + print*, proc_rank, intfc_dveldt(k, stage) + intfc_draddt(k, stage) = myV + gas_dmvdt(k, stage) = myMvdot + gas_dpdt(k, stage) = myPbdot + + end if + + do l = 1, num_dims + if (lag_params%vel_model == 1) then + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), & + cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = 0._wp + elseif (lag_params%vel_model == 2) then + mtn_dposdt(k, l, stage) = mtn_vel(k,l,2) + mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), & + intfc_rad(k,2), mtn_vel(k,l,2), & + gas_mg(k), gas_mv(k, 2), & + Re(1), myRho, cell, l, q_prim_vf) + else + mtn_dposdt(k, l, stage) = 0._wp + mtn_dveldt(k, l, stage) = 0._wp + end if + end do + + adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + + end do + + if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") + + call nvtxEndRange + + end subroutine s_compute_bubble_EL_dynamics + + !> The purpose of this subroutine is to obtain the bubble source terms based on Maeda and Colonius (2018) + !! and add them to the RHS scalar field. + !! @param q_cons_vf Conservative variables + !! @param q_prim_vf Conservative variables + !! @param rhs_vf Time derivative of the conservative variables + subroutine s_compute_bubbles_EL_source(q_cons_vf, q_prim_vf, rhs_vf) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf + type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf + + integer :: i, j, k, l + + if (.not. adap_dt) call s_smear_voidfraction() + + if (lag_params%solver_approach == 2) then + + if (p == 0) then + !$acc parallel loop collapse(4) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + do l = 1, E_idx + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + q_cons_vf(l)%sf(i, j, k)*(q_beta%vf(2)%sf(i, j, k) + & + q_beta%vf(5)%sf(i, j, k)) + + end if + end do + end do + end do + end do + else + !$acc parallel loop collapse(4) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + do l = 1, E_idx + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + q_cons_vf(l)%sf(i, j, k)/q_beta%vf(1)%sf(i, j, k)* & + q_beta%vf(2)%sf(i, j, k) + end if + end do + end do + end do + end do + end if + + do l = 1, num_dims + + call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l) + + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & + (1._wp - q_beta%vf(1)%sf(i, j, k))/ & + q_beta%vf(1)%sf(i, j, k)* & + q_beta%vf(3)%sf(i, j, k) + end if + end do + end do + end do + + !source in energy + !$acc parallel loop collapse(3) gang vector default(present) + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) + end do + end do + end do + + call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) + + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & + q_beta%vf(4)%sf(i, j, k)*(1._wp - q_beta%vf(1)%sf(i, j, k))/ & + q_beta%vf(1)%sf(i, j, k) + end if + end do + end do + end do + end do + + end if + + end subroutine s_compute_bubbles_EL_source + + !> This procedure computes the speed of sound from a given driving pressure + !! @param bub_id Bubble id + !! @param q_prim_vf Primitive variables + !! @param pinf Driving pressure + !! @param cell Bubble cell + !! @param rhol Liquid density + !! @param gamma Liquid specific heat ratio + !! @param pi_inf Liquid stiffness + !! @param cson Calculated speed of sound + subroutine s_compute_cson_from_pinf(bub_id, q_prim_vf, pinf, cell, rhol, gamma, pi_inf, cson) +#ifdef _CRAYFTN + !DIR$ INLINEALWAYS s_compute_cson_from_pinf +#else + !$acc routine seq +#endif + integer, intent(in) :: bub_id + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + real(wp), intent(in) :: pinf, rhol, gamma, pi_inf + integer, dimension(3), intent(in) :: cell + real(wp), intent(out) :: cson + + real(wp) :: E, H + real(wp), dimension(num_dims) :: vel + integer :: i + + !$acc loop seq + do i = 1, num_dims + vel(i) = q_prim_vf(i + contxe)%sf(cell(1), cell(2), cell(3)) + end do + E = gamma*pinf + pi_inf + 0.5_wp*rhol*dot_product(vel, vel) + H = (E + pinf)/rhol + cson = sqrt((H - 0.5_wp*dot_product(vel, vel))/gamma) + + end subroutine s_compute_cson_from_pinf + + !> The purpose of this subroutine is to smear the effect of the bubbles in the Eulerian framework + subroutine s_smear_voidfraction() + + integer :: i, j, k, l + + call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") + + !$acc parallel loop collapse(4) gang vector default(present) + do i = 1, q_beta_idx + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(i)%sf(j, k, l) = 0._wp + end do + end do + end do + end do + + call s_smoothfunction(nBubs, intfc_rad, intfc_vel, & + mtn_s, mtn_pos, q_beta) + + !Store 1-beta + !$acc parallel loop collapse(3) gang vector default(present) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(1)%sf(j, k, l) = 1._wp - q_beta%vf(1)%sf(j, k, l) + ! Limiting void fraction given max value + q_beta%vf(1)%sf(j, k, l) = max(q_beta%vf(1)%sf(j, k, l), & + 1._wp - lag_params%valmaxvoid) + end do + end do + end do + + call nvtxEndRange + + end subroutine s_smear_voidfraction + + !> The purpose of this procedure is obtain the bubble driving pressure p_inf + !! @param bub_id Particle identifier + !! @param q_prim_vf Primitive variables + !! @param ptype 1: p at infinity, 2: averaged P at the bubble location + !! @param f_pinfl Driving pressure + !! @param cell Bubble cell + !! @param Romega Control volume radius + subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega) +#ifdef _CRAYFTN + !DIR$ INLINEALWAYS s_get_pinf +#else + !$acc routine seq +#endif + integer, intent(in) :: bub_id, ptype + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + real(wp), intent(out) :: f_pinfl + integer, dimension(3), intent(out) :: cell + real(wp), intent(out), optional :: preterm1, term2, Romega + + real(wp), dimension(3) :: scoord, psi + real(wp) :: dc, vol, aux + real(wp) :: volgas, term1, Rbeq, denom + real(wp) :: charvol, charpres, charvol2, charpres2 + integer, dimension(3) :: cellaux + integer :: i, j, k + integer :: smearGrid, smearGridz + logical :: celloutside + + f_pinfl = 0._wp + + if (lag_params%vel_model > 0) then + cell = -buff_size + call s_locate_cell(mtn_pos(bub_id, 1:3, 2), cell, mtn_s(bub_id, 1:3, 2)) + scoord = mtn_s(bub_id, 1:3, 2) + else + scoord = mtn_s(bub_id, 1:3, 2) + cell(:) = int(scoord(:)) + end if + + !$acc loop seq + do i = 1, num_dims + if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 + end do + + if ((lag_params%cluster_type == 1)) then + !< Getting p_cell in terms of only the current cell by interpolation + + !< Getting the cell volulme as Omega + if (p > 0) then + vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) + else + if (cyl_coord) then + vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + end if + end if + + !< Obtain bilinear interpolation coefficients, based on the current location of the bubble. + psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) + if (cell(1) == (m + buff_size)) then + cell(1) = cell(1) - 1 + psi(1) = 1._wp + else if (cell(1) == (-buff_size)) then + psi(1) = 0._wp + else + if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1 + psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))) + end if + + psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) + if (cell(2) == (n + buff_size)) then + cell(2) = cell(2) - 1 + psi(2) = 1._wp + else if (cell(2) == (-buff_size)) then + psi(2) = 0._wp + else + if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1 + psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))) + end if + + if (p > 0) then + psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) + if (cell(3) == (p + buff_size)) then + cell(3) = cell(3) - 1 + psi(3) = 1._wp + else if (cell(3) == (-buff_size)) then + psi(3) = 0._wp + else + if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1 + psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))) + end if + else + psi(3) = 0._wp + end if + + !< Perform bilinear interpolation + if (p == 0) then !2D + f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2)) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2)) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2) + else !3D + f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))*(1._wp - psi(3)) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2))*(1._wp - psi(3)) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2)*(1._wp - psi(3)) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2)*(1._wp - psi(3)) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1)*(1._wp - psi(1))*(1._wp - psi(2))*psi(3) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3) + 1)*psi(1)*(1._wp - psi(2))*psi(3) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3) + 1)*psi(1)*psi(2)*psi(3) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3) + 1)*(1._wp - psi(1))*psi(2)*psi(3) + end if + + !R_Omega + dc = (3._wp*vol/(4._wp*pi))**(1._wp/3._wp) + + else if (lag_params%cluster_type >= 2) then + ! Bubble dynamic closure from Maeda and Colonius (2018) + + ! Include the cell that contains the bubble (mapCells+1+mapCells) + smearGrid = mapCells - (-mapCells) + 1 + smearGridz = smearGrid + if (p == 0) smearGridz = 1 + + charvol = 0._wp + charpres = 0._wp + charvol2 = 0._wp + charpres2 = 0._wp + vol = 0._wp + + !$acc loop seq + do i = 1, smearGrid + !$acc loop seq + do j = 1, smearGrid + !$acc loop seq + do k = 1, smearGridz + cellaux(1) = cell(1) + i - (mapCells + 1) + cellaux(2) = cell(2) + j - (mapCells + 1) + cellaux(3) = cell(3) + k - (mapCells + 1) + if (p == 0) cellaux(3) = 0 + + !< check if the current cell is outside the computational domain or not (including ghost cells) + celloutside = .false. + if (num_dims == 2) then + if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then + celloutside = .true. + end if + if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then + celloutside = .true. + end if + if ((cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then + celloutside = .true. + end if + else + if ((cellaux(3) < -buff_size) .or. (cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then + celloutside = .true. + end if + + if ((cellaux(3) > p + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then + celloutside = .true. + end if + end if + if (.not. celloutside) then + if (cyl_coord .and. (p == 0) .and. (y_cc(cellaux(2)) < 0._wp)) then + celloutside = .true. + end if + end if + + if (.not. celloutside) then + !< Obtaining the cell volulme + if (p > 0) then + vol = dx(cellaux(1))*dy(cellaux(2))*dz(cellaux(3)) + else + if (cyl_coord) then + vol = dx(cellaux(1))*dy(cellaux(2))*y_cc(cellaux(2))*2._wp*pi + else + vol = dx(cellaux(1))*dy(cellaux(2))*lag_params%charwidth + end if + end if + !< Update values + charvol = charvol + vol + charpres = charpres + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3))*vol + charvol2 = charvol2 + vol*q_beta%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) + charpres2 = charpres2 + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3)) & + *vol*q_beta%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) + end if + + end do + end do + end do + + f_pinfl = charpres2/charvol2 + vol = charvol + dc = (3._wp*abs(vol)/(4._wp*pi))**(1._wp/3._wp) + + end if + + if (lag_params%pressure_corrector) then + + !Valid if only one bubble exists per cell + volgas = intfc_rad(bub_id, 2)**3._wp + denom = intfc_rad(bub_id, 2)**2._wp + term1 = bub_dphidt(bub_id)*intfc_rad(bub_id, 2)**2._wp + term2 = intfc_vel(bub_id, 2)*intfc_rad(bub_id, 2)**2._wp + + Rbeq = volgas**(1._wp/3._wp) !surrogate bubble radius + aux = dc**3._wp - Rbeq**3._wp + term2 = term2/denom + term2 = 3._wp/2._wp*term2**2._wp*Rbeq**3._wp*(1._wp - Rbeq/dc)/aux + preterm1 = 3._wp/2._wp*Rbeq*(dc**2._wp - Rbeq**2._wp)/(aux*denom) + + !Control volume radius + if (ptype == 2) Romega = dc + + ! Getting p_inf + if (ptype == 1) then + f_pinfl = f_pinfl + preterm1*term1 + term2 + end if + + end if + + end subroutine s_get_pinf + + !> This subroutine updates the Lagrange variables using the tvd RK time steppers. + !! The time derivative of the bubble variables must be stored at every stage to avoid precision errors. + !! @param stage Current tvd RK stage + subroutine s_update_lagrange_tdv_rk(stage) + + integer, intent(in) :: stage + + integer :: k + + if (time_stepper == 1) then ! 1st order TVD RK + !$acc parallel loop gang vector default(present) private(k) + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do + + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) + + call s_transfer_data_to_tmp() + call s_write_void_evol(mytime) + if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() + + if (lag_params%write_bubbles) then + !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel) + call s_write_lag_particles(mytime) + end if + + elseif (time_stepper == 2) then ! 2nd order TVD RK + if (stage == 1) then + !$acc parallel loop gang vector default(present) private(k) + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do + + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) + + elseif (stage == 2) then + !$acc parallel loop gang vector default(present) private(k) + do k = 1, nBubs + !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) + intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp + intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp + mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp + gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp + gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp + end do + + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) + + call s_transfer_data_to_tmp() + call s_write_void_evol(mytime) + if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() + if (lag_params%write_bubbles) then + !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel) + call s_write_lag_particles(mytime) + end if + + end if + + elseif (time_stepper == 3) then ! 3rd order TVD RK + if (stage == 1) then + !$acc parallel loop gang vector default(present) private(k) + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do + + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) + + elseif (stage == 2) then + !$acc parallel loop gang vector default(present) private(k) + do k = 1, nBubs + !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp + mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 2) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp + gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp + gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp + end do + + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) + + elseif (stage == 3) then + !$acc parallel loop gang vector default(present) private(k) + do k = 1, nBubs + !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] + intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) + intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3)) + mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3)) + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3)) + gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) + gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) + end do + + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) + + call s_transfer_data_to_tmp() + call s_write_void_evol(mytime) + if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() + + if (lag_params%write_bubbles) then + !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel) + call s_write_lag_particles(mytime) + end if + + end if + end if + + end subroutine s_update_lagrange_tdv_rk + + !> This subroutine enforces reflective and wall boundary conditions for EL bubbles + subroutine s_enforce_EL_bubbles_boundary_conditions(dest) + + integer, intent(in) :: dest + integer :: k, i, patch_id + integer, dimension(3) :: cell + + !$acc parallel loop gang vector default(present) private(cell) + do k = 1, nBubs + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then + mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest) + elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then + mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest) + elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size - mapCells)) then + call s_remove_lag_bubble(k) + elseif (mtn_pos(k, 1, dest) < x_cb(mapCells -buff_size - 1)) then + call s_remove_lag_bubble(k) + end if + + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,2,dest) < y_cb(-1) + intfc_rad(k,dest)) then + mtn_pos(k, 2, dest) = y_cb(-1) + intfc_rad(k,dest) + else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then + mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest) + elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size - mapCells)) then + call s_remove_lag_bubble(k) + elseif (mtn_pos(k, 2, dest) < y_cb(mapCells - buff_size - 1)) then + call s_remove_lag_bubble(k) + end if + + if (p > 0) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,3,dest) < z_cb(-1) + intfc_rad(k,dest)) then + mtn_pos(k, 3, dest) = z_cb(-1) + intfc_rad(k,dest) + else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then + mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest) + elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size - mapCells)) then + call s_remove_lag_bubble(k) + elseif (mtn_pos(k, 3, dest) < z_cb(mapCells - buff_size - 1)) then + call s_remove_lag_bubble(k) + end if + end if + + if (ib) then + cell = -buff_size + call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) + + if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then + patch_id = ib_markers%sf(cell(1), cell(2), cell(3)) + + do i = 1, num_dims + mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - & + levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) & + * levelset%sf(cell(1), cell(2), cell(3), patch_id) + end do + cell = -buff_size + call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) + end if + end if + + end do + + if (num_procs > 1) then + call s_add_particles_to_transfer_list(mtn_pos(:, :, dest), mtn_posPrev(:, :, dest), nbubs) + call s_mpi_send_particles(intfc_rad, intfc_draddt, intfc_vel, & + intfc_draddt, mtn_posPrev, mtn_pos, & + mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, & + gas_dpdt, gas_mv, gas_dmvdt, lag_id, bub_R0, lag_num_ts) + call s_mpi_recv_particles(intfc_rad, intfc_draddt, intfc_vel, & + intfc_draddt, mtn_posPrev, mtn_pos, & + mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, & + gas_dpdt, gas_mv, gas_dmvdt, lag_id, bub_R0, lag_num_ts, nbubs) + end if + + end subroutine s_enforce_EL_bubbles_boundary_conditions + + !> This subroutine returns the computational coordinate of the cell for the given position. + !! @param pos Input coordinates + !! @param cell Computational coordinate of the cell + !! @param scoord Calculated particle coordinates + subroutine s_locate_cell(pos, cell, scoord) + + real(wp), dimension(3), intent(in) :: pos + real(wp), dimension(3), intent(out) :: scoord + integer, dimension(3), intent(inout) :: cell + + integer :: i + + do while (pos(1) < x_cb(cell(1) - 1)) + cell(1) = cell(1) - 1 + end do + + do while (pos(1) > x_cb(cell(1))) + cell(1) = cell(1) + 1 + end do + + do while (pos(2) < y_cb(cell(2) - 1)) + cell(2) = cell(2) - 1 + end do + + do while (pos(2) > y_cb(cell(2))) + cell(2) = cell(2) + 1 + end do + + if (p > 0) then + do while (pos(3) < z_cb(cell(3) - 1)) + cell(3) = cell(3) - 1 + end do + do while (pos(3) > z_cb(cell(3))) + cell(3) = cell(3) + 1 + end do + end if + + ! The numbering of the cell of which left boundary is the domain boundary is 0. + ! if comp.coord of the pos is s, the real coordinate of s is + ! (the coordinate of the left boundary of the Floor(s)-th cell) + ! + (s-(int(s))*(cell-width). + ! In other words, the coordinate of the center of the cell is x_cc(cell). + + !coordinates in computational space + scoord(1) = cell(1) + (pos(1) - x_cb(cell(1) - 1))/dx(cell(1)) + scoord(2) = cell(2) + (pos(2) - y_cb(cell(2) - 1))/dy(cell(2)) + scoord(3) = 0._wp + if (p > 0) scoord(3) = cell(3) + (pos(3) - z_cb(cell(3) - 1))/dz(cell(3)) + cell(:) = int(scoord(:)) + do i = 1, num_dims + if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 + end do + + end subroutine s_locate_cell + + !> This subroutine transfer data into the temporal variables. + subroutine s_transfer_data_to_tmp() + + integer :: k + + !$acc parallel loop gang vector default(present) private(k) + do k = 1, nBubs + gas_p(k, 2) = gas_p(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + intfc_rad(k, 2) = intfc_rad(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + mtn_posPrev(k, 1:3, 2) = mtn_posPrev(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1) + end do + + end subroutine s_transfer_data_to_tmp + + !> The purpose of this procedure is to determine if the global coordinates of the bubbles + !! are present in the current MPI processor (including ghost cells). + !! @param pos_part Spatial coordinates of the bubble + function particle_in_domain(pos_part) + + logical :: particle_in_domain + real(wp), dimension(3), intent(in) :: pos_part + + ! 2D + if (p == 0 .and. cyl_coord .neqv. .true.) then + ! Defining a virtual z-axis that has the same dimensions as y-axis + ! defined in the input file + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & + (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. & + (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= -lag_params%charwidth/2._wp)) + else + ! cyl_coord + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & + (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size - 1), 0._wp))) + end if + + ! 3D + if (p > 0) then + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & + (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. & + (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1))) + end if + + ! For symmetric and wall boundary condition + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(1) >= x_cb(-1))) + end if + if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(1) < x_cb(m))) + end if + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then + particle_in_domain = (particle_in_domain .and. (pos_part(2) >= y_cb(-1))) + end if + if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then + particle_in_domain = (particle_in_domain .and. (pos_part(2) < y_cb(n))) + end if + + if (p > 0) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(3) >= z_cb(-1))) + end if + if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then + particle_in_domain = (particle_in_domain .and. (pos_part(3) < z_cb(p))) + end if + end if + + end function particle_in_domain + + !> The purpose of this procedure is to determine if the lagrangian bubble is located in the + !! physical domain. The ghost cells are not part of the physical domain. + !! @param pos_part Spatial coordinates of the bubble + function particle_in_domain_physical(pos_part) + + logical :: particle_in_domain_physical + real(wp), dimension(3), intent(in) :: pos_part + + particle_in_domain_physical = ((pos_part(1) < x_cb(m)) .and. (pos_part(1) >= x_cb(-1)) .and. & + (pos_part(2) < y_cb(n)) .and. (pos_part(2) >= y_cb(-1))) + + if (p > 0) then + particle_in_domain_physical = (particle_in_domain_physical .and. (pos_part(3) < z_cb(p)) .and. (pos_part(3) >= z_cb(-1))) + end if + + end function particle_in_domain_physical + + !> The purpose of this procedure is to calculate the gradient of a scalar field along the x, y and z directions + !! following a second-order central difference considering uneven widths + !! @param q Input scalar field + !! @param dq Output gradient of q + !! @param dir Gradient spatial direction + subroutine s_gradient_dir(q, dq, dir) + + type(scalar_field), intent(inout) :: q + type(scalar_field), intent(inout) :: dq + integer, intent(in) :: dir + + integer :: i, j, k + + if (dir == 1) then + ! Gradient in x dir. + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dx(i + 1) - dx(i - 1)) & + + q%sf(i + 1, j, k)*(dx(i) + dx(i - 1)) & + - q%sf(i - 1, j, k)*(dx(i) + dx(i + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1))) + end do + end do + end do + else + if (dir == 2) then + ! Gradient in y dir. + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & + + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & + - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) + end do + end do + end do + else + ! Gradient in z dir. + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & + + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & + - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) + end do + end do + end do + end if + end if + + end subroutine s_gradient_dir + + !> Subroutine that writes on each time step the changes of the lagrangian bubbles. + !! @param q_time Current time + subroutine s_write_lag_particles(qtime) + + real(wp), intent(in) :: qtime + integer :: k + + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + + write (file_loc, '(A,I0,A)') 'lag_bubble_evol_', proc_rank, '.dat' + file_loc = trim(case_dir)//'/D/'//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) + + if (.not. file_exist) then + open (11, FILE=trim(file_loc), FORM='formatted', position='rewind') + write (11, *) 'currentTime, particleID, x, y, z, ', & + 'coreVaporMass, coreVaporConcentration, radius, interfaceVelocity, ', & + 'corePressure' + else + open (11, FILE=trim(file_loc), FORM='formatted', position='append') + end if + + ! Cycle through list + do k = 1, nBubs + write (11, '(6X,f12.6,I24.8,8e24.8)') & + qtime, & + lag_id(k, 1), & + mtn_pos(k, 1, 1), & + mtn_pos(k, 2, 1), & + mtn_pos(k, 3, 1), & + gas_mv(k, 1), & + gas_mv(k, 1)/(gas_mv(k, 1) + gas_mg(k)), & + intfc_rad(k, 1), & + intfc_vel(k, 1), & + gas_p(k, 1) + end do + + close (11) + + end subroutine s_write_lag_particles + + !> Subroutine that writes some useful statistics related to the volume fraction + !! of the particles (void fraction) in the computatioational domain + !! on each time step. + !! @param q_time Current time + subroutine s_write_void_evol(qtime) + + real(wp), intent(in) :: qtime + real(wp) :: volcell, voltot + real(wp) :: lag_void_max, lag_void_avg, lag_vol + real(wp) :: void_max_glb, void_avg_glb, vol_glb + + integer :: i, j, k + + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + + if (proc_rank == 0) then + write (file_loc, '(A)') 'voidfraction.dat' + file_loc = trim(case_dir)//'/D/'//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (.not. file_exist) then + open (12, FILE=trim(file_loc), FORM='formatted', position='rewind') + !write (12, *) 'currentTime, averageVoidFraction, ', & + ! 'maximumVoidFraction, totalParticlesVolume' + !write (12, *) 'The averageVoidFraction value does ', & + ! 'not reflect the real void fraction in the cloud since the ', & + ! 'cells which do not have bubbles are not accounted' + else + open (12, FILE=trim(file_loc), FORM='formatted', position='append') + end if + end if + + lag_void_max = 0._wp + lag_void_avg = 0._wp + lag_vol = 0._wp + !$acc parallel loop collapse(3) gang vector default(present) reduction(+:lag_vol,lag_void_avg) & + !$acc reduction(MAX:lag_void_max) copy(lag_vol, lag_void_avg, lag_void_max) + do k = 0, p + do j = 0, n + do i = 0, m + lag_void_max = max(lag_void_max, 1._wp - q_beta%vf(1)%sf(i, j, k)) + call s_get_char_vol(i, j, k, volcell) + if ((1._wp - q_beta%vf(1)%sf(i, j, k)) > 5.0d-11) then + lag_void_avg = lag_void_avg + (1._wp - q_beta%vf(1)%sf(i, j, k))*volcell + lag_vol = lag_vol + volcell + end if + end do + end do + end do + +#ifdef MFC_MPI + if (num_procs > 1) then + call s_mpi_allreduce_max(lag_void_max, void_max_glb) + lag_void_max = void_max_glb + call s_mpi_allreduce_sum(lag_vol, vol_glb) + lag_vol = vol_glb + call s_mpi_allreduce_sum(lag_void_avg, void_avg_glb) + lag_void_avg = void_avg_glb + end if +#endif + voltot = lag_void_avg + ! This voidavg value does not reflect the real void fraction in the cloud + ! since the cell which does not have bubbles are not accounted + if (lag_vol > 0._wp) lag_void_avg = lag_void_avg/lag_vol + + if (proc_rank == 0) then + write (12, '(6X,4e24.8)') & + qtime, & + lag_void_avg, & + lag_void_max, & + voltot + close (12) + end if + + end subroutine s_write_void_evol + + !> Subroutine that writes the restarting files for the particles in the lagrangian solver. + !! @param t_step Current time step + subroutine s_write_restart_lag_bubbles(t_step) + + ! Generic string used to store the address of a particular file + integer, intent(in) :: t_step + + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + integer :: bub_id, tot_part, tot_part_wrtn, npart_wrtn + integer :: i, k + +#ifdef MFC_MPI + ! For Parallel I/O + integer :: ifile, ierr + integer, dimension(MPI_STATUS_SIZE) :: status + integer(KIND=MPI_OFFSET_KIND) :: disp + integer :: view + integer, dimension(2) :: gsizes, lsizes, start_idx_part + integer, dimension(num_procs) :: part_order, part_ord_mpi + + bub_id = 0._wp + if (nBubs /= 0) then + do k = 1, nBubs + if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then + bub_id = bub_id + 1 + end if + end do + end if + + if (.not. parallel_io) return + + ! Total number of particles + call MPI_ALLREDUCE(bub_id, tot_part, 1, MPI_integer, & + MPI_SUM, MPI_COMM_WORLD, ierr) + + ! Total number of particles written so far + call MPI_ALLREDUCE(npart_wrtn, tot_part_wrtn, 1, MPI_integer, & + MPI_SUM, MPI_COMM_WORLD, ierr) + + lsizes(1) = max(1, bub_id) + lsizes(2) = 21 + + ! if the partcle number is zero, put 1 since MPI cannot deal with writing + ! zero particle + part_order(:) = 1 + part_order(proc_rank + 1) = max(1, bub_id) + + call MPI_ALLREDUCE(part_order, part_ord_mpi, num_procs, MPI_integer, & + MPI_MAX, MPI_COMM_WORLD, ierr) + + gsizes(1) = sum(part_ord_mpi(1:num_procs)) + gsizes(2) = 21 + + start_idx_part(1) = sum(part_ord_mpi(1:proc_rank + 1)) - part_ord_mpi(proc_rank + 1) + start_idx_part(2) = 0 + + write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (file_exist .and. proc_rank == 0) then + call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) + end if + + ! Writing down the total number of particles + if (proc_rank == 0) then + open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown') + write (9) gsizes(1), mytime, dt + close (9) + end if + + call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & + MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_type_COMMIT(view, ierr) + + allocate (MPI_IO_DATA_lag_bubbles(1:max(1, bub_id), 1:21)) + + ! Open the file to write all flow variables + write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (file_exist .and. proc_rank == 0) then + call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) + end if + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) + + disp = 0._wp + + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & + 'native', mpi_info_null, ierr) + + ! Cycle through list + i = 1 + + if (bub_id == 0) then + MPI_IO_DATA_lag_bubbles(1, 1:21) = 0._wp + else + + do k = 1, nBubs + + if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then + + MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_id(k, 1)) + MPI_IO_DATA_lag_bubbles(i, 2:4) = mtn_pos(k, 1:3, 1) + MPI_IO_DATA_lag_bubbles(i, 5:7) = mtn_posPrev(k, 1:3, 1) + MPI_IO_DATA_lag_bubbles(i, 8:10) = mtn_vel(k, 1:3, 1) + MPI_IO_DATA_lag_bubbles(i, 11) = intfc_rad(k, 1) + MPI_IO_DATA_lag_bubbles(i, 12) = intfc_vel(k, 1) + MPI_IO_DATA_lag_bubbles(i, 13) = bub_R0(k) + MPI_IO_DATA_lag_bubbles(i, 14) = Rmax_stats(k) + MPI_IO_DATA_lag_bubbles(i, 15) = Rmin_stats(k) + MPI_IO_DATA_lag_bubbles(i, 16) = bub_dphidt(k) + MPI_IO_DATA_lag_bubbles(i, 17) = gas_p(k, 1) + MPI_IO_DATA_lag_bubbles(i, 18) = gas_mv(k, 1) + MPI_IO_DATA_lag_bubbles(i, 19) = gas_mg(k) + MPI_IO_DATA_lag_bubbles(i, 20) = gas_betaT(k) + MPI_IO_DATA_lag_bubbles(i, 21) = gas_betaC(k) + + i = i + 1 + + end if + + end do + + end if + + call MPI_FILE_write_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*max(1, bub_id), & + mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + + deallocate (MPI_IO_DATA_lag_bubbles) + +#endif + + end subroutine s_write_restart_lag_bubbles + + !> This procedure calculates the maximum and minimum radius of each bubble. + subroutine s_calculate_lag_bubble_stats() + + integer :: k + + !$acc parallel loop gang vector default(present) reduction(MAX:Rmax_glb) & + !$acc reduction(MIN: Rmin_glb) copy(Rmax_glb, Rmin_glb) + do k = 1, nBubs + Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) + Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) + Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) + Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k)) + end do + + end subroutine s_calculate_lag_bubble_stats + + !> Subroutine that writes the maximum and minimum radius of each bubble. + subroutine s_write_lag_bubble_stats() + + integer :: k + character(LEN=path_len + 2*name_len) :: file_loc + + write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat' + file_loc = trim(case_dir)//'/D/'//trim(file_loc) + + !$acc update host(Rmax_glb, Rmin_glb) + + open (13, FILE=trim(file_loc), FORM='formatted', position='rewind') + write (13, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb' + + do k = 1, nBubs + write (13, '(6X,2I24.8,5e24.8)') & + proc_rank, & + lag_id(k, 1), & + mtn_pos(k, 1, 1), & + mtn_pos(k, 2, 1), & + mtn_pos(k, 3, 1), & + Rmax_stats(k), & + Rmin_stats(k) + end do + + close (13) + + end subroutine s_write_lag_bubble_stats + + !> The purpose of this subroutine is to remove one specific particle if dt is too small. + !! @param bub_id Particle id + subroutine s_remove_lag_bubble(bub_id) + + integer, intent(in) :: bub_id + + integer :: i + + !$acc loop seq + do i = bub_id, nBubs - 1 + lag_id(i, 1) = lag_id(i + 1, 1) + bub_R0(i) = bub_R0(i + 1) + Rmax_stats(i) = Rmax_stats(i + 1) + Rmin_stats(i) = Rmin_stats(i + 1) + gas_mg(i) = gas_mg(i + 1) + gas_betaT(i) = gas_betaT(i + 1) + gas_betaC(i) = gas_betaC(i + 1) + bub_dphidt(i) = bub_dphidt(i + 1) + gas_p(i, 1:2) = gas_p(i + 1, 1:2) + gas_mv(i, 1:2) = gas_mv(i + 1, 1:2) + intfc_rad(i, 1:2) = intfc_rad(i + 1, 1:2) + intfc_vel(i, 1:2) = intfc_vel(i + 1, 1:2) + mtn_pos(i, 1:3, 1:2) = mtn_pos(i + 1, 1:3, 1:2) + mtn_posPrev(i, 1:3, 1:2) = mtn_posPrev(i + 1, 1:3, 1:2) + mtn_vel(i, 1:3, 1:2) = mtn_vel(i + 1, 1:3, 1:2) + mtn_s(i, 1:3, 1:2) = mtn_s(i + 1, 1:3, 1:2) + intfc_draddt(i, 1:lag_num_ts) = intfc_draddt(i + 1, 1:lag_num_ts) + intfc_dveldt(i, 1:lag_num_ts) = intfc_dveldt(i + 1, 1:lag_num_ts) + gas_dpdt(i, 1:lag_num_ts) = gas_dpdt(i + 1, 1:lag_num_ts) + gas_dmvdt(i, 1:lag_num_ts) = gas_dmvdt(i + 1, 1:lag_num_ts) + end do + + nBubs = nBubs - 1 + !$acc update device(nBubs) + + end subroutine s_remove_lag_bubble + + !> The purpose of this subroutine is to deallocate variables + subroutine s_finalize_lagrangian_solver() + + integer :: i + + do i = 1, q_beta_idx + @:DEALLOCATE(q_beta%vf(i)%sf) + end do + @:DEALLOCATE(q_beta%vf) + + !Deallocating space + @:DEALLOCATE(lag_id) + @:DEALLOCATE(bub_R0) + @:DEALLOCATE(Rmax_stats) + @:DEALLOCATE(Rmin_stats) + @:DEALLOCATE(gas_mg) + @:DEALLOCATE(gas_betaT) + @:DEALLOCATE(gas_betaC) + @:DEALLOCATE(bub_dphidt) + @:DEALLOCATE(gas_p) + @:DEALLOCATE(gas_mv) + @:DEALLOCATE(intfc_rad) + @:DEALLOCATE(intfc_vel) + @:DEALLOCATE(mtn_pos) + @:DEALLOCATE(mtn_posPrev) + @:DEALLOCATE(mtn_vel) + @:DEALLOCATE(mtn_s) + @:DEALLOCATE(intfc_draddt) + @:DEALLOCATE(intfc_dveldt) + @:DEALLOCATE(gas_dpdt) + @:DEALLOCATE(gas_dmvdt) + @:DEALLOCATE(mtn_dposdt) + @:DEALLOCATE(mtn_dveldt) + + end subroutine s_finalize_lagrangian_solver + +end module m_bubbles_EL diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index beda4de145..674d07d049 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -11,6 +11,8 @@ module m_bubbles_EL use m_mpi_proxy !< Message passing interface (MPI) module proxy + use m_mpi_common + use m_bubbles_EL_kernels !< Definitions of the kernel functions use m_bubbles !< General bubble dynamics procedures @@ -139,6 +141,8 @@ contains if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol + if (num_procs > 1) call s_initialize_particles_mpi(lag_num_ts) + ! Starting bubbles call s_start_lagrange_inputs() call s_read_input_bubbles(q_cons_vf) @@ -245,10 +249,14 @@ contains call s_restart_bubbles(bub_id, save_count) end if - if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat') - print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id + call s_mpi_reduce_int_sum(bub_id) + + if (proc_rank == 0) then + if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat') + end if + !$acc update device(bubbles_lagrange, lag_params) !$acc update device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, & @@ -571,8 +579,6 @@ contains ! Obtaining driving pressure call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) - !if (k == 14) print*, k, cell(1), cell(2), cell(3) - ! Obtain liquid density and computing speed of sound from pinf !$acc loop seq do i = 1, num_fluids @@ -600,7 +606,6 @@ contains gas_mv(k, 1) = myMass_v else - ! Radial acceleration from bubble models intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, & myPb, myPbdot, dmalf, dmntait, dmBtait, & @@ -614,13 +619,13 @@ contains do l = 1, num_dims if (lag_params%vel_model == 1) then - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,1), & + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), & cell, l, q_prim_vf) mtn_dveldt(k, l, stage) = 0._wp elseif (lag_params%vel_model == 2) then mtn_dposdt(k, l, stage) = mtn_vel(k,l,2) mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), & - intfc_rad(k,1), mtn_vel(k,l,2), & + intfc_rad(k,2), mtn_vel(k,l,2), & gas_mg(k), gas_mv(k, 2), & Re(1), myRho, cell, l, q_prim_vf) else @@ -843,12 +848,7 @@ contains if (lag_params%vel_model > 0) then cell = -buff_size call s_locate_cell(mtn_pos(bub_id, 1:3, 2), cell, mtn_s(bub_id, 1:3, 2)) - !print*, mtn_pos(bub_id, 1, 2), mtn_pos(bub_id, 2, 2) scoord = mtn_s(bub_id, 1:3, 2) - - !if (k == 14) print*, "pinf cell", cell - !if (k == 14) print*, "pinf scoord", scoord - !if (k == 14) print*, "pinf pos", mtn_pos(bub_id,:,2) else scoord = mtn_s(bub_id, 1:3, 2) cell(:) = int(scoord(:)) @@ -1053,6 +1053,7 @@ contains !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 1) mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) @@ -1077,6 +1078,7 @@ contains !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1) mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) @@ -1091,6 +1093,7 @@ contains !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp + mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2) mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp @@ -1116,6 +1119,7 @@ contains !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1) mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) @@ -1130,6 +1134,7 @@ contains !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp + mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 2) mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp @@ -1144,6 +1149,7 @@ contains !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3)) + mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2) mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3)) mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3)) gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) @@ -1175,13 +1181,16 @@ contains !$acc parallel loop gang vector default(present) private(cell) do k = 1, nBubs - if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest) - else if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & + elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest) + elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size - mapCells)) then + call s_remove_lag_bubble(k) + elseif (mtn_pos(k, 1, dest) < x_cb(mapCells -buff_size - 1)) then + call s_remove_lag_bubble(k) end if if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & @@ -1190,6 +1199,10 @@ contains else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest) + elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size - mapCells)) then + call s_remove_lag_bubble(k) + elseif (mtn_pos(k, 2, dest) < y_cb(mapCells - buff_size - 1)) then + call s_remove_lag_bubble(k) end if if (p > 0) then @@ -1199,16 +1212,20 @@ contains else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest) + elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size - mapCells)) then + call s_remove_lag_bubble(k) + elseif (mtn_pos(k, 3, dest) < z_cb(mapCells - buff_size - 1)) then + call s_remove_lag_bubble(k) end if end if if (ib) then cell = -buff_size call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) - !if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell + if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then patch_id = ib_markers%sf(cell(1), cell(2), cell(3)) - if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell + do i = 1, num_dims mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - & levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) & @@ -1216,12 +1233,23 @@ contains end do cell = -buff_size call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) - if (k == 14) print*, "IB", mtn_pos(k, 1:3, dest), cell end if end if end do + if (num_procs > 1) then + call s_add_particles_to_transfer_list(mtn_pos(:, :, dest), mtn_posPrev(:, :, dest), nbubs) + call s_mpi_send_particles(intfc_rad, intfc_draddt, intfc_vel, & + intfc_draddt, mtn_posPrev, mtn_pos, & + mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, & + gas_dpdt, gas_mv, gas_dmvdt, gas_mg, lag_id, bub_R0, lag_num_ts) + call s_mpi_recv_particles(intfc_rad, intfc_draddt, intfc_vel, & + intfc_draddt, mtn_posPrev, mtn_pos, & + mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, & + gas_dpdt, gas_mv, gas_dmvdt, gas_mg, lag_id, bub_R0, lag_num_ts, nbubs) + end if + end subroutine s_enforce_EL_bubbles_boundary_conditions !> This subroutine returns the computational coordinate of the cell for the given position. @@ -1310,20 +1338,26 @@ contains if (p == 0 .and. cyl_coord .neqv. .true.) then ! Defining a virtual z-axis that has the same dimensions as y-axis ! defined in the input file - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & - (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. & + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. & + (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. & + (pos_part(2) < y_cb(n + buff_size - mapCells)) .and. & + (pos_part(2) >= y_cb(-buff_size - 1 + mapCells)) .and. & (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= -lag_params%charwidth/2._wp)) else ! cyl_coord - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. & + (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. & (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size - 1), 0._wp))) end if ! 3D if (p > 0) then - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & - (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. & - (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1))) + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. & + (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. & + (pos_part(2) < y_cb(n + buff_size - mapCells)) .and. & + (pos_part(2) >= y_cb(-buff_size - 1 + mapCells)) .and. & + (pos_part(3) < z_cb(p + buff_size - mapCells)) .and. & + (pos_part(3) >= z_cb(-buff_size - 1 + mapCells))) end if ! For symmetric and wall boundary condition diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index ea0281ce14..b55b6c7e9c 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -128,7 +128,9 @@ contains s_coord(1:3) = lbk_s(l, 1:3, 2) center(1:2) = lbk_pos(l, 1:2, 2) if (p > 0) center(3) = lbk_pos(l, 3, 2) + cell = -buff_size call s_get_cell(s_coord, cell) + !print*, s_coord call s_compute_stddsv(cell, volpart, stddsv) strength_vol = volpart @@ -250,7 +252,7 @@ contains theta = 0._wp Nr = ceiling(lag_params%charwidth/(y_cb(cellaux(2)) - y_cb(cellaux(2) - 1))) Nr_count = 1._wp - mapCells*1._wp - dzp = y_cb(cellaux(2) + 1) - y_cb(cellaux(2)) + dzp = y_cb(cellaux(2)) - y_cb(cellaux(2) - 1) Lz2 = (center(3) - (dzp*(0.5_wp + Nr_count) - lag_params%charwidth/2._wp))**2._wp distance = sqrt((center(1) - nodecoord(1))**2._wp + (center(2) - nodecoord(2))**2._wp + Lz2) func = dzp/lag_params%charwidth*exp(-0.5_wp*(distance/stddsv)**2._wp)/(sqrt(2._wp*pi)*stddsv)**3._wp @@ -281,21 +283,27 @@ contains celloutside = .false. if (num_dims == 2) then - if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then + if ((cellaux(1) < -buff_size + mapCells) .or. & + (cellaux(2) < -buff_size + mapCells)) then celloutside = .true. end if if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then celloutside = .true. end if - if ((cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then + if ((cellaux(2) > n + buff_size - mapCells) .or. & + (cellaux(1) > m + buff_size - mapCells)) then celloutside = .true. end if else - if ((cellaux(3) < -buff_size) .or. (cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then + if ((cellaux(3) < -buff_size + mapCells) .or. & + (cellaux(1) < -buff_size + mapCells) .or. & + (cellaux(2) < -buff_size + mapCells)) then celloutside = .true. end if - if ((cellaux(3) > p + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then + if ((cellaux(3) > p + buff_size - mapCells) .or. & + (cellaux(2) > n + buff_size - mapCells) .or. & + (cellaux(1) > m + buff_size - mapCells)) then celloutside = .true. end if end if @@ -412,7 +420,7 @@ contains !! real type into integer. !! @param s Computational coordinates of the bubble, real type !! @param get_cell Computational coordinates of the bubble, integer type - subroutine s_get_cell(s_cell, get_cell) + subroutine s_get_cell(s_cell, get_cell) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_cell #else diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 6b3ef62bd1..5960b3a84f 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -205,6 +205,11 @@ module m_global_parameters integer, allocatable, dimension(:) :: proc_coords !< !! Processor coordinates in MPI_CART_COMM + type(int_bounds_info), dimension(3) :: nidx !< Indicies for neighboring processors + + integer, allocatable, dimension(:,:,:) :: neighbor_ranks + !! Neighbor ranks for lagrangian particle communication + integer, allocatable, dimension(:) :: start_idx !< !! Starting cell-center index of local processor in global grid diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 0a29555e0a..992e3470e1 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -44,12 +44,19 @@ module m_mpi_proxy !> @name Generic flags used to identify and report MPI errors !> @{ - integer, private :: ierr + integer, private :: request, ierr !> @} integer :: i_halo_size !$acc declare create(i_halo_size) + integer, dimension(-1:1, -1:1, -1:1) :: p_send_counts, p_recv_counts + integer, dimension(:,:,:,:), allocatable :: p_send_ids + character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff + type(bounds_info), dimension(3) :: comm_coords + integer :: p_buff_size + !$acc declare create(p_send_counts, comm_coords) + contains subroutine s_initialize_mpi_proxy_module() @@ -72,12 +79,40 @@ contains end if !$acc update device(i_halo_size) + @:ALLOCATE(ib_buff_send(0:i_halo_size), ib_buff_recv(0:i_halo_size)) end if #endif end subroutine s_initialize_mpi_proxy_module + subroutine s_initialize_particles_mpi(lag_num_ts) + + integer :: i, j, k + integer :: real_size, int_size, nVar, lag_num_ts + + call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr) + call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr) + + nVar = 28 + 10*lag_num_ts + int_size + p_buff_size = nVar*lag_params%nBubs_glb*real_size + @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size)) + + comm_coords(1)%beg = x_cb(buff_size - mapCells - 1) + comm_coords(1)%end = x_cb(m - buff_size + mapCells) + if (n > 0) then + comm_coords(2)%beg = y_cb(buff_size - mapCells - 1) + comm_coords(2)%end = y_cb(m - buff_size + mapCells) + if (p > 0) then + comm_coords(3)%beg = z_cb(buff_size - mapCells - 1) + comm_coords(3)%end = z_cb(p - buff_size + mapCells) + end if + end if + + @:ALLOCATE(p_send_ids(nidx(1)%beg:nidx(1)%end, nidx(2)%beg:nidx(2)%end, nidx(3)%beg:nidx(3)%end, 0:lag_params%nBubs_glb)) + + end subroutine s_initialize_particles_mpi + !> Since only the processor with rank 0 reads and verifies !! the consistency of user inputs, these are initially not !! available to the other processors. Then, the purpose of @@ -135,7 +170,7 @@ contains if (bubbles_lagrange) then #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', & - & 'write_bubbles', 'write_bubbles_stats'] + & 'write_bubbles', 'write_bubbles_stats', 'vel_model', 'drag_model'] call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -417,6 +452,191 @@ contains end subroutine s_mpi_sendrecv_ib_buffers + subroutine s_add_particles_to_transfer_list(pos, posPrev, nbub) + + real(wp), dimension(:,:) :: pos, posPrev + integer :: bubID, nbub + integer :: i, j, k + + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + p_send_counts(i,j,k) = 0 + end do + end do + end do + + do k = 1, nbub + ! Left face + if (posPrev(k,1) > comm_coords(1)%beg .and. pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then + p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k + p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1 + + if (n > 0) then + ! Left bottom corner + if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then + p_send_ids(-1, -1, 0, p_send_counts(-1, -1, 0)) = k + p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1 + p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k + p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 + + ! Left top corner + elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then + p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k + p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1 + p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k + p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + end if + end if + + ! Right face + elseif (posPrev(k,1) < comm_coords(1)%end .and. pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then + p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k + p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1 + + if (n > 0) then + ! Right bottom corner + if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then + p_send_ids(1, -1, 0, p_send_counts(1, -1, 0)) = k + p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1 + p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k + p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 + + ! Right top corner + elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then + p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k + p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1 + p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k + p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + end if + end if + + ! Bottom face (corners already accounted for) + elseif (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then + p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k + p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 + + ! Top face (corners already accounted for) + elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then + p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k + p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + end if + end do + + end subroutine s_add_particles_to_transfer_list + + subroutine s_mpi_send_particles(rad, drad, rvel, drvel, posPrev, pos, dpos, & + vel, dvel, gas_p, dgasp, gas_mv, dgasmv, gas_mg, lag_id, bub_R0, lag_num_ts) + + real(wp), dimension(:) :: bub_R0, gas_mg + real(wp), dimension(:, :) :: rad, drad, rvel, drvel, gas_p, dgasp, gas_mv, dgasmv + real(wp), dimension(:, :, :) :: pos, dpos, posPrev, vel, dvel + integer, dimension(:,:) :: lag_id + integer :: i, j, k, l, q + integer :: position, bub_id, lag_num_ts + +#ifdef MFC_MPI + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) > 0) then + call MPI_Isend(p_send_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, request, ierr) + if (p_send_counts(i,j,k) > 0) then + print*, "SEND", proc_rank, p_send_counts(i,j,k) + end if + if (p_send_counts(i,j,k) > 0) then + position = 0 + do l = 0, p_send_counts(i,j,k) - 1 + bub_id = p_send_ids(i,j,k,l) + do q = 1, 2 + call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + do q = 1, lag_num_ts + call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + + call MPI_Isend(p_send_buff, position, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, request, ierr) + end if + end if + end do + end do + end do +#endif + + end subroutine s_mpi_send_particles + + subroutine s_mpi_recv_particles(rad, drad, rvel, drvel, posPrev, pos, dpos, & + vel, dvel, gas_p, dgasp, gas_mv, dgasmv, gas_mg, lag_id, bub_R0, lag_num_ts, nbubs) + + real(wp), dimension(:) :: bub_R0, gas_mg + real(wp), dimension(:, :) :: rad, drad, rvel, drvel, gas_p, dgasp, gas_mv, dgasmv + real(wp), dimension(:, :, :) :: pos, dpos, posPrev, vel, dvel + integer, dimension(:, :) :: lag_id + integer :: i, j, k, l, q + integer :: position, bub_id, nbubs, lag_num_ts + +#ifdef MFC_MPI + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) > 0) then + call MPI_recv(p_recv_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (p_recv_counts(i,j,k) > 0) then + print*, "RECV", proc_rank, p_recv_counts(i,j,k) + end if + if (p_recv_counts(i,j,k) > 0) then + position = 0 + call MPI_recv(p_recv_buff, p_buff_size, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + do l = 0, p_recv_counts(i,j,k) - 1 + nbubs = nbubs + 1 + bub_id = nbubs + do q = 1, 2 + call MPI_Unpack(p_recv_buff, p_buff_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + end do + do q = 1, lag_num_ts + call MPI_Unpack(p_recv_buff, p_buff_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + end do + call MPI_Unpack(p_recv_buff, p_buff_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + lag_id(bub_id, 2) = bub_id + end do + end if + end if + end do + end do + end do +#endif + + end subroutine s_mpi_recv_particles + subroutine s_mpi_send_random_number(phi_rn, num_freq) integer, intent(in) :: num_freq real(wp), intent(inout), dimension(1:num_freq) :: phi_rn diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 3fec8127a7..c8299635df 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -904,13 +904,6 @@ contains end if if (bubbles_lagrange) then - ! RHS additions for sub-grid bubbles_lagrange - call nvtxStartRange("RHS-EL-BUBBLES-SRC") - call s_compute_bubbles_EL_source( & - q_cons_qp%vf(1:sys_size), & - q_prim_qp%vf(1:sys_size), & - rhs_vf) - call nvtxEndRange ! Compute bubble dynamics if (.not. adap_dt) then call nvtxStartRange("RHS-EL-BUBBLES-DYN") @@ -922,6 +915,14 @@ contains stage) call nvtxEndRange end if + + ! RHS additions for sub-grid bubbles_lagrange + call nvtxStartRange("RHS-EL-BUBBLES-SRC") + call s_compute_bubbles_EL_source( & + q_cons_qp%vf(1:sys_size), & + q_prim_qp%vf(1:sys_size), & + rhs_vf) + call nvtxEndRange end if if (chemistry .and. chem_params%reactions) then diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index de07dfd631..5af09f5ed6 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -1020,7 +1020,7 @@ contains integer :: i, j, k, l call nvtxStartRange("RHS-BODYFORCES") - call s_compute_body_forces_rhs(q_cons_vf, q_prim_vf, rhs_vf) + call s_compute_body_forces_rhs(q_prim_vf, q_cons_vf, rhs_vf) !$acc parallel loop collapse(4) gang vector default(present) do i = momxb, E_idx From 08227856d6e0f49d5dc4eb1a99f562aa8f65cece Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Tue, 10 Jun 2025 11:58:52 -0400 Subject: [PATCH 31/62] remove file --- "src/simulation/\\" | 1837 ------------------------------------------- 1 file changed, 1837 deletions(-) delete mode 100644 "src/simulation/\\" diff --git "a/src/simulation/\\" "b/src/simulation/\\" deleted file mode 100644 index 2559c0ffff..0000000000 --- "a/src/simulation/\\" +++ /dev/null @@ -1,1837 +0,0 @@ -!> -!! @file m_bubbles_EL.fpp -!! @brief Contains module m_bubbles_EL - -#:include 'macros.fpp' - -!> @brief This module is used to to compute the volume-averaged bubble model -module m_bubbles_EL - - use m_global_parameters !< Definitions of the global parameters - - use m_mpi_proxy !< Message passing interface (MPI) module proxy - - use m_mpi_common - - use m_bubbles_EL_kernels !< Definitions of the kernel functions - - use m_bubbles !< General bubble dynamics procedures - - use m_variables_conversion !< State variables type conversion procedures - - use m_compile_specific - - use m_boundary_common - - use m_sim_helpers - - use m_helper - - use m_ibm - - implicit none - - !(nBub) - integer, allocatable, dimension(:, :) :: lag_id !< Global and local IDs - real(wp), allocatable, dimension(:) :: bub_R0 !< Initial bubble radius - real(wp), allocatable, dimension(:) :: Rmax_stats !< Maximum radius - real(wp), allocatable, dimension(:) :: Rmin_stats !< Minimum radius - real(wp), allocatable, dimension(:) :: gas_mg !< Bubble's gas mass - real(wp), allocatable, dimension(:) :: gas_betaT !< heatflux model (Preston et al., 2007) - real(wp), allocatable, dimension(:) :: gas_betaC !< massflux model (Preston et al., 2007) - real(wp), allocatable, dimension(:) :: bub_dphidt !< subgrid velocity potential (Maeda & Colonius, 2018) - !(nBub, 1 -> actual val or 2 -> temp val) - real(wp), allocatable, dimension(:, :) :: gas_p !< Pressure in the bubble - real(wp), allocatable, dimension(:, :) :: gas_mv !< Vapor mass in the bubble - real(wp), allocatable, dimension(:, :) :: intfc_rad !< Bubble radius - real(wp), allocatable, dimension(:, :) :: intfc_vel !< Velocity of the bubble interface - !(nBub, 1-> x or 2->y or 3 ->z, 1 -> actual or 2 -> temporal val) - real(wp), allocatable, dimension(:, :, :) :: mtn_pos !< Bubble's position - real(wp), allocatable, dimension(:, :, :) :: mtn_posPrev !< Bubble's previous position - real(wp), allocatable, dimension(:, :, :) :: mtn_vel !< Bubble's velocity - real(wp), allocatable, dimension(:, :, :) :: mtn_s !< Bubble's computational cell position in real format - !(nBub, 1-> x or 2->y or 3 ->z, time-stage) - real(wp), allocatable, dimension(:, :) :: intfc_draddt !< Time derivative of bubble's radius - real(wp), allocatable, dimension(:, :) :: intfc_dveldt !< Time derivative of bubble's interface velocity - real(wp), allocatable, dimension(:, :) :: gas_dpdt !< Time derivative of gas pressure - real(wp), allocatable, dimension(:, :) :: gas_dmvdt !< Time derivative of the vapor mass in the bubble - real(wp), allocatable, dimension(:, :, :) :: mtn_dposdt !< Time derivative of the bubble's position - real(wp), allocatable, dimension(:, :, :) :: mtn_dveldt !< Time derivative of the bubble's velocity - - !$acc declare create(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt, & - !$acc gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & - !$acc gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt) - - integer, private :: lag_num_ts !< Number of time stages in the time-stepping scheme - - !$acc declare create(lag_num_ts) - - integer :: nBubs !< Number of bubbles in the local domain - real(wp) :: Rmax_glb, Rmin_glb !< Maximum and minimum bubbe size in the local domain - type(vector_field) :: q_beta !< Projection of the lagrangian particles in the Eulerian framework - integer :: q_beta_idx !< Size of the q_beta vector field - - !$acc declare create(nBubs, Rmax_glb, Rmin_glb, q_beta, q_beta_idx) - -contains - - !> Initializes the lagrangian subgrid bubble solver - !! @param q_cons_vf Initial conservative variables - subroutine s_initialize_bubbles_EL_module(q_cons_vf) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - - integer :: nBubs_glb, i - - ! Setting number of time-stages for selected time-stepping scheme - lag_num_ts = time_stepper - - ! Allocate space for the Eulerian fields needed to map the effect of the bubbles - if (lag_params%solver_approach == 1) then - ! One-way coupling - q_beta_idx = 3 - elseif (lag_params%solver_approach == 2) then - ! Two-way coupling - q_beta_idx = 4 - if (p == 0) then - !Subgrid noise model for 2D approximation - q_beta_idx = 6 - end if - else - call s_mpi_abort('Please check the lag_params%solver_approach input') - end if - - !$acc update device(lag_num_ts, q_beta_idx) - - @:ALLOCATE(q_beta%vf(1:q_beta_idx)) - - do i = 1, q_beta_idx - @:ALLOCATE(q_beta%vf(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - end do - - @:ACC_SETUP_VFs(q_beta) - - ! Allocating space for lagrangian variables - nBubs_glb = lag_params%nBubs_glb - - @:ALLOCATE(lag_id(1:nBubs_glb, 1:2)) - @:ALLOCATE(bub_R0(1:nBubs_glb)) - @:ALLOCATE(Rmax_stats(1:nBubs_glb)) - @:ALLOCATE(Rmin_stats(1:nBubs_glb)) - @:ALLOCATE(gas_mg(1:nBubs_glb)) - @:ALLOCATE(gas_betaT(1:nBubs_glb)) - @:ALLOCATE(gas_betaC(1:nBubs_glb)) - @:ALLOCATE(bub_dphidt(1:nBubs_glb)) - @:ALLOCATE(gas_p(1:nBubs_glb, 1:2)) - @:ALLOCATE(gas_mv(1:nBubs_glb, 1:2)) - @:ALLOCATE(intfc_rad(1:nBubs_glb, 1:2)) - @:ALLOCATE(intfc_vel(1:nBubs_glb, 1:2)) - @:ALLOCATE(mtn_pos(1:nBubs_glb, 1:3, 1:2)) - @:ALLOCATE(mtn_posPrev(1:nBubs_glb, 1:3, 1:2)) - @:ALLOCATE(mtn_vel(1:nBubs_glb, 1:3, 1:2)) - @:ALLOCATE(mtn_s(1:nBubs_glb, 1:3, 1:2)) - @:ALLOCATE(intfc_draddt(1:nBubs_glb, 1:lag_num_ts)) - @:ALLOCATE(intfc_dveldt(1:nBubs_glb, 1:lag_num_ts)) - @:ALLOCATE(gas_dpdt(1:nBubs_glb, 1:lag_num_ts)) - @:ALLOCATE(gas_dmvdt(1:nBubs_glb, 1:lag_num_ts)) - @:ALLOCATE(mtn_dposdt(1:nBubs_glb, 1:3, 1:lag_num_ts)) - @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts)) - - if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol - - if (num_procs > 1) call s_initialize_particles_mpi(lag_num_ts) - - ! Starting bubbles - call s_start_lagrange_inputs() - call s_read_input_bubbles(q_cons_vf) - - end subroutine s_initialize_bubbles_EL_module - - !> The purpose of this procedure is to start lagrange bubble parameters applying nondimensionalization if needed - subroutine s_start_lagrange_inputs() - - integer :: id_bubbles, id_host - real(wp) :: rho0, c0, T0, x0, p0 - - id_bubbles = num_fluids - id_host = num_fluids - 1 - - !Reference values - rho0 = lag_params%rho0 - c0 = lag_params%c0 - T0 = lag_params%T0 - x0 = lag_params%x0 - p0 = rho0*c0*c0 - - !Update inputs - Tw = lag_params%Thost/T0 - pv = fluid_pp(id_host)%pv/p0 - gamma_v = fluid_pp(id_bubbles)%gamma_v - gamma_n = fluid_pp(id_host)%gamma_v - k_vl = fluid_pp(id_bubbles)%k_v*(T0/(x0*rho0*c0*c0*c0)) - k_nl = fluid_pp(id_host)%k_v*(T0/(x0*rho0*c0*c0*c0)) - cp_v = fluid_pp(id_bubbles)%cp_v*(T0/(c0*c0)) - cp_n = fluid_pp(id_host)%cp_v*(T0/(c0*c0)) - R_v = (R_uni/fluid_pp(id_bubbles)%M_v)*(T0/(c0*c0)) - R_n = (R_uni/fluid_pp(id_host)%M_v)*(T0/(c0*c0)) - lag_params%diffcoefvap = lag_params%diffcoefvap/(x0*c0) - ss = fluid_pp(id_host)%ss/(rho0*x0*c0*c0) - mul0 = fluid_pp(id_host)%mul0/(rho0*x0*c0) - - ! Parameters used in bubble_model - Web = 1._wp/ss - Re_inv = mul0 - - ! Need improvements to accept polytropic gas compression, isothermal and adiabatic thermal models, and - ! the Gilmore and RP bubble models. - polytropic = .false. ! Forcing no polytropic model - thermal = 3 ! Forcing constant transfer coefficient model based on Preston et al., 2007 - ! If Keller-Miksis model is not selected, then no radial motion - - end subroutine s_start_lagrange_inputs - - !> The purpose of this procedure is to obtain the initial bubbles' information - !! @param q_cons_vf Conservative variables - subroutine s_read_input_bubbles(q_cons_vf) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - - real(wp), dimension(8) :: inputBubble - real(wp) :: qtime - integer :: id, bub_id, save_count - integer :: i, ios - logical :: file_exist, indomain - - character(LEN=path_len + 2*name_len) :: path_D_dir !< - - ! Initialize number of particles - bub_id = 0 - id = 0 - - ! Read the input lag_bubble file or restart point - if (cfl_dt) then - save_count = n_start - qtime = n_start*t_save - else - save_count = t_step_start - qtime = t_step_start*dt - end if - - if (save_count == 0) then - if (proc_rank == 0) print *, 'Reading lagrange bubbles input file.' - inquire (file='input/lag_bubbles.dat', exist=file_exist) - if (file_exist) then - open (94, file='input/lag_bubbles.dat', form='formatted', iostat=ios) - do while (ios == 0) - read (94, *, iostat=ios) (inputBubble(i), i=1, 8) - if (ios /= 0) cycle - indomain = particle_in_domain(inputBubble(1:3)) - id = id + 1 - if (id > lag_params%nBubs_glb .and. proc_rank == 0) then - call s_mpi_abort("Current number of bubbles is larger than nBubs_glb") - end if - if (indomain) then - bub_id = bub_id + 1 - call s_add_bubbles(inputBubble, q_cons_vf, bub_id) - lag_id(bub_id, 1) = id !global ID - lag_id(bub_id, 2) = bub_id !local ID - nBubs = bub_id ! local number of bubbles - end if - end do - close (94) - else - call s_mpi_abort("Initialize the lagrange bubbles in input/lag_bubbles.dat") - end if - else - if (proc_rank == 0) print *, 'Restarting lagrange bubbles at save_count: ', save_count - call s_restart_bubbles(bub_id, save_count) - end if - - print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id - - call s_mpi_reduce_int_sum(bub_id) - - if (proc_rank == 0) then - if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat') - end if - - !$acc update device(bubbles_lagrange, lag_params) - - !$acc update device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, & - !$acc bub_dphidt, gas_p, gas_mv, intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & - !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, nBubs) - - Rmax_glb = min(dflt_real, -dflt_real) - Rmin_glb = max(dflt_real, -dflt_real) - !$acc update device(Rmax_glb, Rmin_glb) - - !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc) - - !Populate temporal variables - call s_transfer_data_to_tmp() - call s_smear_voidfraction() - - if (lag_params%write_bubbles) call s_write_lag_particles(qtime) - - if (save_count == 0) then - ! Create ./D directory - write (path_D_dir, '(A,I0,A,I0)') trim(case_dir)//'/D' - call my_inquire(path_D_dir, file_exist) - if (.not. file_exist) call s_create_directory(trim(path_D_dir)) - call s_write_restart_lag_bubbles(save_count) ! Needed for post_processing - call s_write_void_evol(qtime) - end if - - end subroutine s_read_input_bubbles - - !> The purpose of this procedure is to obtain the information of the bubbles when starting fresh - !! @param inputBubble Bubble information - !! @param q_cons_vf Conservative variables - !! @param bub_id Local id of the bubble - subroutine s_add_bubbles(inputBubble, q_cons_vf, bub_id) - - type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf - real(wp), dimension(8), intent(in) :: inputBubble - integer, intent(in) :: bub_id - integer :: i - - real(wp) :: pliq, volparticle, concvap, totalmass, kparticle, cpparticle - real(wp) :: omegaN, PeG, PeT, rhol, pcrit, qv, gamma, pi_inf, dynP - integer, dimension(3) :: cell - real(wp), dimension(2) :: Re - real(wp) :: massflag, heatflag, Re_trans, Im_trans - - massflag = 0._wp - heatflag = 0._wp - if (lag_params%massTransfer_model) massflag = 1._wp - if (lag_params%heatTransfer_model) heatflag = 1._wp - - bub_R0(bub_id) = inputBubble(7) - Rmax_stats(bub_id) = min(dflt_real, -dflt_real) - Rmin_stats(bub_id) = max(dflt_real, -dflt_real) - bub_dphidt(bub_id) = 0._wp - intfc_rad(bub_id, 1) = inputBubble(7) - intfc_vel(bub_id, 1) = inputBubble(8) - mtn_pos(bub_id, 1:3, 1) = inputBubble(1:3) - mtn_posPrev(bub_id, 1:3, 1) = mtn_pos(bub_id, 1:3, 1) - mtn_vel(bub_id, 1:3, 1) = inputBubble(4:6) - - if (cyl_coord .and. p == 0) then - mtn_pos(bub_id, 2, 1) = sqrt(mtn_pos(bub_id, 2, 1)**2._wp + & - mtn_pos(bub_id, 3, 1)**2._wp) - !Storing azimuthal angle (-Pi to Pi)) into the third coordinate variable - mtn_pos(bub_id, 3, 1) = atan2(inputBubble(3), inputBubble(2)) - mtn_posPrev(bub_id, 1:3, 1) = mtn_pos(bub_id, 1:3, 1) - end if - - cell = -buff_size - call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) - - ! Check if the bubble is located in the ghost cell of a symmetric boundary - if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. & - (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. & - (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. & - (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then - call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") - end if - - if (p > 0) then - if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. & - (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then - call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") - end if - end if - - ! If particle is in the ghost cells, find the closest non-ghost cell - cell(1) = min(max(cell(1), 0), m) - cell(2) = min(max(cell(2), 0), n) - if (p > 0) cell(3) = min(max(cell(3), 0), p) - call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), & - rhol, gamma, pi_inf, qv, Re) - dynP = 0._wp - do i = 1, num_dims - dynP = dynP + 0.5_wp*q_cons_vf(contxe + i)%sf(cell(1), cell(2), cell(3))**2/rhol - end do - pliq = (q_cons_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - dynP - pi_inf)/gamma - if (pliq < 0) print *, "Negative pressure", proc_rank, & - q_cons_vf(E_idx)%sf(cell(1), cell(2), cell(3)), pi_inf, gamma, pliq, cell, dynP - - ! Initial particle pressure - gas_p(bub_id, 1) = pliq + 2._wp*(1._wp/Web)/bub_R0(bub_id) - if ((1._wp/Web) /= 0._wp) then - pcrit = pv - 4._wp*(1._wp/Web)/(3._wp*sqrt(3._wp*gas_p(bub_id, 1)*bub_R0(bub_id)**3._wp/(2._wp*(1._wp/Web)))) - pref = gas_p(bub_id, 1) - else - pcrit = 0._wp - end if - - ! Initial particle mass - volparticle = 4._wp/3._wp*pi*bub_R0(bub_id)**3._wp ! volume - gas_mv(bub_id, 1) = pv*volparticle*(1._wp/(R_v*Tw))*(massflag) ! vapermass - gas_mg(bub_id) = (gas_p(bub_id, 1) - pv*(massflag))*volparticle*(1._wp/(R_n*Tw)) ! gasmass - if (gas_mg(bub_id) <= 0._wp) then - call s_mpi_abort("The initial mass of gas inside the bubble is negative. Check the initial conditions.") - end if - totalmass = gas_mg(bub_id) + gas_mv(bub_id, 1) ! totalmass - - ! Bubble natural frequency - concvap = gas_mv(bub_id, 1)/(gas_mv(bub_id, 1) + gas_mg(bub_id)) - omegaN = (3._wp*(gas_p(bub_id, 1) - pv*(massflag)) + 4._wp*(1._wp/Web)/bub_R0(bub_id))/rhol - if (pv*(massflag) > gas_p(bub_id, 1)) then - call s_mpi_abort("Lagrange bubble initially located in a region with pressure below the vapor pressure.") - end if - omegaN = sqrt(omegaN/bub_R0(bub_id)**2._wp) - - cpparticle = concvap*cp_v + (1._wp - concvap)*cp_n - kparticle = concvap*k_vl + (1._wp - concvap)*k_nl - - ! Mass and heat transfer coefficients (based on Preston 2007) - PeT = totalmass/volparticle*cpparticle*bub_R0(bub_id)**2._wp*omegaN/kparticle - call s_transcoeff(1._wp, PeT, Re_trans, Im_trans) - gas_betaT(bub_id) = Re_trans*(heatflag)*kparticle - - PeG = bub_R0(bub_id)**2._wp*omegaN/lag_params%diffcoefvap - call s_transcoeff(1._wp, PeG, Re_trans, Im_trans) - gas_betaC(bub_id) = Re_trans*(massflag)*lag_params%diffcoefvap - - if (gas_mg(bub_id) <= 0._wp) then - call s_mpi_abort("Negative gas mass in the bubble, check if the bubble is in the domain.") - end if - - end subroutine s_add_bubbles - - !> The purpose of this procedure is to obtain the information of the bubbles from a restart point. - !! @param bub_id Local ID of the particle - !! @param save_count File identifier - subroutine s_restart_bubbles(bub_id, save_count) - - integer, intent(inout) :: bub_id, save_count - - character(LEN=path_len + 2*name_len) :: file_loc - -#ifdef MFC_MPI - real(wp), dimension(20) :: inputvals - integer, dimension(MPI_STATUS_SIZE) :: status - integer(kind=MPI_OFFSET_KIND) :: disp - integer :: view - - integer, dimension(3) :: cell - logical :: indomain, particle_file, file_exist - - integer, dimension(2) :: gsizes, lsizes, start_idx_part - integer :: ifile, ierr, tot_data, id - integer :: i - - write (file_loc, '(a,i0,a)') 'lag_bubbles_mpi_io_', save_count, '.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (file=trim(file_loc), exist=file_exist) - - if (file_exist) then - if (proc_rank == 0) then - open (9, file=trim(file_loc), form='unformatted', status='unknown') - read (9) tot_data, mytime, dt - close (9) - print *, 'Reading lag_bubbles_mpi_io: ', tot_data, mytime, dt - end if - else - print '(a)', trim(file_loc)//' is missing. exiting.' - call s_mpi_abort - end if - - call MPI_BCAST(tot_data, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(mytime, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - - gsizes(1) = tot_data - gsizes(2) = 21 - lsizes(1) = tot_data - lsizes(2) = 21 - start_idx_part(1) = 0 - start_idx_part(2) = 0 - - call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & - MPI_ORDER_FORTRAN, mpi_p, view, ierr) - call MPI_type_COMMIT(view, ierr) - - ! Open the file to write all flow variables - write (file_loc, '(a,i0,a)') 'lag_bubbles_', save_count, '.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (file=trim(file_loc), exist=particle_file) - - if (particle_file) then - call MPI_FILE_open(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & - mpi_info_int, ifile, ierr) - disp = 0._wp - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & - 'native', mpi_info_null, ierr) - allocate (MPI_IO_DATA_lag_bubbles(tot_data, 1:21)) - call MPI_FILE_read_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*tot_data, & - mpi_p, status, ierr) - do i = 1, tot_data - id = int(MPI_IO_DATA_lag_bubbles(i, 1)) - inputvals(1:20) = MPI_IO_DATA_lag_bubbles(i, 2:21) - indomain = particle_in_domain(inputvals(1:3)) - if (indomain .and. (id > 0)) then - bub_id = bub_id + 1 - nBubs = bub_id ! local number of bubbles - lag_id(bub_id, 1) = id ! global ID - lag_id(bub_id, 2) = bub_id ! local ID - mtn_pos(bub_id, 1:3, 1) = inputvals(1:3) - mtn_posPrev(bub_id, 1:3, 1) = inputvals(4:6) - mtn_vel(bub_id, 1:3, 1) = inputvals(7:9) - intfc_rad(bub_id, 1) = inputvals(10) - intfc_vel(bub_id, 1) = inputvals(11) - bub_R0(bub_id) = inputvals(12) - Rmax_stats(bub_id) = inputvals(13) - Rmin_stats(bub_id) = inputvals(14) - bub_dphidt(bub_id) = inputvals(15) - gas_p(bub_id, 1) = inputvals(16) - gas_mv(bub_id, 1) = inputvals(17) - gas_mg(bub_id) = inputvals(18) - gas_betaT(bub_id) = inputvals(19) - gas_betaC(bub_id) = inputvals(20) - cell = -buff_size - call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) - end if - end do - deallocate (MPI_IO_DATA_lag_bubbles) - end if - call MPI_FILE_CLOSE(ifile, ierr) -#endif - - end subroutine s_restart_bubbles - - !> Contains the bubble dynamics subroutines. - !! @param q_cons_vf Conservative variables - !! @param q_prim_vf Primitive variables - !! @param rhs_vf Calculated change of conservative variables - !! @param t_step Current time step - !! @param stage Current stage in the time-stepper algorithm - subroutine s_compute_bubble_EL_dynamics(q_cons_vf, q_prim_vf, t_step, rhs_vf, stage) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf - integer, intent(in) :: t_step, stage - - real(wp) :: myVapFlux - real(wp) :: preterm1, term2, paux, pint, Romega, term1_fac - real(wp) :: myR_m, mygamma_m, myPb, myMass_n, myMass_v - real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot - real(wp) :: myPinf, aux1, aux2, myCson, myRho - real(wp) :: gamma, pi_inf, qv - real(wp), dimension(contxe) :: myalpha_rho, myalpha - real(wp), dimension(2) :: Re - integer, dimension(3) :: cell - - integer :: adap_dt_stop_max, adap_dt_stop !< Fail-safe exit if max iteration count reached - real(wp) :: dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu !< Dummy variables for unified subgrid bubble subroutines - - integer :: i, k, l - - call nvtxStartRange("LAGRANGE-BUBBLE-DYNAMICS") - - ! Subgrid p_inf model based on Maeda and Colonius (2018). - if (lag_params%pressure_corrector) then - ! Calculate velocity potentials (valid for one bubble per cell) - !$acc parallel loop gang vector default(present) private(k, cell) - do k = 1, nBubs - call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) - myR0 = bub_R0(k) - myR = intfc_rad(k, 2) - myV = intfc_vel(k, 2) - myPb = gas_p(k, 2) - pint = f_cpbw_KM(myR0, myR, myV, myPb) - pint = pint + 0.5_wp*myV**2._wp - if (lag_params%cluster_type == 2) then - bub_dphidt(k) = (paux - pint) + term2 - ! Accounting for the potential induced by the bubble averaged over the control volume - ! Note that this is based on the incompressible flow assumption near the bubble. - term1_fac = 3._wp/2._wp*(myR*(Romega**2._wp - myR**2._wp))/(Romega**3._wp - myR**3._wp) - bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac) - end if - end do - end if - - ! Radial motion model - adap_dt_stop_max = 0 - !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) & - !$acc reduction(MAX:adap_dt_stop_max) copy(adap_dt_stop_max) copyin(stage) - do k = 1, nBubs - - ! Current bubble state - myPb = gas_p(k, 2) - myMass_n = gas_mg(k) - myMass_v = gas_mv(k, 2) - myR = intfc_rad(k, 2) - myV = intfc_vel(k, 2) - myBeta_c = gas_betaC(k) - myBeta_t = gas_betaT(k) - myR0 = bub_R0(k) - - ! Vapor and heat fluxes - myVapFlux = f_vflux(myR, myV, myPb, myMass_v, k, myMass_n, myBeta_c, myR_m, mygamma_m) - myPbdot = f_bpres_dot(myVapFlux, myR, myV, myPb, myMass_v, k, myBeta_t, myR_m, mygamma_m) - myMvdot = 4._wp*pi*myR**2._wp*myVapFlux - - ! Obtaining driving pressure - call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) - - ! Obtain liquid density and computing speed of sound from pinf - !$acc loop seq - do i = 1, num_fluids - myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3)) - myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) - end do - call s_convert_species_to_mixture_variables_acc(myRho, gamma, pi_inf, qv, myalpha, & - myalpha_rho, Re, cell(1), cell(2), cell(3)) - call s_compute_cson_from_pinf(k, q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson) - - ! Adaptive time stepping - adap_dt_stop = 0 - - if (adap_dt) then - - call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & - dmntait, dmBtait, dm_bub_adv_src, dm_divu, & - k, myMass_v, myMass_n, myBeta_c, & - myBeta_t, myCson, adap_dt_stop) - - ! Update bubble state - intfc_rad(k, 1) = myR - intfc_vel(k, 1) = myV - gas_p(k, 1) = myPb - gas_mv(k, 1) = myMass_v - - else - - ! Radial acceleration from bubble models - intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, & - myPb, myPbdot, dmalf, dmntait, dmBtait, & - dm_bub_adv_src, dm_divu, & - myCson) - print*, proc_rank, intfc_dveldt(k, stage) - intfc_draddt(k, stage) = myV - gas_dmvdt(k, stage) = myMvdot - gas_dpdt(k, stage) = myPbdot - - end if - - do l = 1, num_dims - if (lag_params%vel_model == 1) then - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), & - cell, l, q_prim_vf) - mtn_dveldt(k, l, stage) = 0._wp - elseif (lag_params%vel_model == 2) then - mtn_dposdt(k, l, stage) = mtn_vel(k,l,2) - mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), & - intfc_rad(k,2), mtn_vel(k,l,2), & - gas_mg(k), gas_mv(k, 2), & - Re(1), myRho, cell, l, q_prim_vf) - else - mtn_dposdt(k, l, stage) = 0._wp - mtn_dveldt(k, l, stage) = 0._wp - end if - end do - - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) - - end do - - if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") - - call nvtxEndRange - - end subroutine s_compute_bubble_EL_dynamics - - !> The purpose of this subroutine is to obtain the bubble source terms based on Maeda and Colonius (2018) - !! and add them to the RHS scalar field. - !! @param q_cons_vf Conservative variables - !! @param q_prim_vf Conservative variables - !! @param rhs_vf Time derivative of the conservative variables - subroutine s_compute_bubbles_EL_source(q_cons_vf, q_prim_vf, rhs_vf) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf - - integer :: i, j, k, l - - if (.not. adap_dt) call s_smear_voidfraction() - - if (lag_params%solver_approach == 2) then - - if (p == 0) then - !$acc parallel loop collapse(4) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & - q_cons_vf(l)%sf(i, j, k)*(q_beta%vf(2)%sf(i, j, k) + & - q_beta%vf(5)%sf(i, j, k)) - - end if - end do - end do - end do - end do - else - !$acc parallel loop collapse(4) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & - q_cons_vf(l)%sf(i, j, k)/q_beta%vf(1)%sf(i, j, k)* & - q_beta%vf(2)%sf(i, j, k) - end if - end do - end do - end do - end do - end if - - do l = 1, num_dims - - call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l) - - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & - (1._wp - q_beta%vf(1)%sf(i, j, k))/ & - q_beta%vf(1)%sf(i, j, k)* & - q_beta%vf(3)%sf(i, j, k) - end if - end do - end do - end do - - !source in energy - !$acc parallel loop collapse(3) gang vector default(present) - do k = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(2)%beg, idwbuff(2)%end - do i = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) - end do - end do - end do - - call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) - - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & - q_beta%vf(4)%sf(i, j, k)*(1._wp - q_beta%vf(1)%sf(i, j, k))/ & - q_beta%vf(1)%sf(i, j, k) - end if - end do - end do - end do - end do - - end if - - end subroutine s_compute_bubbles_EL_source - - !> This procedure computes the speed of sound from a given driving pressure - !! @param bub_id Bubble id - !! @param q_prim_vf Primitive variables - !! @param pinf Driving pressure - !! @param cell Bubble cell - !! @param rhol Liquid density - !! @param gamma Liquid specific heat ratio - !! @param pi_inf Liquid stiffness - !! @param cson Calculated speed of sound - subroutine s_compute_cson_from_pinf(bub_id, q_prim_vf, pinf, cell, rhol, gamma, pi_inf, cson) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_compute_cson_from_pinf -#else - !$acc routine seq -#endif - integer, intent(in) :: bub_id - type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf - real(wp), intent(in) :: pinf, rhol, gamma, pi_inf - integer, dimension(3), intent(in) :: cell - real(wp), intent(out) :: cson - - real(wp) :: E, H - real(wp), dimension(num_dims) :: vel - integer :: i - - !$acc loop seq - do i = 1, num_dims - vel(i) = q_prim_vf(i + contxe)%sf(cell(1), cell(2), cell(3)) - end do - E = gamma*pinf + pi_inf + 0.5_wp*rhol*dot_product(vel, vel) - H = (E + pinf)/rhol - cson = sqrt((H - 0.5_wp*dot_product(vel, vel))/gamma) - - end subroutine s_compute_cson_from_pinf - - !> The purpose of this subroutine is to smear the effect of the bubbles in the Eulerian framework - subroutine s_smear_voidfraction() - - integer :: i, j, k, l - - call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, q_beta_idx - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(i)%sf(j, k, l) = 0._wp - end do - end do - end do - end do - - call s_smoothfunction(nBubs, intfc_rad, intfc_vel, & - mtn_s, mtn_pos, q_beta) - - !Store 1-beta - !$acc parallel loop collapse(3) gang vector default(present) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(1)%sf(j, k, l) = 1._wp - q_beta%vf(1)%sf(j, k, l) - ! Limiting void fraction given max value - q_beta%vf(1)%sf(j, k, l) = max(q_beta%vf(1)%sf(j, k, l), & - 1._wp - lag_params%valmaxvoid) - end do - end do - end do - - call nvtxEndRange - - end subroutine s_smear_voidfraction - - !> The purpose of this procedure is obtain the bubble driving pressure p_inf - !! @param bub_id Particle identifier - !! @param q_prim_vf Primitive variables - !! @param ptype 1: p at infinity, 2: averaged P at the bubble location - !! @param f_pinfl Driving pressure - !! @param cell Bubble cell - !! @param Romega Control volume radius - subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega) -#ifdef _CRAYFTN - !DIR$ INLINEALWAYS s_get_pinf -#else - !$acc routine seq -#endif - integer, intent(in) :: bub_id, ptype - type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf - real(wp), intent(out) :: f_pinfl - integer, dimension(3), intent(out) :: cell - real(wp), intent(out), optional :: preterm1, term2, Romega - - real(wp), dimension(3) :: scoord, psi - real(wp) :: dc, vol, aux - real(wp) :: volgas, term1, Rbeq, denom - real(wp) :: charvol, charpres, charvol2, charpres2 - integer, dimension(3) :: cellaux - integer :: i, j, k - integer :: smearGrid, smearGridz - logical :: celloutside - - f_pinfl = 0._wp - - if (lag_params%vel_model > 0) then - cell = -buff_size - call s_locate_cell(mtn_pos(bub_id, 1:3, 2), cell, mtn_s(bub_id, 1:3, 2)) - scoord = mtn_s(bub_id, 1:3, 2) - else - scoord = mtn_s(bub_id, 1:3, 2) - cell(:) = int(scoord(:)) - end if - - !$acc loop seq - do i = 1, num_dims - if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 - end do - - if ((lag_params%cluster_type == 1)) then - !< Getting p_cell in terms of only the current cell by interpolation - - !< Getting the cell volulme as Omega - if (p > 0) then - vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) - else - if (cyl_coord) then - vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi - else - vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth - end if - end if - - !< Obtain bilinear interpolation coefficients, based on the current location of the bubble. - psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) - if (cell(1) == (m + buff_size)) then - cell(1) = cell(1) - 1 - psi(1) = 1._wp - else if (cell(1) == (-buff_size)) then - psi(1) = 0._wp - else - if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1 - psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))) - end if - - psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) - if (cell(2) == (n + buff_size)) then - cell(2) = cell(2) - 1 - psi(2) = 1._wp - else if (cell(2) == (-buff_size)) then - psi(2) = 0._wp - else - if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1 - psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))) - end if - - if (p > 0) then - psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) - if (cell(3) == (p + buff_size)) then - cell(3) = cell(3) - 1 - psi(3) = 1._wp - else if (cell(3) == (-buff_size)) then - psi(3) = 0._wp - else - if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1 - psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))) - end if - else - psi(3) = 0._wp - end if - - !< Perform bilinear interpolation - if (p == 0) then !2D - f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2) - else !3D - f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2))*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2)*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2)*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1)*(1._wp - psi(1))*(1._wp - psi(2))*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3) + 1)*psi(1)*(1._wp - psi(2))*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3) + 1)*psi(1)*psi(2)*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3) + 1)*(1._wp - psi(1))*psi(2)*psi(3) - end if - - !R_Omega - dc = (3._wp*vol/(4._wp*pi))**(1._wp/3._wp) - - else if (lag_params%cluster_type >= 2) then - ! Bubble dynamic closure from Maeda and Colonius (2018) - - ! Include the cell that contains the bubble (mapCells+1+mapCells) - smearGrid = mapCells - (-mapCells) + 1 - smearGridz = smearGrid - if (p == 0) smearGridz = 1 - - charvol = 0._wp - charpres = 0._wp - charvol2 = 0._wp - charpres2 = 0._wp - vol = 0._wp - - !$acc loop seq - do i = 1, smearGrid - !$acc loop seq - do j = 1, smearGrid - !$acc loop seq - do k = 1, smearGridz - cellaux(1) = cell(1) + i - (mapCells + 1) - cellaux(2) = cell(2) + j - (mapCells + 1) - cellaux(3) = cell(3) + k - (mapCells + 1) - if (p == 0) cellaux(3) = 0 - - !< check if the current cell is outside the computational domain or not (including ghost cells) - celloutside = .false. - if (num_dims == 2) then - if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then - celloutside = .true. - end if - if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then - celloutside = .true. - end if - if ((cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then - celloutside = .true. - end if - else - if ((cellaux(3) < -buff_size) .or. (cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then - celloutside = .true. - end if - - if ((cellaux(3) > p + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then - celloutside = .true. - end if - end if - if (.not. celloutside) then - if (cyl_coord .and. (p == 0) .and. (y_cc(cellaux(2)) < 0._wp)) then - celloutside = .true. - end if - end if - - if (.not. celloutside) then - !< Obtaining the cell volulme - if (p > 0) then - vol = dx(cellaux(1))*dy(cellaux(2))*dz(cellaux(3)) - else - if (cyl_coord) then - vol = dx(cellaux(1))*dy(cellaux(2))*y_cc(cellaux(2))*2._wp*pi - else - vol = dx(cellaux(1))*dy(cellaux(2))*lag_params%charwidth - end if - end if - !< Update values - charvol = charvol + vol - charpres = charpres + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3))*vol - charvol2 = charvol2 + vol*q_beta%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) - charpres2 = charpres2 + q_prim_vf(E_idx)%sf(cellaux(1), cellaux(2), cellaux(3)) & - *vol*q_beta%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) - end if - - end do - end do - end do - - f_pinfl = charpres2/charvol2 - vol = charvol - dc = (3._wp*abs(vol)/(4._wp*pi))**(1._wp/3._wp) - - end if - - if (lag_params%pressure_corrector) then - - !Valid if only one bubble exists per cell - volgas = intfc_rad(bub_id, 2)**3._wp - denom = intfc_rad(bub_id, 2)**2._wp - term1 = bub_dphidt(bub_id)*intfc_rad(bub_id, 2)**2._wp - term2 = intfc_vel(bub_id, 2)*intfc_rad(bub_id, 2)**2._wp - - Rbeq = volgas**(1._wp/3._wp) !surrogate bubble radius - aux = dc**3._wp - Rbeq**3._wp - term2 = term2/denom - term2 = 3._wp/2._wp*term2**2._wp*Rbeq**3._wp*(1._wp - Rbeq/dc)/aux - preterm1 = 3._wp/2._wp*Rbeq*(dc**2._wp - Rbeq**2._wp)/(aux*denom) - - !Control volume radius - if (ptype == 2) Romega = dc - - ! Getting p_inf - if (ptype == 1) then - f_pinfl = f_pinfl + preterm1*term1 + term2 - end if - - end if - - end subroutine s_get_pinf - - !> This subroutine updates the Lagrange variables using the tvd RK time steppers. - !! The time derivative of the bubble variables must be stored at every stage to avoid precision errors. - !! @param stage Current tvd RK stage - subroutine s_update_lagrange_tdv_rk(stage) - - integer, intent(in) :: stage - - integer :: k - - if (time_stepper == 1) then ! 1st order TVD RK - !$acc parallel loop gang vector default(present) private(k) - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 1) - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do - - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) - - call s_transfer_data_to_tmp() - call s_write_void_evol(mytime) - if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - - if (lag_params%write_bubbles) then - !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel) - call s_write_lag_particles(mytime) - end if - - elseif (time_stepper == 2) then ! 2nd order TVD RK - if (stage == 1) then - !$acc parallel loop gang vector default(present) private(k) - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do - - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) - - elseif (stage == 2) then - !$acc parallel loop gang vector default(present) private(k) - do k = 1, nBubs - !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) - intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp - intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp - mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2) - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp - gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp - gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp - end do - - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) - - call s_transfer_data_to_tmp() - call s_write_void_evol(mytime) - if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - if (lag_params%write_bubbles) then - !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel) - call s_write_lag_particles(mytime) - end if - - end if - - elseif (time_stepper == 3) then ! 3rd order TVD RK - if (stage == 1) then - !$acc parallel loop gang vector default(present) private(k) - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do - - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) - - elseif (stage == 2) then - !$acc parallel loop gang vector default(present) private(k) - do k = 1, nBubs - !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp - mtn_posPrev(k, 1:3, 2) = mtn_pos(k, 1:3, 2) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp - gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp - gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp - end do - - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) - - elseif (stage == 3) then - !$acc parallel loop gang vector default(present) private(k) - do k = 1, nBubs - !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] - intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) - intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3)) - mtn_posPrev(k, 1:3, 1) = mtn_pos(k, 1:3, 2) - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3)) - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3)) - gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) - gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) - end do - - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) - - call s_transfer_data_to_tmp() - call s_write_void_evol(mytime) - if (lag_params%write_bubbles_stats) call s_calculate_lag_bubble_stats() - - if (lag_params%write_bubbles) then - !$acc update host(gas_p, gas_mv, intfc_rad, intfc_vel) - call s_write_lag_particles(mytime) - end if - - end if - end if - - end subroutine s_update_lagrange_tdv_rk - - !> This subroutine enforces reflective and wall boundary conditions for EL bubbles - subroutine s_enforce_EL_bubbles_boundary_conditions(dest) - - integer, intent(in) :: dest - integer :: k, i, patch_id - integer, dimension(3) :: cell - - !$acc parallel loop gang vector default(present) private(cell) - do k = 1, nBubs - if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then - mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest) - elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then - mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest) - elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size - mapCells)) then - call s_remove_lag_bubble(k) - elseif (mtn_pos(k, 1, dest) < x_cb(mapCells -buff_size - 1)) then - call s_remove_lag_bubble(k) - end if - - if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,2,dest) < y_cb(-1) + intfc_rad(k,dest)) then - mtn_pos(k, 2, dest) = y_cb(-1) + intfc_rad(k,dest) - else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then - mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest) - elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size - mapCells)) then - call s_remove_lag_bubble(k) - elseif (mtn_pos(k, 2, dest) < y_cb(mapCells - buff_size - 1)) then - call s_remove_lag_bubble(k) - end if - - if (p > 0) then - if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,3,dest) < z_cb(-1) + intfc_rad(k,dest)) then - mtn_pos(k, 3, dest) = z_cb(-1) + intfc_rad(k,dest) - else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then - mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest) - elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size - mapCells)) then - call s_remove_lag_bubble(k) - elseif (mtn_pos(k, 3, dest) < z_cb(mapCells - buff_size - 1)) then - call s_remove_lag_bubble(k) - end if - end if - - if (ib) then - cell = -buff_size - call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) - - if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then - patch_id = ib_markers%sf(cell(1), cell(2), cell(3)) - - do i = 1, num_dims - mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - & - levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) & - * levelset%sf(cell(1), cell(2), cell(3), patch_id) - end do - cell = -buff_size - call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) - end if - end if - - end do - - if (num_procs > 1) then - call s_add_particles_to_transfer_list(mtn_pos(:, :, dest), mtn_posPrev(:, :, dest), nbubs) - call s_mpi_send_particles(intfc_rad, intfc_draddt, intfc_vel, & - intfc_draddt, mtn_posPrev, mtn_pos, & - mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, & - gas_dpdt, gas_mv, gas_dmvdt, lag_id, bub_R0, lag_num_ts) - call s_mpi_recv_particles(intfc_rad, intfc_draddt, intfc_vel, & - intfc_draddt, mtn_posPrev, mtn_pos, & - mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, & - gas_dpdt, gas_mv, gas_dmvdt, lag_id, bub_R0, lag_num_ts, nbubs) - end if - - end subroutine s_enforce_EL_bubbles_boundary_conditions - - !> This subroutine returns the computational coordinate of the cell for the given position. - !! @param pos Input coordinates - !! @param cell Computational coordinate of the cell - !! @param scoord Calculated particle coordinates - subroutine s_locate_cell(pos, cell, scoord) - - real(wp), dimension(3), intent(in) :: pos - real(wp), dimension(3), intent(out) :: scoord - integer, dimension(3), intent(inout) :: cell - - integer :: i - - do while (pos(1) < x_cb(cell(1) - 1)) - cell(1) = cell(1) - 1 - end do - - do while (pos(1) > x_cb(cell(1))) - cell(1) = cell(1) + 1 - end do - - do while (pos(2) < y_cb(cell(2) - 1)) - cell(2) = cell(2) - 1 - end do - - do while (pos(2) > y_cb(cell(2))) - cell(2) = cell(2) + 1 - end do - - if (p > 0) then - do while (pos(3) < z_cb(cell(3) - 1)) - cell(3) = cell(3) - 1 - end do - do while (pos(3) > z_cb(cell(3))) - cell(3) = cell(3) + 1 - end do - end if - - ! The numbering of the cell of which left boundary is the domain boundary is 0. - ! if comp.coord of the pos is s, the real coordinate of s is - ! (the coordinate of the left boundary of the Floor(s)-th cell) - ! + (s-(int(s))*(cell-width). - ! In other words, the coordinate of the center of the cell is x_cc(cell). - - !coordinates in computational space - scoord(1) = cell(1) + (pos(1) - x_cb(cell(1) - 1))/dx(cell(1)) - scoord(2) = cell(2) + (pos(2) - y_cb(cell(2) - 1))/dy(cell(2)) - scoord(3) = 0._wp - if (p > 0) scoord(3) = cell(3) + (pos(3) - z_cb(cell(3) - 1))/dz(cell(3)) - cell(:) = int(scoord(:)) - do i = 1, num_dims - if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 - end do - - end subroutine s_locate_cell - - !> This subroutine transfer data into the temporal variables. - subroutine s_transfer_data_to_tmp() - - integer :: k - - !$acc parallel loop gang vector default(present) private(k) - do k = 1, nBubs - gas_p(k, 2) = gas_p(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) - intfc_rad(k, 2) = intfc_rad(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) - mtn_posPrev(k, 1:3, 2) = mtn_posPrev(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) - mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1) - end do - - end subroutine s_transfer_data_to_tmp - - !> The purpose of this procedure is to determine if the global coordinates of the bubbles - !! are present in the current MPI processor (including ghost cells). - !! @param pos_part Spatial coordinates of the bubble - function particle_in_domain(pos_part) - - logical :: particle_in_domain - real(wp), dimension(3), intent(in) :: pos_part - - ! 2D - if (p == 0 .and. cyl_coord .neqv. .true.) then - ! Defining a virtual z-axis that has the same dimensions as y-axis - ! defined in the input file - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & - (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. & - (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= -lag_params%charwidth/2._wp)) - else - ! cyl_coord - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & - (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size - 1), 0._wp))) - end if - - ! 3D - if (p > 0) then - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. (pos_part(1) >= x_cb(-buff_size - 1)) .and. & - (pos_part(2) < y_cb(n + buff_size)) .and. (pos_part(2) >= y_cb(-buff_size - 1)) .and. & - (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1))) - end if - - ! For symmetric and wall boundary condition - if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then - particle_in_domain = (particle_in_domain .and. (pos_part(1) >= x_cb(-1))) - end if - if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then - particle_in_domain = (particle_in_domain .and. (pos_part(1) < x_cb(m))) - end if - if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then - particle_in_domain = (particle_in_domain .and. (pos_part(2) >= y_cb(-1))) - end if - if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then - particle_in_domain = (particle_in_domain .and. (pos_part(2) < y_cb(n))) - end if - - if (p > 0) then - if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then - particle_in_domain = (particle_in_domain .and. (pos_part(3) >= z_cb(-1))) - end if - if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then - particle_in_domain = (particle_in_domain .and. (pos_part(3) < z_cb(p))) - end if - end if - - end function particle_in_domain - - !> The purpose of this procedure is to determine if the lagrangian bubble is located in the - !! physical domain. The ghost cells are not part of the physical domain. - !! @param pos_part Spatial coordinates of the bubble - function particle_in_domain_physical(pos_part) - - logical :: particle_in_domain_physical - real(wp), dimension(3), intent(in) :: pos_part - - particle_in_domain_physical = ((pos_part(1) < x_cb(m)) .and. (pos_part(1) >= x_cb(-1)) .and. & - (pos_part(2) < y_cb(n)) .and. (pos_part(2) >= y_cb(-1))) - - if (p > 0) then - particle_in_domain_physical = (particle_in_domain_physical .and. (pos_part(3) < z_cb(p)) .and. (pos_part(3) >= z_cb(-1))) - end if - - end function particle_in_domain_physical - - !> The purpose of this procedure is to calculate the gradient of a scalar field along the x, y and z directions - !! following a second-order central difference considering uneven widths - !! @param q Input scalar field - !! @param dq Output gradient of q - !! @param dir Gradient spatial direction - subroutine s_gradient_dir(q, dq, dir) - - type(scalar_field), intent(inout) :: q - type(scalar_field), intent(inout) :: dq - integer, intent(in) :: dir - - integer :: i, j, k - - if (dir == 1) then - ! Gradient in x dir. - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dx(i + 1) - dx(i - 1)) & - + q%sf(i + 1, j, k)*(dx(i) + dx(i - 1)) & - - q%sf(i - 1, j, k)*(dx(i) + dx(i + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1))) - end do - end do - end do - else - if (dir == 2) then - ! Gradient in y dir. - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & - + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & - - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) - end do - end do - end do - else - ! Gradient in z dir. - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & - + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & - - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) - end do - end do - end do - end if - end if - - end subroutine s_gradient_dir - - !> Subroutine that writes on each time step the changes of the lagrangian bubbles. - !! @param q_time Current time - subroutine s_write_lag_particles(qtime) - - real(wp), intent(in) :: qtime - integer :: k - - character(LEN=path_len + 2*name_len) :: file_loc - logical :: file_exist - - write (file_loc, '(A,I0,A)') 'lag_bubble_evol_', proc_rank, '.dat' - file_loc = trim(case_dir)//'/D/'//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - - if (.not. file_exist) then - open (11, FILE=trim(file_loc), FORM='formatted', position='rewind') - write (11, *) 'currentTime, particleID, x, y, z, ', & - 'coreVaporMass, coreVaporConcentration, radius, interfaceVelocity, ', & - 'corePressure' - else - open (11, FILE=trim(file_loc), FORM='formatted', position='append') - end if - - ! Cycle through list - do k = 1, nBubs - write (11, '(6X,f12.6,I24.8,8e24.8)') & - qtime, & - lag_id(k, 1), & - mtn_pos(k, 1, 1), & - mtn_pos(k, 2, 1), & - mtn_pos(k, 3, 1), & - gas_mv(k, 1), & - gas_mv(k, 1)/(gas_mv(k, 1) + gas_mg(k)), & - intfc_rad(k, 1), & - intfc_vel(k, 1), & - gas_p(k, 1) - end do - - close (11) - - end subroutine s_write_lag_particles - - !> Subroutine that writes some useful statistics related to the volume fraction - !! of the particles (void fraction) in the computatioational domain - !! on each time step. - !! @param q_time Current time - subroutine s_write_void_evol(qtime) - - real(wp), intent(in) :: qtime - real(wp) :: volcell, voltot - real(wp) :: lag_void_max, lag_void_avg, lag_vol - real(wp) :: void_max_glb, void_avg_glb, vol_glb - - integer :: i, j, k - - character(LEN=path_len + 2*name_len) :: file_loc - logical :: file_exist - - if (proc_rank == 0) then - write (file_loc, '(A)') 'voidfraction.dat' - file_loc = trim(case_dir)//'/D/'//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (.not. file_exist) then - open (12, FILE=trim(file_loc), FORM='formatted', position='rewind') - !write (12, *) 'currentTime, averageVoidFraction, ', & - ! 'maximumVoidFraction, totalParticlesVolume' - !write (12, *) 'The averageVoidFraction value does ', & - ! 'not reflect the real void fraction in the cloud since the ', & - ! 'cells which do not have bubbles are not accounted' - else - open (12, FILE=trim(file_loc), FORM='formatted', position='append') - end if - end if - - lag_void_max = 0._wp - lag_void_avg = 0._wp - lag_vol = 0._wp - !$acc parallel loop collapse(3) gang vector default(present) reduction(+:lag_vol,lag_void_avg) & - !$acc reduction(MAX:lag_void_max) copy(lag_vol, lag_void_avg, lag_void_max) - do k = 0, p - do j = 0, n - do i = 0, m - lag_void_max = max(lag_void_max, 1._wp - q_beta%vf(1)%sf(i, j, k)) - call s_get_char_vol(i, j, k, volcell) - if ((1._wp - q_beta%vf(1)%sf(i, j, k)) > 5.0d-11) then - lag_void_avg = lag_void_avg + (1._wp - q_beta%vf(1)%sf(i, j, k))*volcell - lag_vol = lag_vol + volcell - end if - end do - end do - end do - -#ifdef MFC_MPI - if (num_procs > 1) then - call s_mpi_allreduce_max(lag_void_max, void_max_glb) - lag_void_max = void_max_glb - call s_mpi_allreduce_sum(lag_vol, vol_glb) - lag_vol = vol_glb - call s_mpi_allreduce_sum(lag_void_avg, void_avg_glb) - lag_void_avg = void_avg_glb - end if -#endif - voltot = lag_void_avg - ! This voidavg value does not reflect the real void fraction in the cloud - ! since the cell which does not have bubbles are not accounted - if (lag_vol > 0._wp) lag_void_avg = lag_void_avg/lag_vol - - if (proc_rank == 0) then - write (12, '(6X,4e24.8)') & - qtime, & - lag_void_avg, & - lag_void_max, & - voltot - close (12) - end if - - end subroutine s_write_void_evol - - !> Subroutine that writes the restarting files for the particles in the lagrangian solver. - !! @param t_step Current time step - subroutine s_write_restart_lag_bubbles(t_step) - - ! Generic string used to store the address of a particular file - integer, intent(in) :: t_step - - character(LEN=path_len + 2*name_len) :: file_loc - logical :: file_exist - integer :: bub_id, tot_part, tot_part_wrtn, npart_wrtn - integer :: i, k - -#ifdef MFC_MPI - ! For Parallel I/O - integer :: ifile, ierr - integer, dimension(MPI_STATUS_SIZE) :: status - integer(KIND=MPI_OFFSET_KIND) :: disp - integer :: view - integer, dimension(2) :: gsizes, lsizes, start_idx_part - integer, dimension(num_procs) :: part_order, part_ord_mpi - - bub_id = 0._wp - if (nBubs /= 0) then - do k = 1, nBubs - if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then - bub_id = bub_id + 1 - end if - end do - end if - - if (.not. parallel_io) return - - ! Total number of particles - call MPI_ALLREDUCE(bub_id, tot_part, 1, MPI_integer, & - MPI_SUM, MPI_COMM_WORLD, ierr) - - ! Total number of particles written so far - call MPI_ALLREDUCE(npart_wrtn, tot_part_wrtn, 1, MPI_integer, & - MPI_SUM, MPI_COMM_WORLD, ierr) - - lsizes(1) = max(1, bub_id) - lsizes(2) = 21 - - ! if the partcle number is zero, put 1 since MPI cannot deal with writing - ! zero particle - part_order(:) = 1 - part_order(proc_rank + 1) = max(1, bub_id) - - call MPI_ALLREDUCE(part_order, part_ord_mpi, num_procs, MPI_integer, & - MPI_MAX, MPI_COMM_WORLD, ierr) - - gsizes(1) = sum(part_ord_mpi(1:num_procs)) - gsizes(2) = 21 - - start_idx_part(1) = sum(part_ord_mpi(1:proc_rank + 1)) - part_ord_mpi(proc_rank + 1) - start_idx_part(2) = 0 - - write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist .and. proc_rank == 0) then - call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) - end if - - ! Writing down the total number of particles - if (proc_rank == 0) then - open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown') - write (9) gsizes(1), mytime, dt - close (9) - end if - - call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & - MPI_ORDER_FORTRAN, mpi_p, view, ierr) - call MPI_type_COMMIT(view, ierr) - - allocate (MPI_IO_DATA_lag_bubbles(1:max(1, bub_id), 1:21)) - - ! Open the file to write all flow variables - write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist .and. proc_rank == 0) then - call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) - end if - - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & - mpi_info_int, ifile, ierr) - - disp = 0._wp - - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & - 'native', mpi_info_null, ierr) - - ! Cycle through list - i = 1 - - if (bub_id == 0) then - MPI_IO_DATA_lag_bubbles(1, 1:21) = 0._wp - else - - do k = 1, nBubs - - if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then - - MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_id(k, 1)) - MPI_IO_DATA_lag_bubbles(i, 2:4) = mtn_pos(k, 1:3, 1) - MPI_IO_DATA_lag_bubbles(i, 5:7) = mtn_posPrev(k, 1:3, 1) - MPI_IO_DATA_lag_bubbles(i, 8:10) = mtn_vel(k, 1:3, 1) - MPI_IO_DATA_lag_bubbles(i, 11) = intfc_rad(k, 1) - MPI_IO_DATA_lag_bubbles(i, 12) = intfc_vel(k, 1) - MPI_IO_DATA_lag_bubbles(i, 13) = bub_R0(k) - MPI_IO_DATA_lag_bubbles(i, 14) = Rmax_stats(k) - MPI_IO_DATA_lag_bubbles(i, 15) = Rmin_stats(k) - MPI_IO_DATA_lag_bubbles(i, 16) = bub_dphidt(k) - MPI_IO_DATA_lag_bubbles(i, 17) = gas_p(k, 1) - MPI_IO_DATA_lag_bubbles(i, 18) = gas_mv(k, 1) - MPI_IO_DATA_lag_bubbles(i, 19) = gas_mg(k) - MPI_IO_DATA_lag_bubbles(i, 20) = gas_betaT(k) - MPI_IO_DATA_lag_bubbles(i, 21) = gas_betaC(k) - - i = i + 1 - - end if - - end do - - end if - - call MPI_FILE_write_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*max(1, bub_id), & - mpi_p, status, ierr) - - call MPI_FILE_CLOSE(ifile, ierr) - - deallocate (MPI_IO_DATA_lag_bubbles) - -#endif - - end subroutine s_write_restart_lag_bubbles - - !> This procedure calculates the maximum and minimum radius of each bubble. - subroutine s_calculate_lag_bubble_stats() - - integer :: k - - !$acc parallel loop gang vector default(present) reduction(MAX:Rmax_glb) & - !$acc reduction(MIN: Rmin_glb) copy(Rmax_glb, Rmin_glb) - do k = 1, nBubs - Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) - Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) - Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) - Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k)) - end do - - end subroutine s_calculate_lag_bubble_stats - - !> Subroutine that writes the maximum and minimum radius of each bubble. - subroutine s_write_lag_bubble_stats() - - integer :: k - character(LEN=path_len + 2*name_len) :: file_loc - - write (file_loc, '(A,I0,A)') 'stats_lag_bubbles_', proc_rank, '.dat' - file_loc = trim(case_dir)//'/D/'//trim(file_loc) - - !$acc update host(Rmax_glb, Rmin_glb) - - open (13, FILE=trim(file_loc), FORM='formatted', position='rewind') - write (13, *) 'proc_rank, particleID, x, y, z, Rmax_glb, Rmin_glb' - - do k = 1, nBubs - write (13, '(6X,2I24.8,5e24.8)') & - proc_rank, & - lag_id(k, 1), & - mtn_pos(k, 1, 1), & - mtn_pos(k, 2, 1), & - mtn_pos(k, 3, 1), & - Rmax_stats(k), & - Rmin_stats(k) - end do - - close (13) - - end subroutine s_write_lag_bubble_stats - - !> The purpose of this subroutine is to remove one specific particle if dt is too small. - !! @param bub_id Particle id - subroutine s_remove_lag_bubble(bub_id) - - integer, intent(in) :: bub_id - - integer :: i - - !$acc loop seq - do i = bub_id, nBubs - 1 - lag_id(i, 1) = lag_id(i + 1, 1) - bub_R0(i) = bub_R0(i + 1) - Rmax_stats(i) = Rmax_stats(i + 1) - Rmin_stats(i) = Rmin_stats(i + 1) - gas_mg(i) = gas_mg(i + 1) - gas_betaT(i) = gas_betaT(i + 1) - gas_betaC(i) = gas_betaC(i + 1) - bub_dphidt(i) = bub_dphidt(i + 1) - gas_p(i, 1:2) = gas_p(i + 1, 1:2) - gas_mv(i, 1:2) = gas_mv(i + 1, 1:2) - intfc_rad(i, 1:2) = intfc_rad(i + 1, 1:2) - intfc_vel(i, 1:2) = intfc_vel(i + 1, 1:2) - mtn_pos(i, 1:3, 1:2) = mtn_pos(i + 1, 1:3, 1:2) - mtn_posPrev(i, 1:3, 1:2) = mtn_posPrev(i + 1, 1:3, 1:2) - mtn_vel(i, 1:3, 1:2) = mtn_vel(i + 1, 1:3, 1:2) - mtn_s(i, 1:3, 1:2) = mtn_s(i + 1, 1:3, 1:2) - intfc_draddt(i, 1:lag_num_ts) = intfc_draddt(i + 1, 1:lag_num_ts) - intfc_dveldt(i, 1:lag_num_ts) = intfc_dveldt(i + 1, 1:lag_num_ts) - gas_dpdt(i, 1:lag_num_ts) = gas_dpdt(i + 1, 1:lag_num_ts) - gas_dmvdt(i, 1:lag_num_ts) = gas_dmvdt(i + 1, 1:lag_num_ts) - end do - - nBubs = nBubs - 1 - !$acc update device(nBubs) - - end subroutine s_remove_lag_bubble - - !> The purpose of this subroutine is to deallocate variables - subroutine s_finalize_lagrangian_solver() - - integer :: i - - do i = 1, q_beta_idx - @:DEALLOCATE(q_beta%vf(i)%sf) - end do - @:DEALLOCATE(q_beta%vf) - - !Deallocating space - @:DEALLOCATE(lag_id) - @:DEALLOCATE(bub_R0) - @:DEALLOCATE(Rmax_stats) - @:DEALLOCATE(Rmin_stats) - @:DEALLOCATE(gas_mg) - @:DEALLOCATE(gas_betaT) - @:DEALLOCATE(gas_betaC) - @:DEALLOCATE(bub_dphidt) - @:DEALLOCATE(gas_p) - @:DEALLOCATE(gas_mv) - @:DEALLOCATE(intfc_rad) - @:DEALLOCATE(intfc_vel) - @:DEALLOCATE(mtn_pos) - @:DEALLOCATE(mtn_posPrev) - @:DEALLOCATE(mtn_vel) - @:DEALLOCATE(mtn_s) - @:DEALLOCATE(intfc_draddt) - @:DEALLOCATE(intfc_dveldt) - @:DEALLOCATE(gas_dpdt) - @:DEALLOCATE(gas_dmvdt) - @:DEALLOCATE(mtn_dposdt) - @:DEALLOCATE(mtn_dveldt) - - end subroutine s_finalize_lagrangian_solver - -end module m_bubbles_EL From 9390c88bb8505e555f0bd1ee2480b3979cd19256 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Wed, 11 Jun 2025 14:55:40 -0400 Subject: [PATCH 32/62] 3D transfer list --- src/simulation/m_bubbles_EL.fpp | 2 +- src/simulation/m_mpi_proxy.fpp | 85 +++++++++++++++++++++++++++++---- 2 files changed, 77 insertions(+), 10 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 674d07d049..f5691fdc3b 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -1189,7 +1189,7 @@ contains mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest) elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size - mapCells)) then call s_remove_lag_bubble(k) - elseif (mtn_pos(k, 1, dest) < x_cb(mapCells -buff_size - 1)) then + elseif (mtn_pos(k, 1, dest) < x_cb(mapCells - buff_size - 1)) then call s_remove_lag_bubble(k) end if diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 992e3470e1..ec92e7079d 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -471,7 +471,6 @@ contains if (posPrev(k,1) > comm_coords(1)%beg .and. pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1 - if (n > 0) then ! Left bottom corner if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then @@ -479,21 +478,40 @@ contains p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1 p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - + if (p > 0) then + ! Left bottom back corner + if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then + p_send_ids(-1, -1, -1, p_send_counts(-1, -1, -1)) = k + p_send_counts(-1, -1, -1) = p_send_counts(-1, -1, -1) + 1 + ! Left bottom front corner + elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then + p_send_ids(-1, -1, 1, p_send_counts(-1, -1, 1)) = k + p_send_counts(-1, -1, 1) = p_send_counts(-1, -1, 1) + 1 + end if + end if ! Left top corner elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1 p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + if (p > 0) then + ! Left top back corner + if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then + p_send_ids(-1, 1, 1, p_send_counts(-1, 1, 1)) = k + p_send_counts(-1, 1, 1) = p_send_counts(-1, 1, 1) + 1 + ! Left top front corner + elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then + p_send_ids(-1, 1, -1, p_send_counts(-1, 1, -1)) = k + p_send_counts(-1, 1, -1) = p_send_counts(-1, 1, -1) + 1 + end if + end if end if end if - ! Right face elseif (posPrev(k,1) < comm_coords(1)%end .and. pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1 - if (n > 0) then ! Right bottom corner if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then @@ -501,25 +519,74 @@ contains p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1 p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - + if (p > 0) then + ! Right bottom back corner + if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then + p_send_ids(1, -1, -1, p_send_counts(1, -1, -1)) = k + p_send_counts(1, -1, -1) = p_send_counts(1, -1, -1) + 1 + ! Right bottom front corner + elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then + p_send_ids(1, -1, 1, p_send_counts(1, -1, 1)) = k + p_send_counts(1, -1, 1) = p_send_counts(1, -1, 1) + 1 + end if + end if ! Right top corner elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1 p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + if (p > 0) then + ! Right top back corner + if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then + p_send_ids(1, 1, -1, p_send_counts(1, 1, -1)) = k + p_send_counts(1, 1, -1) = p_send_counts(1, 1, -1) + 1 + ! Right top front corner + elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then + p_send_ids(1, 1, 1, p_send_counts(1, 1, 1)) = k + p_send_counts(1, 1, 1) = p_send_counts(1, 1, 1) + 1 + end if + end if end if end if - ! Bottom face (corners already accounted for) elseif (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - + if (p > 0) then + ! Bottom face back corner + if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then + p_send_ids(0, -1, -1, p_send_counts(0, -1, -1)) = k + p_send_counts(0, -1, -1) = p_send_counts(0, -1, -1) + 1 + ! Bottom face front corner + elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then + p_send_ids(0, -1, 1, p_send_counts(0, -1, 1)) = k + p_send_counts(0, -1, 1) = p_send_counts(0, -1, 1) + 1 + end if + endif ! Top face (corners already accounted for) elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + if (p > 0) then + ! Top face back corner + if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then + p_send_ids(0, 1, -1, p_send_counts(0, 1, -1)) = k + p_send_counts(0, 1, -1) = p_send_counts(0, 1, -1) + 1 + ! Top face front corner + elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then + p_send_ids(0, 1, 1, p_send_counts(0, 1, 1)) = k + p_send_counts(0, 1, 1) = p_send_counts(0, 1, 1) + 1 + end if + endif + ! Back face (everything else accounted for) + elseif (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then + p_send_ids(0, 0, -1, p_send_counts(0, 0, -1)) = k + p_send_counts(0, 0, -1) = p_send_counts(0, 0, -1) + 1 + ! Front face (everything else accounted for) + elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then + p_send_ids(0, 0, 1, p_send_counts(0, 0, 1)) = k + p_send_counts(0, 0, 1) = p_send_counts(0, 0, 1) + 1 end if end do @@ -542,7 +609,7 @@ contains if (abs(i) + abs(j) + abs(k) > 0) then call MPI_Isend(p_send_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, request, ierr) if (p_send_counts(i,j,k) > 0) then - print*, "SEND", proc_rank, p_send_counts(i,j,k) + print*, "SEND", proc_rank, neighbor_ranks(i,j,k), p_send_counts(i,j,k) end if if (p_send_counts(i,j,k) > 0) then position = 0 @@ -597,7 +664,7 @@ contains if (abs(i) + abs(j) + abs(k) > 0) then call MPI_recv(p_recv_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) if (p_recv_counts(i,j,k) > 0) then - print*, "RECV", proc_rank, p_recv_counts(i,j,k) + print*, "RECV", neighbor_ranks(i,j,k), proc_rank, p_recv_counts(i,j,k) end if if (p_recv_counts(i,j,k) > 0) then position = 0 From aa259f127288a59ac015199e056bf3d0454b1633 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Wed, 11 Jun 2025 16:13:57 -0400 Subject: [PATCH 33/62] bug fixes --- src/simulation/m_bubbles_EL.fpp | 39 ++++++++++++------------- src/simulation/m_bubbles_EL_kernels.fpp | 22 +++++++------- src/simulation/m_mpi_proxy.fpp | 12 ++++---- 3 files changed, 36 insertions(+), 37 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index f5691fdc3b..35950f3d01 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -1180,16 +1180,16 @@ contains integer, dimension(3) :: cell !$acc parallel loop gang vector default(present) private(cell) - do k = 1, nBubs + do k = nBubs, 1, -1 if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest) elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest) - elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size - mapCells)) then + elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size)) then call s_remove_lag_bubble(k) - elseif (mtn_pos(k, 1, dest) < x_cb(mapCells - buff_size - 1)) then + elseif (mtn_pos(k, 1, dest) < x_cb(buff_size - 1)) then call s_remove_lag_bubble(k) end if @@ -1199,9 +1199,9 @@ contains else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest) - elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size - mapCells)) then + elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size)) then call s_remove_lag_bubble(k) - elseif (mtn_pos(k, 2, dest) < y_cb(mapCells - buff_size - 1)) then + elseif (mtn_pos(k, 2, dest) < y_cb(buff_size - 1)) then call s_remove_lag_bubble(k) end if @@ -1212,9 +1212,9 @@ contains else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest) - elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size - mapCells)) then + elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size)) then call s_remove_lag_bubble(k) - elseif (mtn_pos(k, 3, dest) < z_cb(mapCells - buff_size - 1)) then + elseif (mtn_pos(k, 3, dest) < z_cb(buff_size - 1)) then call s_remove_lag_bubble(k) end if end if @@ -1235,7 +1235,6 @@ contains call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) end if end if - end do if (num_procs > 1) then @@ -1338,26 +1337,26 @@ contains if (p == 0 .and. cyl_coord .neqv. .true.) then ! Defining a virtual z-axis that has the same dimensions as y-axis ! defined in the input file - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. & - (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. & - (pos_part(2) < y_cb(n + buff_size - mapCells)) .and. & - (pos_part(2) >= y_cb(-buff_size - 1 + mapCells)) .and. & + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. & + (pos_part(1) >= x_cb(-buff_size - 1)) .and. & + (pos_part(2) < y_cb(n + buff_size)) .and. & + (pos_part(2) >= y_cb(-buff_size - 1)) .and. & (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= -lag_params%charwidth/2._wp)) else ! cyl_coord - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. & - (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. & + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. & + (pos_part(1) >= x_cb(-buff_size - 1)) .and. & (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size - 1), 0._wp))) end if ! 3D if (p > 0) then - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - mapCells)) .and. & - (pos_part(1) >= x_cb(-buff_size - 1 + mapCells)) .and. & - (pos_part(2) < y_cb(n + buff_size - mapCells)) .and. & - (pos_part(2) >= y_cb(-buff_size - 1 + mapCells)) .and. & - (pos_part(3) < z_cb(p + buff_size - mapCells)) .and. & - (pos_part(3) >= z_cb(-buff_size - 1 + mapCells))) + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. & + (pos_part(1) >= x_cb(-buff_size - 1)) .and. & + (pos_part(2) < y_cb(n + buff_size)) .and. & + (pos_part(2) >= y_cb(-buff_size - 1)) .and. & + (pos_part(3) < z_cb(p + buff_size)) .and. & + (pos_part(3) >= z_cb(-buff_size - 1))) end if ! For symmetric and wall boundary condition diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index b55b6c7e9c..1e15c772a4 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -283,27 +283,27 @@ contains celloutside = .false. if (num_dims == 2) then - if ((cellaux(1) < -buff_size + mapCells) .or. & - (cellaux(2) < -buff_size + mapCells)) then + if ((cellaux(1) < -buff_size) .or. & + (cellaux(2) < -buff_size)) then celloutside = .true. end if - if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then + if (cyl_coord .and. cellaux(2) < 0) then celloutside = .true. end if - if ((cellaux(2) > n + buff_size - mapCells) .or. & - (cellaux(1) > m + buff_size - mapCells)) then + if ((cellaux(2) > n + buff_size) .or. & + (cellaux(1) > m + buff_size)) then celloutside = .true. end if else - if ((cellaux(3) < -buff_size + mapCells) .or. & - (cellaux(1) < -buff_size + mapCells) .or. & - (cellaux(2) < -buff_size + mapCells)) then + if ((cellaux(3) < -buff_size) .or. & + (cellaux(1) < -buff_size) .or. & + (cellaux(2) < -buff_size)) then celloutside = .true. end if - if ((cellaux(3) > p + buff_size - mapCells) .or. & - (cellaux(2) > n + buff_size - mapCells) .or. & - (cellaux(1) > m + buff_size - mapCells)) then + if ((cellaux(3) > p + buff_size) .or. & + (cellaux(2) > n + buff_size) .or. & + (cellaux(1) > m + buff_size)) then celloutside = .true. end if end if diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index ec92e7079d..7f78ac2831 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -98,14 +98,14 @@ contains p_buff_size = nVar*lag_params%nBubs_glb*real_size @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size)) - comm_coords(1)%beg = x_cb(buff_size - mapCells - 1) - comm_coords(1)%end = x_cb(m - buff_size + mapCells) + comm_coords(1)%beg = x_cb(buff_size - 1) + comm_coords(1)%end = x_cb(m - buff_size) if (n > 0) then - comm_coords(2)%beg = y_cb(buff_size - mapCells - 1) - comm_coords(2)%end = y_cb(m - buff_size + mapCells) + comm_coords(2)%beg = y_cb(buff_size) + comm_coords(2)%end = y_cb(n - buff_size) if (p > 0) then - comm_coords(3)%beg = z_cb(buff_size - mapCells - 1) - comm_coords(3)%end = z_cb(p - buff_size + mapCells) + comm_coords(3)%beg = z_cb(buff_size - 1) + comm_coords(3)%end = z_cb(p - buff_size) end if end if From abf82613c1dcd9ca223e516ee3021d04fde8086e Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Sat, 14 Jun 2025 12:29:49 -0400 Subject: [PATCH 34/62] Simple cases work --- src/common/m_helper_basic.f90 | 7 +- src/pre_process/m_global_parameters.fpp | 43 +++- src/pre_process/m_mpi_proxy.fpp | 2 +- src/pre_process/m_start_up.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 274 +++++++++++++++--------- src/simulation/m_bubbles_EL_kernels.fpp | 42 ++-- src/simulation/m_global_parameters.fpp | 6 +- src/simulation/m_mpi_proxy.fpp | 256 +++++++++++----------- src/simulation/m_start_up.fpp | 2 +- toolchain/mfc/run/case_dicts.py | 2 + 10 files changed, 388 insertions(+), 248 deletions(-) diff --git a/src/common/m_helper_basic.f90 b/src/common/m_helper_basic.f90 index ae3221a318..44ec039f9e 100644 --- a/src/common/m_helper_basic.f90 +++ b/src/common/m_helper_basic.f90 @@ -76,9 +76,10 @@ logical function f_is_integer(var) result(res) end function f_is_integer subroutine s_configure_coordinate_bounds(weno_polyn, buff_size, idwint, idwbuff, & - viscous, bubbles_lagrange, m, n, p, num_dims) + viscous, bubbles_lagrange, m, n, p, & + num_dims, fd_number) - integer, intent(in) :: weno_polyn, m, n, p, num_dims + integer, intent(in) :: weno_polyn, m, n, p, num_dims, fd_number integer, intent(inout) :: buff_size type(int_bounds_info), dimension(3), intent(inout) :: idwint, idwbuff logical, intent(in) :: viscous, bubbles_lagrange @@ -95,7 +96,7 @@ subroutine s_configure_coordinate_bounds(weno_polyn, buff_size, idwint, idwbuff, ! Correction for smearing function in the lagrangian subgrid bubble model if (bubbles_lagrange) then - buff_size = max(buff_size, 6) + buff_size = max(buff_size + fd_number, 6 + fd_number) end if ! Configuring Coordinate Direction Indexes diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index fecb7acc9c..40eada055a 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -123,6 +123,21 @@ module m_global_parameters ! Stands for "InDices With BUFFer". type(int_bounds_info) :: idwbuff(1:3) + integer :: fd_order !< + !! The order of the finite-difference (fd) approximations of the first-order + !! derivatives that need to be evaluated when the CoM or flow probe data + !! files are to be written at each time step + + integer :: fd_number !< + !! The finite-difference number is given by MAX(1, fd_order/2). Essentially, + !! it is a measure of the half-size of the finite-difference stencil for the + !! selected order of accuracy. + + !> @name lagrangian subgrid bubble parameters + !> @{! + type(bubbles_lagrange_parameters) :: lag_params !< Lagrange bubbles' parameters + !> @} + type(int_bounds_info) :: bc_x, bc_y, bc_z !< !! Boundary conditions in the x-, y- and z-coordinate directions @@ -386,6 +401,28 @@ contains ! Initial condition parameters num_patches = dflt_int + fd_order = dflt_int + lag_params%cluster_type = dflt_int + lag_params%pressure_corrector = .false. + lag_params%smooth_type = dflt_int + lag_params%heatTransfer_model = .false. + lag_params%massTransfer_model = .false. + lag_params%write_bubbles = .false. + lag_params%write_bubbles_stats = .false. + lag_params%nBubs_glb = dflt_int + lag_params%vel_model = dflt_int + lag_params%drag_model = dflt_int + lag_params%c_d = dflt_real + lag_params%epsilonb = 1._wp + lag_params%charwidth = dflt_real + lag_params%valmaxvoid = dflt_real + lag_params%c0 = dflt_real + lag_params%rho0 = dflt_real + lag_params%T0 = dflt_real + lag_params%Thost = dflt_real + lag_params%x0 = dflt_real + lag_params%diffcoefvap = dflt_real + do i = 1, num_patches_max patch_icpp(i)%geometry = dflt_int patch_icpp(i)%model_scale(:) = 1._wp @@ -858,10 +895,14 @@ contains chemxb = species_idx%beg chemxe = species_idx%end + if (lag_params%vel_model /= 0) then + fd_number = max(1, fd_order/2) + end if + call s_configure_coordinate_bounds(weno_polyn, buff_size, & idwint, idwbuff, viscous, & bubbles_lagrange, m, n, p, & - num_dims) + num_dims, fd_number) #ifdef MFC_MPI diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index abc47cdb65..b6196618b4 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -47,7 +47,7 @@ contains & 'weno_order', 'precision', 'perturb_flow_fluid', & & 'perturb_sph_fluid', 'num_patches', 'thermal', 'nb', 'dist_type',& & 'R0_type', 'relax_model', 'num_ibs', 'n_start', 'elliptic_smoothing_iters', & - & 'num_bc_patches' ] + & 'num_bc_patches', 'lag_params%vel_model' ] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp index 25b42eab52..5823a91451 100644 --- a/src/pre_process/m_start_up.fpp +++ b/src/pre_process/m_start_up.fpp @@ -149,7 +149,7 @@ contains n_start_old, surface_tension, hyperelasticity, pre_stress, & elliptic_smoothing, elliptic_smoothing_iters, & viscous, bubbles_lagrange, bc_x, bc_y, bc_z, num_bc_patches, & - patch_bc, Bx0, relativity, cont_damage + patch_bc, Bx0, relativity, cont_damage, fd_order, lag_params ! Inquiring the status of the pre_process.inp file file_loc = 'pre_process.inp' diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 35950f3d01..59ba9377f3 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -116,7 +116,6 @@ contains ! Allocating space for lagrangian variables nBubs_glb = lag_params%nBubs_glb - @:ALLOCATE(lag_id(1:nBubs_glb, 1:2)) @:ALLOCATE(bub_R0(1:nBubs_glb)) @:ALLOCATE(Rmax_stats(1:nBubs_glb)) @:ALLOCATE(Rmin_stats(1:nBubs_glb)) @@ -124,6 +123,7 @@ contains @:ALLOCATE(gas_betaT(1:nBubs_glb)) @:ALLOCATE(gas_betaC(1:nBubs_glb)) @:ALLOCATE(bub_dphidt(1:nBubs_glb)) + @:ALLOCATE(lag_id(1:nBubs_glb, 1:2)) @:ALLOCATE(gas_p(1:nBubs_glb, 1:2)) @:ALLOCATE(gas_mv(1:nBubs_glb, 1:2)) @:ALLOCATE(intfc_rad(1:nBubs_glb, 1:2)) @@ -227,7 +227,7 @@ contains do while (ios == 0) read (94, *, iostat=ios) (inputBubble(i), i=1, 8) if (ios /= 0) cycle - indomain = particle_in_domain(inputBubble(1:3)) + indomain = particle_in_domain_physical(inputBubble(1:3)) id = id + 1 if (id > lag_params%nBubs_glb .and. proc_rank == 0) then call s_mpi_abort("Current number of bubbles is larger than nBubs_glb") @@ -269,6 +269,28 @@ contains !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc) + call sleep(proc_rank) + print*, "Processor", proc_rank + do i = 1, nBubs + print*, i, lag_id(i,1), mtn_pos(i, 1:3, 1) + end do + + if (num_procs > 1) then + call s_add_particles_to_transfer_list_IC(mtn_pos(:, :, 1), nbubs) + call s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & + mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts) + call s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & + mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) + !if (proc_rank == 0 .or. proc_rank == 2) print*, "b", proc_rank, mtn_pos + !call sleep(1) + end if + !Populate temporal variables call s_transfer_data_to_tmp() call s_smear_voidfraction() @@ -326,28 +348,29 @@ contains mtn_posPrev(bub_id, 1:3, 1) = mtn_pos(bub_id, 1:3, 1) end if - cell = -buff_size + cell = fd_number - buff_size call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) - ! Check if the bubble is located in the ghost cell of a symmetric boundary - if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. & - (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. & - (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. & - (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then - call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") - end if - - if (p > 0) then - if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. & - (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then - call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") - end if - end if + !Check if the bubble is located in the ghost cell of a symmetric boundary + !if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. & + !(any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. & + !(any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. & + !(any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then + !call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") + !end if + + !if (p > 0) then + !if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. & + !(any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then + !call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") + !end if + !end if ! If particle is in the ghost cells, find the closest non-ghost cell - cell(1) = min(max(cell(1), 0), m) - cell(2) = min(max(cell(2), 0), n) - if (p > 0) cell(3) = min(max(cell(3), 0), p) + !cell(1) = min(max(cell(1), 0), m) + !cell(2) = min(max(cell(2), 0), n) + !if (p > 0) cell(3) = min(max(cell(3), 0), p) + call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), & rhol, gamma, pi_inf, qv, Re) dynP = 0._wp @@ -472,7 +495,7 @@ contains do i = 1, tot_data id = int(MPI_IO_DATA_lag_bubbles(i, 1)) inputvals(1:20) = MPI_IO_DATA_lag_bubbles(i, 2:21) - indomain = particle_in_domain(inputvals(1:3)) + indomain = particle_in_domain_physical(inputvals(1:3)) if (indomain .and. (id > 0)) then bub_id = bub_id + 1 nBubs = bub_id ! local number of bubbles @@ -492,7 +515,7 @@ contains gas_mg(bub_id) = inputvals(18) gas_betaT(bub_id) = inputvals(19) gas_betaC(bub_id) = inputvals(20) - cell = -buff_size + cell = fd_number - buff_size call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) end if end do @@ -555,6 +578,12 @@ contains end do end if + !call sleep(proc_rank) + !print*, proc_rank + !do k = 1, nBubs + !print*, k, lag_id(k, 1), mtn_pos(k,:,:) + !end do + ! Radial motion model adap_dt_stop_max = 0 !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) & @@ -593,7 +622,6 @@ contains adap_dt_stop = 0 if (adap_dt) then - call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & dmntait, dmBtait, dm_bub_adv_src, dm_divu, & k, myMass_v, myMass_n, myBeta_c, & @@ -627,13 +655,20 @@ contains mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), & intfc_rad(k,2), mtn_vel(k,l,2), & gas_mg(k), gas_mv(k, 2), & - Re(1), myRho, cell, l, q_prim_vf) + Re(1), myRho, cell, l, lag_id(k,1), q_prim_vf) else mtn_dposdt(k, l, stage) = 0._wp mtn_dveldt(k, l, stage) = 0._wp end if end do + !if (lag_id(k,1) == 2) then + !print*, "as", proc_rank, stage, k, mtn_dposdt(k,:,stage), mtn_dveldt(k,:,stage) + !end if + + !if (proc_rank == 0 .or. proc_rank == 2) print*, "a", proc_rank, mtn_dposdt(:,:,stage), mtn_dveldt(:,:,stage) + !call sleep(1) + adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) end do @@ -846,18 +881,19 @@ contains f_pinfl = 0._wp if (lag_params%vel_model > 0) then - cell = -buff_size + cell = fd_number - buff_size call s_locate_cell(mtn_pos(bub_id, 1:3, 2), cell, mtn_s(bub_id, 1:3, 2)) scoord = mtn_s(bub_id, 1:3, 2) else scoord = mtn_s(bub_id, 1:3, 2) cell(:) = int(scoord(:)) + !$acc loop seq + do i = 1, num_dims + if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 + end do end if - - !$acc loop seq - do i = 1, num_dims - if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 - end do + !print*, "h", proc_rank, cell, mtn_pos(bub_id, 1:3, 2), mtn_s(bub_id, 1:3, 2) + !call sleep(1) if ((lag_params%cluster_type == 1)) then !< Getting p_cell in terms of only the current cell by interpolation @@ -875,42 +911,45 @@ contains !< Obtain bilinear interpolation coefficients, based on the current location of the bubble. psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) - if (cell(1) == (m + buff_size)) then - cell(1) = cell(1) - 1 - psi(1) = 1._wp - else if (cell(1) == (-buff_size)) then - psi(1) = 0._wp - else - if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1 + !if (cell(1) == (m + buff_size - fd_number)) then + !cell(1) = cell(1) - 1 + !psi(1) = 1._wp + !else if (cell(1) == (fd_number - buff_size)) then + !psi(1) = 0._wp + !else + !if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1 psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))) - end if + !end if psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) - if (cell(2) == (n + buff_size)) then - cell(2) = cell(2) - 1 - psi(2) = 1._wp - else if (cell(2) == (-buff_size)) then - psi(2) = 0._wp - else - if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1 + !if (cell(2) == (n + buff_size - fd_number)) then + !cell(2) = cell(2) - 1 + !psi(2) = 1._wp + !else if (cell(2) == (fd_number - buff_size)) then + !psi(2) = 0._wp + !else + !if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1 psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))) - end if + !end if if (p > 0) then psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) - if (cell(3) == (p + buff_size)) then - cell(3) = cell(3) - 1 - psi(3) = 1._wp - else if (cell(3) == (-buff_size)) then - psi(3) = 0._wp - else - if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1 + !if (cell(3) == (p + buff_size - fd_number)) then + !cell(3) = cell(3) - 1 + !psi(3) = 1._wp + !else if (cell(3) == (fd_number - buff_size)) then + !psi(3) = 0._wp + !else + !if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1 psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))) - end if + !end if else psi(3) = 0._wp end if + !print*, "i", proc_rank, cell, mtn_pos(bub_id, 1:3, 2), mtn_s(bub_id, 1:3, 2) + !call sleep(1) + !< Perform bilinear interpolation if (p == 0) then !2D f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2)) @@ -959,26 +998,32 @@ contains !< check if the current cell is outside the computational domain or not (including ghost cells) celloutside = .false. if (num_dims == 2) then - if ((cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then + if ((cellaux(1) < fd_number - buff_size) .or. & + (cellaux(2) < fd_number - buff_size)) then celloutside = .true. end if - if (cyl_coord .and. y_cc(cellaux(2)) < 0._wp) then + if (cyl_coord .and. cellaux(2) < 0) then celloutside = .true. end if - if ((cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then + if ((cellaux(2) > n + buff_size - fd_number) .or. & + (cellaux(1) > m + buff_size - fd_number)) then celloutside = .true. end if else - if ((cellaux(3) < -buff_size) .or. (cellaux(1) < -buff_size) .or. (cellaux(2) < -buff_size)) then + if ((cellaux(3) < fd_number - buff_size) .or. & + (cellaux(1) < fd_number - buff_size) .or. & + (cellaux(2) < fd_number - buff_size)) then celloutside = .true. end if - if ((cellaux(3) > p + buff_size) .or. (cellaux(2) > n + buff_size) .or. (cellaux(1) > m + buff_size)) then + if ((cellaux(3) > p + buff_size - fd_number) .or. & + (cellaux(2) > n + buff_size - fd_number) .or. & + (cellaux(1) > m + buff_size - fd_number)) then celloutside = .true. end if end if if (.not. celloutside) then - if (cyl_coord .and. (p == 0) .and. (y_cc(cellaux(2)) < 0._wp)) then + if (cyl_coord .and. (p == 0) .and. (cellaux(2) < 0)) then celloutside = .true. end if end if @@ -1083,6 +1128,10 @@ contains mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + !if (lag_id(k,1) == 2) then + !print*, "g11", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,1), mtn_dveldt(k,:,1) + !print*, "g12", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,2), mtn_dveldt(k,:,2) + !end if end do if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) @@ -1098,6 +1147,10 @@ contains mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp + !if (lag_id(k,1) == 2) then + !print*, "g21", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,1), mtn_dveldt(k,:,1) + !print*, "g22", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,2), mtn_dveldt(k,:,2) + !end if end do if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) @@ -1178,19 +1231,22 @@ contains integer, intent(in) :: dest integer :: k, i, patch_id integer, dimension(3) :: cell + logical, dimension(1:nBubs) :: remove_bubble !$acc parallel loop gang vector default(present) private(cell) - do k = nBubs, 1, -1 + do k = 1, nBubs + remove_bubble(k) = .false. + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest) elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest) - elseif (mtn_pos(k, 1, dest) > x_cb(m + buff_size)) then - call s_remove_lag_bubble(k) - elseif (mtn_pos(k, 1, dest) < x_cb(buff_size - 1)) then - call s_remove_lag_bubble(k) + elseif (mtn_pos(k, 1, dest) >= x_cb(m + buff_size - fd_number)) then + remove_bubble(k) = .true. + elseif (mtn_pos(k, 1, dest) < x_cb(fd_number - buff_size - 1)) then + remove_bubble(k) = .true. end if if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & @@ -1199,10 +1255,10 @@ contains else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest) - elseif (mtn_pos(k, 2, dest) > y_cb(n + buff_size)) then - call s_remove_lag_bubble(k) - elseif (mtn_pos(k, 2, dest) < y_cb(buff_size - 1)) then - call s_remove_lag_bubble(k) + elseif (mtn_pos(k, 2, dest) >= y_cb(n + buff_size - fd_number)) then + remove_bubble(k) = .true. + elseif (mtn_pos(k, 2, dest) < y_cb(fd_number - buff_size - 1)) then + remove_bubble(k) = .true. end if if (p > 0) then @@ -1212,15 +1268,15 @@ contains else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest) - elseif (mtn_pos(k, 3, dest) > z_cb(p + buff_size)) then - call s_remove_lag_bubble(k) - elseif (mtn_pos(k, 3, dest) < z_cb(buff_size - 1)) then - call s_remove_lag_bubble(k) + elseif (mtn_pos(k, 3, dest) >= z_cb(p + buff_size - fd_number)) then + remove_bubble(k) = .true. + elseif (mtn_pos(k, 3, dest) < z_cb(fd_number - buff_size - 1)) then + remove_bubble(k) = .true. end if end if if (ib) then - cell = -buff_size + cell = fd_number - buff_size call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then @@ -1231,22 +1287,35 @@ contains levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) & * levelset%sf(cell(1), cell(2), cell(3), patch_id) end do - cell = -buff_size + cell = fd_number - buff_size call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) end if end if end do + !$acc loop seq + do k = nBubs, 1, -1 + if (remove_bubble(k)) call s_remove_lag_bubble(k) + end do + if (num_procs > 1) then call s_add_particles_to_transfer_list(mtn_pos(:, :, dest), mtn_posPrev(:, :, dest), nbubs) - call s_mpi_send_particles(intfc_rad, intfc_draddt, intfc_vel, & - intfc_draddt, mtn_posPrev, mtn_pos, & - mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, & - gas_dpdt, gas_mv, gas_dmvdt, gas_mg, lag_id, bub_R0, lag_num_ts) - call s_mpi_recv_particles(intfc_rad, intfc_draddt, intfc_vel, & - intfc_draddt, mtn_posPrev, mtn_pos, & - mtn_dposdt, mtn_vel, mtn_dveldt, gas_p, & - gas_dpdt, gas_mv, gas_dmvdt, gas_mg, lag_id, bub_R0, lag_num_ts, nbubs) + call s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & + mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts) + call s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & + mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) + !do k = 1, nBubs + !if (lag_id(k,1) == 2) then + !print*, "c1", proc_rank, lag_id(k, 1), mtn_dveldt(k,:,1), mtn_dposdt(k, :, 1) + !print*, "c2", proc_rank, lag_id(k, 1), mtn_dveldt(k,:,2), mtn_dposdt(k, :, 2) + !endif + !end do end if end subroutine s_enforce_EL_bubbles_boundary_conditions @@ -1267,7 +1336,7 @@ contains cell(1) = cell(1) - 1 end do - do while (pos(1) > x_cb(cell(1))) + do while (pos(1) >= x_cb(cell(1))) cell(1) = cell(1) + 1 end do @@ -1275,7 +1344,7 @@ contains cell(2) = cell(2) - 1 end do - do while (pos(2) > y_cb(cell(2))) + do while (pos(2) >= y_cb(cell(2))) cell(2) = cell(2) + 1 end do @@ -1283,7 +1352,7 @@ contains do while (pos(3) < z_cb(cell(3) - 1)) cell(3) = cell(3) - 1 end do - do while (pos(3) > z_cb(cell(3))) + do while (pos(3) >= z_cb(cell(3))) cell(3) = cell(3) + 1 end do end if @@ -1337,26 +1406,27 @@ contains if (p == 0 .and. cyl_coord .neqv. .true.) then ! Defining a virtual z-axis that has the same dimensions as y-axis ! defined in the input file - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. & - (pos_part(1) >= x_cb(-buff_size - 1)) .and. & - (pos_part(2) < y_cb(n + buff_size)) .and. & - (pos_part(2) >= y_cb(-buff_size - 1)) .and. & - (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) >= -lag_params%charwidth/2._wp)) + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. & + (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. & + (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. & + (pos_part(2) >= y_cb(fd_number - buff_size - 1)) .and. & + (pos_part(3) < lag_params%charwidth/2._wp) .and. (pos_part(3) > -lag_params%charwidth/2._wp)) else ! cyl_coord - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. & - (pos_part(1) >= x_cb(-buff_size - 1)) .and. & - (abs(pos_part(2)) < y_cb(n + buff_size)) .and. (abs(pos_part(2)) >= max(y_cb(-buff_size - 1), 0._wp))) + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. & + (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. & + (abs(pos_part(2)) < y_cb(n + buff_size - fd_number)) .and. & + (abs(pos_part(2)) >= max(y_cb(fd_number - buff_size - 1), 0._wp))) end if ! 3D if (p > 0) then - particle_in_domain = ((pos_part(1) < x_cb(m + buff_size)) .and. & - (pos_part(1) >= x_cb(-buff_size - 1)) .and. & - (pos_part(2) < y_cb(n + buff_size)) .and. & - (pos_part(2) >= y_cb(-buff_size - 1)) .and. & - (pos_part(3) < z_cb(p + buff_size)) .and. & - (pos_part(3) >= z_cb(-buff_size - 1))) + particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. & + (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. & + (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. & + (pos_part(2) >= y_cb(fd_number - buff_size - 1)) .and. & + (pos_part(3) < z_cb(p + buff_size - fd_number)) .and. & + (pos_part(3) >= z_cb(fd_number - buff_size - 1))) end if ! For symmetric and wall boundary condition @@ -1774,7 +1844,6 @@ contains !$acc loop seq do i = bub_id, nBubs - 1 - lag_id(i, 1) = lag_id(i + 1, 1) bub_R0(i) = bub_R0(i + 1) Rmax_stats(i) = Rmax_stats(i + 1) Rmin_stats(i) = Rmin_stats(i + 1) @@ -1782,6 +1851,7 @@ contains gas_betaT(i) = gas_betaT(i + 1) gas_betaC(i) = gas_betaC(i + 1) bub_dphidt(i) = bub_dphidt(i + 1) + lag_id(i, 1) = lag_id(i + 1, 1) gas_p(i, 1:2) = gas_p(i + 1, 1:2) gas_mv(i, 1:2) = gas_mv(i + 1, 1:2) intfc_rad(i, 1:2) = intfc_rad(i + 1, 1:2) @@ -1794,6 +1864,8 @@ contains intfc_dveldt(i, 1:lag_num_ts) = intfc_dveldt(i + 1, 1:lag_num_ts) gas_dpdt(i, 1:lag_num_ts) = gas_dpdt(i + 1, 1:lag_num_ts) gas_dmvdt(i, 1:lag_num_ts) = gas_dmvdt(i + 1, 1:lag_num_ts) + mtn_dposdt(i,1:3, 1:lag_num_ts) = mtn_dposdt(i + 1, 1:3, 1:lag_num_ts) + mtn_dveldt(i,1:3, 1:lag_num_ts) = mtn_dveldt(i + 1, 1:3, 1:lag_num_ts) end do nBubs = nBubs - 1 diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 1e15c772a4..9fb8002f7d 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -128,7 +128,7 @@ contains s_coord(1:3) = lbk_s(l, 1:3, 2) center(1:2) = lbk_pos(l, 1:2, 2) if (p > 0) center(3) = lbk_pos(l, 3, 2) - cell = -buff_size + cell = fd_number - buff_size call s_get_cell(s_coord, cell) !print*, s_coord call s_compute_stddsv(cell, volpart, stddsv) @@ -283,27 +283,27 @@ contains celloutside = .false. if (num_dims == 2) then - if ((cellaux(1) < -buff_size) .or. & - (cellaux(2) < -buff_size)) then + if ((cellaux(1) < fd_number - buff_size) .or. & + (cellaux(2) < fd_number - buff_size)) then celloutside = .true. end if if (cyl_coord .and. cellaux(2) < 0) then celloutside = .true. end if - if ((cellaux(2) > n + buff_size) .or. & - (cellaux(1) > m + buff_size)) then + if ((cellaux(2) > n + buff_size - fd_number) .or. & + (cellaux(1) > m + buff_size - fd_number)) then celloutside = .true. end if else - if ((cellaux(3) < -buff_size) .or. & - (cellaux(1) < -buff_size) .or. & - (cellaux(2) < -buff_size)) then + if ((cellaux(3) < fd_number - buff_size) .or. & + (cellaux(1) < fd_number - buff_size) .or. & + (cellaux(2) < fd_number - buff_size)) then celloutside = .true. end if - if ((cellaux(3) > p + buff_size) .or. & - (cellaux(2) > n + buff_size) .or. & - (cellaux(1) > m + buff_size)) then + if ((cellaux(3) > p + buff_size - fd_number) .or. & + (cellaux(2) > n + buff_size - fd_number) .or. & + (cellaux(1) > m + buff_size - fd_number)) then celloutside = .true. end if end if @@ -478,10 +478,10 @@ contains end function f_interpolate_velocity - function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,q_prim_vf) result(a) + function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,id,q_prim_vf) result(a) !$acc routine seq integer, dimension(3) :: cell - integer :: i + integer :: i, id type(scalar_field), dimension(sys_size) :: q_prim_vf real(wp) :: a, vol, mass, force, vel, area real(wp) :: pos, rad, dp, v_rel, mg, mv, Re, rho, c_d @@ -500,6 +500,15 @@ contains (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) end if + !if (i == 1 .and. id == 2) then + !print*, "c", proc_rank, dp, & + !q_prim_vf(E_idx)%sf(cell(1) - 1, cell(2), cell(3)), & + !q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)), & + !q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3)), & + !pos, x_cc(cell(1) + 1), x_cc(cell(1) - 1), cell + !call sleep(1) + !end if + vol = (4._wp/3._wp) * pi * rad**3._wp force = -1._wp * vol * dp @@ -517,6 +526,11 @@ contains a = force / (mg + mv) + !if (i == 1 .and. id == 2) then + !print*, "d", proc_rank, vol, force, v_rel, vel, v_rel - vel, a, mg, mv + !call sleep(1) + !end if + end function f_get_acceleration function f_get_clift_gauvin_drag_coeff(v_rel, mu_inv, rho, rad) result(c_d) @@ -528,7 +542,7 @@ contains Re_b = max(2._wp * rho * abs(v_rel) * rad * mu_inv, Re_b_min) C_d = 24._wp * (1 + 0.15_wp * Re_b ** (0.687_wp)) / Re_b - !C_d = C_d + 0.42_wp / (1 + (42500 / (Re_b ** 1.16_wp))) + C_d = C_d + 0.42_wp / (1 + (42500 / (Re_b ** 1.16_wp))) end function f_get_clift_gauvin_drag_coeff diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 5960b3a84f..7e2385608b 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -1185,10 +1185,14 @@ contains fd_number = max(1, fd_order/2) end if + if (bubbles_lagrange) then + fd_number = max(1, fd_order/2) + end if + call s_configure_coordinate_bounds(weno_polyn, buff_size, & idwint, idwbuff, viscous, & bubbles_lagrange, m, n, p, & - num_dims) + num_dims, fd_number) !$acc update device(idwint, idwbuff) ! Configuring Coordinate Direction Indexes diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 7f78ac2831..a8215bc8c3 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -89,23 +89,23 @@ contains subroutine s_initialize_particles_mpi(lag_num_ts) integer :: i, j, k - integer :: real_size, int_size, nVar, lag_num_ts + integer :: real_size, int_size, nReal, lag_num_ts call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr) call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr) - nVar = 28 + 10*lag_num_ts + int_size - p_buff_size = nVar*lag_params%nBubs_glb*real_size + nReal = 7 + 16*2 + 10*lag_num_ts + p_buff_size = lag_params%nBubs_glb * (nReal * real_size + int_size) @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size)) - comm_coords(1)%beg = x_cb(buff_size - 1) - comm_coords(1)%end = x_cb(m - buff_size) + comm_coords(1)%beg = x_cb(buff_size - fd_number - 1) + comm_coords(1)%end = x_cb(m - buff_size + fd_number) if (n > 0) then - comm_coords(2)%beg = y_cb(buff_size) - comm_coords(2)%end = y_cb(n - buff_size) + comm_coords(2)%beg = y_cb(buff_size - fd_number - 1) + comm_coords(2)%end = y_cb(n - buff_size + fd_number) if (p > 0) then - comm_coords(3)%beg = z_cb(buff_size - 1) - comm_coords(3)%end = z_cb(p - buff_size) + comm_coords(3)%beg = z_cb(buff_size - fd_number - 1) + comm_coords(3)%end = z_cb(p - buff_size + fd_number) end if end if @@ -138,7 +138,7 @@ contains & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end', 'fd_order', & & 'num_probes', 'num_integrals', 'bubble_model', 'thermal', & & 'R0_type', 'num_source', 'relax_model', 'num_ibs', 'n_start', & - & 'num_bc_patches'] + & 'num_bc_patches', 'fd_order', 'lag_params%vel_model'] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -469,8 +469,8 @@ contains do k = 1, nbub ! Left face if (posPrev(k,1) > comm_coords(1)%beg .and. pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then - p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k - p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1 + !p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k + !p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1 if (n > 0) then ! Left bottom corner if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then @@ -478,40 +478,18 @@ contains p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1 p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - if (p > 0) then - ! Left bottom back corner - if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then - p_send_ids(-1, -1, -1, p_send_counts(-1, -1, -1)) = k - p_send_counts(-1, -1, -1) = p_send_counts(-1, -1, -1) + 1 - ! Left bottom front corner - elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then - p_send_ids(-1, -1, 1, p_send_counts(-1, -1, 1)) = k - p_send_counts(-1, -1, 1) = p_send_counts(-1, -1, 1) + 1 - end if - end if ! Left top corner elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1 p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 - if (p > 0) then - ! Left top back corner - if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then - p_send_ids(-1, 1, 1, p_send_counts(-1, 1, 1)) = k - p_send_counts(-1, 1, 1) = p_send_counts(-1, 1, 1) + 1 - ! Left top front corner - elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then - p_send_ids(-1, 1, -1, p_send_counts(-1, 1, -1)) = k - p_send_counts(-1, 1, -1) = p_send_counts(-1, 1, -1) + 1 - end if - end if end if end if ! Right face elseif (posPrev(k,1) < comm_coords(1)%end .and. pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then - p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k - p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1 + !p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k + !p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1 if (n > 0) then ! Right bottom corner if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then @@ -519,124 +497,144 @@ contains p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1 p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - if (p > 0) then - ! Right bottom back corner - if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then - p_send_ids(1, -1, -1, p_send_counts(1, -1, -1)) = k - p_send_counts(1, -1, -1) = p_send_counts(1, -1, -1) + 1 - ! Right bottom front corner - elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then - p_send_ids(1, -1, 1, p_send_counts(1, -1, 1)) = k - p_send_counts(1, -1, 1) = p_send_counts(1, -1, 1) + 1 - end if - end if ! Right top corner elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1 p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 - if (p > 0) then - ! Right top back corner - if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then - p_send_ids(1, 1, -1, p_send_counts(1, 1, -1)) = k - p_send_counts(1, 1, -1) = p_send_counts(1, 1, -1) + 1 - ! Right top front corner - elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then - p_send_ids(1, 1, 1, p_send_counts(1, 1, 1)) = k - p_send_counts(1, 1, 1) = p_send_counts(1, 1, 1) + 1 - end if - end if end if end if ! Bottom face (corners already accounted for) elseif (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - if (p > 0) then - ! Bottom face back corner - if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then - p_send_ids(0, -1, -1, p_send_counts(0, -1, -1)) = k - p_send_counts(0, -1, -1) = p_send_counts(0, -1, -1) + 1 - ! Bottom face front corner - elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then - p_send_ids(0, -1, 1, p_send_counts(0, -1, 1)) = k - p_send_counts(0, -1, 1) = p_send_counts(0, -1, 1) + 1 - end if - endif ! Top face (corners already accounted for) elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 - if (p > 0) then - ! Top face back corner - if (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then - p_send_ids(0, 1, -1, p_send_counts(0, 1, -1)) = k - p_send_counts(0, 1, -1) = p_send_counts(0, 1, -1) + 1 - ! Top face front corner - elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then - p_send_ids(0, 1, 1, p_send_counts(0, 1, 1)) = k - p_send_counts(0, 1, 1) = p_send_counts(0, 1, 1) + 1 - end if - endif - ! Back face (everything else accounted for) - elseif (posPrev(k, 3) > comm_coords(3)%beg .and. pos(k,3) < comm_coords(3)%beg .and. nidx(3)%beg /= 0) then - p_send_ids(0, 0, -1, p_send_counts(0, 0, -1)) = k - p_send_counts(0, 0, -1) = p_send_counts(0, 0, -1) + 1 - ! Front face (everything else accounted for) - elseif (posPrev(k, 3) < comm_coords(3)%end .and. pos(k,3) > comm_coords(3)%end .and. nidx(3)%end /= 0) then - p_send_ids(0, 0, 1, p_send_counts(0, 0, 1)) = k - p_send_counts(0, 0, 1) = p_send_counts(0, 0, 1) + 1 end if end do end subroutine s_add_particles_to_transfer_list - subroutine s_mpi_send_particles(rad, drad, rvel, drvel, posPrev, pos, dpos, & - vel, dvel, gas_p, dgasp, gas_mv, dgasmv, gas_mg, lag_id, bub_R0, lag_num_ts) + subroutine s_add_particles_to_transfer_list_IC(pos, nbub) + + real(wp), dimension(:,:) :: pos + integer :: bubID, nbub + integer :: i, j, k + + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + p_send_counts(i,j,k) = 0 + end do + end do + end do + + do k = 1, nbub + ! Left face + if (pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then + !p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k + !p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1 + ! Left bottom corner + if (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then + p_send_ids(-1, -1, 0, p_send_counts(-1, -1, 0)) = k + p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1 + p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k + p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 + ! Left top corner + elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then + p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k + p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1 + p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k + p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + end if + ! Right face + elseif (pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then + !p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k + !p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1 + ! Right bottom corner + if (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then + p_send_ids(1, -1, 0, p_send_counts(1, -1, 0)) = k + p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1 + p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k + p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 + ! Right top corner + elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then + p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k + p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1 + p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k + p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + end if + ! Bottom face (corners already accounted for) + elseif (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then + p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k + p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 + ! Top face (corners already accounted for) + elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then + p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k + p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + end if + end do + + end subroutine s_add_particles_to_transfer_list_IC + + + subroutine s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, & + rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, & + dgasmv, dpos, dvel, lag_num_ts) - real(wp), dimension(:) :: bub_R0, gas_mg - real(wp), dimension(:, :) :: rad, drad, rvel, drvel, gas_p, dgasp, gas_mv, dgasmv - real(wp), dimension(:, :, :) :: pos, dpos, posPrev, vel, dvel + real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt integer, dimension(:,:) :: lag_id - integer :: i, j, k, l, q + real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv + real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel integer :: position, bub_id, lag_num_ts + integer :: i, j, k, l, q + + #ifdef MFC_MPI do k = nidx(3)%beg, nidx(3)%end do j = nidx(2)%beg, nidx(2)%end do i = nidx(1)%beg, nidx(1)%end if (abs(i) + abs(j) + abs(k) > 0) then call MPI_Isend(p_send_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, request, ierr) - if (p_send_counts(i,j,k) > 0) then - print*, "SEND", proc_rank, neighbor_ranks(i,j,k), p_send_counts(i,j,k) - end if + !if (p_send_counts(i,j,k) > 0) then + !print*, "SEND", proc_rank, neighbor_ranks(i,j,k), p_send_counts(i,j,k) + !end if if (p_send_counts(i,j,k) > 0) then position = 0 do l = 0, p_send_counts(i,j,k) - 1 bub_id = p_send_ids(i,j,k,l) + call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) do q = 1, 2 + call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) end do do q = 1, lag_num_ts call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) end do - call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) end do - call MPI_Isend(p_send_buff, position, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, request, ierr) end if end if @@ -647,15 +645,18 @@ contains end subroutine s_mpi_send_particles - subroutine s_mpi_recv_particles(rad, drad, rvel, drvel, posPrev, pos, dpos, & - vel, dvel, gas_p, dgasp, gas_mv, dgasmv, gas_mg, lag_id, bub_R0, lag_num_ts, nbubs) + subroutine s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, & + rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, & + dgasmv, dpos, dvel, lag_num_ts, nbubs) + + real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt + integer, dimension(:,:) :: lag_id + real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv + real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel + integer :: position, bub_id, lag_num_ts, nbubs - real(wp), dimension(:) :: bub_R0, gas_mg - real(wp), dimension(:, :) :: rad, drad, rvel, drvel, gas_p, dgasp, gas_mv, dgasmv - real(wp), dimension(:, :, :) :: pos, dpos, posPrev, vel, dvel - integer, dimension(:, :) :: lag_id integer :: i, j, k, l, q - integer :: position, bub_id, nbubs, lag_num_ts #ifdef MFC_MPI do k = nidx(3)%beg, nidx(3)%end @@ -663,36 +664,41 @@ contains do i = nidx(1)%beg, nidx(1)%end if (abs(i) + abs(j) + abs(k) > 0) then call MPI_recv(p_recv_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - if (p_recv_counts(i,j,k) > 0) then - print*, "RECV", neighbor_ranks(i,j,k), proc_rank, p_recv_counts(i,j,k) - end if + !if (p_recv_counts(i,j,k) > 0) then + !print*, "RECV", neighbor_ranks(i,j,k), proc_rank, p_recv_counts(i,j,k) + !end if if (p_recv_counts(i,j,k) > 0) then position = 0 call MPI_recv(p_recv_buff, p_buff_size, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - do l = 0, p_recv_counts(i,j,k) - 1 nbubs = nbubs + 1 bub_id = nbubs + call MPI_Unpack(p_recv_buff, p_buff_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) do q = 1, 2 + call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff, p_buff_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff, p_buff_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff, p_buff_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff, p_buff_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) end do do q = 1, lag_num_ts call MPI_Unpack(p_recv_buff, p_buff_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff, p_buff_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_buff_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) end do - call MPI_Unpack(p_recv_buff, p_buff_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) lag_id(bub_id, 2) = bub_id end do end if diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 0668d7f9a3..3fbae2d4d7 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1338,8 +1338,8 @@ contains call s_initialize_cbc_module() call s_initialize_derived_variables() - if (bubbles_lagrange) call s_initialize_bubbles_EL_module(q_cons_ts(1)%vf) + if (bubbles_lagrange) call s_initialize_bubbles_EL_module(q_cons_ts(1)%vf) if (hypoelasticity) call s_initialize_hypoelastic_module() if (hyperelasticity) call s_initialize_hyperelastic_module() diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index ad35b62ae1..c28857c8d4 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -100,6 +100,8 @@ def analytic(self): 'elliptic_smoothing_iters': ParamType.INT, 'viscous': ParamType.LOG, 'bubbles_lagrange': ParamType.LOG, + 'lag_params%vel_model': ParamType.INT, + 'fd_order': ParamType.INT, }) for ib_id in range(1, 10+1): From c06231042691eb864ad536a7e90abf87147b4f24 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Sat, 14 Jun 2025 12:58:06 -0400 Subject: [PATCH 35/62] fix build --- src/common/m_mpi_common.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 9e0280eb23..f513067d34 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -1077,7 +1077,7 @@ contains !! Remaining number of cells, in a particular coordinate direction, !! after the majority is divided up among the available processors - integer :: i, j !< Generic loop iterators + integer :: i, j, k !< Generic loop iterators integer, dimension(1:num_dims) :: neighbor_coords diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 9179863314..0c0752dd7a 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -854,7 +854,7 @@ contains !! @param f_pinfl Driving pressure !! @param cell Bubble cell !! @param Romega Control volume radius - pure subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega) + impure subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_get_pinf #else @@ -1321,7 +1321,7 @@ contains !! @param pos Input coordinates !! @param cell Computational coordinate of the cell !! @param scoord Calculated particle coordinates - pure subroutine s_locate_cell(pos, cell, scoord) + impure subroutine s_locate_cell(pos, cell, scoord) real(wp), dimension(3), intent(in) :: pos real(wp), dimension(3), intent(out) :: scoord From e1e01043198052ac4f7715b74f6dc96ed1b6231b Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Mon, 16 Jun 2025 18:39:26 -0400 Subject: [PATCH 36/62] remove comments, fix communication, and refactor transfer lists --- src/post_process/m_start_up.f90 | 2 +- src/simulation/m_bubbles_EL.fpp | 125 ++------- src/simulation/m_bubbles_EL_kernels.fpp | 14 - src/simulation/m_mpi_proxy.fpp | 358 ++++++++++-------------- 4 files changed, 181 insertions(+), 318 deletions(-) diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index 20db2682e6..db1697ebd2 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -680,7 +680,7 @@ impure subroutine s_initialize_modules ! Computation of parameters, allocation procedures, and/or any other tasks ! needed to properly setup the modules call s_initialize_global_parameters_module() - if (bubbles_euler .and. nb > 1) call s_simpson + if (bubbles_euler .and. nb > 1) call s_simpson(weight, R0) if (bubbles_euler .and. .not. polytropic) then call s_initialize_nonpoly() end if diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 0c0752dd7a..5313c27575 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -269,26 +269,13 @@ contains !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc) - call sleep(proc_rank) - print*, "Processor", proc_rank - do i = 1, nBubs - print*, i, lag_id(i,1), mtn_pos(i, 1:3, 1) - end do - if (num_procs > 1) then - call s_add_particles_to_transfer_list_IC(mtn_pos(:, :, 1), nbubs) - call s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & - intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & - mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & - gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts) - call s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, 1)) + call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) - !if (proc_rank == 0 .or. proc_rank == 2) print*, "b", proc_rank, mtn_pos - !call sleep(1) end if !Populate temporal variables @@ -351,25 +338,20 @@ contains cell = fd_number - buff_size call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) - !Check if the bubble is located in the ghost cell of a symmetric boundary - !if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. & - !(any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. & - !(any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. & - !(any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then - !call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") - !end if - - !if (p > 0) then - !if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. & - !(any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then - !call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") - !end if - !end if - - ! If particle is in the ghost cells, find the closest non-ghost cell - !cell(1) = min(max(cell(1), 0), m) - !cell(2) = min(max(cell(2), 0), n) - !if (p > 0) cell(3) = min(max(cell(3), 0), p) + ! Check if the bubble is located in the ghost cell of a symmetric boundary + if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. & + (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. & + (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. & + (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then + call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") + end if + + if (p > 0) then + if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. & + (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then + call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") + end if + end if call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), & rhol, gamma, pi_inf, qv, Re) @@ -576,12 +558,6 @@ contains end do end if - !call sleep(proc_rank) - !print*, proc_rank - !do k = 1, nBubs - !print*, k, lag_id(k, 1), mtn_pos(k,:,:) - !end do - ! Radial motion model adap_dt_stop_max = 0 !$acc parallel loop gang vector default(present) private(k, myalpha_rho, myalpha, Re, cell) & @@ -660,13 +636,6 @@ contains end if end do - !if (lag_id(k,1) == 2) then - !print*, "as", proc_rank, stage, k, mtn_dposdt(k,:,stage), mtn_dveldt(k,:,stage) - !end if - - !if (proc_rank == 0 .or. proc_rank == 2) print*, "a", proc_rank, mtn_dposdt(:,:,stage), mtn_dveldt(:,:,stage) - !call sleep(1) - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) end do @@ -889,8 +858,6 @@ contains if (scoord(i) < 0._wp) cell(i) = cell(i) - 1 end do end if - !print*, "h", proc_rank, cell, mtn_pos(bub_id, 1:3, 2), mtn_s(bub_id, 1:3, 2) - !call sleep(1) if ((lag_params%cluster_type == 1)) then !< Getting p_cell in terms of only the current cell by interpolation @@ -908,45 +875,18 @@ contains !< Obtain bilinear interpolation coefficients, based on the current location of the bubble. psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) - !if (cell(1) == (m + buff_size - fd_number)) then - !cell(1) = cell(1) - 1 - !psi(1) = 1._wp - !else if (cell(1) == (fd_number - buff_size)) then - !psi(1) = 0._wp - !else - !if (psi(1) < x_cc(cell(1))) cell(1) = cell(1) - 1 - psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))) - !end if + psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))) psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) - !if (cell(2) == (n + buff_size - fd_number)) then - !cell(2) = cell(2) - 1 - !psi(2) = 1._wp - !else if (cell(2) == (fd_number - buff_size)) then - !psi(2) = 0._wp - !else - !if (psi(2) < y_cc(cell(2))) cell(2) = cell(2) - 1 - psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))) - !end if + psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))) if (p > 0) then psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) - !if (cell(3) == (p + buff_size - fd_number)) then - !cell(3) = cell(3) - 1 - !psi(3) = 1._wp - !else if (cell(3) == (fd_number - buff_size)) then - !psi(3) = 0._wp - !else - !if (psi(3) < z_cc(cell(3))) cell(3) = cell(3) - 1 - psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))) - !end if + psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))) else psi(3) = 0._wp end if - !print*, "i", proc_rank, cell, mtn_pos(bub_id, 1:3, 2), mtn_s(bub_id, 1:3, 2) - !call sleep(1) - !< Perform bilinear interpolation if (p == 0) then !2D f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2)) @@ -1125,10 +1065,6 @@ contains mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - !if (lag_id(k,1) == 2) then - !print*, "g11", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,1), mtn_dveldt(k,:,1) - !print*, "g12", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,2), mtn_dveldt(k,:,2) - !end if end do if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) @@ -1144,10 +1080,6 @@ contains mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp - !if (lag_id(k,1) == 2) then - !print*, "g21", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,1), mtn_dveldt(k,:,1) - !print*, "g22", proc_rank, k, lag_id(k,1), mtn_dposdt(k,:,2), mtn_dveldt(k,:,2) - !end if end do if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) @@ -1296,23 +1228,12 @@ contains end do if (num_procs > 1) then - call s_add_particles_to_transfer_list(mtn_pos(:, :, dest), mtn_posPrev(:, :, dest), nbubs) - call s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & - intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & - mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & - gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts) - call s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, dest), mtn_posPrev(:, :, dest)) + call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) - !do k = 1, nBubs - !if (lag_id(k,1) == 2) then - !print*, "c1", proc_rank, lag_id(k, 1), mtn_dveldt(k,:,1), mtn_dposdt(k, :, 1) - !print*, "c2", proc_rank, lag_id(k, 1), mtn_dveldt(k,:,2), mtn_dposdt(k, :, 2) - !endif - !end do end if end subroutine s_enforce_EL_bubbles_boundary_conditions @@ -1330,18 +1251,22 @@ contains integer :: i do while (pos(1) < x_cb(cell(1) - 1)) + if (cell(1) == fd_number - buff_size) print*, "xb error", proc_rank, pos cell(1) = cell(1) - 1 end do do while (pos(1) >= x_cb(cell(1))) + if (cell(1) == m + buff_size - fd_number) print*, "xe error", proc_rank, pos cell(1) = cell(1) + 1 end do do while (pos(2) < y_cb(cell(2) - 1)) + if (cell(2) == fd_number - buff_size) print*, "yb error", proc_rank, pos cell(2) = cell(2) - 1 end do do while (pos(2) >= y_cb(cell(2))) + if (cell(2) == n + buff_size - fd_number) print*, "ye error", proc_rank, pos cell(2) = cell(2) + 1 end do diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index cb9279575d..98cbad4f66 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -500,15 +500,6 @@ contains (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) end if - !if (i == 1 .and. id == 2) then - !print*, "c", proc_rank, dp, & - !q_prim_vf(E_idx)%sf(cell(1) - 1, cell(2), cell(3)), & - !q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)), & - !q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3)), & - !pos, x_cc(cell(1) + 1), x_cc(cell(1) - 1), cell - !call sleep(1) - !end if - vol = (4._wp/3._wp) * pi * rad**3._wp force = -1._wp * vol * dp @@ -526,11 +517,6 @@ contains a = force / (mg + mv) - !if (i == 1 .and. id == 2) then - !print*, "d", proc_rank, vol, force, v_rel, vel, v_rel - vel, a, mg, mv - !call sleep(1) - !end if - end function f_get_acceleration function f_get_clift_gauvin_drag_coeff(v_rel, mu_inv, rho, rad) result(c_d) diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index aa345d4ab7..f79eb26995 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -32,6 +32,8 @@ module m_mpi_proxy implicit none + integer, parameter :: n_neighbor = 26 + integer, private, allocatable, dimension(:) :: ib_buff_send !< !! This variable is utilized to pack and send the buffer of the immersed !! boundary markers, for a single computational domain boundary at the @@ -54,7 +56,7 @@ module m_mpi_proxy integer, dimension(:,:,:,:), allocatable :: p_send_ids character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff type(bounds_info), dimension(3) :: comm_coords - integer :: p_buff_size + integer :: p_buff_size, p_var_size !$acc declare create(p_send_counts, comm_coords) contains @@ -94,7 +96,9 @@ contains call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr) nReal = 7 + 16*2 + 10*lag_num_ts - p_buff_size = lag_params%nBubs_glb * (nReal * real_size + int_size) + p_var_size = (nReal * real_size + int_size) + p_buff_size = lag_params%nBubs_glb * p_var_size + @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size)) comm_coords(1)%beg = x_cb(buff_size - fd_number - 1) @@ -422,9 +426,10 @@ contains end subroutine s_mpi_sendrecv_ib_buffers - subroutine s_add_particles_to_transfer_list(pos, posPrev, nbub) + subroutine s_add_particles_to_transfer_list(nBub, pos, posPrev) - real(wp), dimension(:,:) :: pos, posPrev + real(wp), dimension(:,:) :: pos + real(wp), dimension(:,:), optional :: posPrev integer :: bubID, nbub integer :: i, j, k @@ -437,185 +442,82 @@ contains end do do k = 1, nbub - ! Left face - if (posPrev(k,1) > comm_coords(1)%beg .and. pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then - !p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k - !p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1 + if (f_crosses_boundary(k, 1, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, -1, 0, 0) if (n > 0) then - ! Left bottom corner - if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then - p_send_ids(-1, -1, 0, p_send_counts(-1, -1, 0)) = k - p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1 - p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k - p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - ! Left top corner - elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then - p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k - p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1 - p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k - p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + if (f_crosses_boundary(k, 2, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, -1, -1, 0) + call s_add_particle_to_direction(k, 0, -1, 0) + elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, -1, 1, 0) + call s_add_particle_to_direction(k, 0, 1, 0) end if end if - ! Right face - elseif (posPrev(k,1) < comm_coords(1)%end .and. pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then - !p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k - !p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1 + elseif (f_crosses_boundary(k, 1, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, 1, 0, 0) if (n > 0) then - ! Right bottom corner - if (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then - p_send_ids(1, -1, 0, p_send_counts(1, -1, 0)) = k - p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1 - p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k - p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - ! Right top corner - elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then - p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k - p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1 - p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k - p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + if (f_crosses_boundary(k, 2, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, 1, -1, 0) + call s_add_particle_to_direction(k, 0, -1, 0) + elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, 1, 1, 0) + call s_add_particle_to_direction(k, 0, 1, 0) end if end if - ! Bottom face (corners already accounted for) - elseif (posPrev(k,2) > comm_coords(2)%beg .and. pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then - p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k - p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - ! Top face (corners already accounted for) - elseif (posPrev(k,2) < comm_coords(2)%end .and. pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then - p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k - p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + elseif (f_crosses_boundary(k, 2, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, 0, -1, 0) + elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, 0, 1, 0) end if end do - end subroutine s_add_particles_to_transfer_list - - subroutine s_add_particles_to_transfer_list_IC(pos, nbub) + contains - real(wp), dimension(:,:) :: pos - integer :: bubID, nbub - integer :: i, j, k + logical function f_crosses_boundary(particle_id, dir, loc, pos, posPrev) + integer, intent(in) :: particle_id, dir, loc + real(wp), dimension(:,:), intent(in) :: pos + real(wp), dimension(:,:), optional, intent(in) :: posPrev - do k = nidx(3)%beg, nidx(3)%end - do j = nidx(2)%beg, nidx(2)%end - do i = nidx(1)%beg, nidx(1)%end - p_send_counts(i,j,k) = 0 - end do - end do - end do + if (loc == -1) then ! Beginning of the domain + if (nidx(dir)%beg == 0) then + f_crosses_boundary = .false. + return + end if - do k = 1, nbub - ! Left face - if (pos(k,1) < comm_coords(1)%beg .and. nidx(1)%beg /= 0) then - !p_send_ids(-1, 0, 0, p_send_counts(-1, 0, 0)) = k - !p_send_counts(-1, 0, 0) = p_send_counts(-1, 0, 0) + 1 - ! Left bottom corner - if (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then - p_send_ids(-1, -1, 0, p_send_counts(-1, -1, 0)) = k - p_send_counts(-1, -1, 0) = p_send_counts(-1, -1, 0) + 1 - p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k - p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - ! Left top corner - elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then - p_send_ids(-1, 1, 0, p_send_counts(-1, 1, 0)) = k - p_send_counts(-1, 1, 0) = p_send_counts(-1, 1, 0) + 1 - p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k - p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + if (present(posPrev)) then + f_crosses_boundary = (posPrev(particle_id,dir) > comm_coords(dir)%beg .and. & + pos(particle_id,dir) < comm_coords(dir)%beg) + else + f_crosses_boundary = (pos(particle_id,dir) < comm_coords(dir)%beg) end if - ! Right face - elseif (pos(k,1) > comm_coords(1)%end .and. nidx(1)%end /= 0) then - !p_send_ids(1, 0, 0, p_send_counts(1, 0, 0)) = k - !p_send_counts(1, 0, 0) = p_send_counts(1, 0, 0) + 1 - ! Right bottom corner - if (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then - p_send_ids(1, -1, 0, p_send_counts(1, -1, 0)) = k - p_send_counts(1, -1, 0) = p_send_counts(1, -1, 0) + 1 - p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k - p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - ! Right top corner - elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then - p_send_ids(1, 1, 0, p_send_counts(1, 1, 0)) = k - p_send_counts(1, 1, 0) = p_send_counts(1, 1, 0) + 1 - p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k - p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 + elseif (loc == 1) then ! End of the domain + if (nidx(dir)%end == 0) then + f_crosses_boundary = .false. + return + end if + + if (present(posPrev)) then + f_crosses_boundary = (posPrev(particle_id,dir) < comm_coords(dir)%end .and. & + pos(particle_id,dir) > comm_coords(dir)%end) + else + f_crosses_boundary = (pos(particle_id,dir) > comm_coords(dir)%end) end if - ! Bottom face (corners already accounted for) - elseif (pos(k,2) < comm_coords(2)%beg .and. nidx(2)%beg /= 0) then - p_send_ids(0, -1, 0, p_send_counts(0, -1, 0)) = k - p_send_counts(0, -1, 0) = p_send_counts(0, -1, 0) + 1 - ! Top face (corners already accounted for) - elseif (pos(k,2) > comm_coords(2)%end .and. nidx(2)%end /= 0) then - p_send_ids(0, 1, 0, p_send_counts(0, 1, 0)) = k - p_send_counts(0, 1, 0) = p_send_counts(0, 1, 0) + 1 end if - end do - end subroutine s_add_particles_to_transfer_list_IC + end function f_crosses_boundary + subroutine s_add_particle_to_direction(particle_id, dir_x, dir_y, dir_z) - subroutine s_mpi_send_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, & - rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, & - dgasmv, dpos, dvel, lag_num_ts) + integer, intent(in) :: particle_id, dir_x, dir_y, dir_z - real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt - integer, dimension(:,:) :: lag_id - real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv - real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel - integer :: position, bub_id, lag_num_ts + p_send_ids(dir_x, dir_y, dir_z, p_send_counts(dir_x, dir_y, dir_z)) = particle_id + p_send_counts(dir_x, dir_y, dir_z) = p_send_counts(dir_x, dir_y, dir_z) + 1 - integer :: i, j, k, l, q + end subroutine s_add_particle_to_direction + end subroutine s_add_particles_to_transfer_list -#ifdef MFC_MPI - do k = nidx(3)%beg, nidx(3)%end - do j = nidx(2)%beg, nidx(2)%end - do i = nidx(1)%beg, nidx(1)%end - if (abs(i) + abs(j) + abs(k) > 0) then - call MPI_Isend(p_send_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, request, ierr) - !if (p_send_counts(i,j,k) > 0) then - !print*, "SEND", proc_rank, neighbor_ranks(i,j,k), p_send_counts(i,j,k) - !end if - if (p_send_counts(i,j,k) > 0) then - position = 0 - do l = 0, p_send_counts(i,j,k) - 1 - bub_id = p_send_ids(i,j,k,l) - call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - do q = 1, 2 - call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - end do - do q = 1, lag_num_ts - call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - end do - end do - call MPI_Isend(p_send_buff, position, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, request, ierr) - end if - end if - end do - end do - end do -#endif - - end subroutine s_mpi_send_particles - - subroutine s_mpi_recv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, & rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, & dgasmv, dpos, dvel, lag_num_ts, nbubs) @@ -624,61 +526,111 @@ contains integer, dimension(:,:) :: lag_id real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel - integer :: position, bub_id, lag_num_ts, nbubs + integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size integer :: i, j, k, l, q -#ifdef MFC_MPI do k = nidx(3)%beg, nidx(3)%end do j = nidx(2)%beg, nidx(2)%end do i = nidx(1)%beg, nidx(1)%end if (abs(i) + abs(j) + abs(k) > 0) then - call MPI_recv(p_recv_counts(i,j,k), 1, MPI_INTEGER, neighbor_ranks(i,j,k), 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - !if (p_recv_counts(i,j,k) > 0) then - !print*, "RECV", neighbor_ranks(i,j,k), proc_rank, p_recv_counts(i,j,k) - !end if - if (p_recv_counts(i,j,k) > 0) then - position = 0 - call MPI_recv(p_recv_buff, p_buff_size, MPI_PACKED, neighbor_ranks(i,j,k), 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - do l = 0, p_recv_counts(i,j,k) - 1 - nbubs = nbubs + 1 - bub_id = nbubs - call MPI_Unpack(p_recv_buff, p_buff_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - do q = 1, 2 - call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - end do - do q = 1, lag_num_ts - call MPI_Unpack(p_recv_buff, p_buff_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_buff_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - end do - lag_id(bub_id, 2) = bub_id + + partner = neighbor_ranks(i,j,k) + + send_tag = neighbor_tag(i, j, k) + recv_tag = neighbor_tag(-i, -j, -k) + + call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag, & + p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + p_recv_size = p_recv_counts(i,j,k) * p_var_size + + position = 0 + do l = 0, p_send_counts(i,j,k) - 1 + bub_id = p_send_ids(i,j,k,l) + call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + do q = 1, 2 + call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + do q = 1, lag_num_ts + call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + end do + + send_tag = send_tag + max(num_procs, n_neighbor) + recv_tag = recv_tag + max(num_procs, n_neighbor) + + call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, & + p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + position = 0 + do l = 0, p_recv_counts(i,j,k) - 1 + nbubs = nbubs + 1 + bub_id = nbubs + call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + do q = 1, 2 + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + end do + do q = 1, lag_num_ts + call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) end do - end if + lag_id(bub_id, 2) = bub_id + + end do end if end do end do end do -#endif - end subroutine s_mpi_recv_particles + end subroutine s_mpi_sendrecv_particles + + integer function neighbor_tag(i, j, k) result(tag) + + integer, intent(in) :: i, j, k + + tag = (k + 1)*9 + (j + 1)*3 + (i + 1) + + end function neighbor_tag impure subroutine s_mpi_send_random_number(phi_rn, num_freq) integer, intent(in) :: num_freq From 7df40fd0d0ca1dcfde3a675e861f0079b4b5ab01 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Sun, 22 Jun 2025 16:30:30 -0400 Subject: [PATCH 37/62] comments and GPU MPI --- src/simulation/m_bubbles_EL.fpp | 37 ++-- src/simulation/m_bubbles_EL_kernels.fpp | 63 +++--- src/simulation/m_mpi_proxy.fpp | 249 +++++++++++++++--------- 3 files changed, 213 insertions(+), 136 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 5313c27575..31d83c12ea 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -617,24 +617,24 @@ contains gas_dmvdt(k, stage) = myMvdot gas_dpdt(k, stage) = myPbdot - end if + do l = 1, num_dims + if (lag_params%vel_model == 1) then + mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), & + cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = 0._wp + elseif (lag_params%vel_model == 2) then + mtn_dposdt(k, l, stage) = mtn_vel(k,l,2) + mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), & + intfc_rad(k,2), mtn_vel(k,l,2), & + gas_mg(k), gas_mv(k, 2), & + Re(1), myRho, cell, l, q_prim_vf) + else + mtn_dposdt(k, l, stage) = 0._wp + mtn_dveldt(k, l, stage) = 0._wp + end if + end do - do l = 1, num_dims - if (lag_params%vel_model == 1) then - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), & - cell, l, q_prim_vf) - mtn_dveldt(k, l, stage) = 0._wp - elseif (lag_params%vel_model == 2) then - mtn_dposdt(k, l, stage) = mtn_vel(k,l,2) - mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), & - intfc_rad(k,2), mtn_vel(k,l,2), & - gas_mg(k), gas_mv(k, 2), & - Re(1), myRho, cell, l, lag_id(k,1), q_prim_vf) - else - mtn_dposdt(k, l, stage) = 0._wp - mtn_dveldt(k, l, stage) = 0._wp - end if - end do + end if adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) @@ -1155,7 +1155,8 @@ contains end subroutine s_update_lagrange_tdv_rk !> This subroutine enforces reflective and wall boundary conditions for EL bubbles - subroutine s_enforce_EL_bubbles_boundary_conditions(dest) + !! @param dest Destination for the bubble position update + impure subroutine s_enforce_EL_bubbles_boundary_conditions(dest) integer, intent(in) :: dest integer :: k, i, patch_id diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 98cbad4f66..6f076f36d9 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -437,14 +437,23 @@ contains end subroutine s_get_cell - function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v) + !! This function interpolates the velocity of Eulerian field at the position + !! of the bubble. + !! @param pos Position of the bubble in directiion i + !! @param cell Computational coordinates of the bubble + !! @param i Direction of the velocity (1: x, 2: y, 3: z) + !! @param q_prim_vf Eulerian field with primitive variables + !! @return v Interpolated velocity at the position of the bubble + pure function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v) !$acc routine seq - integer, dimension(3) :: cell - integer :: i - type(scalar_field), dimension(sys_size) :: q_prim_vf + real(wp), intent(in) :: pos + integer, dimension(3), intent(in) :: cell + integer, intent(in) :: i + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + real(wp) :: v real(wp) :: L1, L2, L3 - real(wp) :: pos, x1, x2, x3 + real(wp) :: x1, x2, x3 real(wp) :: y1, y2, y3 if (i == 1) then @@ -478,13 +487,28 @@ contains end function f_interpolate_velocity - function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,id,q_prim_vf) result(a) + !! This function calculates the acceleration of the bubble + !! based on the pressure gradient, velocity, and drag model. + !! @param pos Position of the bubble in direction i + !! @param rad Radius of the bubble + !! @param vel Velocity of the bubble + !! @param mg Mass of the gas in the bubble + !! @param mv Mass of the liquid in the bubble + !! @param Re Reynolds number + !! @param rho Density of the fluid + !! @param cell Computational coordinates of the bubble + !! @param i Direction of the velocity (1: x, 2: y, 3: z) + !! @param q_prim_vf Eulerian field with primitive variables + !! @return a Acceleration of the bubble in direction i + pure function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,q_prim_vf) result(a) !$acc routine seq - integer, dimension(3) :: cell - integer :: i, id - type(scalar_field), dimension(sys_size) :: q_prim_vf - real(wp) :: a, vol, mass, force, vel, area - real(wp) :: pos, rad, dp, v_rel, mg, mv, Re, rho, c_d + real(wp), intent(in) :: pos, rad, vel, mg, mv, Re, rho + integer, dimension(3), intent(in) :: cell + integer, intent(in) :: i + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + + real(wp) :: a + real(wp) :: dp, vol, force, v_rel if (i == 1) then dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - & @@ -509,27 +533,10 @@ contains force = force - (4._wp * pi * rad * v_rel) / Re else if (lag_params%drag_model == 2) then ! No slip Stokes drag force = force - (6._wp * pi * rad * v_rel) / Re - elseif (lag_params%drag_model == 3) then ! Clift and Gauvin (1971) Cd fit - area = pi * rad**2._wp - c_d = f_get_clift_gauvin_drag_coeff(v_rel, Re, rho, rad) - force = force - 0.5_wp * sign(1._wp, v_rel) * rho * (v_rel ** 2._wp) * c_d * area end if a = force / (mg + mv) end function f_get_acceleration - function f_get_clift_gauvin_drag_coeff(v_rel, mu_inv, rho, rad) result(c_d) -!$acc routine seq - real(wp) :: v_rel, mu_inv, rho, rad - real(wp) :: c_d - real(wp) :: Re_b ! Bubble Reynolds number - - Re_b = max(2._wp * rho * abs(v_rel) * rad * mu_inv, Re_b_min) - - C_d = 24._wp * (1 + 0.15_wp * Re_b ** (0.687_wp)) / Re_b - C_d = C_d + 0.42_wp / (1 + (42500 / (Re_b ** 1.16_wp))) - - end function f_get_clift_gauvin_drag_coeff - end module m_bubbles_EL_kernels diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index f79eb26995..1cb8273c8a 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -87,6 +87,9 @@ contains end subroutine s_initialize_mpi_proxy_module + !! This subroutine initializes the MPI buffers and variables + !! required for the particle communication. + !! @param lag_num_ts Number of stages in time-stepping scheme subroutine s_initialize_particles_mpi(lag_num_ts) integer :: i, j, k @@ -426,7 +429,13 @@ contains end subroutine s_mpi_sendrecv_ib_buffers - subroutine s_add_particles_to_transfer_list(nBub, pos, posPrev) + !> This subroutine adds particles to the transfer list for the MPI + !! communication. + !! @param nBub Current LOCAL number of bubbles + !! @param pos Current position of each bubble + !! @param posPrev Previous position of each bubble (optional, not used + !! for communication of initial condition) + impure subroutine s_add_particles_to_transfer_list(nBub, pos, posPrev) real(wp), dimension(:,:) :: pos real(wp), dimension(:,:), optional :: posPrev @@ -517,7 +526,33 @@ contains end subroutine s_add_particles_to_transfer_list - subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + !> This subroutine performs the MPI communication for lagrangian particles/ + !! bubbles. + !! @param bub_R0 Initial radius of each bubble + !! @param Rmax_stats Maximum radius of each bubble + !! @param Rmin_stats Minimum radius of each bubble + !! @param gas_mg Mass of gas in each bubble + !! @param gas_betaT Heat flux model coefficient for each bubble + !! @param gas_betaC mass flux model coefficient for each bubble + !! @param bub_dphidt Subgrid velocity potential for each bubble + !! @param lag_id Global and local ID of each bubble + !! @param gas_p Pressure of the gas in each bubble + !! @param gas_mv Mass of vapor in each bubble + !! @param rad Radius of each bubble + !! @param rvel Radial velocity of each bubble + !! @param pos Position of each bubble + !! @param posPrev Previous position of each bubble + !! @param vel Velocity of each bubble + !! @param scoord Cell index in real format of each bubble + !! @param drad Radial velocity of each bubble + !! @param drvel Radial acceleration of each bubble + !! @param dgasp Time derivative of gas pressure in each bubble + !! @param dgasmv Time derivative of vapor mass in each bubble + !! @param dpos Time derivative of position of each bubble + !! @param dvel Time derivative of velocity of each bubble + !! @param lag_num_ts Number of stages in time-stepping scheme + !! @param nBubs Local number of bubbles + impure subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, & rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, & dgasmv, dpos, dvel, lag_num_ts, nbubs) @@ -530,100 +565,134 @@ contains integer :: i, j, k, l, q - do k = nidx(3)%beg, nidx(3)%end - do j = nidx(2)%beg, nidx(2)%end - do i = nidx(1)%beg, nidx(1)%end - if (abs(i) + abs(j) + abs(k) > 0) then - - partner = neighbor_ranks(i,j,k) - - send_tag = neighbor_tag(i, j, k) - recv_tag = neighbor_tag(-i, -j, -k) - - call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag, & - p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - p_recv_size = p_recv_counts(i,j,k) * p_var_size - - position = 0 - do l = 0, p_send_counts(i,j,k) - 1 - bub_id = p_send_ids(i,j,k,l) - call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - do q = 1, 2 - call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - end do - do q = 1, lag_num_ts - call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - end do - end do - - send_tag = send_tag + max(num_procs, n_neighbor) - recv_tag = recv_tag + max(num_procs, n_neighbor) - - call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, & - p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - position = 0 - do l = 0, p_recv_counts(i,j,k) - 1 - nbubs = nbubs + 1 - bub_id = nbubs - call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - do q = 1, 2 - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - end do - do q = 1, lag_num_ts - call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - end do - lag_id(bub_id, 2) = bub_id + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + !$acc host_data use_device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, & + !$acc drad, drvel, dgasp, dgasmv, dpos, dvel) + call nvtxStartRange("LAG-COMM-SENDRECV-RDMA") + #:else + call nvtxStartRange("LAG-COMM-DEV2HOST") + !$acc update host(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, & + !$acc drad, drvel, dgasp, dgasmv, dpos, dvel) + call nvtxEndRange + call nvtxStartRange("LAG-COMM-SENDRECV-NO-RMDA") + #:endif + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) > 0) then + + partner = neighbor_ranks(i,j,k) + + send_tag = neighbor_tag(i, j, k) + recv_tag = neighbor_tag(-i, -j, -k) + + call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag, & + p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + p_recv_size = p_recv_counts(i,j,k) * p_var_size + + position = 0 + do l = 0, p_send_counts(i,j,k) - 1 + bub_id = p_send_ids(i,j,k,l) + call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + do q = 1, 2 + call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + do q = 1, lag_num_ts + call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + end do + + send_tag = send_tag + max(num_procs, n_neighbor) + recv_tag = recv_tag + max(num_procs, n_neighbor) + + call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, & + p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + position = 0 + do l = 0, p_recv_counts(i,j,k) - 1 + nbubs = nbubs + 1 + bub_id = nbubs + call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + do q = 1, 2 + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + end do + do q = 1, lag_num_ts + call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + end do + lag_id(bub_id, 2) = bub_id + + end do + end if end do - end if + end do end do - end do - end do + + #:if rdma_mpi + !$acc end host_data + !$acc wait + #:else + call nvtxStartRange("RHS-COMM-HOST2DEV") + !$acc update host(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, & + !$acc drad, drvel, dgasp, dgasmv, dpos, dvel) + call nvtxEndRange + #:endif + end if + #:endfor end subroutine s_mpi_sendrecv_particles + + !! This function returns a unique tag for each neighbor based on its position + !! relative to the current process. + !! @param i, j, k Indices of the neighbor in the range [-1, 1] + !! @return tag Unique integer tag for the neighbor integer function neighbor_tag(i, j, k) result(tag) integer, intent(in) :: i, j, k From e0d73987306bc9e086c1494eac485e7a7036ef15 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Tue, 24 Jun 2025 14:30:28 -0400 Subject: [PATCH 38/62] 3D Transfer list --- src/simulation/m_mpi_proxy.fpp | 96 ++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 1cb8273c8a..788f5297d9 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -457,9 +457,45 @@ contains if (f_crosses_boundary(k, 2, -1, pos, posPrev)) then call s_add_particle_to_direction(k, -1, -1, 0) call s_add_particle_to_direction(k, 0, -1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, -1, -1, -1) + call s_add_particle_to_direction(k, 0, -1, -1) + call s_add_particle_to_direction(k, -1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, -1, -1, 1) + call s_add_particle_to_direction(k, 0, -1, 1) + call s_add_particle_to_direction(k, -1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then call s_add_particle_to_direction(k, -1, 1, 0) call s_add_particle_to_direction(k, 0, 1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, -1, 1, -1) + call s_add_particle_to_direction(k, 0, 1, -1) + call s_add_particle_to_direction(k, -1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, -1, 1, 1) + call s_add_particle_to_direction(k, 0, 1, 1) + call s_add_particle_to_direction(k, -1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + else + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, -1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, -1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if end if end if elseif (f_crosses_boundary(k, 1, 1, pos, posPrev)) then @@ -468,15 +504,75 @@ contains if (f_crosses_boundary(k, 2, -1, pos, posPrev)) then call s_add_particle_to_direction(k, 1, -1, 0) call s_add_particle_to_direction(k, 0, -1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, 1, -1, -1) + call s_add_particle_to_direction(k, 0, -1, -1) + call s_add_particle_to_direction(k, 1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, 1, -1, 1) + call s_add_particle_to_direction(k, 0, -1, 1) + call s_add_particle_to_direction(k, 1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then call s_add_particle_to_direction(k, 1, 1, 0) call s_add_particle_to_direction(k, 0, 1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, 1, 1, -1) + call s_add_particle_to_direction(k, 0, 1, -1) + call s_add_particle_to_direction(k, 1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, 1, 1, 1) + call s_add_particle_to_direction(k, 0, 1, 1) + call s_add_particle_to_direction(k, 1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + else + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, 1, 0, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, 1, 0, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if end if end if elseif (f_crosses_boundary(k, 2, -1, pos, posPrev)) then call s_add_particle_to_direction(k, 0, -1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, 0, -1, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, 0, -1, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if elseif (f_crosses_boundary(k, 2, 1, pos, posPrev)) then call s_add_particle_to_direction(k, 0, 1, 0) + if (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, 0, 1, -1) + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, 0, 1, 1) + call s_add_particle_to_direction(k, 0, 0, 1) + end if + end if + elseif (p > 0) then + if (f_crosses_boundary(k, 3, -1, pos, posPrev)) then + call s_add_particle_to_direction(k, 0, 0, -1) + elseif (f_crosses_boundary(k, 3, 1, pos, posPrev)) then + call s_add_particle_to_direction(k, 0, 0, 1) + end if end if end do From fc94cfaea6e7e55b5355f6e98f6061bc6b4d5247 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Tue, 24 Jun 2025 16:41:18 -0400 Subject: [PATCH 39/62] GPU particle MPI --- src/simulation/m_bubbles_EL.fpp | 108 +++++++++++++---- src/simulation/m_mpi_proxy.fpp | 206 ++++++++++++++------------------ 2 files changed, 172 insertions(+), 142 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 31d83c12ea..76e06266cc 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -73,6 +73,11 @@ module m_bubbles_EL !$acc declare create(nBubs, Rmax_glb, Rmin_glb, q_beta, q_beta_idx) + + integer, allocatable, dimension(:) :: keep_bubble, prefix_sum + integer :: active_bubs + !$acc declare create(keep_bubble, prefix_sum, active_bubs) + contains !> Initializes the lagrangian subgrid bubble solver @@ -139,6 +144,8 @@ contains @:ALLOCATE(mtn_dposdt(1:nBubs_glb, 1:3, 1:lag_num_ts)) @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts)) + @:ALLOCATE(keep_bubble(1:nBubs_glb), prefix_sum(1:nBubs_glb)) + if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol if (num_procs > 1) call s_initialize_particles_mpi(lag_num_ts) @@ -257,6 +264,15 @@ contains if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat') end if + if (num_procs > 1) then + call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, 1)) + call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & + mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) + end if + !$acc update device(bubbles_lagrange, lag_params) !$acc update device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, & @@ -269,15 +285,6 @@ contains !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc) - if (num_procs > 1) then - call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, 1)) - call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & - intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & - mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & - gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) - end if - !Populate temporal variables call s_transfer_data_to_tmp() call s_smear_voidfraction() @@ -1159,13 +1166,12 @@ contains impure subroutine s_enforce_EL_bubbles_boundary_conditions(dest) integer, intent(in) :: dest - integer :: k, i, patch_id + integer :: k, i, patch_id, offset integer, dimension(3) :: cell - logical, dimension(1:nBubs) :: remove_bubble !$acc parallel loop gang vector default(present) private(cell) do k = 1, nBubs - remove_bubble(k) = .false. + keep_bubble(k) = 1 if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then @@ -1174,9 +1180,9 @@ contains .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest) elseif (mtn_pos(k, 1, dest) >= x_cb(m + buff_size - fd_number)) then - remove_bubble(k) = .true. + keep_bubble(k) = 0 elseif (mtn_pos(k, 1, dest) < x_cb(fd_number - buff_size - 1)) then - remove_bubble(k) = .true. + keep_bubble(k) = 0 end if if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & @@ -1186,9 +1192,9 @@ contains .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest) elseif (mtn_pos(k, 2, dest) >= y_cb(n + buff_size - fd_number)) then - remove_bubble(k) = .true. + keep_bubble(k) = 0 elseif (mtn_pos(k, 2, dest) < y_cb(fd_number - buff_size - 1)) then - remove_bubble(k) = .true. + keep_bubble(k) = 0 end if if (p > 0) then @@ -1199,9 +1205,9 @@ contains .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest) elseif (mtn_pos(k, 3, dest) >= z_cb(p + buff_size - fd_number)) then - remove_bubble(k) = .true. + keep_bubble(k) = 0 elseif (mtn_pos(k, 3, dest) < z_cb(fd_number - buff_size - 1)) then - remove_bubble(k) = .true. + keep_bubble(k) = 0 end if end if @@ -1223,20 +1229,76 @@ contains end if end do - !$acc loop seq - do k = nBubs, 1, -1 - if (remove_bubble(k)) call s_remove_lag_bubble(k) + call nvtxStartRange("LAG-BC-DEV2HOST") + !$acc update host(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + !$acc gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + !$acc intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & + !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + !$acc gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, & + !$acc keep_bubble, nBubs) + call nvtxEndRange + + do k = 1, nBubs + if (k == 1) then + prefix_sum(k) = keep_bubble(k) + else + prefix_sum(k) = prefix_sum(k - 1) + keep_bubble(k) + end if + if (k == nBubs) active_bubs = prefix_sum(k) + end do + + do k = 1, nBubs + if (keep_bubble(k) == 1) then + if (prefix_sum(k) /= k) then + bub_R0(prefix_sum(k)) = bub_R0(k) + Rmax_stats(prefix_sum(k)) = Rmax_stats(k) + Rmin_stats(prefix_sum(k)) = Rmin_stats(k) + gas_mg(prefix_sum(k)) = gas_mg(k) + gas_betaT(prefix_sum(k)) = gas_betaT(k) + gas_betaC(prefix_sum(k)) = gas_betaC(k) + bub_dphidt(prefix_sum(k)) = bub_dphidt(k) + lag_id(prefix_sum(k), 1) = lag_id(k, 1) + gas_p(prefix_sum(k), 1:2) = gas_p(k, 1:2) + gas_mv(prefix_sum(k), 1:2) = gas_mv(k, 1:2) + intfc_rad(prefix_sum(k), 1:2) = intfc_rad(k, 1:2) + intfc_vel(prefix_sum(k), 1:2) = intfc_vel(k, 1:2) + mtn_pos(prefix_sum(k), 1:3, 1:2) = mtn_pos(k, 1:3, 1:2) + mtn_posPrev(prefix_sum(k), 1:3, 1:2) = mtn_posPrev(k, 1:3, 1:2) + mtn_vel(prefix_sum(k), 1:3, 1:2) = mtn_vel(k, 1:3, 1:2) + mtn_s(prefix_sum(k), 1:3, 1:2) = mtn_s(k, 1:3, 1:2) + intfc_draddt(prefix_sum(k), 1:lag_num_ts) = intfc_draddt(k, 1:lag_num_ts) + intfc_dveldt(prefix_sum(k), 1:lag_num_ts) = intfc_dveldt(k, 1:lag_num_ts) + gas_dpdt(prefix_sum(k), 1:lag_num_ts) = gas_dpdt(k, 1:lag_num_ts) + gas_dmvdt(prefix_sum(k), 1:lag_num_ts) = gas_dmvdt(k, 1:lag_num_ts) + mtn_dposdt(prefix_sum(k), 1:3, 1:lag_num_ts) = mtn_dposdt(k, 1:3, 1:lag_num_ts) + mtn_dveldt(prefix_sum(k), 1:3, 1:lag_num_ts) = mtn_dveldt(k, 1:3, 1:lag_num_ts) + end if + end if + if (k == nBubs) nBubs = active_bubs end do if (num_procs > 1) then + call nvtxStartRange("LAG-BC-TRANSFER-LIST") call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, dest), mtn_posPrev(:, :, dest)) + call nvtxEndRange + + call nvtxStartRange("LAG-BC-SENDRECV") call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) + call nvtxEndRange end if + call nvtxStartRange("LAG-BC-HOST2DEV") + !$acc update device(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + !$acc gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + !$acc intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & + !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + !$acc gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) + call nvtxEndRange + end subroutine s_enforce_EL_bubbles_boundary_conditions !> This subroutine returns the computational coordinate of the cell for the given position. @@ -1252,22 +1314,18 @@ contains integer :: i do while (pos(1) < x_cb(cell(1) - 1)) - if (cell(1) == fd_number - buff_size) print*, "xb error", proc_rank, pos cell(1) = cell(1) - 1 end do do while (pos(1) >= x_cb(cell(1))) - if (cell(1) == m + buff_size - fd_number) print*, "xe error", proc_rank, pos cell(1) = cell(1) + 1 end do do while (pos(2) < y_cb(cell(2) - 1)) - if (cell(2) == fd_number - buff_size) print*, "yb error", proc_rank, pos cell(2) = cell(2) - 1 end do do while (pos(2) >= y_cb(cell(2))) - if (cell(2) == n + buff_size - fd_number) print*, "ye error", proc_rank, pos cell(2) = cell(2) + 1 end do diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 788f5297d9..6428eab9f1 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -579,6 +579,7 @@ contains contains logical function f_crosses_boundary(particle_id, dir, loc, pos, posPrev) + integer, intent(in) :: particle_id, dir, loc real(wp), dimension(:,:), intent(in) :: pos real(wp), dimension(:,:), optional, intent(in) :: posPrev @@ -661,126 +662,97 @@ contains integer :: i, j, k, l, q - #:for rdma_mpi in [False, True] - if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then - #:if rdma_mpi - !$acc host_data use_device(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, & - !$acc drad, drvel, dgasp, dgasmv, dpos, dvel) - call nvtxStartRange("LAG-COMM-SENDRECV-RDMA") - #:else - call nvtxStartRange("LAG-COMM-DEV2HOST") - !$acc update host(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, & - !$acc drad, drvel, dgasp, dgasmv, dpos, dvel) - call nvtxEndRange - call nvtxStartRange("LAG-COMM-SENDRECV-NO-RMDA") - #:endif + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) > 0) then + + partner = neighbor_ranks(i,j,k) + + send_tag = neighbor_tag(i, j, k) + recv_tag = neighbor_tag(-i, -j, -k) + + call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag, & + p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + p_recv_size = p_recv_counts(i,j,k) * p_var_size + + position = 0 + do l = 0, p_send_counts(i,j,k) - 1 + bub_id = p_send_ids(i,j,k,l) + call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + do q = 1, 2 + call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + do q = 1, lag_num_ts + call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + end do + + send_tag = send_tag + max(num_procs, n_neighbor) + recv_tag = recv_tag + max(num_procs, n_neighbor) + + call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, & + p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + position = 0 + do l = 0, p_recv_counts(i,j,k) - 1 + nbubs = nbubs + 1 + bub_id = nbubs + call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + do q = 1, 2 + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + end do + do q = 1, lag_num_ts + call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) + end do + lag_id(bub_id, 2) = bub_id - do k = nidx(3)%beg, nidx(3)%end - do j = nidx(2)%beg, nidx(2)%end - do i = nidx(1)%beg, nidx(1)%end - if (abs(i) + abs(j) + abs(k) > 0) then - - partner = neighbor_ranks(i,j,k) - - send_tag = neighbor_tag(i, j, k) - recv_tag = neighbor_tag(-i, -j, -k) - - call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag, & - p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - p_recv_size = p_recv_counts(i,j,k) * p_var_size - - position = 0 - do l = 0, p_send_counts(i,j,k) - 1 - bub_id = p_send_ids(i,j,k,l) - call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - do q = 1, 2 - call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - end do - do q = 1, lag_num_ts - call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - end do - end do - - send_tag = send_tag + max(num_procs, n_neighbor) - recv_tag = recv_tag + max(num_procs, n_neighbor) - - call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, & - p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - position = 0 - do l = 0, p_recv_counts(i,j,k) - 1 - nbubs = nbubs + 1 - bub_id = nbubs - call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - do q = 1, 2 - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - end do - do q = 1, lag_num_ts - call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - end do - lag_id(bub_id, 2) = bub_id - - end do - end if end do - end do + end if end do - - #:if rdma_mpi - !$acc end host_data - !$acc wait - #:else - call nvtxStartRange("RHS-COMM-HOST2DEV") - !$acc update host(lag_id, bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - !$acc gas_betaC, bub_dphidt, gas_p, gas_mv, rad, rvel, pos, posPrev, vel, scoord, & - !$acc drad, drvel, dgasp, dgasmv, dpos, dvel) - call nvtxEndRange - #:endif - end if - #:endfor + end do + end do end subroutine s_mpi_sendrecv_particles From 2e89526218b9ff092066393c0971decb95602c83 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Wed, 25 Jun 2025 16:59:24 -0400 Subject: [PATCH 40/62] improve EL I/O and silo output --- src/common/m_constants.fpp | 1 + src/post_process/m_data_output.fpp | 460 ++++++++++++++++++++--- src/post_process/m_global_parameters.fpp | 36 ++ src/post_process/m_mpi_proxy.fpp | 7 +- src/post_process/m_start_up.f90 | 9 +- src/simulation/m_bubbles_EL.fpp | 326 ++++++++++------ src/simulation/m_start_up.fpp | 1 - toolchain/mfc/run/case_dicts.py | 18 + 8 files changed, 670 insertions(+), 188 deletions(-) diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index 0c17e9789f..8e6a9a1c34 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -50,6 +50,7 @@ module m_constants ! Lagrange bubbles constants integer, parameter :: mapCells = 3 !< Number of cells around the bubble where the smoothening function will have effect real(wp), parameter :: R_uni = 8314._wp ! Universal gas constant - J/kmol/K + integer, parameter :: lag_io_vars = 21 ! Number of variables per particle for MPI_IO ! Strang Splitting constants real(wp), parameter :: dflt_adap_dt_tol = 1e-4_wp !< Default tolerance for adaptive step size diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp index 382f8387ae..c1dde087f5 100644 --- a/src/post_process/m_data_output.fpp +++ b/src/post_process/m_data_output.fpp @@ -30,7 +30,8 @@ module m_data_output s_open_energy_data_file, & s_write_grid_to_formatted_database_file, & s_write_variable_to_formatted_database_file, & - s_write_lag_bubbles_results, & + s_write_lag_bubbles_results_to_text, & + s_write_lag_bubbles_to_formatted_database_file, & s_write_intf_data_file, & s_write_energy_data_file, & s_close_formatted_database_file, & @@ -276,12 +277,14 @@ contains end if if (bubbles_lagrange) then !Lagrangian solver - dbdir = trim(case_dir)//'/lag_bubbles_post_process' - file_loc = trim(dbdir)//'/.' - call my_inquire(file_loc, dir_check) + if (lag_txt_wrt) then + dbdir = trim(case_dir)//'/lag_bubbles_post_process' + file_loc = trim(dbdir)//'/.' + call my_inquire(file_loc, dir_check) - if (dir_check .neqv. .true.) then - call s_create_directory(trim(dbdir)) + if (dir_check .neqv. .true.) then + call s_create_directory(trim(dbdir)) + end if end if end if @@ -1088,7 +1091,7 @@ contains !> Subroutine that writes the post processed results in the folder 'lag_bubbles_data' !! @param t_step Current time step - impure subroutine s_write_lag_bubbles_results(t_step) + impure subroutine s_write_lag_bubbles_results_to_text(t_step) integer, intent(in) :: t_step @@ -1106,31 +1109,65 @@ contains logical :: lg_bub_file, file_exist integer, dimension(2) :: gsizes, lsizes, start_idx_part - integer :: ifile, ierr, tot_data + integer :: ifile, ierr + real(wp) :: file_time, file_dt + integer :: file_num_procs, file_tot_part, tot_part integer :: i - write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat' + integer, dimension(:), allocatable :: proc_bubble_counts + real(wp), dimension(1:1, 1:lag_io_vars) :: dummy + dummy = 0._wp + + ! Construct file path + write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist) then - if (proc_rank == 0) then - open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown') - read (9) tot_data, time_real - close (9) - end if - else - print '(A)', trim(file_loc)//' is missing. Exiting.' - call s_mpi_abort + ! Check if file exists + inquire(FILE=trim(file_loc), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!') end if - call MPI_BCAST(tot_data, 1, MPI_integer, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(time_real, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + if (.not. parallel_io) return + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + allocate(proc_bubble_counts(file_num_procs)) + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) - gsizes(1) = tot_data - gsizes(2) = 21 - lsizes(1) = tot_data - lsizes(2) = 21 + ! Skip to processor counts position + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), & + MPI_OFFSET_KIND) + call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr) + call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + gsizes(1) = file_tot_part + gsizes(2) = lag_io_vars + lsizes(1) = file_tot_part + lsizes(2) = lag_io_vars start_idx_part(1) = 0 start_idx_part(2) = 0 @@ -1138,59 +1175,362 @@ contains MPI_ORDER_FORTRAN, mpi_p, view, ierr) call MPI_TYPE_COMMIT(view, ierr) - write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & + 'native', mpi_info_null, ierr) + + allocate (MPI_IO_DATA_lg_bubbles(file_tot_part, 1:lag_io_vars)) + + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, lag_io_vars*file_tot_part, & + mpi_p, status, ierr) + + write (file_loc, '(A,I0,A)') 'lag_bubbles_post_process_', t_step, '.dat' + file_loc = trim(case_dir)//'/lag_bubbles_post_process/'//trim(file_loc) + + if (proc_rank == 0) then + open (unit=29, file=file_loc, form='formatted', position='rewind') + + if (lag_header) then + write(29, '(A)', advance='no') + if (lag_id_wrt) write(29, '(A8)', advance='no') 'id, ' + if (lag_pos_wrt) write(29, '(3(A17))', advance='no') 'px, ', 'py, ', 'pz, ' + if (lag_pos_prev_wrt) write(29, '(3(A17))', advance='no') 'pvx, ', 'pvy, ', 'pvz, ' + if (lag_vel_wrt) write(29, '(3(A17))', advance='no') 'vx, ', 'vy, ', 'vz, ' + if (lag_rad_wrt) write(29, '(A17)', advance='no') 'radius, ' + if (lag_rvel_wrt) write(29, '(A17)', advance='no') 'rvel, ' + if (lag_r0_wrt) write(29, '(A17)', advance='no') 'r0, ' + if (lag_rmax_wrt) write(29, '(A17)', advance='no') 'rmax, ' + if (lag_rmin_wrt) write(29, '(A17)', advance='no') 'rmin, ' + if (lag_dphidt_wrt) write(29, '(A17)', advance='no') 'dphidt, ' + if (lag_pres_wrt) write(29, '(A17)', advance='no') 'pressure, ' + if (lag_mv_wrt) write(29, '(A17)', advance='no') 'mv, ' + if (lag_mg_wrt) write(29, '(A17)', advance='no') 'mg, ' + if (lag_betaT_wrt) write(29, '(A17)', advance='no') 'betaT, ' + if (lag_betaC_wrt) write(29, '(A17)', advance='no') 'betaC, ' + write(29, '(A15)') 'time' + end if + + do i = 1, file_tot_part + id = int(MPI_IO_DATA_lg_bubbles(i, 1)) + inputvals(1:20) = MPI_IO_DATA_lg_bubbles(i, 2:21) + if (id > 0) then + write(29, '(100(A))', advance='no') '' + + if (lag_id_wrt) write(29, '(I6, A)', advance='no') id, ', ' + if (lag_pos_wrt) write(29, '(3(E15.7, A))', advance='no') inputvals(1), ', ', inputvals(2), ', ', inputvals(3), ', ' + if (lag_pos_prev_wrt) write(29, '(3(E15.7, A))', advance='no') inputvals(4), ', ', inputvals(5), ', ', inputvals(6), ', ' + if (lag_vel_wrt) write(29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(8), ', ' + if (lag_rad_wrt) write(29, '(E15.7, A)', advance='no') inputvals(10), ', ' + if (lag_rvel_wrt) write(29, '(E15.7, A)', advance='no') inputvals(11), ', ' + if (lag_r0_wrt) write(29, '(E15.7, A)', advance='no') inputvals(12), ', ' + if (lag_rmax_wrt) write(29, '(E15.7, A)', advance='no') inputvals(13), ', ' + if (lag_rmin_wrt) write(29, '(E15.7, A)', advance='no') inputvals(14), ', ' + if (lag_dphidt_wrt) write(29, '(E15.7, A)', advance='no') inputvals(15), ', ' + if (lag_pres_wrt) write(29, '(E15.7, A)', advance='no') inputvals(16), ', ' + if (lag_mv_wrt) write(29, '(E15.7, A)', advance='no') inputvals(17), ', ' + if (lag_mg_wrt) write(29, '(E15.7, A)', advance='no') inputvals(18), ', ' + if (lag_betaT_wrt) write(29, '(E15.7, A)', advance='no') inputvals(19), ', ' + if (lag_betaC_wrt) write(29, '(E15.7, A)', advance='no') inputvals(20), ', ' + write(29, '(E15.7)') time_real + end if + end do + close (29) + end if + + deallocate (MPI_IO_DATA_lg_bubbles) + + call s_mpi_barrier() + + call MPI_FILE_CLOSE(ifile, ierr) +#endif + + end subroutine s_write_lag_bubbles_results_to_text + + impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step) + + integer, intent(in) :: t_step + + character(len=len_trim(case_dir) + 3*name_len) :: file_loc + + integer :: id + +#ifdef MFC_MPI + real(wp), dimension(20) :: inputvals + real(wp) :: time_real + integer, dimension(MPI_STATUS_SIZE) :: status + integer(KIND=MPI_OFFSET_KIND) :: disp + integer :: view + + logical :: lg_bub_file, file_exist + + integer, dimension(2) :: gsizes, lsizes, start_idx_part + integer :: ifile, ierr, tot_data, valid_data, nBub + real(wp) :: file_time, file_dt + integer :: file_num_procs, file_tot_part + integer, dimension(:), allocatable :: proc_bubble_counts + real(wp), dimension(1:1, 1:lag_io_vars) :: dummy + character(LEN=4*name_len), dimension(num_procs) :: meshnames + integer, dimension(num_procs) :: meshtypes + + integer :: i, j + + real(wp), dimension(:), allocatable :: bub_id + real(wp), dimension(:), allocatable :: px, py, pz, ppx, ppy, ppz, vx, vy, vz + real(wp), dimension(:), allocatable :: radius, rvel, rnot, rmax, rmin, dphidt + real(wp), dimension(:), allocatable :: pressure, mv, mg, betaT, betaC + + dummy = 0._wp + + ! Construct file path + write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=lg_bub_file) - if (lg_bub_file) then + ! Check if file exists + inquire(FILE=trim(file_loc), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!') + end if - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & - mpi_info_int, ifile, ierr) + if (.not. parallel_io) return + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr) - disp = 0._wp - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & - 'native', mpi_info_null, ierr) + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + allocate(proc_bubble_counts(file_num_procs)) + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + ! Skip to processor counts position + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), & + MPI_OFFSET_KIND) + call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr) + call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - allocate (MPI_IO_DATA_lg_bubbles(tot_data, 1:21)) + ! Set time variables from file - call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, 21*tot_data, & - mpi_p, status, ierr) + nBub = proc_bubble_counts(proc_rank + 1) - write (file_loc, '(A,I0,A)') 'lag_bubbles_post_process_', t_step, '.dat' - file_loc = trim(case_dir)//'/lag_bubbles_post_process/'//trim(file_loc) + start_idx_part(1) = 0 + do i = 1, proc_rank + start_idx_part(1) = start_idx_part(1) + proc_bubble_counts(i) + end do + start_idx_part(2) = 0 + lsizes(1) = nBub + lsizes(2) = lag_io_vars + + gsizes(1) = file_tot_part + gsizes(2) = lag_io_vars + + if (nBub > 0) then + + allocate(bub_id(nBub)) + allocate(px(nBub)) + allocate(py(nBub)) + allocate(pz(nBub)) + allocate(ppx(nBub)) + allocate(ppy(nBub)) + allocate(ppz(nBub)) + allocate(vx(nBub)) + allocate(vy(nBub)) + allocate(vz(nBub)) + allocate(radius(nBub)) + allocate(rvel(nBub)) + allocate(rnot(nBub)) + allocate(rmax(nBub)) + allocate(rmin(nBub)) + allocate(dphidt(nBub)) + allocate(pressure(nBub)) + allocate(mv(nBub)) + allocate(mg(nBub)) + allocate(betaT(nBub)) + allocate(betaC(nBub)) + allocate (MPI_IO_DATA_lg_bubbles(nBub, 1:lag_io_vars)) + + call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & + MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, & + lag_io_vars * nBub, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) + + ! Extract data from MPI_IO_DATA_lg_bubbles array + ! Adjust these indices based on your actual data layout + bub_id(:) = MPI_IO_DATA_lg_bubbles(:, 1) + px(:) = MPI_IO_DATA_lg_bubbles(:, 2) + py(:) = MPI_IO_DATA_lg_bubbles(:, 3) + pz(:) = MPI_IO_DATA_lg_bubbles(:, 4) + ppx(:) = MPI_IO_DATA_lg_bubbles(:, 5) + ppy(:) = MPI_IO_DATA_lg_bubbles(:, 6) + ppz(:) = MPI_IO_DATA_lg_bubbles(:, 7) + vx(:) = MPI_IO_DATA_lg_bubbles(:, 8) + vy(:) = MPI_IO_DATA_lg_bubbles(:, 9) + vz(:) = MPI_IO_DATA_lg_bubbles(:, 10) + radius(:) = MPI_IO_DATA_lg_bubbles(:, 11) + rvel(:) = MPI_IO_DATA_lg_bubbles(:, 12) + rnot(:) = MPI_IO_DATA_lg_bubbles(:, 13) + rmax(:) = MPI_IO_DATA_lg_bubbles(:, 14) + rmin(:) = MPI_IO_DATA_lg_bubbles(:, 15) + dphidt(:) = MPI_IO_DATA_lg_bubbles(:, 16) + pressure(:) = MPI_IO_DATA_lg_bubbles(:, 17) + mv(:) = MPI_IO_DATA_lg_bubbles(:, 18) + mg(:) = MPI_IO_DATA_lg_bubbles(:, 19) + betaT(:) = MPI_IO_DATA_lg_bubbles(:, 20) + betaC(:) = MPI_IO_DATA_lg_bubbles(:, 21) + + ! Next, the root processor proceeds to record all of the spatial + ! extents in the formatted database master file. In addition, it + ! also records a sub-domain connectivity map so that the entire + ! grid may be reassembled by looking at the master file. if (proc_rank == 0) then - open (unit=29, file=file_loc, form='formatted', position='rewind') - !write(29,*) 'lg_bubID, x, y, z, xPrev, yPrev, zPrev, xVel, yVel, ', & - ! 'zVel, radius, interfaceVelocity, equilibriumRadius', & - ! 'Rmax, Rmin, dphidt, pressure, mv, mg, betaT, betaC, time' - do i = 1, tot_data - id = int(MPI_IO_DATA_lg_bubbles(i, 1)) - inputvals(1:20) = MPI_IO_DATA_lg_bubbles(i, 2:21) - if (id > 0) then - write (29, 6) int(id), inputvals(1), inputvals(2), & - inputvals(3), inputvals(4), inputvals(5), inputvals(6), inputvals(7), & - inputvals(8), inputvals(9), inputvals(10), inputvals(11), & - inputvals(12), inputvals(13), inputvals(14), inputvals(15), & - inputvals(16), inputvals(17), inputvals(18), inputvals(19), & - inputvals(20), time_real -6 format(I6, 21(1x, E15.7)) - end if + + do i = 1, num_procs + write (meshnames(i), '(A,I0,A,I0,A)') '../p', i - 1, & + '/', t_step, '.silo:lag_bubbles' + meshtypes(i) = DB_POINTMESH end do - close (29) + err = DBSET2DSTRLEN(len(meshnames(1))) + err = DBPUTMMESH(dbroot, 'lag_bubbles', 16, & + num_procs, meshnames, & + len_trim(meshnames), & + meshtypes, DB_F77NULL, ierr) end if - deallocate (MPI_IO_DATA_lg_bubbles) + err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, & + px, py, pz, nBub, & + DB_DOUBLE, DB_F77NULL, ierr) - end if + if (lag_id_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_id', bub_id, nBub, t_step) + end if - call s_mpi_barrier() + if (lag_vel_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_vel1', vx, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_vel2', vy, nBub, t_step) + if (p > 0) then + call s_write_lag_variable_to_formatted_database_file('part_vel3', vz, nBub, t_step) + end if + end if - call MPI_FILE_CLOSE(ifile, ierr) + if (lag_rad_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_radius', radius, nBub, t_step) + end if + + if (lag_rvel_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rdot', rvel, nBub, t_step) + end if + + if (lag_r0_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_r0', rnot, nBub, t_step) + end if + + if (lag_rmax_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rmax', rmax, nBub, t_step) + end if + + if (lag_rmin_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rmin', rmin, nBub, t_step) + end if + + if (lag_dphidt_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_dphidt', dphidt, nBub, t_step) + end if + + if (lag_pres_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_pressure', pressure, nBub, t_step) + end if + + if (lag_mv_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_mv', mv, nBub, t_step) + end if + + if (lag_mg_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_mg', mg, nBub, t_step) + end if + + if (lag_betaT_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_betaT', betaT, nBub, t_step) + end if + + if (lag_betaC_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_betaC', betaC, nBub, t_step) + end if + + deallocate(bub_id, px, py, pz, ppx, ppy, ppz, vx, vy, vz, radius, & + rvel, rnot, rmax, rmin, dphidt, pressure, mv, mg, & + betaT, betaC) + end if + + deallocate (MPI_IO_DATA_lg_bubbles) #endif - end subroutine s_write_lag_bubbles_results + end subroutine s_write_lag_bubbles_to_formatted_database_file + + subroutine s_write_lag_variable_to_formatted_database_file(varname, data, nBubs, t_step) + + character(len=*), intent(in) :: varname + real(wp), dimension(1:nBubs), intent(in) :: data + integer, intent(in) :: nBubs, t_step + + character(len=64), dimension(num_procs) :: var_names + integer, dimension(num_procs) :: var_types + integer :: i + + if (proc_rank == 0) then + do i = 1, num_procs + write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, & + '/', t_step, '.silo:'//trim(varname) + var_types(i) = DB_POINTVAR + end do + err = DBSET2DSTRLEN(len(var_names(1))) + err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), & + num_procs, var_names, & + len_trim(var_names), & + var_types, DB_F77NULL, ierr) + end if + + err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), & + 'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr) + + end subroutine s_write_lag_variable_to_formatted_database_file + impure subroutine s_write_intf_data_file(q_prim_vf) type(scalar_field), dimension(sys_size), intent(IN) :: q_prim_vf diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index e667c7eeac..4d0dad06ac 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -256,6 +256,24 @@ module m_global_parameters logical :: ib logical :: chem_wrt_Y(1:num_species) logical :: chem_wrt_T + logical :: lag_header + logical :: lag_txt_wrt + logical :: lag_db_wrt + logical :: lag_id_wrt + logical :: lag_pos_wrt + logical :: lag_pos_prev_wrt + logical :: lag_vel_wrt + logical :: lag_rad_wrt + logical :: lag_rvel_wrt + logical :: lag_r0_wrt + logical :: lag_rmax_wrt + logical :: lag_rmin_wrt + logical :: lag_dphidt_wrt + logical :: lag_pres_wrt + logical :: lag_mv_wrt + logical :: lag_mg_wrt + logical :: lag_betaT_wrt + logical :: lag_betaC_wrt !> @} real(wp), dimension(num_fluids_max) :: schlieren_alpha !< @@ -428,6 +446,24 @@ contains schlieren_wrt = .false. sim_data = .false. cf_wrt = .false. + lag_txt_wrt = .true. + lag_header = .true. + lag_db_wrt = .false. + lag_id_wrt = .true. + lag_pos_wrt = .true. + lag_pos_prev_wrt = .false. + lag_vel_wrt = .true. + lag_rad_wrt = .true. + lag_rvel_wrt = .false. + lag_r0_wrt = .false. + lag_rmax_wrt = .false. + lag_rmin_wrt = .false. + lag_dphidt_wrt = .false. + lag_pres_wrt = .false. + lag_mv_wrt = .false. + lag_mg_wrt = .false. + lag_betaT_wrt = .false. + lag_betaC_wrt = .false. ib = .false. schlieren_alpha = dflt_real diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index effe0b9bb3..3917d18b74 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -107,7 +107,12 @@ contains & 'file_per_process', 'relax', 'cf_wrt', & & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', & & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', & - & 'output_partial_domain', 'relativity', 'cont_damage' ] + & 'output_partial_domain', 'relativity', 'cont_damage', & + & 'lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt', & + & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', & + & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt', & + & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt', & + & 'lag_betaT_wrt', 'lag_betaC_wrt' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index db1697ebd2..7a0e6d276b 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -89,7 +89,11 @@ impure subroutine s_read_input_file cfl_adap_dt, cfl_const_dt, t_save, t_stop, n_start, & cfl_target, surface_tension, bubbles_lagrange, & sim_data, hyperelasticity, Bx0, relativity, cont_damage, & - num_bc_patches + num_bc_patches, lag_header, lag_txt_wrt, lag_db_wrt, & + lag_id_wrt, lag_pos_wrt, lag_pos_prev_wrt, lag_vel_wrt, & + lag_rad_wrt, lag_rvel_wrt, lag_r0_wrt, lag_rmax_wrt, & + lag_rmin_wrt, lag_dphidt_wrt, lag_pres_wrt, lag_mv_wrt, & + lag_mg_wrt, lag_betaT_wrt, lag_betaC_wrt ! Inquiring the status of the post_process.inp file file_loc = 'post_process.inp' @@ -663,7 +667,8 @@ impure subroutine s_save_data(t_step, varname, pres, c, H) call s_write_variable_to_formatted_database_file(varname, t_step) varname(:) = ' ' - call s_write_lag_bubbles_results(t_step) !! Individual bubble evolution + if (lag_txt_wrt) call s_write_lag_bubbles_results_to_text(t_step) !! Individual bubble evolution + if (lag_db_wrt) call s_write_lag_bubbles_to_formatted_database_file(t_step) end if if (sim_data .and. proc_rank == 0) then diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 31d83c12ea..f91b2def3c 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -415,6 +415,8 @@ contains integer, intent(inout) :: bub_id, save_count character(LEN=path_len + 2*name_len) :: file_loc + real(wp) :: file_time, file_dt + integer :: file_num_procs, file_tot_part, tot_part #ifdef MFC_MPI real(wp), dimension(20) :: inputvals @@ -429,81 +431,146 @@ contains integer :: ifile, ierr, tot_data, id integer :: i - write (file_loc, '(a,i0,a)') 'lag_bubbles_mpi_io_', save_count, '.dat' + integer, dimension(:), allocatable :: proc_bubble_counts + real(wp), dimension(1:1, 1:lag_io_vars) :: dummy + dummy = 0._wp + + ! Construct file path + write(file_loc, '(A,I0,A)') 'lag_bubbles_', save_count, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (file=trim(file_loc), exist=file_exist) - - if (file_exist) then - if (proc_rank == 0) then - open (9, file=trim(file_loc), form='unformatted', status='unknown') - read (9) tot_data, mytime, dt - close (9) - print *, 'Reading lag_bubbles_mpi_io: ', tot_data, mytime, dt - end if - else - print '(a)', trim(file_loc)//' is missing. exiting.' - call s_mpi_abort + + ! Check if file exists + inquire(FILE=trim(file_loc), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!') end if - call MPI_BCAST(tot_data, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(mytime, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + if (.not. parallel_io) return + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + allocate(proc_bubble_counts(file_num_procs)) + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + ! Skip to processor counts position + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), & + MPI_OFFSET_KIND) + call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr) + call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + ! Set time variables from file + mytime = file_time + dt = file_dt + + bub_id = proc_bubble_counts(proc_rank + 1) - gsizes(1) = tot_data - gsizes(2) = 21 - lsizes(1) = tot_data - lsizes(2) = 21 start_idx_part(1) = 0 + do i = 1, proc_rank + start_idx_part(1) = start_idx_part(1) + proc_bubble_counts(i) + end do + start_idx_part(2) = 0 + lsizes(1) = bub_id + lsizes(2) = lag_io_vars - call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & - MPI_ORDER_FORTRAN, mpi_p, view, ierr) - call MPI_type_COMMIT(view, ierr) + gsizes(1) = file_tot_part + gsizes(2) = lag_io_vars - ! Open the file to write all flow variables - write (file_loc, '(a,i0,a)') 'lag_bubbles_', save_count, '.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (file=trim(file_loc), exist=particle_file) + if (bub_id > 0) then + + allocate (MPI_IO_DATA_lag_bubbles(bub_id, 1:lag_io_vars)) + + call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & + MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) - if (particle_file) then - call MPI_FILE_open(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & mpi_info_int, ifile, ierr) - disp = 0._wp - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & - 'native', mpi_info_null, ierr) - allocate (MPI_IO_DATA_lag_bubbles(tot_data, 1:21)) - call MPI_FILE_read_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*tot_data, & - mpi_p, status, ierr) - do i = 1, tot_data - id = int(MPI_IO_DATA_lag_bubbles(i, 1)) - inputvals(1:20) = MPI_IO_DATA_lag_bubbles(i, 2:21) - indomain = particle_in_domain_physical(inputvals(1:3)) - if (indomain .and. (id > 0)) then - bub_id = bub_id + 1 - nBubs = bub_id ! local number of bubbles - lag_id(bub_id, 1) = id ! global ID - lag_id(bub_id, 2) = bub_id ! local ID - mtn_pos(bub_id, 1:3, 1) = inputvals(1:3) - mtn_posPrev(bub_id, 1:3, 1) = inputvals(4:6) - mtn_vel(bub_id, 1:3, 1) = inputvals(7:9) - intfc_rad(bub_id, 1) = inputvals(10) - intfc_vel(bub_id, 1) = inputvals(11) - bub_R0(bub_id) = inputvals(12) - Rmax_stats(bub_id) = inputvals(13) - Rmin_stats(bub_id) = inputvals(14) - bub_dphidt(bub_id) = inputvals(15) - gas_p(bub_id, 1) = inputvals(16) - gas_mv(bub_id, 1) = inputvals(17) - gas_mg(bub_id) = inputvals(18) - gas_betaT(bub_id) = inputvals(19) - gas_betaC(bub_id) = inputvals(20) - cell = fd_number - buff_size - call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) - end if + + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lag_bubbles, & + lag_io_vars * bub_id, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) + + nBubs = bub_id + + do i = 1, bub_id + lag_id(i, 1) = int(MPI_IO_DATA_lag_bubbles(i, 1)) + mtn_pos(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 2:4) + mtn_posPrev(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 5:7) + mtn_vel(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 8:10) + intfc_rad(i, 1) = MPI_IO_DATA_lag_bubbles(i, 11) + intfc_vel(i, 1) = MPI_IO_DATA_lag_bubbles(i, 12) + bub_R0(i) = MPI_IO_DATA_lag_bubbles(i, 13) + Rmax_stats(i) = MPI_IO_DATA_lag_bubbles(i, 14) + Rmin_stats(i) = MPI_IO_DATA_lag_bubbles(i, 15) + bub_dphidt(i) = MPI_IO_DATA_lag_bubbles(i, 16) + gas_p(i, 1) = MPI_IO_DATA_lag_bubbles(i, 17) + gas_mv(i, 1) = MPI_IO_DATA_lag_bubbles(i, 18) + gas_mg(i) = MPI_IO_DATA_lag_bubbles(i, 19) + gas_betaT(i) = MPI_IO_DATA_lag_bubbles(i, 20) + gas_betaC(i) = MPI_IO_DATA_lag_bubbles(i, 21) end do + deallocate (MPI_IO_DATA_lag_bubbles) + + else + nBubs = 0 + + call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) + end if + + print*, proc_rank, lag_id + + if (proc_rank == 0) then + write(*,'(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count + write(*,'(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt end if - call MPI_FILE_CLOSE(ifile, ierr) + + deallocate(proc_bubble_counts) #endif end subroutine s_restart_bubbles @@ -1582,7 +1649,7 @@ contains character(LEN=path_len + 2*name_len) :: file_loc logical :: file_exist - integer :: bub_id, tot_part, tot_part_wrtn, npart_wrtn + integer :: bub_id, tot_part integer :: i, k #ifdef MFC_MPI @@ -1593,6 +1660,9 @@ contains integer :: view integer, dimension(2) :: gsizes, lsizes, start_idx_part integer, dimension(num_procs) :: part_order, part_ord_mpi + integer, dimension(num_procs) :: proc_bubble_counts + real(wp), dimension(1:1, 1:lag_io_vars) :: dummy + dummy = 0._wp bub_id = 0._wp if (nBubs /= 0) then @@ -1605,78 +1675,60 @@ contains if (.not. parallel_io) return + lsizes(1) = bub_id + lsizes(2) = lag_io_vars + ! Total number of particles call MPI_ALLREDUCE(bub_id, tot_part, 1, MPI_integer, & MPI_SUM, MPI_COMM_WORLD, ierr) - ! Total number of particles written so far - call MPI_ALLREDUCE(npart_wrtn, tot_part_wrtn, 1, MPI_integer, & - MPI_SUM, MPI_COMM_WORLD, ierr) - - lsizes(1) = max(1, bub_id) - lsizes(2) = 21 + call MPI_ALLGATHER(bub_id, 1, MPI_INTEGER, proc_bubble_counts, 1, MPI_INTEGER, & + MPI_COMM_WORLD, ierr) - ! if the partcle number is zero, put 1 since MPI cannot deal with writing - ! zero particle - part_order(:) = 1 - part_order(proc_rank + 1) = max(1, bub_id) - - call MPI_ALLREDUCE(part_order, part_ord_mpi, num_procs, MPI_integer, & - MPI_MAX, MPI_COMM_WORLD, ierr) - - gsizes(1) = sum(part_ord_mpi(1:num_procs)) - gsizes(2) = 21 - - start_idx_part(1) = sum(part_ord_mpi(1:proc_rank + 1)) - part_ord_mpi(proc_rank + 1) + ! Calculate starting index for this processor's particles + call MPI_EXSCAN(lsizes(1), start_idx_part(1), 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) + if (proc_rank == 0) start_idx_part(1) = 0 start_idx_part(2) = 0 - write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat' + gsizes(1) = tot_part + gsizes(2) = lag_io_vars + + write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist .and. proc_rank == 0) then - call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) - end if - ! Writing down the total number of particles + ! Clean up existing file if (proc_rank == 0) then - open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown') - write (9) gsizes(1), mytime, dt - close (9) + inquire(FILE=trim(file_loc), EXIST=file_exist) + if (file_exist) then + call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) + end if end if - call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & - MPI_ORDER_FORTRAN, mpi_p, view, ierr) - call MPI_type_COMMIT(view, ierr) + call MPI_BARRIER(MPI_COMM_WORLD, ierr) - allocate (MPI_IO_DATA_lag_bubbles(1:max(1, bub_id), 1:21)) - - ! Open the file to write all flow variables - write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist .and. proc_rank == 0) then - call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, & + ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) + + ! Write header using MPI I/O for consistency + call MPI_FILE_WRITE(ifile, tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_WRITE(ifile, mytime, 1, mpi_p, status, ierr) + call MPI_FILE_WRITE(ifile, dt, 1, mpi_p, status, ierr) + call MPI_FILE_WRITE(ifile, num_procs, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_WRITE(ifile, proc_bubble_counts, num_procs, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) end if - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & - mpi_info_int, ifile, ierr) - - disp = 0._wp + call MPI_BARRIER(MPI_COMM_WORLD, ierr) - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & - 'native', mpi_info_null, ierr) - - ! Cycle through list - i = 1 - - if (bub_id == 0) then - MPI_IO_DATA_lag_bubbles(1, 1:21) = 0._wp - else + if (bub_id > 0) then + allocate (MPI_IO_DATA_lag_bubbles(max(1, bub_id), 1:lag_io_vars)) + i = 1 do k = 1, nBubs - if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then - MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_id(k, 1)) MPI_IO_DATA_lag_bubbles(i, 2:4) = mtn_pos(k, 1:3, 1) MPI_IO_DATA_lag_bubbles(i, 5:7) = mtn_posPrev(k, 1:3, 1) @@ -1692,21 +1744,47 @@ contains MPI_IO_DATA_lag_bubbles(i, 19) = gas_mg(k) MPI_IO_DATA_lag_bubbles(i, 20) = gas_betaT(k) MPI_IO_DATA_lag_bubbles(i, 21) = gas_betaC(k) - i = i + 1 - end if - end do - end if + call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & + MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, & + ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) - call MPI_FILE_write_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*max(1, bub_id), & - mpi_p, status, ierr) + ! Skip header (written by rank 0) + disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + & + num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) - call MPI_FILE_CLOSE(ifile, ierr) + call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA_lag_bubbles, & + lag_io_vars * bub_id, mpi_p, status, ierr) - deallocate (MPI_IO_DATA_lag_bubbles) + call MPI_FILE_CLOSE(ifile, ierr) + + deallocate (MPI_IO_DATA_lag_bubbles) + + else + call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, & + ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) + + ! Skip header (written by rank 0) + disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + & + num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_WRITE_ALL(ifile, dummy, 0, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if #endif diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index dfa071ea67..8d9aa29a36 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1261,7 +1261,6 @@ contains call acc_present_dump() #endif - call s_initialize_mpi_common_module() call s_initialize_mpi_proxy_module() call s_initialize_variables_conversion_module() diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index c28857c8d4..512c6fc0b6 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -423,6 +423,24 @@ def analytic(self): 'pres_inf_wrt': ParamType.LOG, 'cons_vars_wrt': ParamType.LOG, 'prim_vars_wrt': ParamType.LOG, + 'lag_header': ParamType.LOG, + 'lag_txt_wrt': ParamType.LOG, + 'lag_db_wrt': ParamType.LOG, + 'lag_id_wrt': ParamType.LOG, + 'lag_pos_wrt': ParamType.LOG, + 'lag_pos_prev_wrt': ParamType.LOG, + 'lag_vel_wrt': ParamType.LOG, + 'lag_rad_wrt': ParamType.LOG, + 'lag_rvel_wrt': ParamType.LOG, + 'lag_r0_wrt': ParamType.LOG, + 'lag_rmax_wrt': ParamType.LOG, + 'lag_rmin_wrt': ParamType.LOG, + 'lag_dphidt_wrt': ParamType.LOG, + 'lag_pres_wrt': ParamType.LOG, + 'lag_mv_wrt': ParamType.LOG, + 'lag_mg_wrt': ParamType.LOG, + 'lag_betaT_wrt': ParamType.LOG, + 'lag_betaC_wrt': ParamType.LOG, 'c_wrt': ParamType.LOG, 'omega_wrt': ParamType.LOG, 'qbmm': ParamType.LOG, From 40009ab37721eaaf335c64a7cbcdc0a1f1c6a699 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Sun, 29 Jun 2025 14:43:01 -0400 Subject: [PATCH 41/62] empty array writing and perlin noise ICs --- src/post_process/m_data_output.fpp | 186 ++++++++++++++++++---- src/post_process/m_global_parameters.fpp | 2 +- src/post_process/m_mpi_proxy.fpp | 2 +- src/pre_process/include/3dHardcodedIC.fpp | 48 ++++++ src/pre_process/m_global_parameters.fpp | 8 + src/pre_process/m_mpi_proxy.fpp | 3 +- src/pre_process/m_start_up.fpp | 3 +- src/simulation/m_body_forces.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 53 +++--- src/simulation/m_time_steppers.fpp | 12 +- toolchain/mfc/run/case_dicts.py | 5 + 11 files changed, 261 insertions(+), 63 deletions(-) diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp index c1dde087f5..eaecab573c 100644 --- a/src/post_process/m_data_output.fpp +++ b/src/post_process/m_data_output.fpp @@ -1275,6 +1275,8 @@ contains real(wp), dimension(1:1, 1:lag_io_vars) :: dummy character(LEN=4*name_len), dimension(num_procs) :: meshnames integer, dimension(num_procs) :: meshtypes + real(wp) :: dummy_data + integer :: i, j @@ -1284,6 +1286,7 @@ contains real(wp), dimension(:), allocatable :: pressure, mv, mg, betaT, betaC dummy = 0._wp + dummy_data = 0._wp ! Construct file path write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' @@ -1437,97 +1440,212 @@ contains DB_DOUBLE, DB_F77NULL, ierr) if (lag_id_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_id', bub_id, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_id', t_step, bub_id, nBub) end if if (lag_vel_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_vel1', vx, nBub, t_step) - call s_write_lag_variable_to_formatted_database_file('part_vel2', vy, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_vel1', t_step, vx, nBub) + call s_write_lag_variable_to_formatted_database_file('part_vel2', t_step, vy, nBub) if (p > 0) then - call s_write_lag_variable_to_formatted_database_file('part_vel3', vz, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_vel3', t_step, vz, nBub) end if end if if (lag_rad_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_radius', radius, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_radius', t_step, radius, nBub) end if if (lag_rvel_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_rdot', rvel, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_rdot', t_step, rvel, nBub) end if if (lag_r0_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_r0', rnot, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_r0', t_step, rnot, nBub) end if if (lag_rmax_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_rmax', rmax, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_rmax', t_step, rmax, nBub) end if if (lag_rmin_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_rmin', rmin, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_rmin', t_step, rmin, nBub) end if if (lag_dphidt_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_dphidt', dphidt, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_dphidt', t_step, dphidt, nBub) end if if (lag_pres_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_pressure', pressure, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_pressure', t_step, pressure, nBub) end if if (lag_mv_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_mv', mv, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_mv', t_step, mv, nBub) end if if (lag_mg_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_mg', mg, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_mg', t_step, mg, nBub) end if if (lag_betaT_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_betaT', betaT, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_betaT', t_step, betaT, nBub) end if if (lag_betaC_wrt) then - call s_write_lag_variable_to_formatted_database_file('part_betaC', betaC, nBub, t_step) + call s_write_lag_variable_to_formatted_database_file('part_betaC', t_step, betaC, nBub) end if deallocate(bub_id, px, py, pz, ppx, ppy, ppz, vx, vy, vz, radius, & rvel, rnot, rmax, rmin, dphidt, pressure, mv, mg, & betaT, betaC) - end if + deallocate (MPI_IO_DATA_lg_bubbles) + else + call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) - deallocate (MPI_IO_DATA_lg_bubbles) + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) + + if (proc_rank == 0) then + + do i = 1, num_procs + write (meshnames(i), '(A,I0,A,I0,A)') '../p', i - 1, & + '/', t_step, '.silo:lag_bubbles' + meshtypes(i) = DB_POINTMESH + end do + err = DBSET2DSTRLEN(len(meshnames(1))) + err = DBPUTMMESH(dbroot, 'lag_bubbles', 16, & + num_procs, meshnames, & + len_trim(meshnames), & + meshtypes, DB_F77NULL, ierr) + end if + + err = DBSETEMPTYOK(1) + err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, & + dummy_data, dummy_data, dummy_data, 0, & + DB_DOUBLE, DB_F77NULL, ierr) + + if (lag_id_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_id', t_step) + end if + + if (lag_vel_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_vel1', t_step) + call s_write_lag_variable_to_formatted_database_file('part_vel2', t_step) + if (p > 0) then + call s_write_lag_variable_to_formatted_database_file('part_vel3', t_step) + end if + end if + + if (lag_rad_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_radius', t_step) + end if + + if (lag_rvel_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rdot', t_step) + end if + + if (lag_r0_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_r0', t_step) + end if + + if (lag_rmax_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rmax', t_step) + end if + + if (lag_rmin_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rmin', t_step) + end if + + if (lag_dphidt_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_dphidt', t_step) + end if + + if (lag_pres_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_pressure', t_step) + end if + + if (lag_mv_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_mv', t_step) + end if + + if (lag_mg_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_mg', t_step) + end if + + if (lag_betaT_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_betaT', t_step) + end if + + if (lag_betaC_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_betaC', t_step) + end if + + end if #endif end subroutine s_write_lag_bubbles_to_formatted_database_file - subroutine s_write_lag_variable_to_formatted_database_file(varname, data, nBubs, t_step) + subroutine s_write_lag_variable_to_formatted_database_file(varname, t_step, data, nBubs) character(len=*), intent(in) :: varname - real(wp), dimension(1:nBubs), intent(in) :: data - integer, intent(in) :: nBubs, t_step + integer, intent(in) :: t_step + real(wp), dimension(1:nBubs), intent(in), optional :: data + integer, intent(in), optional :: nBubs character(len=64), dimension(num_procs) :: var_names integer, dimension(num_procs) :: var_types + real(wp) :: dummy_data integer :: i - if (proc_rank == 0) then - do i = 1, num_procs - write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, & - '/', t_step, '.silo:'//trim(varname) - var_types(i) = DB_POINTVAR - end do - err = DBSET2DSTRLEN(len(var_names(1))) - err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), & - num_procs, var_names, & - len_trim(var_names), & - var_types, DB_F77NULL, ierr) - end if + dummy_data = 0._wp - err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), & - 'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr) + if (present(nBubs) .and. present(data)) then + if (proc_rank == 0) then + do i = 1, num_procs + write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, & + '/', t_step, '.silo:'//trim(varname) + var_types(i) = DB_POINTVAR + end do + err = DBSET2DSTRLEN(len(var_names(1))) + err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), & + num_procs, var_names, & + len_trim(var_names), & + var_types, DB_F77NULL, ierr) + end if + + err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), & + 'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr) + else + if (proc_rank == 0) then + do i = 1, num_procs + write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, & + '/', t_step, '.silo:'//trim(varname) + var_types(i) = DB_POINTVAR + end do + err = DBSET2DSTRLEN(len(var_names(1))) + err = DBSETEMPTYOK(1) + err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), & + num_procs, var_names, & + len_trim(var_names), & + var_types, DB_F77NULL, ierr) + end if + + err = DBSETEMPTYOK(1) + err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), & + 'lag_bubbles', 11, dummy_data, 0, DB_DOUBLE, DB_F77NULL, ierr) + end if end subroutine s_write_lag_variable_to_formatted_database_file diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 4d0dad06ac..22d5739bc9 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -446,7 +446,7 @@ contains schlieren_wrt = .false. sim_data = .false. cf_wrt = .false. - lag_txt_wrt = .true. + lag_txt_wrt = .false. lag_header = .true. lag_db_wrt = .false. lag_id_wrt = .true. diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 3917d18b74..cfe43ed80f 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -112,7 +112,7 @@ contains & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', & & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt', & & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt', & - & 'lag_betaT_wrt', 'lag_betaC_wrt' ] + & 'lag_betaT_wrt', 'lag_betaC_wrt', 'bc_io' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/pre_process/include/3dHardcodedIC.fpp b/src/pre_process/include/3dHardcodedIC.fpp index e0018598a7..1dc1969392 100644 --- a/src/pre_process/include/3dHardcodedIC.fpp +++ b/src/pre_process/include/3dHardcodedIC.fpp @@ -5,7 +5,38 @@ real(wp) :: eps + real(wp) :: pres + real(wp), dimension(0:m_glb, 0:p_glb) :: ih + integer :: i, j, pos, start, end + character(len=10000) :: line + character(len=25) :: value + + if (interface_file /= '.') then + open(unit=10, file=trim(interface_file), status="old", action="read") + do i = 0, m_glb + read(10, '(A)') line ! Read a full line as a string + start = 1 + + do j = 0, p_glb + end = index(line(start:), ',') ! Find the next comma + if (end == 0) then + value = trim(adjustl(line(start:))) ! Last value in the line + else + value = trim(adjustl(line(start:start+end-2))) ! Extract substring + start = start + end ! Move to next value + end if + read(value, *) ih(i, j) ! Convert string to numeric value + if (.not. f_is_default(normMag)) ih(i,j )= ih(i,j) * normMag + if (.not. f_is_default(normFac)) ih(i,j) = ih(i,j) / normFac + end do + end do + close(10) + + print*, "Interface file "//trim(interface_file)//" read" + end if + eps = 1e-9_wp + #:enddef #:def Hardcoded3D() @@ -56,6 +87,23 @@ q_prim_vf(advxe)%sf(i, j, k) = patch_icpp(1)%alpha(2) end if + case (302) ! (3D Perlin Noise Interface) + + alph = 0.5_wp * (1 + (1._wp - 2._wp * eps) * & + tanh((ih(start_idx(1) + i,start_idx(3) + k) - y_cc(j))*100._wp)) + + q_prim_vf(advxb)%sf(i,j,k) = alph + q_prim_vf(advxe)%sf(i,j,k) = 1._wp - alph + + q_prim_vf(contxb)%sf(i,j,k) = q_prim_vf(advxb)%sf(i,j,k) * 1._wp + q_prim_vf(contxe)%sf(i,j,k) = q_prim_vf(advxe)%sf(i,j,k) * (1._wp / 950._wp) + + q_prim_vf(E_idx)%sf(i,j,k) = p0 + & + (q_prim_vf(contxb)%sf(i,j,k) + q_prim_vf(contxe)%sf(i,j,k)) * g0 * & + (ih(start_idx(1) + i, start_idx(3) + k) - y_cc(j)) + + if (surface_tension) q_prim_vf(c_idx)%sf(i,j,k) = alph + ! Put your variable assignments here case default call s_int_to_str(patch_id, iStr) diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index 5c34086d5a..a8a5192be2 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -28,6 +28,9 @@ module m_global_parameters logical :: old_ic, non_axis_sym !< Use existing IC data integer :: t_step_old, t_step_start !< Existing IC/grid folder + character(LEN=path_len) :: interface_file + real(wp) :: normFac, normMag, g0, p0 + logical :: cfl_adap_dt, cfl_const_dt, cfl_dt integer :: n_start, n_start_old @@ -314,6 +317,11 @@ contains old_ic = .false. t_step_old = dflt_int t_step_start = dflt_int + interface_file = '.' + normFac = dflt_real + normMag = dflt_real + g0 = dflt_real + p0 = dflt_real cfl_adap_dt = .false. cfl_const_dt = .false. diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index c7c7a23add..92f9e0f78a 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -41,6 +41,7 @@ contains ! Logistics call MPI_BCAST(case_dir, len(case_dir), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(interface_file, len(interface_file), MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr) #:for VAR in ['t_step_old', 't_step_start', 'm', 'n', 'p', 'm_glb', 'n_glb', 'p_glb', & & 'loops_x', 'loops_y', 'loops_z', 'model_eqns', 'num_fluids', & @@ -70,7 +71,7 @@ contains & 'perturb_flow_mag', 'pref', 'rhoref', 'poly_sigma', 'R0ref', & & 'Web', 'Ca', 'Re_inv', 'sigR', 'sigV', 'rhoRV', 'palpha_eps', & & 'ptgalpha_eps', 'sigma', 'pi_fac', 'mixlayer_vel_coef', & - & 'mixlayer_domain', 'Bx0' ] + & 'mixlayer_domain', 'Bx0', 'normMag', 'normFac', 'g0', 'p0' ] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp index 30c38cdfdf..8ee3867cbc 100644 --- a/src/pre_process/m_start_up.fpp +++ b/src/pre_process/m_start_up.fpp @@ -149,7 +149,8 @@ contains n_start_old, surface_tension, hyperelasticity, pre_stress, & elliptic_smoothing, elliptic_smoothing_iters, & viscous, bubbles_lagrange, bc_x, bc_y, bc_z, num_bc_patches, & - patch_bc, Bx0, relativity, cont_damage, fd_order, lag_params + patch_bc, Bx0, relativity, cont_damage, fd_order, lag_params, & + interface_file, normFac, normMag, g0, p0 ! Inquiring the status of the pre_process.inp file file_loc = 'pre_process.inp' diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 1ef74cbcee..fe84ec68d4 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -98,7 +98,7 @@ contains !! so the system can be advanced in time !! @param q_cons_vf Conservative variables !! @param q_prim_vf Primitive variables - subroutine s_compute_body_forces_rhs(q_cons_vf, q_prim_vf, rhs_vf) + subroutine s_compute_body_forces_rhs(q_prim_vf, q_cons_vf, rhs_vf) type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index ee45c77e6e..2ca4e2da7c 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -1097,8 +1097,9 @@ contains !> This subroutine updates the Lagrange variables using the tvd RK time steppers. !! The time derivative of the bubble variables must be stored at every stage to avoid precision errors. !! @param stage Current tvd RK stage - impure subroutine s_update_lagrange_tdv_rk(stage) + impure subroutine s_update_lagrange_tdv_rk(q_cons_vf, stage) + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer, intent(in) :: stage integer :: k @@ -1116,7 +1117,7 @@ contains gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1) call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1141,7 +1142,7 @@ contains gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2) elseif (stage == 2) then !$acc parallel loop gang vector default(present) private(k) @@ -1156,7 +1157,7 @@ contains gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1) call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1182,7 +1183,7 @@ contains gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2) elseif (stage == 2) then !$acc parallel loop gang vector default(present) private(k) @@ -1197,7 +1198,7 @@ contains gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=2) + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2) elseif (stage == 3) then !$acc parallel loop gang vector default(present) private(k) @@ -1212,7 +1213,8 @@ contains gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(dest=1) + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1) + call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1230,16 +1232,20 @@ contains !> This subroutine enforces reflective and wall boundary conditions for EL bubbles !! @param dest Destination for the bubble position update - impure subroutine s_enforce_EL_bubbles_boundary_conditions(dest) + impure subroutine s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest) + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer, intent(in) :: dest integer :: k, i, patch_id, offset integer, dimension(3) :: cell + real(wp), dimension(3) :: scoord !$acc parallel loop gang vector default(present) private(cell) do k = 1, nBubs keep_bubble(k) = 1 + ! Relocate bubbles at solid boundaries and delete bubbles that leave + ! buffer regions if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest) @@ -1278,20 +1284,31 @@ contains end if end if - if (ib) then - cell = fd_number - buff_size - call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) + if (keep_bubble(k) == 1) then + ! Remove bubbles that are no longer in a liquid + !cell = fd_number - buff_size + !call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) - if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then - patch_id = ib_markers%sf(cell(1), cell(2), cell(3)) + !if (q_cons_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then + !keep_bubble(k) = 0 + !end if - do i = 1, num_dims - mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - & - levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) & - * levelset%sf(cell(1), cell(2), cell(3), patch_id) - end do + ! Move bubbles back to surface of IB + if (ib) then cell = fd_number - buff_size call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) + + if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then + patch_id = ib_markers%sf(cell(1), cell(2), cell(3)) + + do i = 1, num_dims + mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - & + levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) & + * levelset%sf(cell(1), cell(2), cell(3), patch_id) + end do + cell = fd_number - buff_size + call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) + end if end if end if end do diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index b1c338b5c9..b10e66badf 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -375,7 +375,7 @@ contains if (t_step == t_step_stop) return end if - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1) !$acc parallel loop collapse(4) gang vector default(present) do i = 1, sys_size @@ -477,7 +477,7 @@ contains if (t_step == t_step_stop) return end if - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1) !$acc parallel loop collapse(4) gang vector default(present) do i = 1, sys_size @@ -549,7 +549,7 @@ contains call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=2) !$acc parallel loop collapse(4) gang vector default(present) do i = 1, sys_size @@ -659,7 +659,7 @@ contains if (t_step == t_step_stop) return end if - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=1) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1) !$acc parallel loop collapse(4) gang vector default(present) do i = 1, sys_size @@ -731,7 +731,7 @@ contains call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=2) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=2) !$acc parallel loop collapse(4) gang vector default(present) do i = 1, sys_size @@ -804,7 +804,7 @@ contains ! Stage 3 of 3 call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 3) - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=3) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=3) !$acc parallel loop collapse(4) gang vector default(present) do i = 1, sys_size diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index 512c6fc0b6..1e18dcf69b 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -102,6 +102,11 @@ def analytic(self): 'bubbles_lagrange': ParamType.LOG, 'lag_params%vel_model': ParamType.INT, 'fd_order': ParamType.INT, + 'normFac': ParamType.REAL, + 'interface_file': ParamType.STR, + 'normMag': ParamType.REAL, + 'g0': ParamType.REAL, + 'p0': ParamType.REAL, }) for ib_id in range(1, 10+1): From 50e977247071c7a2bc1caefbfa8dab063f4b8946 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Tue, 8 Jul 2025 17:50:36 -0400 Subject: [PATCH 42/62] gpu macros and minro fixes --- src/post_process/m_data_output.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 39 ++++++++++++------------- src/simulation/m_bubbles_EL_kernels.fpp | 4 +-- src/simulation/m_mpi_proxy.fpp | 3 +- src/simulation/m_surface_tension.fpp | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp index 639e376512..8a85f47cc3 100644 --- a/src/post_process/m_data_output.fpp +++ b/src/post_process/m_data_output.fpp @@ -1601,7 +1601,7 @@ contains character(len=*), intent(in) :: varname integer, intent(in) :: t_step - real(wp), dimension(1:nBubs), intent(in), optional :: data + real(wp), dimension(1:), intent(in), optional :: data integer, intent(in), optional :: nBubs character(len=64), dimension(num_procs) :: var_names diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 382d08b279..3720a6a195 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -81,7 +81,7 @@ module m_bubbles_EL integer, allocatable, dimension(:) :: keep_bubble, prefix_sum integer :: active_bubs - !$acc declare create(keep_bubble, prefix_sum, active_bubs) + $:GPU_DECLARE(create='[keep_bubble, prefix_sum, active_bubs]') contains @@ -903,7 +903,7 @@ contains !! @param f_pinfl Driving pressure !! @param cell Bubble cell !! @param Romega Control volume radius - pure subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega) + impure subroutine s_get_pinf(bub_id, q_prim_vf, ptype, f_pinfl, cell, preterm1, term2, Romega) $:GPU_ROUTINE(function_name='s_get_pinf',parallelism='[seq]', & & cray_inline=True) @@ -1242,9 +1242,8 @@ contains integer, intent(in) :: dest integer :: k, i, patch_id, offset integer, dimension(3) :: cell - real(wp), dimension(3) :: scoord - !$acc parallel loop gang vector default(present) private(cell) + $:GPU_PARALLEL_LOOP(private='[cell]') do k = 1, nBubs keep_bubble(k) = 1 @@ -1290,12 +1289,12 @@ contains if (keep_bubble(k) == 1) then ! Remove bubbles that are no longer in a liquid - !cell = fd_number - buff_size - !call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) + cell = fd_number - buff_size + call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) - !if (q_cons_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then - !keep_bubble(k) = 0 - !end if + if (q_cons_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then + keep_bubble(k) = 0 + end if ! Move bubbles back to surface of IB if (ib) then @@ -1305,11 +1304,13 @@ contains if (ib_markers%sf(cell(1), cell(2), cell(3)) /= 0) then patch_id = ib_markers%sf(cell(1), cell(2), cell(3)) + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - & levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) & * levelset%sf(cell(1), cell(2), cell(3), patch_id) end do + cell = fd_number - buff_size call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) end if @@ -1318,12 +1319,11 @@ contains end do call nvtxStartRange("LAG-BC-DEV2HOST") - !$acc update host(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - !$acc gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & - !$acc intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & - !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & - !$acc gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, & - !$acc keep_bubble, nBubs) + $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, & + & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & + & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, keep_bubble, & + & nBubs]') call nvtxEndRange do k = 1, nBubs @@ -1380,11 +1380,10 @@ contains end if call nvtxStartRange("LAG-BC-HOST2DEV") - !$acc update device(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - !$acc gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & - !$acc intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & - !$acc mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & - !$acc gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) + $:GPU_UPDATE(device='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & + & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, & + & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & + & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts]') call nvtxEndRange end subroutine s_enforce_EL_bubbles_boundary_conditions diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 9083012bd3..cbc0ca5d4b 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -433,7 +433,7 @@ contains !! @param q_prim_vf Eulerian field with primitive variables !! @return v Interpolated velocity at the position of the bubble pure function f_interpolate_velocity(pos, cell, i, q_prim_vf) result(v) -!$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: pos integer, dimension(3), intent(in) :: cell integer, intent(in) :: i @@ -489,7 +489,7 @@ contains !! @param q_prim_vf Eulerian field with primitive variables !! @return a Acceleration of the bubble in direction i pure function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,q_prim_vf) result(a) -!$acc routine seq + $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: pos, rad, vel, mg, mv, Re, rho integer, dimension(3), intent(in) :: cell integer, intent(in) :: i diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 8aad16be9e..0c9488ed57 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -57,7 +57,8 @@ module m_mpi_proxy character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff type(bounds_info), dimension(3) :: comm_coords integer :: p_buff_size, p_var_size - !$acc declare create(p_send_counts, comm_coords) + + $:GPU_DECLARE(create='[p_send_counts, comm_coords]') contains diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 30b6fcec2b..67c31cf74e 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -280,7 +280,7 @@ contains do k = 0, n do j = 0, m c_divs(num_dims + 1)%sf(j, k, l) = 0._wp - !s$acc loop seq + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims c_divs(num_dims + 1)%sf(j, k, l) = & c_divs(num_dims + 1)%sf(j, k, l) + & From 35f6b01120851c499c1abc8187bd60fa98d0a689 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Fri, 11 Jul 2025 13:54:30 -0400 Subject: [PATCH 43/62] adap dt moving bubbles and 4th order differences --- src/pre_process/m_mpi_proxy.fpp | 3 +- src/simulation/m_bubbles.fpp | 28 ++- src/simulation/m_bubbles_EL.fpp | 237 ++++++++++++++++++------ src/simulation/m_bubbles_EL_kernels.fpp | 186 ++++++++++++++----- src/simulation/m_mpi_proxy.fpp | 3 +- src/simulation/m_time_steppers.fpp | 12 +- 6 files changed, 354 insertions(+), 115 deletions(-) diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index 3e175c768c..5901f253eb 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -48,7 +48,8 @@ contains & 'weno_order', 'precision', 'perturb_flow_fluid', & & 'perturb_sph_fluid', 'num_patches', 'thermal', 'nb', 'dist_type',& & 'R0_type', 'relax_model', 'num_ibs', 'n_start', 'elliptic_smoothing_iters', & - & 'num_bc_patches', 'lag_params%vel_model', 'mixlayer_perturb_nk' ] + & 'num_bc_patches', 'lag_params%vel_model', 'mixlayer_perturb_nk', & + & 'fd_order' ] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index db04cef7d7..190bab2033 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -17,6 +17,8 @@ module m_bubbles use m_helper_basic !< Functions to compare floating point numbers + use m_bubbles_EL_kernels + implicit none real(wp) :: chi_vw !< Bubble wall properties (Ando 2010) @@ -467,7 +469,8 @@ contains pure subroutine s_advance_step(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & fntait, fBtait, f_bub_adv_src, f_divu, & bub_id, fmass_v, fmass_n, fbeta_c, & - fbeta_t, fCson, adap_dt_stop) + fbeta_t, fCson, adap_dt_stop, fRe, fPos, & + fVel, cell, q_prim_vf) $:GPU_ROUTINE(function_name='s_advance_step',parallelism='[seq]', & & cray_inline=True) @@ -477,14 +480,18 @@ contains integer, intent(in) :: bub_id real(wp), intent(in) :: fmass_n, fbeta_c, fbeta_t, fCson integer, intent(inout) :: adap_dt_stop + real(wp), intent(inout), dimension(3), optional :: fPos, fVel + real(wp), intent(in), optional :: fRe + integer, intent(in), dimension(3), optional :: cell + type(scalar_field), intent(in), dimension(sys_size), optional :: q_prim_vf real(wp), dimension(5) :: err !< Error estimates for adaptive time stepping real(wp) :: t_new !< Updated time step size real(wp) :: h !< Time step size real(wp), dimension(4) :: myR_tmp1, myV_tmp1, myR_tmp2, myV_tmp2 !< Bubble radius, radial velocity, and radial acceleration for the inner loop real(wp), dimension(4) :: myPb_tmp1, myMv_tmp1, myPb_tmp2, myMv_tmp2 !< Gas pressure and vapor mass for the inner loop (EL) - real(wp) :: fR2, fV2, fpb2, fmass_v2 - integer :: iter_count + real(wp) :: fR2, fV2, fpb2, fmass_v2, vTemp, aTemp + integer :: l, iter_count call s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & fntait, fBtait, f_bub_adv_src, f_divu, fCson, h) @@ -555,6 +562,21 @@ contains ! Update pb and mass_v fpb = myPb_tmp1(4) fmass_v = myMv_tmp1(4) + + if (lag_params%vel_model == 1) then + do l = 1, num_dims + vTemp = f_interpolate_velocity(fR, cell, l, q_prim_vf) + fPos(l) = fPos(l) + h * vTemp + fVel(l) = vTemp + end do + elseif (lag_params%vel_model == 2) then + do l = 1, num_dims + aTemp = f_get_acceleration(fPos(l), fR, fVel(l), fmass_n, fmass_v, & + fRe, fRho, cell, l, q_prim_vf) + fPos(l) = fPos(l) + h * fVel(l) + fVel(l) = fVel(l) + h * aTemp + end do + end if end if ! Update step size for the next sub-step diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 3720a6a195..d92a70b239 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -605,15 +605,15 @@ contains real(wp) :: myR_m, mygamma_m, myPb, myMass_n, myMass_v real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot real(wp) :: myPinf, aux1, aux2, myCson, myRho + real(wp), dimension(3) :: myPos, myVel real(wp) :: gamma, pi_inf, qv real(wp), dimension(contxe) :: myalpha_rho, myalpha real(wp), dimension(2) :: Re integer, dimension(3) :: cell - integer :: adap_dt_stop_max, adap_dt_stop !< Fail-safe exit if max iteration count reached real(wp) :: dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu !< Dummy variables for unified subgrid bubble subroutines - integer :: i, k, l + integer :: i, j, k, l call nvtxStartRange("LAGRANGE-BUBBLE-DYNAMICS") @@ -641,7 +641,7 @@ contains ! Radial motion model adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', & + $:GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell,myPos,myVel]', & & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & & copy='[adap_dt_stop_max]',copyin='[stage]') do k = 1, nBubs @@ -655,6 +655,8 @@ contains myBeta_c = gas_betaC(k) myBeta_t = gas_betaT(k) myR0 = bub_R0(k) + myPos = mtn_pos(k,:,2) + myVel = mtn_vel(k,:,2) ! Vapor and heat fluxes call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m) @@ -681,7 +683,8 @@ contains call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & dmntait, dmBtait, dm_bub_adv_src, dm_divu, & k, myMass_v, myMass_n, myBeta_c, & - myBeta_t, myCson, adap_dt_stop) + myBeta_t, myCson, adap_dt_stop, Re(1), & + myPos, myVel, cell, q_prim_vf) ! Update bubble state intfc_rad(k, 1) = myR @@ -689,6 +692,16 @@ contains gas_p(k, 1) = myPb gas_mv(k, 1) = myMass_v + if (lag_params%vel_model == 1) then + mtn_posPrev(k,:,1) = mtn_pos(k,:,2) + mtn_pos(k,:,1) = myPos + mtn_vel(k,:,1) = myVel + elseif (lag_params%vel_model == 2) then + mtn_posPrev(k,:,1) = mtn_pos(k,:,2) + mtn_pos(k,:,1) = myPos + mtn_vel(k,:,1) = myVel + end if + else ! Radial acceleration from bubble models intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, & @@ -701,21 +714,20 @@ contains do l = 1, num_dims if (lag_params%vel_model == 1) then - mtn_dposdt(k, l, stage) = f_interpolate_velocity(mtn_pos(k,l,2), & + mtn_dposdt(k, l, stage) = f_interpolate_velocity(myPos(l), & cell, l, q_prim_vf) mtn_dveldt(k, l, stage) = 0._wp elseif (lag_params%vel_model == 2) then - mtn_dposdt(k, l, stage) = mtn_vel(k,l,2) - mtn_dveldt(k, l, stage) = f_get_acceleration(mtn_pos(k,l,2), & - intfc_rad(k,2), mtn_vel(k,l,2), & - gas_mg(k), gas_mv(k, 2), & + mtn_dposdt(k, l, stage) = myVel(l) + mtn_dveldt(k, l, stage) = f_get_acceleration(myPos(l), & + myR, myVel(l), & + myMass_n, myMass_v, & Re(1), myRho, cell, l, q_prim_vf) else mtn_dposdt(k, l, stage) = 0._wp mtn_dveldt(k, l, stage) = 0._wp end if end do - end if adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) @@ -724,6 +736,10 @@ contains if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") + if (adap_dt .and. lag_params%vel_model > 0) then + call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=1) + end if + call nvtxEndRange end subroutine s_compute_bubble_EL_dynamics @@ -913,7 +929,8 @@ contains integer, dimension(3), intent(out) :: cell real(wp), intent(out), optional :: preterm1, term2, Romega - real(wp), dimension(3) :: scoord, psi + real(wp), dimension(3) :: scoord, psi_pos, psi_x, psi_y, psi_z + real(wp), xi, eta, zeta real(wp) :: dc, vol, aux real(wp) :: volgas, term1, Rbeq, denom real(wp) :: charvol, charpres, charvol2, charpres2 @@ -940,46 +957,149 @@ contains if ((lag_params%cluster_type == 1)) then !< Getting p_cell in terms of only the current cell by interpolation - !< Getting the cell volulme as Omega - if (p > 0) then - vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) - else - if (cyl_coord) then - vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + if (fd_order == 2) then ! Bilinear interpolation + + if (p > 0) then + vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) else - vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + if (cyl_coord) then + vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + end if end if - end if - !< Obtain bilinear interpolation coefficients, based on the current location of the bubble. - psi(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) - psi(1) = abs((psi(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))) + !< Obtain bilinear interpolation coefficients, based on the current location of the bubble. + psi_pos(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) + psi_pos(1) = abs((psi_pos(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1)))) - psi(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) - psi(2) = abs((psi(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))) + psi_pos(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) + psi_pos(2) = abs((psi_pos(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2)))) - if (p > 0) then - psi(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) - psi(3) = abs((psi(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))) - else - psi(3) = 0._wp - end if + if (p > 0) then + psi_pos(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) + psi_pos(3) = abs((psi_pos(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3)))) + else + psi_pos(3) = 0._wp + end if + + ! Calculate bilinear basis functions for each direction + ! For normalized coordinate xi in [0, 1], the two basis functions are: + ! phi_0(xi) = 1 - xi, phi_1(xi) = xi + + ! X-direction basis functions + psi_x(1) = 1._wp - psi_pos(1) ! Left basis function + psi_x(2) = psi_pos(1) ! Right basis function + + ! Y-direction basis functions + psi_y(1) = 1._wp - psi_pos(2) ! Left basis function + psi_y(2) = psi_pos(2) ! Right basis function + + if (p > 0) then + ! Z-direction basis functions + psi_z(1) = 1._wp - psi_pos(3) ! Left basis function + psi_z(2) = psi_pos(3) ! Right basis function + else + psi_z(1) = 1._wp + psi_z(2) = 0._wp + end if + + !< Perform bilinear interpolation + f_pinfl = 0._wp + + if (p == 0) then !2D - 4 point interpolation (2x2) + do j = 1, 2 + do i = 1, 2 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3)) * & + psi_x(i) * psi_y(j) + end do + end do + else !3D - 8 point interpolation (2x2x2) + do k = 1, 2 + do j = 1, 2 + do i = 1, 2 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3) + k - 1) * & + psi_x(i) * psi_y(j) * psi_z(k) + end do + end do + end do + end if + + elseif (fd_order == 4) then ! Biquadratic interpolation - !< Perform bilinear interpolation - if (p == 0) then !2D - f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2) - else !3D - f_pinfl = q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3))*(1._wp - psi(1))*(1._wp - psi(2))*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3))*psi(1)*(1._wp - psi(2))*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3))*psi(1)*psi(2)*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3))*(1._wp - psi(1))*psi(2)*(1._wp - psi(3)) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1)*(1._wp - psi(1))*(1._wp - psi(2))*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3) + 1)*psi(1)*(1._wp - psi(2))*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2) + 1, cell(3) + 1)*psi(1)*psi(2)*psi(3) - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3) + 1)*(1._wp - psi(1))*psi(2)*psi(3) + if (p > 0) then + vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) + else + if (cyl_coord) then + vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + end if + end if + + !< Obtain biquadratic interpolation coefficients, based on the current location of the bubble. + ! For biquadratic interpolation, we need coefficients for 3 points in each direction + psi_pos(1) = (scoord(1) - real(cell(1)))*dx(cell(1)) + x_cb(cell(1) - 1) + psi_pos(1) = (psi_pos(1) - x_cc(cell(1)))/(x_cc(cell(1) + 1) - x_cc(cell(1))) + + psi_pos(2) = (scoord(2) - real(cell(2)))*dy(cell(2)) + y_cb(cell(2) - 1) + psi_pos(2) = (psi_pos(2) - y_cc(cell(2)))/(y_cc(cell(2) + 1) - y_cc(cell(2))) + + if (p > 0) then + psi_pos(3) = (scoord(3) - real(cell(3)))*dz(cell(3)) + z_cb(cell(3) - 1) + psi_pos(3) = (psi_pos(3) - z_cc(cell(3)))/(z_cc(cell(3) + 1) - z_cc(cell(3))) + else + psi_pos(3) = 0._wp + end if + + ! Calculate biquadratic basis functions for each direction + ! For normalized coordinate xi in [-1, 1], the three basis functions are: + ! phi_0(xi) = xi*(xi-1)/2, phi_1(xi) = (1-xi)*(1+xi), phi_2(xi) = xi*(xi+1)/2 + + ! X-direction basis functions + xi = 2._wp*psi_pos(1) - 1._wp ! Convert to [-1, 1] range + psi_x(1) = xi*(xi - 1._wp)/2._wp ! Left basis function + psi_x(2) = (1._wp - xi)*(1._wp + xi) ! Center basis function + psi_x(3) = xi*(xi + 1._wp)/2._wp ! Right basis function + + ! Y-direction basis functions + eta = 2._wp*psi_pos(2) - 1._wp ! Convert to [-1, 1] range + psi_y(1) = eta*(eta - 1._wp)/2._wp ! Left basis function + psi_y(2) = (1._wp - eta)*(1._wp + eta) ! Center basis function + psi_y(3) = eta*(eta + 1._wp)/2._wp ! Right basis function + + if (p > 0) then + ! Z-direction basis functions + zeta = 2._wp*psi_pos(3) - 1._wp ! Convert to [-1, 1] range + psi_z(1) = zeta*(zeta - 1._wp)/2._wp ! Left basis function + psi_z(2) = (1._wp - zeta)*(1._wp + zeta) ! Center basis function + psi_z(3) = zeta*(zeta + 1._wp)/2._wp ! Right basis function + else + psi_z(1) = 0._wp + psi_z(2) = 1._wp + psi_z(3) = 0._wp + end if + + !< Perform biquadratic interpolation + f_pinfl = 0._wp + + if (p == 0) then !2D - 9 point interpolation (3x3) + do j = 1, 3 + do i = 1, 3 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3)) * & + psi_x(i) * psi_y(j) + end do + end do + else !3D - 27 point interpolation (3x3x3) + do k = 1, 3 + do j = 1, 3 + do i = 1, 3 + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3) + k - 2) * & + psi_x(i) * psi_y(j) * psi_z(k) + end do + end do + end do + end if end if !R_Omega @@ -1101,9 +1221,9 @@ contains !> This subroutine updates the Lagrange variables using the tvd RK time steppers. !! The time derivative of the bubble variables must be stored at every stage to avoid precision errors. !! @param stage Current tvd RK stage - impure subroutine s_update_lagrange_tdv_rk(q_cons_vf, stage) + impure subroutine s_update_lagrange_tdv_rk(q_prim_vf, stage) - type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf integer, intent(in) :: stage integer :: k @@ -1121,7 +1241,7 @@ contains gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1) + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=1) call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1146,7 +1266,7 @@ contains gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2) + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=2) elseif (stage == 2) then $:GPU_PARALLEL_LOOP(private='[k]') @@ -1161,7 +1281,7 @@ contains gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1) + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=1) call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1187,7 +1307,7 @@ contains gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2) + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=2) elseif (stage == 2) then $:GPU_PARALLEL_LOOP(private='[k]') @@ -1202,7 +1322,7 @@ contains gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=2) + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=2) elseif (stage == 3) then $:GPU_PARALLEL_LOOP(private='[k]') @@ -1217,8 +1337,7 @@ contains gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) end do - if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest=1) - + if (lag_params%vel_model > 0) call s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest=1) call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1236,9 +1355,9 @@ contains !> This subroutine enforces reflective and wall boundary conditions for EL bubbles !! @param dest Destination for the bubble position update - impure subroutine s_enforce_EL_bubbles_boundary_conditions(q_cons_vf, dest) + impure subroutine s_enforce_EL_bubbles_boundary_conditions(q_prim_vf, dest) - type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf integer, intent(in) :: dest integer :: k, i, patch_id, offset integer, dimension(3) :: cell @@ -1292,7 +1411,7 @@ contains cell = fd_number - buff_size call s_locate_cell(mtn_pos(k, 1:3, dest), cell, mtn_s(k, 1:3, dest)) - if (q_cons_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then + if (q_prim_vf(advxb)%sf(cell(1), cell(2), cell(3)) < (1._wp - lag_params%valmaxvoid)) then keep_bubble(k) = 0 end if @@ -1322,7 +1441,7 @@ contains $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, & & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & - & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, keep_bubble, & + & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, keep_bubble, & & nBubs]') call nvtxEndRange @@ -1383,7 +1502,7 @@ contains $:GPU_UPDATE(device='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, & & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & - & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts]') + & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, nBubs]') call nvtxEndRange end subroutine s_enforce_EL_bubbles_boundary_conditions diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index cbc0ca5d4b..3424e4081f 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -440,38 +440,86 @@ contains type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf real(wp) :: v - real(wp) :: L1, L2, L3 - real(wp) :: x1, x2, x3 - real(wp) :: y1, y2, y3 - - if (i == 1) then - x1 = x_cc(cell(1) - 1) - y1 = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) - x2 = x_cc(cell(1)) - y2 = q_prim_vf(momxb)%sf(cell(1) , cell(2), cell(3)) - x3 = x_cc(cell(1) + 1) - y3 = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) - elseif (i == 2) then - x1 = y_cc(cell(2) - 1) - y1 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) - x2 = y_cc(cell(2)) - y2 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) , cell(3)) - x3 = y_cc(cell(2) + 1) - y3 = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) - elseif (i == 3) then - x1 = z_cc(cell(3) - 1) - y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) - x2 = z_cc(cell(3)) - y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) ) - x3 = z_cc(cell(3) + 1) - y1 = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) - endif - - L1 = ((pos - x2)*(pos - x3)) / ((x1 - x2)*(x1 - x3)) - L2 = ((pos - x1)*(pos - x3)) / ((x2 - x1)*(x2 - x3)) - L3 = ((pos - x1)*(pos - x2)) / ((x3 - x1)*(x3 - x2)) - - v = L1*y1 + L2*y2 + L3*y3 + real(wp), dimension(fd_order + 1) :: xi, eta, L + + if (fd_order == 2) then + if (i == 1) then + xi(1) = x_cc(cell(1) - 1) + eta(1) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) + xi(2) = x_cc(cell(1)) + eta(2) = q_prim_vf(momxb)%sf(cell(1) , cell(2), cell(3)) + xi(3) = x_cc(cell(1) + 1) + eta(3) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) + elseif (i == 2) then + xi(1) = y_cc(cell(2) - 1) + eta(1) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) + xi(2) = y_cc(cell(2)) + eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) , cell(3)) + xi(3) = y_cc(cell(2) + 1) + eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) + elseif (i == 3) then + xi(1) = z_cc(cell(3) - 1) + eta(1) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) + xi(2) = z_cc(cell(3)) + eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) ) + xi(3) = z_cc(cell(3) + 1) + eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) + endif + + L(1) = ((pos - xi(2))*(pos - xi(3))) / ((xi(1) - xi(2))*(xi(1) - xi(3))) + L(2) = ((pos - xi(1))*(pos - xi(3))) / ((xi(2) - xi(1))*(xi(2) - xi(3))) + L(3) = ((pos - xi(1))*(pos - xi(2))) / ((xi(3) - xi(1))*(xi(3) - xi(2))) + + v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) + elseif (fd_order == 4) then + if (i == 1) then + xi(1) = x_cc(cell(1) - 2) + eta(1) = q_prim_vf(momxb)%sf(cell(1) - 2, cell(2), cell(3)) + xi(2) = x_cc(cell(1) - 1) + eta(2) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) + xi(3) = x_cc(cell(1)) + eta(3) = q_prim_vf(momxb)%sf(cell(1) , cell(2), cell(3)) + xi(4) = x_cc(cell(1) + 1) + eta(4) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) + xi(5) = x_cc(cell(1) + 2) + eta(5) = q_prim_vf(momxb)%sf(cell(1) + 2, cell(2), cell(3)) + elseif (i == 2) then + xi(1) = y_cc(cell(2) - 2) + eta(1) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 2, cell(3)) + xi(2) = y_cc(cell(2) - 1) + eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) + xi(3) = y_cc(cell(2)) + eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) , cell(3)) + xi(4) = y_cc(cell(2) + 1) + eta(4) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) + xi(5) = y_cc(cell(2) + 2) + eta(5) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 2, cell(3)) + elseif (i == 3) then + xi(1) = z_cc(cell(3) - 2) + eta(1) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 2) + xi(2) = z_cc(cell(3) - 1) + eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) + xi(3) = z_cc(cell(3)) + eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) ) + xi(4) = z_cc(cell(3) + 1) + eta(4) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) + xi(5) = z_cc(cell(3) + 2) + eta(5) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 2) + endif + + L(1) = ((pos - xi(2))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5))) / & + ((xi(1) - xi(2))*(xi(1) - xi(3))*(xi(1) - xi(3))*(xi(2) - xi(5))) + L(2) = ((pos - xi(1))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5))) / & + ((xi(2) - xi(1))*(xi(2) - xi(3))*(xi(2) - xi(3))*(xi(2) - xi(5))) + L(3) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(4))*(pos - xi(5))) / & + ((xi(3) - xi(1))*(xi(3) - xi(2))*(xi(3) - xi(4))*(xi(3) - xi(5))) + L(4) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4))) / & + ((xi(4) - xi(1))*(xi(4) - xi(2))*(xi(4) - xi(3))*(xi(4) - xi(5))) + L(5) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4))) / & + ((xi(5) - xi(1))*(xi(5) - xi(2))*(xi(5) - xi(3))*(xi(5) - xi(4))) + + v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) + L(4)*eta(4) + L(5)*eta(5) + end if end function f_interpolate_velocity @@ -497,19 +545,69 @@ contains real(wp) :: a real(wp) :: dp, vol, force, v_rel + real(wp), dimension(fd_order - 1) :: xi, eta, L + + if (fd_order == 2) then + if (i == 1) then + dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / & + (x_cc(cell(1) + 1) - x_cc(cell(1) - 1)) + elseif (i == 2) then + dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / & + (y_cc(cell(2) + 1) - y_cc(cell(2) - 1)) + elseif (i == 3) then + dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - & + q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / & + (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) + end if + elseif (fd_order == 4) then + if (i == 1) then + xi(1) = x_cc(cell(1) - 1) + eta(1) = (q_prim_vf(E_idx)%sf(cell(1) ,cell(2),cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1) - 2,cell(2),cell(3))) / & + (x_cc(cell(1)) - x_cc(cell(1) - 2)) + xi(2) = x_cc(cell(1)) + eta(2) = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / & + (x_cc(cell(1) + 1) - x_cc(cell(1) - 1)) + xi(3) = x_cc(cell(1) + 1) + eta(3) = (q_prim_vf(E_idx)%sf(cell(1) + 2,cell(2),cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1) ,cell(2),cell(3))) / & + (x_cc(cell(1) + 2) - x_cc(cell(1))) + elseif (i == 2) then + xi(1) = y_cc(cell(2) - 1) + eta(1) = (q_prim_vf(E_idx)%sf(cell(1),cell(2) ,cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1),cell(2) - 2,cell(3))) / & + (y_cc(cell(2)) - y_cc(cell(2) - 2)) + xi(2) = y_cc(cell(2)) + eta(2) = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / & + (y_cc(cell(2) + 1) - y_cc(cell(2) - 1)) + xi(3) = y_cc(cell(2) + 1) + eta(3) = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 2,cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1),cell(2) ,cell(3))) / & + (y_cc(cell(2) + 2) - y_cc(cell(2))) + elseif (i == 3) then + xi(1) = z_cc(cell(3) - 1) + eta(1) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) ) - & + q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 2)) / & + (z_cc(cell(3)) - z_cc(cell(3) - 2)) + xi(2) = y_cc(cell(3)) + eta(2) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - & + q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / & + (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) + xi(3) = y_cc(cell(3) + 1) + eta(3) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 2) - & + q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) )) / & + (z_cc(cell(3) + 2) - z_cc(cell(3))) + end if + + L(1) = ((pos - xi(2))*(pos - xi(3))) / ((xi(1) - xi(2))*(xi(1) - xi(3))) + L(2) = ((pos - xi(1))*(pos - xi(3))) / ((xi(2) - xi(1))*(xi(2) - xi(3))) + L(3) = ((pos - xi(1))*(pos - xi(2))) / ((xi(3) - xi(1))*(xi(3) - xi(2))) - if (i == 1) then - dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - & - q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / & - (x_cc(cell(1) + 1) - x_cc(cell(1) - 1)) - elseif (i == 2) then - dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - & - q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / & - (y_cc(cell(2) + 1) - y_cc(cell(2) - 1)) - elseif (i == 3) then - dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - & - q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / & - (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) + dp = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) end if vol = (4._wp/3._wp) * pi * rad**3._wp diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 0c9488ed57..56c19034ff 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -145,7 +145,7 @@ contains & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end', 'fd_order', & & 'num_probes', 'num_integrals', 'bubble_model', 'thermal', & & 'R0_type', 'num_source', 'relax_model', 'num_ibs', 'n_start', & - & 'num_bc_patches', 'fd_order', 'lag_params%vel_model'] + & 'num_bc_patches', 'lag_params%vel_model'] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -755,7 +755,6 @@ contains end subroutine s_mpi_sendrecv_particles - !! This function returns a unique tag for each neighbor based on its position !! relative to the current process. !! @param i, j, k Indices of the neighbor in the range [-1, 1] diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index e6ade24be1..66dd1cf978 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -375,7 +375,7 @@ contains if (t_step == t_step_stop) return end if - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=1) $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size @@ -477,7 +477,7 @@ contains if (t_step == t_step_stop) return end if - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=1) $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size @@ -549,7 +549,7 @@ contains call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=2) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=2) $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size @@ -659,7 +659,7 @@ contains if (t_step == t_step_stop) return end if - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(1)%vf, stage=1) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=1) $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size @@ -731,7 +731,7 @@ contains call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 2) - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=2) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=2) $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size @@ -804,7 +804,7 @@ contains ! Stage 3 of 3 call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg, 3) - if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_cons_ts(2)%vf, stage=3) + if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(q_prim_vf, stage=3) $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size From b4e027233b02e02ec00f1725ba82c43d3bd7907b Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Fri, 11 Jul 2025 13:59:32 -0400 Subject: [PATCH 44/62] bug fix --- src/simulation/m_bubbles_EL.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index d92a70b239..ac2e166a93 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -930,7 +930,7 @@ contains real(wp), intent(out), optional :: preterm1, term2, Romega real(wp), dimension(3) :: scoord, psi_pos, psi_x, psi_y, psi_z - real(wp), xi, eta, zeta + real(wp) :: xi, eta, zeta real(wp) :: dc, vol, aux real(wp) :: volgas, term1, Rbeq, denom real(wp) :: charvol, charpres, charvol2, charpres2 From 679926aef6882d39299cc8e15cc7f754f3c638af Mon Sep 17 00:00:00 2001 From: Benjamin Wilfong Date: Sun, 27 Jul 2025 12:36:38 -0500 Subject: [PATCH 45/62] finish merge and test --- src/common/m_mpi_common.fpp | 4 ++-- src/post_process/m_data_output.fpp | 2 ++ src/post_process/m_mpi_proxy.fpp | 5 ----- src/pre_process/m_start_up.fpp | 6 +----- src/simulation/m_mpi_proxy.fpp | 10 ++++------ 5 files changed, 9 insertions(+), 18 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index a7cfb97995..6650b01047 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -413,6 +413,7 @@ contains integer, intent(inout) :: var_loc #ifdef MFC_MPI + integer :: ierr !< Generic flag used to identify and report MPI errors ! Temporary storage variable that holds the reduced sum value integer :: var_glb @@ -423,7 +424,6 @@ contains MPI_SUM, 0, MPI_COMM_WORLD, ierr) var_loc = var_glb - #endif end subroutine s_mpi_reduce_int_sum @@ -1102,7 +1102,7 @@ contains integer :: recon_order !< reconstruction order - integer :: i, j !< Generic loop iterators + integer :: i, j, k !< Generic loop iterators integer :: ierr !< Generic flag used to identify and report MPI errors ! temp array to store neighbor rank coordinates diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp index 3114da7771..51a1827e1c 100644 --- a/src/post_process/m_data_output.fpp +++ b/src/post_process/m_data_output.fpp @@ -1614,6 +1614,8 @@ contains character(len=64), dimension(num_procs) :: var_names integer, dimension(num_procs) :: var_types real(wp) :: dummy_data + + integer :: ierr !< Generic flag used to identify and report database errors integer :: i dummy_data = 0._wp diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 5c3bffad2c..fb535cd955 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -104,17 +104,12 @@ contains & 'file_per_process', 'relax', 'cf_wrt', 'igr', & & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', & & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', & -<<<<<<< HEAD & 'output_partial_domain', 'relativity', 'cont_damage', & & 'lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt', & & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', & & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt', & & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt', & & 'lag_betaT_wrt', 'lag_betaC_wrt', 'bc_io' ] -======= - & 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io' ] - ->>>>>>> upstream/master call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp index 1f70e04e45..8f41a0d0d5 100644 --- a/src/pre_process/m_start_up.fpp +++ b/src/pre_process/m_start_up.fpp @@ -149,12 +149,8 @@ contains n_start_old, surface_tension, hyperelasticity, pre_stress, & elliptic_smoothing, elliptic_smoothing_iters, & viscous, bubbles_lagrange, bc_x, bc_y, bc_z, num_bc_patches, & -<<<<<<< HEAD patch_bc, Bx0, relativity, cont_damage, fd_order, lag_params, & - interface_file, normFac, normMag, g0, p0 -======= - patch_bc, Bx0, relativity, cont_damage, igr, igr_order ->>>>>>> upstream/master + interface_file, normFac, normMag, g0, p0, igr, igr_order ! Inquiring the status of the pre_process.inp file file_loc = 'pre_process.inp' diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 7a2b8d5fa1..9e4412ba81 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -90,6 +90,7 @@ contains integer :: i, j, k integer :: real_size, int_size, nReal, lag_num_ts + integer :: ierr !< Generic flag used to identify and report MPI errors call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr) call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr) @@ -140,13 +141,9 @@ contains & 'wave_speeds', 'avg_state', 'precision', 'bc_x%beg', 'bc_x%end', & & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end', 'fd_order', & & 'num_probes', 'num_integrals', 'bubble_model', 'thermal', & -<<<<<<< HEAD - & 'R0_type', 'num_source', 'relax_model', 'num_ibs', 'n_start', & - & 'num_bc_patches', 'lag_params%vel_model'] -======= & 'num_source','relax_model','num_ibs','n_start','num_igr_iters', & - & 'num_bc_patches','num_igr_warm_start_iters','adap_dt_max_iters' ] ->>>>>>> upstream/master + & 'num_bc_patches','num_igr_warm_start_iters','adap_dt_max_iters', & + & 'lag_params%vel_model'] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -667,6 +664,7 @@ contains integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size integer :: i, j, k, l, q + integer :: ierr !< Generic flag used to identify and report MPI errors do k = nidx(3)%beg, nidx(3)%end do j = nidx(2)%beg, nidx(2)%end From ca56a8fec386305065e76dd8b6a66f5a0d8feab3 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Mon, 1 Sep 2025 12:40:01 -0400 Subject: [PATCH 46/62] clean merge --- src/common/m_helper_basic.fpp | 2 +- src/common/m_mpi_common.fpp | 6 +- src/post_process/m_data_output.fpp | 177 +++++++++++------------ src/post_process/m_global_parameters.fpp | 2 +- src/pre_process/m_global_parameters.fpp | 2 +- src/simulation/m_bubbles.fpp | 6 +- src/simulation/m_bubbles_EL.fpp | 135 +++++++++-------- src/simulation/m_bubbles_EL_kernels.fpp | 124 ++++++++-------- src/simulation/m_global_parameters.fpp | 12 +- src/simulation/m_mpi_proxy.fpp | 52 +++---- toolchain/mfc/run/case_dicts.py | 2 +- 11 files changed, 259 insertions(+), 261 deletions(-) diff --git a/src/common/m_helper_basic.fpp b/src/common/m_helper_basic.fpp index db5ebc256c..8bd6674eec 100644 --- a/src/common/m_helper_basic.fpp +++ b/src/common/m_helper_basic.fpp @@ -10,7 +10,7 @@ module m_helper_basic implicit none - private; + private; public :: f_approx_equal, & f_approx_in_array, & f_is_default, & diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index ee71af35ac..5eae8fcabb 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -1641,8 +1641,8 @@ contains end if @:ALLOCATE(neighbor_ranks(nidx(1)%beg:nidx(1)%end, & - nidx(2)%beg:nidx(2)%end, & - nidx(3)%beg:nidx(3)%end)) + nidx(2)%beg:nidx(2)%end, & + nidx(3)%beg:nidx(3)%end)) do k = nidx(3)%beg, nidx(3)%end do j = nidx(2)%beg, nidx(2)%end @@ -1652,7 +1652,7 @@ contains if (num_dims > 1) neighbor_coords(2) = proc_coords(2) + j if (num_dims > 2) neighbor_coords(3) = proc_coords(3) + k call MPI_CART_RANK(MPI_COMM_CART, neighbor_coords, & - neighbor_ranks(i, j, k), ierr) + neighbor_ranks(i, j, k), ierr) end if end do end do diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp index 51a1827e1c..2b491f7df2 100644 --- a/src/post_process/m_data_output.fpp +++ b/src/post_process/m_data_output.fpp @@ -1126,11 +1126,11 @@ contains dummy = 0._wp ! Construct file path - write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' + write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) ! Check if file exists - inquire(FILE=trim(file_loc), EXIST=file_exist) + inquire (FILE=trim(file_loc), EXIST=file_exist) if (.not. file_exist) then call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!') end if @@ -1139,7 +1139,7 @@ contains if (proc_rank == 0) then call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & - mpi_info_int, ifile, ierr) + mpi_info_int, ifile, ierr) call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr) call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr) @@ -1154,15 +1154,15 @@ contains call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - allocate(proc_bubble_counts(file_num_procs)) + allocate (proc_bubble_counts(file_num_procs)) if (proc_rank == 0) then call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & - mpi_info_int, ifile, ierr) + mpi_info_int, ifile, ierr) ! Skip to processor counts position disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), & - MPI_OFFSET_KIND) + MPI_OFFSET_KIND) call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr) call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr) @@ -1186,7 +1186,7 @@ contains mpi_info_int, ifile, ierr) disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & - file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & 'native', mpi_info_null, ierr) @@ -1202,47 +1202,47 @@ contains open (unit=29, file=file_loc, form='formatted', position='rewind') if (lag_header) then - write(29, '(A)', advance='no') - if (lag_id_wrt) write(29, '(A8)', advance='no') 'id, ' - if (lag_pos_wrt) write(29, '(3(A17))', advance='no') 'px, ', 'py, ', 'pz, ' - if (lag_pos_prev_wrt) write(29, '(3(A17))', advance='no') 'pvx, ', 'pvy, ', 'pvz, ' - if (lag_vel_wrt) write(29, '(3(A17))', advance='no') 'vx, ', 'vy, ', 'vz, ' - if (lag_rad_wrt) write(29, '(A17)', advance='no') 'radius, ' - if (lag_rvel_wrt) write(29, '(A17)', advance='no') 'rvel, ' - if (lag_r0_wrt) write(29, '(A17)', advance='no') 'r0, ' - if (lag_rmax_wrt) write(29, '(A17)', advance='no') 'rmax, ' - if (lag_rmin_wrt) write(29, '(A17)', advance='no') 'rmin, ' - if (lag_dphidt_wrt) write(29, '(A17)', advance='no') 'dphidt, ' - if (lag_pres_wrt) write(29, '(A17)', advance='no') 'pressure, ' - if (lag_mv_wrt) write(29, '(A17)', advance='no') 'mv, ' - if (lag_mg_wrt) write(29, '(A17)', advance='no') 'mg, ' - if (lag_betaT_wrt) write(29, '(A17)', advance='no') 'betaT, ' - if (lag_betaC_wrt) write(29, '(A17)', advance='no') 'betaC, ' - write(29, '(A15)') 'time' + write (29, '(A)', advance='no') + if (lag_id_wrt) write (29, '(A8)', advance='no') 'id, ' + if (lag_pos_wrt) write (29, '(3(A17))', advance='no') 'px, ', 'py, ', 'pz, ' + if (lag_pos_prev_wrt) write (29, '(3(A17))', advance='no') 'pvx, ', 'pvy, ', 'pvz, ' + if (lag_vel_wrt) write (29, '(3(A17))', advance='no') 'vx, ', 'vy, ', 'vz, ' + if (lag_rad_wrt) write (29, '(A17)', advance='no') 'radius, ' + if (lag_rvel_wrt) write (29, '(A17)', advance='no') 'rvel, ' + if (lag_r0_wrt) write (29, '(A17)', advance='no') 'r0, ' + if (lag_rmax_wrt) write (29, '(A17)', advance='no') 'rmax, ' + if (lag_rmin_wrt) write (29, '(A17)', advance='no') 'rmin, ' + if (lag_dphidt_wrt) write (29, '(A17)', advance='no') 'dphidt, ' + if (lag_pres_wrt) write (29, '(A17)', advance='no') 'pressure, ' + if (lag_mv_wrt) write (29, '(A17)', advance='no') 'mv, ' + if (lag_mg_wrt) write (29, '(A17)', advance='no') 'mg, ' + if (lag_betaT_wrt) write (29, '(A17)', advance='no') 'betaT, ' + if (lag_betaC_wrt) write (29, '(A17)', advance='no') 'betaC, ' + write (29, '(A15)') 'time' end if do i = 1, file_tot_part id = int(MPI_IO_DATA_lg_bubbles(i, 1)) inputvals(1:20) = MPI_IO_DATA_lg_bubbles(i, 2:21) if (id > 0) then - write(29, '(100(A))', advance='no') '' - - if (lag_id_wrt) write(29, '(I6, A)', advance='no') id, ', ' - if (lag_pos_wrt) write(29, '(3(E15.7, A))', advance='no') inputvals(1), ', ', inputvals(2), ', ', inputvals(3), ', ' - if (lag_pos_prev_wrt) write(29, '(3(E15.7, A))', advance='no') inputvals(4), ', ', inputvals(5), ', ', inputvals(6), ', ' - if (lag_vel_wrt) write(29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(8), ', ' - if (lag_rad_wrt) write(29, '(E15.7, A)', advance='no') inputvals(10), ', ' - if (lag_rvel_wrt) write(29, '(E15.7, A)', advance='no') inputvals(11), ', ' - if (lag_r0_wrt) write(29, '(E15.7, A)', advance='no') inputvals(12), ', ' - if (lag_rmax_wrt) write(29, '(E15.7, A)', advance='no') inputvals(13), ', ' - if (lag_rmin_wrt) write(29, '(E15.7, A)', advance='no') inputvals(14), ', ' - if (lag_dphidt_wrt) write(29, '(E15.7, A)', advance='no') inputvals(15), ', ' - if (lag_pres_wrt) write(29, '(E15.7, A)', advance='no') inputvals(16), ', ' - if (lag_mv_wrt) write(29, '(E15.7, A)', advance='no') inputvals(17), ', ' - if (lag_mg_wrt) write(29, '(E15.7, A)', advance='no') inputvals(18), ', ' - if (lag_betaT_wrt) write(29, '(E15.7, A)', advance='no') inputvals(19), ', ' - if (lag_betaC_wrt) write(29, '(E15.7, A)', advance='no') inputvals(20), ', ' - write(29, '(E15.7)') time_real + write (29, '(100(A))', advance='no') '' + + if (lag_id_wrt) write (29, '(I6, A)', advance='no') id, ', ' + if (lag_pos_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(1), ', ', inputvals(2), ', ', inputvals(3), ', ' + if (lag_pos_prev_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(4), ', ', inputvals(5), ', ', inputvals(6), ', ' + if (lag_vel_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(8), ', ' + if (lag_rad_wrt) write (29, '(E15.7, A)', advance='no') inputvals(10), ', ' + if (lag_rvel_wrt) write (29, '(E15.7, A)', advance='no') inputvals(11), ', ' + if (lag_r0_wrt) write (29, '(E15.7, A)', advance='no') inputvals(12), ', ' + if (lag_rmax_wrt) write (29, '(E15.7, A)', advance='no') inputvals(13), ', ' + if (lag_rmin_wrt) write (29, '(E15.7, A)', advance='no') inputvals(14), ', ' + if (lag_dphidt_wrt) write (29, '(E15.7, A)', advance='no') inputvals(15), ', ' + if (lag_pres_wrt) write (29, '(E15.7, A)', advance='no') inputvals(16), ', ' + if (lag_mv_wrt) write (29, '(E15.7, A)', advance='no') inputvals(17), ', ' + if (lag_mg_wrt) write (29, '(E15.7, A)', advance='no') inputvals(18), ', ' + if (lag_betaT_wrt) write (29, '(E15.7, A)', advance='no') inputvals(19), ', ' + if (lag_betaC_wrt) write (29, '(E15.7, A)', advance='no') inputvals(20), ', ' + write (29, '(E15.7)') time_real end if end do close (29) @@ -1284,7 +1284,6 @@ contains integer, dimension(num_procs) :: meshtypes real(wp) :: dummy_data - integer :: i, j real(wp), dimension(:), allocatable :: bub_id @@ -1296,11 +1295,11 @@ contains dummy_data = 0._wp ! Construct file path - write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' + write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) ! Check if file exists - inquire(FILE=trim(file_loc), EXIST=file_exist) + inquire (FILE=trim(file_loc), EXIST=file_exist) if (.not. file_exist) then call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!') end if @@ -1309,7 +1308,7 @@ contains if (proc_rank == 0) then call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & - mpi_info_int, ifile, ierr) + mpi_info_int, ifile, ierr) call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr) call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr) @@ -1324,15 +1323,15 @@ contains call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - allocate(proc_bubble_counts(file_num_procs)) + allocate (proc_bubble_counts(file_num_procs)) if (proc_rank == 0) then call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & - mpi_info_int, ifile, ierr) + mpi_info_int, ifile, ierr) ! Skip to processor counts position disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), & - MPI_OFFSET_KIND) + MPI_OFFSET_KIND) call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr) call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr) @@ -1359,31 +1358,31 @@ contains if (nBub > 0) then - allocate(bub_id(nBub)) - allocate(px(nBub)) - allocate(py(nBub)) - allocate(pz(nBub)) - allocate(ppx(nBub)) - allocate(ppy(nBub)) - allocate(ppz(nBub)) - allocate(vx(nBub)) - allocate(vy(nBub)) - allocate(vz(nBub)) - allocate(radius(nBub)) - allocate(rvel(nBub)) - allocate(rnot(nBub)) - allocate(rmax(nBub)) - allocate(rmin(nBub)) - allocate(dphidt(nBub)) - allocate(pressure(nBub)) - allocate(mv(nBub)) - allocate(mg(nBub)) - allocate(betaT(nBub)) - allocate(betaC(nBub)) + allocate (bub_id(nBub)) + allocate (px(nBub)) + allocate (py(nBub)) + allocate (pz(nBub)) + allocate (ppx(nBub)) + allocate (ppy(nBub)) + allocate (ppz(nBub)) + allocate (vx(nBub)) + allocate (vy(nBub)) + allocate (vz(nBub)) + allocate (radius(nBub)) + allocate (rvel(nBub)) + allocate (rnot(nBub)) + allocate (rmax(nBub)) + allocate (rmin(nBub)) + allocate (dphidt(nBub)) + allocate (pressure(nBub)) + allocate (mv(nBub)) + allocate (mg(nBub)) + allocate (betaT(nBub)) + allocate (betaC(nBub)) allocate (MPI_IO_DATA_lg_bubbles(nBub, 1:lag_io_vars)) call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & - MPI_ORDER_FORTRAN, mpi_p, view, ierr) + MPI_ORDER_FORTRAN, mpi_p, view, ierr) call MPI_TYPE_COMMIT(view, ierr) call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & @@ -1391,11 +1390,11 @@ contains ! Skip extended header disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & - file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, & - lag_io_vars * nBub, mpi_p, status, ierr) + lag_io_vars*nBub, mpi_p, status, ierr) call MPI_FILE_CLOSE(ifile, ierr) call MPI_TYPE_FREE(view, ierr) @@ -1443,8 +1442,8 @@ contains end if err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, & - px, py, pz, nBub, & - DB_DOUBLE, DB_F77NULL, ierr) + px, py, pz, nBub, & + DB_DOUBLE, DB_F77NULL, ierr) if (lag_id_wrt) then call s_write_lag_variable_to_formatted_database_file('part_id', t_step, bub_id, nBub) @@ -1502,9 +1501,9 @@ contains call s_write_lag_variable_to_formatted_database_file('part_betaC', t_step, betaC, nBub) end if - deallocate(bub_id, px, py, pz, ppx, ppy, ppz, vx, vy, vz, radius, & - rvel, rnot, rmax, rmin, dphidt, pressure, mv, mg, & - betaT, betaC) + deallocate (bub_id, px, py, pz, ppx, ppy, ppz, vx, vy, vz, radius, & + rvel, rnot, rmax, rmin, dphidt, pressure, mv, mg, & + betaT, betaC) deallocate (MPI_IO_DATA_lg_bubbles) else call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) @@ -1515,7 +1514,7 @@ contains ! Skip extended header disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & - file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr) @@ -1539,8 +1538,8 @@ contains err = DBSETEMPTYOK(1) err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, & - dummy_data, dummy_data, dummy_data, 0, & - DB_DOUBLE, DB_F77NULL, ierr) + dummy_data, dummy_data, dummy_data, 0, & + DB_DOUBLE, DB_F77NULL, ierr) if (lag_id_wrt) then call s_write_lag_variable_to_formatted_database_file('part_id', t_step) @@ -1629,13 +1628,13 @@ contains end do err = DBSET2DSTRLEN(len(var_names(1))) err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), & - num_procs, var_names, & - len_trim(var_names), & - var_types, DB_F77NULL, ierr) + num_procs, var_names, & + len_trim(var_names), & + var_types, DB_F77NULL, ierr) end if err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), & - 'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr) + 'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr) else if (proc_rank == 0) then do i = 1, num_procs @@ -1646,14 +1645,14 @@ contains err = DBSET2DSTRLEN(len(var_names(1))) err = DBSETEMPTYOK(1) err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), & - num_procs, var_names, & - len_trim(var_names), & - var_types, DB_F77NULL, ierr) + num_procs, var_names, & + len_trim(var_names), & + var_types, DB_F77NULL, ierr) end if err = DBSETEMPTYOK(1) err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), & - 'lag_bubbles', 11, dummy_data, 0, DB_DOUBLE, DB_F77NULL, ierr) + 'lag_bubbles', 11, dummy_data, 0, DB_DOUBLE, DB_F77NULL, ierr) end if end subroutine s_write_lag_variable_to_formatted_database_file diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 485df39053..cf07cc287e 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -180,7 +180,7 @@ module m_global_parameters type(int_bounds_info), dimension(3) :: nidx - integer, allocatable, dimension(:,:,:) :: neighbor_ranks + integer, allocatable, dimension(:, :, :) :: neighbor_ranks !! Neighbor ranks for lagrangian particle communication integer, allocatable, dimension(:) :: start_idx !< diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index 2f3dadfaa4..6d8a5504a9 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -195,7 +195,7 @@ module m_global_parameters type(int_bounds_info), dimension(3) :: nidx - integer, allocatable, dimension(:,:,:) :: neighbor_ranks + integer, allocatable, dimension(:, :, :) :: neighbor_ranks !! Neighbor ranks for lagrangian particle communication integer, allocatable, dimension(:) :: start_idx !< diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 5e3a2ead80..a936254ac2 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -566,15 +566,15 @@ contains if (lag_params%vel_model == 1) then do l = 1, num_dims vTemp = f_interpolate_velocity(fR, cell, l, q_prim_vf) - fPos(l) = fPos(l) + h * vTemp + fPos(l) = fPos(l) + h*vTemp fVel(l) = vTemp end do elseif (lag_params%vel_model == 2) then do l = 1, num_dims aTemp = f_get_acceleration(fPos(l), fR, fVel(l), fmass_n, fmass_v, & fRe, fRho, cell, l, q_prim_vf) - fPos(l) = fPos(l) + h * fVel(l) - fVel(l) = fVel(l) + h * aTemp + fPos(l) = fPos(l) + h*fVel(l) + fVel(l) = fVel(l) + h*aTemp end do end if end if diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 4d266a001a..269b096058 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -78,7 +78,6 @@ module m_bubbles_EL $:GPU_DECLARE(create='[nBubs,Rmax_glb,Rmin_glb,q_beta,q_beta_idx]') - integer, allocatable, dimension(:) :: keep_bubble, prefix_sum integer :: active_bubs $:GPU_DECLARE(create='[keep_bubble, prefix_sum, active_bubs]') @@ -272,10 +271,10 @@ contains if (num_procs > 1) then call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, 1)) call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & - intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & - mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & - gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & + mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) end if $:GPU_UPDATE(device='[bubbles_lagrange, lag_params]') @@ -450,11 +449,11 @@ contains dummy = 0._wp ! Construct file path - write(file_loc, '(A,I0,A)') 'lag_bubbles_', save_count, '.dat' + write (file_loc, '(A,I0,A)') 'lag_bubbles_', save_count, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) ! Check if file exists - inquire(FILE=trim(file_loc), EXIST=file_exist) + inquire (FILE=trim(file_loc), EXIST=file_exist) if (.not. file_exist) then call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!') end if @@ -463,7 +462,7 @@ contains if (proc_rank == 0) then call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & - mpi_info_int, ifile, ierr) + mpi_info_int, ifile, ierr) call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr) call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr) @@ -478,15 +477,15 @@ contains call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - allocate(proc_bubble_counts(file_num_procs)) + allocate (proc_bubble_counts(file_num_procs)) if (proc_rank == 0) then call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & - mpi_info_int, ifile, ierr) + mpi_info_int, ifile, ierr) ! Skip to processor counts position disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), & - MPI_OFFSET_KIND) + MPI_OFFSET_KIND) call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr) call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr) @@ -518,7 +517,7 @@ contains allocate (MPI_IO_DATA_lag_bubbles(bub_id, 1:lag_io_vars)) call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & - MPI_ORDER_FORTRAN, mpi_p, view, ierr) + MPI_ORDER_FORTRAN, mpi_p, view, ierr) call MPI_TYPE_COMMIT(view, ierr) call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & @@ -526,11 +525,11 @@ contains ! Skip extended header disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & - file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lag_bubbles, & - lag_io_vars * bub_id, mpi_p, status, ierr) + lag_io_vars*bub_id, mpi_p, status, ierr) call MPI_FILE_CLOSE(ifile, ierr) call MPI_TYPE_FREE(view, ierr) @@ -568,7 +567,7 @@ contains ! Skip extended header disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & - file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr) @@ -577,14 +576,14 @@ contains call MPI_TYPE_FREE(view, ierr) end if - print*, proc_rank, lag_id + print *, proc_rank, lag_id if (proc_rank == 0) then - write(*,'(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count - write(*,'(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt + write (*, '(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count + write (*, '(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt end if - deallocate(proc_bubble_counts) + deallocate (proc_bubble_counts) #endif end subroutine s_restart_bubbles @@ -655,8 +654,8 @@ contains myBeta_c = gas_betaC(k) myBeta_t = gas_betaT(k) myR0 = bub_R0(k) - myPos = mtn_pos(k,:,2) - myVel = mtn_vel(k,:,2) + myPos = mtn_pos(k, :, 2) + myVel = mtn_vel(k, :, 2) ! Vapor and heat fluxes call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m) @@ -693,13 +692,13 @@ contains gas_mv(k, 1) = myMass_v if (lag_params%vel_model == 1) then - mtn_posPrev(k,:,1) = mtn_pos(k,:,2) - mtn_pos(k,:,1) = myPos - mtn_vel(k,:,1) = myVel + mtn_posPrev(k, :, 1) = mtn_pos(k, :, 2) + mtn_pos(k, :, 1) = myPos + mtn_vel(k, :, 1) = myVel elseif (lag_params%vel_model == 2) then - mtn_posPrev(k,:,1) = mtn_pos(k,:,2) - mtn_pos(k,:,1) = myPos - mtn_vel(k,:,1) = myVel + mtn_posPrev(k, :, 1) = mtn_pos(k, :, 2) + mtn_pos(k, :, 1) = myPos + mtn_vel(k, :, 1) = myVel end if else @@ -715,14 +714,14 @@ contains do l = 1, num_dims if (lag_params%vel_model == 1) then mtn_dposdt(k, l, stage) = f_interpolate_velocity(myPos(l), & - cell, l, q_prim_vf) + cell, l, q_prim_vf) mtn_dveldt(k, l, stage) = 0._wp elseif (lag_params%vel_model == 2) then mtn_dposdt(k, l, stage) = myVel(l) mtn_dveldt(k, l, stage) = f_get_acceleration(myPos(l), & - myR, myVel(l), & - myMass_n, myMass_v, & - Re(1), myRho, cell, l, q_prim_vf) + myR, myVel(l), & + myMass_n, myMass_v, & + Re(1), myRho, cell, l, q_prim_vf) else mtn_dposdt(k, l, stage) = 0._wp mtn_dveldt(k, l, stage) = 0._wp @@ -1010,16 +1009,16 @@ contains if (p == 0) then !2D - 4 point interpolation (2x2) do j = 1, 2 do i = 1, 2 - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3)) * & - psi_x(i) * psi_y(j) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3))* & + psi_x(i)*psi_y(j) end do end do else !3D - 8 point interpolation (2x2x2) do k = 1, 2 do j = 1, 2 do i = 1, 2 - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3) + k - 1) * & - psi_x(i) * psi_y(j) * psi_z(k) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 1, cell(2) + j - 1, cell(3) + k - 1)* & + psi_x(i)*psi_y(j)*psi_z(k) end do end do end do @@ -1086,16 +1085,16 @@ contains if (p == 0) then !2D - 9 point interpolation (3x3) do j = 1, 3 do i = 1, 3 - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3)) * & - psi_x(i) * psi_y(j) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3))* & + psi_x(i)*psi_y(j) end do end do else !3D - 27 point interpolation (3x3x3) do k = 1, 3 do j = 1, 3 do i = 1, 3 - f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3) + k - 2) * & - psi_x(i) * psi_y(j) * psi_z(k) + f_pinfl = f_pinfl + q_prim_vf(E_idx)%sf(cell(1) + i - 2, cell(2) + j - 2, cell(3) + k - 2)* & + psi_x(i)*psi_y(j)*psi_z(k) end do end do end do @@ -1369,11 +1368,11 @@ contains ! Relocate bubbles at solid boundaries and delete bubbles that leave ! buffer regions if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,1,dest) < x_cb(-1) + intfc_rad(k,dest)) then - mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k,dest) + .and. mtn_pos(k, 1, dest) < x_cb(-1) + intfc_rad(k, dest)) then + mtn_pos(k, 1, dest) = x_cb(-1) + intfc_rad(k, dest) elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,1,dest) > x_cb(m) - intfc_rad(k,dest)) then - mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k,dest) + .and. mtn_pos(k, 1, dest) > x_cb(m) - intfc_rad(k, dest)) then + mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k, dest) elseif (mtn_pos(k, 1, dest) >= x_cb(m + buff_size - fd_number)) then keep_bubble(k) = 0 elseif (mtn_pos(k, 1, dest) < x_cb(fd_number - buff_size - 1)) then @@ -1381,11 +1380,11 @@ contains end if if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,2,dest) < y_cb(-1) + intfc_rad(k,dest)) then - mtn_pos(k, 2, dest) = y_cb(-1) + intfc_rad(k,dest) + .and. mtn_pos(k, 2, dest) < y_cb(-1) + intfc_rad(k, dest)) then + mtn_pos(k, 2, dest) = y_cb(-1) + intfc_rad(k, dest) else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,2,dest) > y_cb(n) - intfc_rad(k,dest)) then - mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k,dest) + .and. mtn_pos(k, 2, dest) > y_cb(n) - intfc_rad(k, dest)) then + mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k, dest) elseif (mtn_pos(k, 2, dest) >= y_cb(n + buff_size - fd_number)) then keep_bubble(k) = 0 elseif (mtn_pos(k, 2, dest) < y_cb(fd_number - buff_size - 1)) then @@ -1394,11 +1393,11 @@ contains if (p > 0) then if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,3,dest) < z_cb(-1) + intfc_rad(k,dest)) then - mtn_pos(k, 3, dest) = z_cb(-1) + intfc_rad(k,dest) + .and. mtn_pos(k, 3, dest) < z_cb(-1) + intfc_rad(k, dest)) then + mtn_pos(k, 3, dest) = z_cb(-1) + intfc_rad(k, dest) else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & - .and. mtn_pos(k,3,dest) > z_cb(p) - intfc_rad(k,dest)) then - mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k,dest) + .and. mtn_pos(k, 3, dest) > z_cb(p) - intfc_rad(k, dest)) then + mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k, dest) elseif (mtn_pos(k, 3, dest) >= z_cb(p + buff_size - fd_number)) then keep_bubble(k) = 0 elseif (mtn_pos(k, 3, dest) < z_cb(fd_number - buff_size - 1)) then @@ -1427,7 +1426,7 @@ contains do i = 1, num_dims mtn_pos(k, i, dest) = mtn_pos(k, i, dest) - & levelset_norm%sf(cell(1), cell(2), cell(3), patch_id, i) & - * levelset%sf(cell(1), cell(2), cell(3), patch_id) + *levelset%sf(cell(1), cell(2), cell(3), patch_id) end do cell = fd_number - buff_size @@ -1449,7 +1448,7 @@ contains if (k == 1) then prefix_sum(k) = keep_bubble(k) else - prefix_sum(k) = prefix_sum(k - 1) + keep_bubble(k) + prefix_sum(k) = prefix_sum(k - 1) + keep_bubble(k) end if if (k == nBubs) active_bubs = prefix_sum(k) end do @@ -1491,10 +1490,10 @@ contains call nvtxStartRange("LAG-BC-SENDRECV") call s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & - intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & - mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & - gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & + intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & + mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & + gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) call nvtxEndRange end if @@ -1881,7 +1880,7 @@ contains MPI_SUM, MPI_COMM_WORLD, ierr) call MPI_ALLGATHER(bub_id, 1, MPI_INTEGER, proc_bubble_counts, 1, MPI_INTEGER, & - MPI_COMM_WORLD, ierr) + MPI_COMM_WORLD, ierr) ! Calculate starting index for this processor's particles call MPI_EXSCAN(lsizes(1), start_idx_part(1), 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) @@ -1891,12 +1890,12 @@ contains gsizes(1) = tot_part gsizes(2) = lag_io_vars - write(file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' + write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) ! Clean up existing file if (proc_rank == 0) then - inquire(FILE=trim(file_loc), EXIST=file_exist) + inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) end if @@ -1906,8 +1905,8 @@ contains if (proc_rank == 0) then call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, & - ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & - mpi_info_int, ifile, ierr) + ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) ! Write header using MPI I/O for consistency call MPI_FILE_WRITE(ifile, tot_part, 1, MPI_INTEGER, status, ierr) @@ -1947,7 +1946,7 @@ contains end do call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & - MPI_ORDER_FORTRAN, mpi_p, view, ierr) + MPI_ORDER_FORTRAN, mpi_p, view, ierr) call MPI_TYPE_COMMIT(view, ierr) call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, & @@ -1956,11 +1955,11 @@ contains ! Skip header (written by rank 0) disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + & - num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA_lag_bubbles, & - lag_io_vars * bub_id, mpi_p, status, ierr) + lag_io_vars*bub_id, mpi_p, status, ierr) call MPI_FILE_CLOSE(ifile, ierr) @@ -1976,7 +1975,7 @@ contains ! Skip header (written by rank 0) disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + & - num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) call MPI_FILE_WRITE_ALL(ifile, dummy, 0, mpi_p, status, ierr) @@ -2063,8 +2062,8 @@ contains intfc_dveldt(i, 1:lag_num_ts) = intfc_dveldt(i + 1, 1:lag_num_ts) gas_dpdt(i, 1:lag_num_ts) = gas_dpdt(i + 1, 1:lag_num_ts) gas_dmvdt(i, 1:lag_num_ts) = gas_dmvdt(i + 1, 1:lag_num_ts) - mtn_dposdt(i,1:3, 1:lag_num_ts) = mtn_dposdt(i + 1, 1:3, 1:lag_num_ts) - mtn_dveldt(i,1:3, 1:lag_num_ts) = mtn_dveldt(i + 1, 1:3, 1:lag_num_ts) + mtn_dposdt(i, 1:3, 1:lag_num_ts) = mtn_dposdt(i + 1, 1:3, 1:lag_num_ts) + mtn_dveldt(i, 1:3, 1:lag_num_ts) = mtn_dveldt(i + 1, 1:3, 1:lag_num_ts) end do nBubs = nBubs - 1 diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 3424e4081f..3ba776c368 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -447,28 +447,28 @@ contains xi(1) = x_cc(cell(1) - 1) eta(1) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) xi(2) = x_cc(cell(1)) - eta(2) = q_prim_vf(momxb)%sf(cell(1) , cell(2), cell(3)) + eta(2) = q_prim_vf(momxb)%sf(cell(1), cell(2), cell(3)) xi(3) = x_cc(cell(1) + 1) eta(3) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) elseif (i == 2) then xi(1) = y_cc(cell(2) - 1) eta(1) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) xi(2) = y_cc(cell(2)) - eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) , cell(3)) + eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2), cell(3)) xi(3) = y_cc(cell(2) + 1) eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) elseif (i == 3) then xi(1) = z_cc(cell(3) - 1) eta(1) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) xi(2) = z_cc(cell(3)) - eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) ) + eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)) xi(3) = z_cc(cell(3) + 1) eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) - endif + end if - L(1) = ((pos - xi(2))*(pos - xi(3))) / ((xi(1) - xi(2))*(xi(1) - xi(3))) - L(2) = ((pos - xi(1))*(pos - xi(3))) / ((xi(2) - xi(1))*(xi(2) - xi(3))) - L(3) = ((pos - xi(1))*(pos - xi(2))) / ((xi(3) - xi(1))*(xi(3) - xi(2))) + L(1) = ((pos - xi(2))*(pos - xi(3)))/((xi(1) - xi(2))*(xi(1) - xi(3))) + L(2) = ((pos - xi(1))*(pos - xi(3)))/((xi(2) - xi(1))*(xi(2) - xi(3))) + L(3) = ((pos - xi(1))*(pos - xi(2)))/((xi(3) - xi(1))*(xi(3) - xi(2))) v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) elseif (fd_order == 4) then @@ -478,7 +478,7 @@ contains xi(2) = x_cc(cell(1) - 1) eta(2) = q_prim_vf(momxb)%sf(cell(1) - 1, cell(2), cell(3)) xi(3) = x_cc(cell(1)) - eta(3) = q_prim_vf(momxb)%sf(cell(1) , cell(2), cell(3)) + eta(3) = q_prim_vf(momxb)%sf(cell(1), cell(2), cell(3)) xi(4) = x_cc(cell(1) + 1) eta(4) = q_prim_vf(momxb)%sf(cell(1) + 1, cell(2), cell(3)) xi(5) = x_cc(cell(1) + 2) @@ -489,7 +489,7 @@ contains xi(2) = y_cc(cell(2) - 1) eta(2) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) - 1, cell(3)) xi(3) = y_cc(cell(2)) - eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) , cell(3)) + eta(3) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2), cell(3)) xi(4) = y_cc(cell(2) + 1) eta(4) = q_prim_vf(momxb + 1)%sf(cell(1), cell(2) + 1, cell(3)) xi(5) = y_cc(cell(2) + 2) @@ -500,22 +500,22 @@ contains xi(2) = z_cc(cell(3) - 1) eta(2) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) - 1) xi(3) = z_cc(cell(3)) - eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) ) + eta(3) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3)) xi(4) = z_cc(cell(3) + 1) eta(4) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 1) xi(5) = z_cc(cell(3) + 2) eta(5) = q_prim_vf(momxe)%sf(cell(1), cell(2), cell(3) + 2) - endif + end if - L(1) = ((pos - xi(2))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5))) / & + L(1) = ((pos - xi(2))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5)))/ & ((xi(1) - xi(2))*(xi(1) - xi(3))*(xi(1) - xi(3))*(xi(2) - xi(5))) - L(2) = ((pos - xi(1))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5))) / & + L(2) = ((pos - xi(1))*(pos - xi(3))*(pos - xi(4))*(pos - xi(5)))/ & ((xi(2) - xi(1))*(xi(2) - xi(3))*(xi(2) - xi(3))*(xi(2) - xi(5))) - L(3) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(4))*(pos - xi(5))) / & + L(3) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(4))*(pos - xi(5)))/ & ((xi(3) - xi(1))*(xi(3) - xi(2))*(xi(3) - xi(4))*(xi(3) - xi(5))) - L(4) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4))) / & + L(4) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4)))/ & ((xi(4) - xi(1))*(xi(4) - xi(2))*(xi(4) - xi(3))*(xi(4) - xi(5))) - L(5) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4))) / & + L(5) = ((pos - xi(1))*(pos - xi(2))*(pos - xi(3))*(pos - xi(4)))/ & ((xi(5) - xi(1))*(xi(5) - xi(2))*(xi(5) - xi(3))*(xi(5) - xi(4))) v = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) + L(4)*eta(4) + L(5)*eta(5) @@ -536,7 +536,7 @@ contains !! @param i Direction of the velocity (1: x, 2: y, 3: z) !! @param q_prim_vf Eulerian field with primitive variables !! @return a Acceleration of the bubble in direction i - pure function f_get_acceleration(pos,rad,vel,mg,mv,Re,rho,cell,i,q_prim_vf) result(a) + pure function f_get_acceleration(pos, rad, vel, mg, mv, Re, rho, cell, i, q_prim_vf) result(a) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in) :: pos, rad, vel, mg, mv, Re, rho integer, dimension(3), intent(in) :: cell @@ -549,79 +549,79 @@ contains if (fd_order == 2) then if (i == 1) then - dp = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - & - q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / & - (x_cc(cell(1) + 1) - x_cc(cell(1) - 1)) + dp = (q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1) - 1, cell(2), cell(3)))/ & + (x_cc(cell(1) + 1) - x_cc(cell(1) - 1)) elseif (i == 2) then - dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - & - q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / & - (y_cc(cell(2) + 1) - y_cc(cell(2) - 1)) + dp = (q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1), cell(2) - 1, cell(3)))/ & + (y_cc(cell(2) + 1) - y_cc(cell(2) - 1)) elseif (i == 3) then - dp = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - & - q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / & - (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) + dp = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1) - & + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) - 1))/ & + (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) end if elseif (fd_order == 4) then if (i == 1) then xi(1) = x_cc(cell(1) - 1) - eta(1) = (q_prim_vf(E_idx)%sf(cell(1) ,cell(2),cell(3)) - & - q_prim_vf(E_idx)%sf(cell(1) - 2,cell(2),cell(3))) / & - (x_cc(cell(1)) - x_cc(cell(1) - 2)) + eta(1) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1) - 2, cell(2), cell(3)))/ & + (x_cc(cell(1)) - x_cc(cell(1) - 2)) xi(2) = x_cc(cell(1)) - eta(2) = (q_prim_vf(E_idx)%sf(cell(1) + 1,cell(2),cell(3)) - & - q_prim_vf(E_idx)%sf(cell(1) - 1,cell(2),cell(3))) / & - (x_cc(cell(1) + 1) - x_cc(cell(1) - 1)) + eta(2) = (q_prim_vf(E_idx)%sf(cell(1) + 1, cell(2), cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1) - 1, cell(2), cell(3)))/ & + (x_cc(cell(1) + 1) - x_cc(cell(1) - 1)) xi(3) = x_cc(cell(1) + 1) - eta(3) = (q_prim_vf(E_idx)%sf(cell(1) + 2,cell(2),cell(3)) - & - q_prim_vf(E_idx)%sf(cell(1) ,cell(2),cell(3))) / & - (x_cc(cell(1) + 2) - x_cc(cell(1))) + eta(3) = (q_prim_vf(E_idx)%sf(cell(1) + 2, cell(2), cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)))/ & + (x_cc(cell(1) + 2) - x_cc(cell(1))) elseif (i == 2) then xi(1) = y_cc(cell(2) - 1) - eta(1) = (q_prim_vf(E_idx)%sf(cell(1),cell(2) ,cell(3)) - & - q_prim_vf(E_idx)%sf(cell(1),cell(2) - 2,cell(3))) / & - (y_cc(cell(2)) - y_cc(cell(2) - 2)) + eta(1) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1), cell(2) - 2, cell(3)))/ & + (y_cc(cell(2)) - y_cc(cell(2) - 2)) xi(2) = y_cc(cell(2)) - eta(2) = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 1,cell(3)) - & - q_prim_vf(E_idx)%sf(cell(1),cell(2) - 1,cell(3))) / & - (y_cc(cell(2) + 1) - y_cc(cell(2) - 1)) + eta(2) = (q_prim_vf(E_idx)%sf(cell(1), cell(2) + 1, cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1), cell(2) - 1, cell(3)))/ & + (y_cc(cell(2) + 1) - y_cc(cell(2) - 1)) xi(3) = y_cc(cell(2) + 1) - eta(3) = (q_prim_vf(E_idx)%sf(cell(1),cell(2) + 2,cell(3)) - & - q_prim_vf(E_idx)%sf(cell(1),cell(2) ,cell(3))) / & - (y_cc(cell(2) + 2) - y_cc(cell(2))) + eta(3) = (q_prim_vf(E_idx)%sf(cell(1), cell(2) + 2, cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)))/ & + (y_cc(cell(2) + 2) - y_cc(cell(2))) elseif (i == 3) then xi(1) = z_cc(cell(3) - 1) - eta(1) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) ) - & - q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 2)) / & - (z_cc(cell(3)) - z_cc(cell(3) - 2)) + eta(1) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - & + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) - 2))/ & + (z_cc(cell(3)) - z_cc(cell(3) - 2)) xi(2) = y_cc(cell(3)) - eta(2) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 1) - & - q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) - 1)) / & - (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) + eta(2) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1) - & + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) - 1))/ & + (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) xi(3) = y_cc(cell(3) + 1) - eta(3) = (q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) + 2) - & - q_prim_vf(E_idx)%sf(cell(1),cell(2),cell(3) )) / & - (z_cc(cell(3) + 2) - z_cc(cell(3))) + eta(3) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 2) - & + q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)))/ & + (z_cc(cell(3) + 2) - z_cc(cell(3))) end if - L(1) = ((pos - xi(2))*(pos - xi(3))) / ((xi(1) - xi(2))*(xi(1) - xi(3))) - L(2) = ((pos - xi(1))*(pos - xi(3))) / ((xi(2) - xi(1))*(xi(2) - xi(3))) - L(3) = ((pos - xi(1))*(pos - xi(2))) / ((xi(3) - xi(1))*(xi(3) - xi(2))) + L(1) = ((pos - xi(2))*(pos - xi(3)))/((xi(1) - xi(2))*(xi(1) - xi(3))) + L(2) = ((pos - xi(1))*(pos - xi(3)))/((xi(2) - xi(1))*(xi(2) - xi(3))) + L(3) = ((pos - xi(1))*(pos - xi(2)))/((xi(3) - xi(1))*(xi(3) - xi(2))) dp = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) end if - vol = (4._wp/3._wp) * pi * rad**3._wp - force = -1._wp * vol * dp + vol = (4._wp/3._wp)*pi*rad**3._wp + force = -1._wp*vol*dp - v_rel = vel - f_interpolate_velocity(pos,cell,i,q_prim_vf) + v_rel = vel - f_interpolate_velocity(pos, cell, i, q_prim_vf) if (lag_params%drag_model == 1) then ! Free slip Stokes drag - force = force - (4._wp * pi * rad * v_rel) / Re + force = force - (4._wp*pi*rad*v_rel)/Re else if (lag_params%drag_model == 2) then ! No slip Stokes drag - force = force - (6._wp * pi * rad * v_rel) / Re + force = force - (6._wp*pi*rad*v_rel)/Re end if - a = force / (mg + mv) + a = force/(mg + mv) end function f_get_acceleration diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 49fac90142..09ee8e3e8e 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -252,9 +252,9 @@ module m_global_parameters integer, allocatable, dimension(:) :: proc_coords !< !! Processor coordinates in MPI_CART_COMM - type(int_bounds_info), dimension(3) :: nidx !< Indicies for neighboring processors + type(int_bounds_info), dimension(3) :: nidx !< Indices for neighboring processors - integer, allocatable, dimension(:,:,:) :: neighbor_ranks + integer, allocatable, dimension(:, :, :) :: neighbor_ranks !! Neighbor ranks for lagrangian particle communication integer, allocatable, dimension(:) :: start_idx !< @@ -1352,7 +1352,7 @@ contains @:PREFER_GPU(x_cc) @:PREFER_GPU(dx) - if (n == 0) return; + if (n == 0) return; @:ALLOCATE(y_cb(-1 - buff_size:n + buff_size)) @:ALLOCATE(y_cc(-buff_size:n + buff_size)) @:ALLOCATE(dy(-buff_size:n + buff_size)) @@ -1360,7 +1360,7 @@ contains @:PREFER_GPU(y_cc) @:PREFER_GPU(dy) - if (p == 0) return; + if (p == 0) return; @:ALLOCATE(z_cb(-1 - buff_size:p + buff_size)) @:ALLOCATE(z_cc(-buff_size:p + buff_size)) @:ALLOCATE(dz(-buff_size:p + buff_size)) @@ -1446,10 +1446,10 @@ contains ! Deallocating grid variables for the x-, y- and z-directions @:DEALLOCATE(x_cb, x_cc, dx) - if (n == 0) return; + if (n == 0) return; @:DEALLOCATE(y_cb, y_cc, dy) - if (p == 0) return; + if (p == 0) return; @:DEALLOCATE(z_cb, z_cc, dz) end subroutine s_finalize_global_parameters_module diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index d1eb2f3679..549b9f4429 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -48,7 +48,7 @@ module m_mpi_proxy $:GPU_DECLARE(create='[i_halo_size]') integer, dimension(-1:1, -1:1, -1:1) :: p_send_counts, p_recv_counts - integer, dimension(:,:,:,:), allocatable :: p_send_ids + integer, dimension(:, :, :, :), allocatable :: p_send_ids character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff type(bounds_info), dimension(3) :: comm_coords integer :: p_buff_size, p_var_size @@ -96,8 +96,8 @@ contains call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr) nReal = 7 + 16*2 + 10*lag_num_ts - p_var_size = (nReal * real_size + int_size) - p_buff_size = lag_params%nBubs_glb * p_var_size + p_var_size = (nReal*real_size + int_size) + p_buff_size = lag_params%nBubs_glb*p_var_size @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size)) @@ -449,15 +449,15 @@ contains !! for communication of initial condition) impure subroutine s_add_particles_to_transfer_list(nBub, pos, posPrev) - real(wp), dimension(:,:) :: pos - real(wp), dimension(:,:), optional :: posPrev + real(wp), dimension(:, :) :: pos + real(wp), dimension(:, :), optional :: posPrev integer :: bubID, nbub integer :: i, j, k do k = nidx(3)%beg, nidx(3)%end do j = nidx(2)%beg, nidx(2)%end do i = nidx(1)%beg, nidx(1)%end - p_send_counts(i,j,k) = 0 + p_send_counts(i, j, k) = 0 end do end do end do @@ -593,8 +593,8 @@ contains logical function f_crosses_boundary(particle_id, dir, loc, pos, posPrev) integer, intent(in) :: particle_id, dir, loc - real(wp), dimension(:,:), intent(in) :: pos - real(wp), dimension(:,:), optional, intent(in) :: posPrev + real(wp), dimension(:, :), intent(in) :: pos + real(wp), dimension(:, :), optional, intent(in) :: posPrev if (loc == -1) then ! Beginning of the domain if (nidx(dir)%beg == 0) then @@ -603,10 +603,10 @@ contains end if if (present(posPrev)) then - f_crosses_boundary = (posPrev(particle_id,dir) > comm_coords(dir)%beg .and. & - pos(particle_id,dir) < comm_coords(dir)%beg) + f_crosses_boundary = (posPrev(particle_id, dir) > comm_coords(dir)%beg .and. & + pos(particle_id, dir) < comm_coords(dir)%beg) else - f_crosses_boundary = (pos(particle_id,dir) < comm_coords(dir)%beg) + f_crosses_boundary = (pos(particle_id, dir) < comm_coords(dir)%beg) end if elseif (loc == 1) then ! End of the domain if (nidx(dir)%end == 0) then @@ -615,10 +615,10 @@ contains end if if (present(posPrev)) then - f_crosses_boundary = (posPrev(particle_id,dir) < comm_coords(dir)%end .and. & - pos(particle_id,dir) > comm_coords(dir)%end) + f_crosses_boundary = (posPrev(particle_id, dir) < comm_coords(dir)%end .and. & + pos(particle_id, dir) > comm_coords(dir)%end) else - f_crosses_boundary = (pos(particle_id,dir) > comm_coords(dir)%end) + f_crosses_boundary = (pos(particle_id, dir) > comm_coords(dir)%end) end if end if @@ -662,12 +662,12 @@ contains !! @param lag_num_ts Number of stages in time-stepping scheme !! @param nBubs Local number of bubbles impure subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & - gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, & - rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, & - dgasmv, dpos, dvel, lag_num_ts, nbubs) + gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, & + rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, & + dgasmv, dpos, dvel, lag_num_ts, nbubs) real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt - integer, dimension(:,:) :: lag_id + integer, dimension(:, :) :: lag_id real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size @@ -680,20 +680,20 @@ contains do i = nidx(1)%beg, nidx(1)%end if (abs(i) + abs(j) + abs(k) > 0) then - partner = neighbor_ranks(i,j,k) + partner = neighbor_ranks(i, j, k) send_tag = neighbor_tag(i, j, k) recv_tag = neighbor_tag(-i, -j, -k) - call MPI_Sendrecv(p_send_counts(i,j,k), 1, MPI_INTEGER, partner, send_tag, & - p_recv_counts(i,j,k), 1, MPI_INTEGER, partner, recv_tag, & + call MPI_Sendrecv(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, & + p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - p_recv_size = p_recv_counts(i,j,k) * p_var_size + p_recv_size = p_recv_counts(i, j, k)*p_var_size position = 0 - do l = 0, p_send_counts(i,j,k) - 1 - bub_id = p_send_ids(i,j,k,l) + do l = 0, p_send_counts(i, j, k) - 1 + bub_id = p_send_ids(i, j, k, l) call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) @@ -725,12 +725,12 @@ contains send_tag = send_tag + max(num_procs, n_neighbor) recv_tag = recv_tag + max(num_procs, n_neighbor) - call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, & + call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, & p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) position = 0 - do l = 0, p_recv_counts(i,j,k) - 1 + do l = 0, p_recv_counts(i, j, k) - 1 nbubs = nbubs + 1 bub_id = nbubs call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index bb6e6484e5..7521249edc 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -329,7 +329,7 @@ def analytic(self): 'write_bubbles', 'write_bubbles_stats' ]: SIMULATION[f'lag_params%{var}'] = ParamType.LOG -for var in [ 'solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb', +for var in [ 'solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb', 'vel_model', 'drag_model']: SIMULATION[f'lag_params%{var}'] = ParamType.INT From fff74d879f7384360c541deb01eea75c7326fc06 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Tue, 2 Sep 2025 13:39:16 -0400 Subject: [PATCH 47/62] add perturbed IC stuff --- src/post_process/m_mpi_proxy.fpp | 1 + src/pre_process/include/3dHardcodedIC.fpp | 56 +++++++++++++++++++++++ src/pre_process/m_mpi_proxy.fpp | 2 +- 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index b096549ddc..053d96d457 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -110,6 +110,7 @@ contains & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt', & & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt', & & 'lag_betaT_wrt', 'lag_betaC_wrt', 'bc_io', 'down_sample' ] + call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor call MPI_BCAST(flux_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) diff --git a/src/pre_process/include/3dHardcodedIC.fpp b/src/pre_process/include/3dHardcodedIC.fpp index 39fb581c0d..5096ec806c 100644 --- a/src/pre_process/include/3dHardcodedIC.fpp +++ b/src/pre_process/include/3dHardcodedIC.fpp @@ -7,6 +7,45 @@ ! Case 302 - IGR Jet real(wp) :: r, ux_th, ux_am, p_th, p_am, rho_th, rho_am, y_th, z_th, r_th, eps_smooth + real(wp), allocatable, dimension(:, :) :: ih + integer :: i, j, pos, start, end + logical :: file_exist + character(len=10000) :: line + character(len=25) :: value + + if (patch_icpp(patch_id)%hcid == 303) then + allocate(ih(0:m_glb, 0:p_glb)) + + if (interface_file == '.') then + call s_mpi_abort("Error: interface_file must be specified for hcid=303") + else + inquire (file=trim(interface_file), exist=file_exist) + if (file_exist) then + open(unit=10, file=trim(interface_file), status="old", action="read") + do i = 0, m_glb + read(10, '(A)') line ! Read a full line as a string + start = 1 + + do j = 0, p_glb + end = index(line(start:), ',') ! Find the next comma + if (end == 0) then + value = trim(adjustl(line(start:))) ! Last value in the line + else + value = trim(adjustl(line(start:start+end-2))) ! Extract substring + start = start + end ! Move to next value + end if + read(value, *) ih(i, j) ! Convert string to numeric value + if (.not. f_is_default(normMag)) ih(i,j )= ih(i,j) * normMag + if (.not. f_is_default(normFac)) ih(i,j) = ih(i,j) + normFac + end do + end do + close(10) + else + call s_mpi_abort("Error: interface_file specified for hcid=303 does not exist") + end if + end if + end if + eps = 1e-9_wp #:enddef @@ -86,6 +125,23 @@ q_prim_vf(E_idx)%sf(i, j, k) = p_th*f_cut_on(r - r_th, eps_smooth)*f_cut_on(x_cc(i), eps_smooth) + p_am + case (303) ! 3D Interface from file + + alph = 0.5_wp * (1 + (1._wp - 2._wp * eps) * & + tanh((ih(start_idx(1) + i,start_idx(3) + k) - y_cc(j))*0.1_wp)) + + q_prim_vf(advxb)%sf(i,j,k) = alph + q_prim_vf(advxe)%sf(i,j,k) = 1._wp - alph + + q_prim_vf(contxb)%sf(i,j,k) = q_prim_vf(advxb)%sf(i,j,k) * 1._wp + q_prim_vf(contxe)%sf(i,j,k) = q_prim_vf(advxe)%sf(i,j,k) * (1._wp / 950._wp) + + q_prim_vf(E_idx)%sf(i,j,k) = p0 + & + (q_prim_vf(contxb)%sf(i,j,k) + q_prim_vf(contxe)%sf(i,j,k)) * g0 * & + (ih(start_idx(1) + i, start_idx(3) + k) - y_cc(j)) + + if (surface_tension) q_prim_vf(c_idx)%sf(i,j,k) = alph + case (370) ! This hardcoded case extrudes a 2D profile to initialize a 3D simulation domain @: HardcodedReadValues() diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index a6b7c7f85a..d9540c6bc9 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -72,7 +72,7 @@ contains & 'perturb_flow_mag', 'pref', 'rhoref', 'poly_sigma', 'R0ref', & & 'Web', 'Ca', 'Re_inv', 'sigR', 'sigV', 'rhoRV', 'palpha_eps', & & 'ptgalpha_eps', 'sigma', 'pi_fac', 'mixlayer_vel_coef', 'Bx0', & - & 'mixlayer_perturb_k0'] + & 'mixlayer_perturb_k0', 'normFac', 'normMag', 'p0', 'g0'] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor From e9a652577c5b01b92694e65e29fbf0e6524cf30f Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Tue, 2 Sep 2025 14:28:26 -0400 Subject: [PATCH 48/62] fix for GNU --- src/pre_process/include/3dHardcodedIC.fpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pre_process/include/3dHardcodedIC.fpp b/src/pre_process/include/3dHardcodedIC.fpp index 5096ec806c..4a64c01d88 100644 --- a/src/pre_process/include/3dHardcodedIC.fpp +++ b/src/pre_process/include/3dHardcodedIC.fpp @@ -8,7 +8,7 @@ real(wp) :: r, ux_th, ux_am, p_th, p_am, rho_th, rho_am, y_th, z_th, r_th, eps_smooth real(wp), allocatable, dimension(:, :) :: ih - integer :: i, j, pos, start, end + integer :: pos, start, end logical :: file_exist character(len=10000) :: line character(len=25) :: value @@ -35,8 +35,8 @@ start = start + end ! Move to next value end if read(value, *) ih(i, j) ! Convert string to numeric value - if (.not. f_is_default(normMag)) ih(i,j )= ih(i,j) * normMag - if (.not. f_is_default(normFac)) ih(i,j) = ih(i,j) + normFac + if (.not. f_is_default(normMag)) ih(i, j) = ih(i, j) * normMag + if (.not. f_is_default(normFac)) ih(i, j) = ih(i, j) + normFac end do end do close(10) From 69b51ea8b9b44c8c3867d50a45aa10ccc2781a9c Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Mon, 15 Sep 2025 11:10:45 -0400 Subject: [PATCH 49/62] periodic BCs mostly working --- src/common/m_boundary_common.fpp | 15 +++ src/simulation/m_bubbles_EL.fpp | 176 ++++++++++++++++--------- src/simulation/m_global_parameters.fpp | 19 +-- src/simulation/m_ibm.fpp | 20 +-- src/simulation/m_mpi_proxy.fpp | 125 +++++++++++++----- src/simulation/m_start_up.fpp | 8 +- toolchain/mfc/run/case_dicts.py | 4 - 7 files changed, 245 insertions(+), 122 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 4958bad881..9aeb30eb24 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -1942,6 +1942,21 @@ contains offset_x%beg = buff_size; offset_x%end = buff_size offset_y%beg = buff_size; offset_y%end = buff_size offset_z%beg = buff_size; offset_z%end = buff_size + +#ifdef MFC_MPI + ! Populate global domain boundaries with stretched grids + call s_mpi_allreduce_min(x_cb(-1), glb_bounds(1)%beg) + call s_mpi_allreduce_max(x_cb(m), glb_bounds(1)%end) + + if (n > 0) then + call s_mpi_allreduce_min(y_cb(-1), glb_bounds(2)%beg) + call s_mpi_allreduce_max(y_cb(n), glb_bounds(2)%end) + if (p > 0) then + call s_mpi_allreduce_min(z_cb(-1), glb_bounds(3)%beg) + call s_mpi_allreduce_max(z_cb(p), glb_bounds(3)%end) + end if + end if +#endif #endif #ifndef MFC_PRE_PROCESS diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 269b096058..8187b14638 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -79,8 +79,11 @@ module m_bubbles_EL $:GPU_DECLARE(create='[nBubs,Rmax_glb,Rmin_glb,q_beta,q_beta_idx]') integer, allocatable, dimension(:) :: keep_bubble, prefix_sum + integer, allocatable, dimension(:) :: wrap_bubble_loc + integer, allocatable, dimension(:,:) :: wrap_bubble_dir integer :: active_bubs $:GPU_DECLARE(create='[keep_bubble, prefix_sum, active_bubs]') + $:GPU_DECLARE(create='[wrap_bubble_loc, wrap_bubble_dir]') contains @@ -110,6 +113,17 @@ contains call s_mpi_abort('Please check the lag_params%solver_approach input') end if + pcomm_coords(1)%beg = x_cb(buff_size - fd_number - 1) + pcomm_coords(1)%end = x_cb(m - buff_size + fd_number) + if (n > 0) then + pcomm_coords(2)%beg = y_cb(buff_size - fd_number - 1) + pcomm_coords(2)%end = y_cb(n - buff_size + fd_number) + if (p > 0) then + pcomm_coords(3)%beg = z_cb(buff_size - fd_number - 1) + pcomm_coords(3)%end = z_cb(p - buff_size + fd_number) + end if + end if + $:GPU_UPDATE(device='[lag_num_ts, q_beta_idx]') @:ALLOCATE(q_beta%vf(1:q_beta_idx)) @@ -149,6 +163,7 @@ contains @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts)) @:ALLOCATE(keep_bubble(1:nBubs_glb), prefix_sum(1:nBubs_glb)) + @:ALLOCATE(wrap_bubble_loc(1:nBubs_glb), wrap_bubble_dir(1:nBubs_glb, 1:num_dims)) if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol @@ -274,7 +289,8 @@ contains gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & - gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) + gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs, & + dest=1) end if $:GPU_UPDATE(device='[bubbles_lagrange, lag_params]') @@ -576,8 +592,6 @@ contains call MPI_TYPE_FREE(view, ierr) end if - print *, proc_rank, lag_id - if (proc_rank == 0) then write (*, '(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count write (*, '(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt @@ -1358,12 +1372,16 @@ contains type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf integer, intent(in) :: dest - integer :: k, i, patch_id, offset + integer :: k, i, q + integer :: patch_id, newBubs, new_idx + real(wp) :: offset integer, dimension(3) :: cell $:GPU_PARALLEL_LOOP(private='[cell]') do k = 1, nBubs keep_bubble(k) = 1 + wrap_bubble_loc(k) = 0 + wrap_bubble_dir(k,:) = 0 ! Relocate bubbles at solid boundaries and delete bubbles that leave ! buffer regions @@ -1373,6 +1391,16 @@ contains elseif (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k, 1, dest) > x_cb(m) - intfc_rad(k, dest)) then mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k, dest) + elseif (bc_x%beg == BC_PERIODIC .and. mtn_pos(k, 1, dest) < pcomm_coords(1)%beg .and. & + mtn_posPrev(k, 1, dest) > pcomm_coords(1)%beg) then + !print*, "1, -1", mtn_pos(k, 1, dest), mtn_posPrev(k, 1, dest), pcomm_coords(1)%beg + wrap_bubble_dir(k,1) = 1 + wrap_bubble_loc(k) = -1 + elseif (bc_x%end == BC_PERIODIC .and. mtn_pos(k, 1, dest) > pcomm_coords(1)%end .and. & + mtn_posPrev(k, 1, dest) < pcomm_coords(1)%end) then + !print*, "1, 1", mtn_pos(k, 1, dest), mtn_posPrev(k, 1, dest), pcomm_coords(1)%end + wrap_bubble_dir(k,1) = 1 + wrap_bubble_loc(k) = 1 elseif (mtn_pos(k, 1, dest) >= x_cb(m + buff_size - fd_number)) then keep_bubble(k) = 0 elseif (mtn_pos(k, 1, dest) < x_cb(fd_number - buff_size - 1)) then @@ -1385,6 +1413,16 @@ contains else if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k, 2, dest) > y_cb(n) - intfc_rad(k, dest)) then mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k, dest) + elseif (bc_y%beg == BC_PERIODIC .and. mtn_pos(k, 2, dest) < pcomm_coords(2)%beg .and. & + mtn_posPrev(k, 2, dest) > pcomm_coords(2)%beg) then + !print*, "2, -1", mtn_pos(k, 2, dest), mtn_posPrev(k, 2, dest), pcomm_coords(2)%beg + wrap_bubble_dir(k,2) = 1 + wrap_bubble_loc(k) = -1 + elseif (bc_y%end == BC_PERIODIC .and. mtn_pos(k, 2, dest) > pcomm_coords(2)%end .and. & + mtn_posPrev(k, 2, dest) < pcomm_coords(2)%end) then + !print*, "2, 1", mtn_pos(k, 2, dest), mtn_posPrev(k, 2, dest), pcomm_coords(2)%end + wrap_bubble_dir(k,2) = 1 + wrap_bubble_loc(k) = 1 elseif (mtn_pos(k, 2, dest) >= y_cb(n + buff_size - fd_number)) then keep_bubble(k) = 0 elseif (mtn_pos(k, 2, dest) < y_cb(fd_number - buff_size - 1)) then @@ -1398,6 +1436,14 @@ contains else if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) & .and. mtn_pos(k, 3, dest) > z_cb(p) - intfc_rad(k, dest)) then mtn_pos(k, 3, dest) = z_cb(p) - intfc_rad(k, dest) + elseif (bc_z%beg == BC_PERIODIC .and. mtn_pos(k, 3, dest) < pcomm_coords(3)%beg .and. & + mtn_posPrev(k, 3, dest) > pcomm_coords(3)%beg) then + wrap_bubble_dir(k,3) = 1 + wrap_bubble_loc(k) = -1 + elseif (bc_z%end == BC_PERIODIC .and. mtn_pos(k, 3, dest) > pcomm_coords(3)%end .and. & + mtn_posPrev(k, 3, dest) < pcomm_coords(3)%end) then + wrap_bubble_dir(k,3) = 1 + wrap_bubble_loc(k) = 1 elseif (mtn_pos(k, 3, dest) >= z_cb(p + buff_size - fd_number)) then keep_bubble(k) = 0 elseif (mtn_pos(k, 3, dest) < z_cb(fd_number - buff_size - 1)) then @@ -1436,6 +1482,7 @@ contains end if end do + call nvtxStartRange("LAG-BC") call nvtxStartRange("LAG-BC-DEV2HOST") $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, & @@ -1444,6 +1491,7 @@ contains & nBubs]') call nvtxEndRange + ! Handle deletion of bubbles leaving local domain do k = 1, nBubs if (k == 1) then prefix_sum(k) = keep_bubble(k) @@ -1456,33 +1504,41 @@ contains do k = 1, nBubs if (keep_bubble(k) == 1) then if (prefix_sum(k) /= k) then - bub_R0(prefix_sum(k)) = bub_R0(k) - Rmax_stats(prefix_sum(k)) = Rmax_stats(k) - Rmin_stats(prefix_sum(k)) = Rmin_stats(k) - gas_mg(prefix_sum(k)) = gas_mg(k) - gas_betaT(prefix_sum(k)) = gas_betaT(k) - gas_betaC(prefix_sum(k)) = gas_betaC(k) - bub_dphidt(prefix_sum(k)) = bub_dphidt(k) - lag_id(prefix_sum(k), 1) = lag_id(k, 1) - gas_p(prefix_sum(k), 1:2) = gas_p(k, 1:2) - gas_mv(prefix_sum(k), 1:2) = gas_mv(k, 1:2) - intfc_rad(prefix_sum(k), 1:2) = intfc_rad(k, 1:2) - intfc_vel(prefix_sum(k), 1:2) = intfc_vel(k, 1:2) - mtn_pos(prefix_sum(k), 1:3, 1:2) = mtn_pos(k, 1:3, 1:2) - mtn_posPrev(prefix_sum(k), 1:3, 1:2) = mtn_posPrev(k, 1:3, 1:2) - mtn_vel(prefix_sum(k), 1:3, 1:2) = mtn_vel(k, 1:3, 1:2) - mtn_s(prefix_sum(k), 1:3, 1:2) = mtn_s(k, 1:3, 1:2) - intfc_draddt(prefix_sum(k), 1:lag_num_ts) = intfc_draddt(k, 1:lag_num_ts) - intfc_dveldt(prefix_sum(k), 1:lag_num_ts) = intfc_dveldt(k, 1:lag_num_ts) - gas_dpdt(prefix_sum(k), 1:lag_num_ts) = gas_dpdt(k, 1:lag_num_ts) - gas_dmvdt(prefix_sum(k), 1:lag_num_ts) = gas_dmvdt(k, 1:lag_num_ts) - mtn_dposdt(prefix_sum(k), 1:3, 1:lag_num_ts) = mtn_dposdt(k, 1:3, 1:lag_num_ts) - mtn_dveldt(prefix_sum(k), 1:3, 1:lag_num_ts) = mtn_dveldt(k, 1:3, 1:lag_num_ts) + call s_copy_lag_bubble(k, prefix_sum(k)) end if end if if (k == nBubs) nBubs = active_bubs end do + ! Handle periodic wrapping of bubbles on same processor + newBubs = 0 + do k = 1, nBubs + if (any(wrap_bubble_dir(k, :) == 1)) then + newBubs = newBubs + 1 + new_idx = nBubs + newBubs + call s_copy_lag_bubble(k, new_idx) + do i = 1, num_dims + if (wrap_bubble_dir(k, i) == 1) then + offset = glb_bounds(i)%end - glb_bounds(i)%beg + if (wrap_bubble_loc(k) == 1) then + do q = 1, 2 + mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) - offset + mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) - offset + end do + else if (wrap_bubble_loc(k) == -1) then + do q = 1, 2 + mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) + offset + mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) + offset + end do + end if + end if + end do + end if + end do + print*, nBubs, newBubs + nBubs = nBubs + newBubs + + ! Handle MPI transfer of bubbles going to another processor's local domain if (num_procs > 1) then call nvtxStartRange("LAG-BC-TRANSFER-LIST") call s_add_particles_to_transfer_list(nBubs, mtn_pos(:, :, dest), mtn_posPrev(:, :, dest)) @@ -1493,7 +1549,8 @@ contains gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, & intfc_rad, intfc_vel, mtn_pos, mtn_posPrev, mtn_vel, & mtn_s, intfc_draddt, intfc_dveldt, gas_dpdt, & - gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs) + gas_dmvdt, mtn_dposdt, mtn_dveldt, lag_num_ts, nBubs, & + dest) call nvtxEndRange end if @@ -1503,6 +1560,7 @@ contains & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, nBubs]') call nvtxEndRange + call nvtxEndRange end subroutine s_enforce_EL_bubbles_boundary_conditions @@ -2034,42 +2092,34 @@ contains !> The purpose of this subroutine is to remove one specific particle if dt is too small. !! @param bub_id Particle id - impure subroutine s_remove_lag_bubble(bub_id) - - integer, intent(in) :: bub_id - - integer :: i - - $:GPU_LOOP(parallelism='[seq]') - do i = bub_id, nBubs - 1 - bub_R0(i) = bub_R0(i + 1) - Rmax_stats(i) = Rmax_stats(i + 1) - Rmin_stats(i) = Rmin_stats(i + 1) - gas_mg(i) = gas_mg(i + 1) - gas_betaT(i) = gas_betaT(i + 1) - gas_betaC(i) = gas_betaC(i + 1) - bub_dphidt(i) = bub_dphidt(i + 1) - lag_id(i, 1) = lag_id(i + 1, 1) - gas_p(i, 1:2) = gas_p(i + 1, 1:2) - gas_mv(i, 1:2) = gas_mv(i + 1, 1:2) - intfc_rad(i, 1:2) = intfc_rad(i + 1, 1:2) - intfc_vel(i, 1:2) = intfc_vel(i + 1, 1:2) - mtn_pos(i, 1:3, 1:2) = mtn_pos(i + 1, 1:3, 1:2) - mtn_posPrev(i, 1:3, 1:2) = mtn_posPrev(i + 1, 1:3, 1:2) - mtn_vel(i, 1:3, 1:2) = mtn_vel(i + 1, 1:3, 1:2) - mtn_s(i, 1:3, 1:2) = mtn_s(i + 1, 1:3, 1:2) - intfc_draddt(i, 1:lag_num_ts) = intfc_draddt(i + 1, 1:lag_num_ts) - intfc_dveldt(i, 1:lag_num_ts) = intfc_dveldt(i + 1, 1:lag_num_ts) - gas_dpdt(i, 1:lag_num_ts) = gas_dpdt(i + 1, 1:lag_num_ts) - gas_dmvdt(i, 1:lag_num_ts) = gas_dmvdt(i + 1, 1:lag_num_ts) - mtn_dposdt(i, 1:3, 1:lag_num_ts) = mtn_dposdt(i + 1, 1:3, 1:lag_num_ts) - mtn_dveldt(i, 1:3, 1:lag_num_ts) = mtn_dveldt(i + 1, 1:3, 1:lag_num_ts) - end do - - nBubs = nBubs - 1 - $:GPU_UPDATE(device='[nBubs]') - - end subroutine s_remove_lag_bubble + impure subroutine s_copy_lag_bubble(src, dest) + + integer, intent(in) :: src, dest + + bub_R0(dest) = bub_R0(src) + Rmax_stats(dest) = Rmax_stats(src) + Rmin_stats(dest) = Rmin_stats(src) + gas_mg(dest) = gas_mg(src) + gas_betaT(dest) = gas_betaT(src) + gas_betaC(dest) = gas_betaC(src) + bub_dphidt(dest) = bub_dphidt(src) + lag_id(dest, 1) = lag_id(src, 1) + gas_p(dest, 1:2) = gas_p(src, 1:2) + gas_mv(dest, 1:2) = gas_mv(src, 1:2) + intfc_rad(dest, 1:2) = intfc_rad(src, 1:2) + intfc_vel(dest, 1:2) = intfc_vel(src, 1:2) + mtn_vel(dest, 1:3, 1:2) = mtn_vel(src, 1:3, 1:2) + mtn_s(dest, 1:3, 1:2) = mtn_s(src, 1:3, 1:2) + intfc_draddt(dest, 1:lag_num_ts) = intfc_draddt(src, 1:lag_num_ts) + intfc_dveldt(dest, 1:lag_num_ts) = intfc_dveldt(src, 1:lag_num_ts) + gas_dpdt(dest, 1:lag_num_ts) = gas_dpdt(src, 1:lag_num_ts) + gas_dmvdt(dest, 1:lag_num_ts) = gas_dmvdt(src, 1:lag_num_ts) + mtn_dposdt(dest, 1:3, 1:lag_num_ts) = mtn_dposdt(src, 1:3, 1:lag_num_ts) + mtn_dveldt(dest, 1:3, 1:lag_num_ts) = mtn_dveldt(src, 1:3, 1:lag_num_ts) + mtn_pos(dest, 1:3, 1:2) = mtn_pos(src, 1:3, 1:2) + mtn_posPrev(dest, 1:3, 1:2) = mtn_posPrev(src, 1:3, 1:2) + + end subroutine s_copy_lag_bubble !> The purpose of this subroutine is to deallocate variables impure subroutine s_finalize_lagrangian_solver() diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 9e73462189..dbef5fcc98 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -63,6 +63,7 @@ module m_global_parameters !> @{ real(wp), target, allocatable, dimension(:) :: x_cb, y_cb, z_cb + type(bounds_info), dimension(3) :: glb_bounds !< !> @} !> @name Cell-center (CC) locations in the x-, y- and z-directions, respectively @@ -70,7 +71,6 @@ module m_global_parameters real(wp), target, allocatable, dimension(:) :: x_cc, y_cc, z_cc !> @} - !type(bounds_info) :: x_domain, y_domain, z_domain !< !! Locations of the domain bounds in the x-, y- and z-coordinate directions !> @name Cell-width distributions in the x-, y- and z-directions, respectively !> @{ @@ -231,6 +231,7 @@ module m_global_parameters integer :: num_bc_patches logical :: bc_io + logical, dimension(3) :: periodic_bc !> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z @@ -239,10 +240,6 @@ module m_global_parameters $:GPU_DECLARE(create='[bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3]') $:GPU_DECLARE(create='[bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3]') - type(bounds_info) :: x_domain, y_domain, z_domain - real(wp) :: x_a, y_a, z_a - real(wp) :: x_b, y_b, z_b - logical :: parallel_io !< Format of the data files logical :: file_per_process !< shared file or not when using parallel io integer :: precision !< Precision of output files @@ -252,6 +249,9 @@ module m_global_parameters integer, allocatable, dimension(:) :: proc_coords !< !! Processor coordinates in MPI_CART_COMM + type(bounds_info), allocatable, dimension(:) :: pcomm_coords + !! Coordinates for EL particle transfer + type(int_bounds_info), dimension(3) :: nidx !< Indices for neighboring processors integer, allocatable, dimension(:, :, :) :: neighbor_ranks @@ -652,6 +652,7 @@ contains num_bc_patches = 0 bc_io = .false. + periodic_bc = .false. bc_x%beg = dflt_int; bc_x%end = dflt_int bc_y%beg = dflt_int; bc_y%end = dflt_int @@ -664,9 +665,9 @@ contains #:endfor #:endfor - x_domain%beg = dflt_int; x_domain%end = dflt_int - y_domain%beg = dflt_int; y_domain%end = dflt_int - z_domain%beg = dflt_int; z_domain%end = dflt_int + glb_bounds(1)%beg = dflt_int; glb_bounds(1)%end = dflt_int + glb_bounds(2)%beg = dflt_int; glb_bounds(2)%end = dflt_int + glb_bounds(3)%beg = dflt_int; glb_bounds(3)%end = dflt_int ! Fluids physical parameters do i = 1, num_fluids_max @@ -1388,6 +1389,7 @@ contains #:endif allocate (proc_coords(1:num_dims)) + allocate (pcomm_coords(1:num_dims)) if (parallel_io .neqv. .true.) return @@ -1424,6 +1426,7 @@ contains end if deallocate (proc_coords) + deallocate (pcomm_coords) if (parallel_io) then deallocate (start_idx) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 4d01608120..6fe6c3ac64 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -501,17 +501,17 @@ contains ghost_points_in(count)%slip = patch_ib(patch_id)%slip ! ghost_points(count)%rank = proc_rank - if ((x_cc(i) - dx(i)) < x_domain%beg) then + if ((x_cc(i) - dx(i)) < glb_bounds(1)%beg) then ghost_points_in(count)%DB(1) = -1 - else if ((x_cc(i) + dx(i)) > x_domain%end) then + else if ((x_cc(i) + dx(i)) > glb_bounds(1)%end) then ghost_points_in(count)%DB(1) = 1 else ghost_points_in(count)%DB(1) = 0 end if - if ((y_cc(j) - dy(j)) < y_domain%beg) then + if ((y_cc(j) - dy(j)) < glb_bounds(2)%beg) then ghost_points_in(count)%DB(2) = -1 - else if ((y_cc(j) + dy(j)) > y_domain%end) then + else if ((y_cc(j) + dy(j)) > glb_bounds(2)%end) then ghost_points_in(count)%DB(2) = 1 else ghost_points_in(count)%DB(2) = 0 @@ -543,25 +543,25 @@ contains ib_markers%sf(i, j, k) ghost_points_in(count)%slip = patch_ib(patch_id)%slip - if ((x_cc(i) - dx(i)) < x_domain%beg) then + if ((x_cc(i) - dx(i)) < glb_bounds(1)%beg) then ghost_points_in(count)%DB(1) = -1 - else if ((x_cc(i) + dx(i)) > x_domain%end) then + else if ((x_cc(i) + dx(i)) > glb_bounds(1)%end) then ghost_points_in(count)%DB(1) = 1 else ghost_points_in(count)%DB(1) = 0 end if - if ((y_cc(j) - dy(j)) < y_domain%beg) then + if ((y_cc(j) - dy(j)) < glb_bounds(2)%beg) then ghost_points_in(count)%DB(2) = -1 - else if ((y_cc(j) + dy(j)) > y_domain%end) then + else if ((y_cc(j) + dy(j)) > glb_bounds(2)%end) then ghost_points_in(count)%DB(2) = 1 else ghost_points_in(count)%DB(2) = 0 end if - if ((z_cc(k) - dz(k)) < z_domain%beg) then + if ((z_cc(k) - dz(k)) < glb_bounds(3)%beg) then ghost_points_in(count)%DB(3) = -1 - else if ((z_cc(k) + dz(k)) > z_domain%end) then + else if ((z_cc(k) + dz(k)) > glb_bounds(3)%end) then ghost_points_in(count)%DB(3) = 1 else ghost_points_in(count)%DB(3) = 0 diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 2b2f1745b9..27abce892c 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -50,10 +50,9 @@ module m_mpi_proxy integer, dimension(-1:1, -1:1, -1:1) :: p_send_counts, p_recv_counts integer, dimension(:, :, :, :), allocatable :: p_send_ids character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff - type(bounds_info), dimension(3) :: comm_coords integer :: p_buff_size, p_var_size - $:GPU_DECLARE(create='[p_send_counts, comm_coords]') + $:GPU_DECLARE(create='[p_send_counts]') contains @@ -92,6 +91,7 @@ contains integer :: real_size, int_size, nReal, lag_num_ts integer :: ierr !< Generic flag used to identify and report MPI errors +#ifdef MFC_MPI call MPI_Pack_size(1, mpi_p, MPI_COMM_WORLD, real_size, ierr) call MPI_Pack_size(1, MPI_INTEGER, MPI_COMM_WORLD, int_size, ierr) @@ -100,19 +100,8 @@ contains p_buff_size = lag_params%nBubs_glb*p_var_size @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size)) - - comm_coords(1)%beg = x_cb(buff_size - fd_number - 1) - comm_coords(1)%end = x_cb(m - buff_size + fd_number) - if (n > 0) then - comm_coords(2)%beg = y_cb(buff_size - fd_number - 1) - comm_coords(2)%end = y_cb(n - buff_size + fd_number) - if (p > 0) then - comm_coords(3)%beg = z_cb(buff_size - fd_number - 1) - comm_coords(3)%end = z_cb(p - buff_size + fd_number) - end if - end if - @:ALLOCATE(p_send_ids(nidx(1)%beg:nidx(1)%end, nidx(2)%beg:nidx(2)%end, nidx(3)%beg:nidx(3)%end, 0:lag_params%nBubs_glb)) +#endif end subroutine s_initialize_particles_mpi @@ -195,11 +184,8 @@ contains & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', & & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', & & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', & - & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end', & - & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', & - & 'z_b', 't_stop', 't_save', 'cfl_target', 'Bx0', 'alf_factor', & - & 'tau_star', 'cont_damage_s', 'alpha_bar', 'adap_dt_tol', & - & 'ic_eps', 'ic_beta' ] + & 't_stop', 't_save', 'cfl_target', 'Bx0', 'alf_factor', 'tau_star', & + & 'cont_damage_s', 'alpha_bar', 'adap_dt_tol', 'ic_eps', 'ic_beta' ] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -285,6 +271,9 @@ contains call MPI_BCAST(nv_uvm_igr_temps_on_gpu, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(nv_uvm_pref_gpu, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + ! Extra BC Variable + call MPI_BCAST(periodic_bc, 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + #endif end subroutine s_mpi_bcast_user_inputs @@ -612,6 +601,7 @@ contains call s_add_particle_to_direction(k, 0, 0, 1) end if end if + end do contains @@ -629,10 +619,10 @@ contains end if if (present(posPrev)) then - f_crosses_boundary = (posPrev(particle_id, dir) > comm_coords(dir)%beg .and. & - pos(particle_id, dir) < comm_coords(dir)%beg) + f_crosses_boundary = (posPrev(particle_id, dir) > pcomm_coords(dir)%beg .and. & + pos(particle_id, dir) < pcomm_coords(dir)%beg) else - f_crosses_boundary = (pos(particle_id, dir) < comm_coords(dir)%beg) + f_crosses_boundary = (pos(particle_id, dir) < pcomm_coords(dir)%beg) end if elseif (loc == 1) then ! End of the domain if (nidx(dir)%end == 0) then @@ -641,10 +631,10 @@ contains end if if (present(posPrev)) then - f_crosses_boundary = (posPrev(particle_id, dir) < comm_coords(dir)%end .and. & - pos(particle_id, dir) > comm_coords(dir)%end) + f_crosses_boundary = (posPrev(particle_id, dir) < pcomm_coords(dir)%end .and. & + pos(particle_id, dir) > pcomm_coords(dir)%end) else - f_crosses_boundary = (pos(particle_id, dir) > comm_coords(dir)%end) + f_crosses_boundary = (pos(particle_id, dir) > pcomm_coords(dir)%end) end if end if @@ -690,31 +680,35 @@ contains impure subroutine s_mpi_sendrecv_particles(bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, rad, & rvel, pos, posPrev, vel, scoord, drad, drvel, dgasp, & - dgasmv, dpos, dvel, lag_num_ts, nbubs) + dgasmv, dpos, dvel, lag_num_ts, nbubs, dest) real(wp), dimension(:) :: bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, gas_betaC, bub_dphidt integer, dimension(:, :) :: lag_id real(wp), dimension(:, :) :: gas_p, gas_mv, rad, rvel, drad, drvel, dgasp, dgasmv real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel - integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size + integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size, dest integer :: i, j, k, l, q integer :: ierr !< Generic flag used to identify and report MPI errors +#ifdef MFC_MPI do k = nidx(3)%beg, nidx(3)%end do j = nidx(2)%beg, nidx(2)%end do i = nidx(1)%beg, nidx(1)%end - if (abs(i) + abs(j) + abs(k) > 0) then - + if (abs(i) + abs(j) + abs(k) /= 0) then partner = neighbor_ranks(i, j, k) - send_tag = neighbor_tag(i, j, k) - recv_tag = neighbor_tag(-i, -j, -k) + recv_tag = send_tag !neighbor_tag(-i,-j,-k) - call MPI_Sendrecv(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, & + call MPI_sendrecv(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, & p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + ! Skip if no particles to exchange + if (p_send_counts(i, j, k) == 0 .and. p_recv_counts(i, j, k) == 0) then + cycle + end if + p_recv_size = p_recv_counts(i, j, k)*p_var_size position = 0 @@ -751,7 +745,7 @@ contains send_tag = send_tag + max(num_procs, n_neighbor) recv_tag = recv_tag + max(num_procs, n_neighbor) - call MPI_Sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, & + call MPI_sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, & p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) @@ -786,12 +780,16 @@ contains call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) end do lag_id(bub_id, 2) = bub_id - end do end if end do end do end do +#endif + + if (any(periodic_bc)) then + call s_wrap_particle_positions(pos, posPrev, nbubs, dest) + end if end subroutine s_mpi_sendrecv_particles @@ -807,6 +805,65 @@ contains end function neighbor_tag + subroutine s_wrap_particle_positions(pos, posPrev, nbubs, dest) + + real(wp), dimension(:, :, :) :: pos, posPrev + integer :: nbubs, dest + integer :: i, q + real :: offset + + do i = 1, nbubs + if (periodic_bc(1)) then + offset = glb_bounds(1)%end - glb_bounds(1)%beg + if (pos(i, 1, dest) > x_cb(m + buff_size)) then + do q = 1, 2 + pos(i, 1, q) = pos(i, 1, q) - offset + posPrev(i, 1, q) = posPrev(i, 1, q) - offset + end do + endif + if (pos(i, 1, dest) < x_cb(-1 - buff_size)) then + do q = 1, 2 + pos(i, 1, q) = pos(i, 1, q) + offset + posPrev(i, 1, q) = posPrev(i, 1, q) + offset + end do + endif + end if + + if (periodic_bc(2)) then + offset = glb_bounds(2)%end - glb_bounds(2)%beg + if (pos(i, 2, dest) > y_cb(n + buff_size)) then + do q = 1, 2 + pos(i, 2, q) = pos(i, 2, q) - offset + posPrev(i, 2, q) = posPrev(i, 2, q) - offset + end do + endif + if (pos(i, 2, dest) < y_cb(-buff_size - 1)) then + do q = 1, 2 + pos(i, 2, q) = pos(i, 2, q) + offset + posPrev(i, 2, q) = posPrev(i, 2, q) + offset + end do + endif + end if + + if (periodic_bc(3)) then + offset = glb_bounds(3)%end - glb_bounds(3)%beg + if (pos(i, 3, dest) > z_cb(p + buff_size)) then + do q = 1, 2 + pos(i, 3, q) = pos(i, 3, q) - offset + posPrev(i, 2, q) = posPrev(i, 2, q) - offset + end do + endif + if (pos(i, 3, dest) < z_cb(-1 - buff_size)) then + do q = 1, 2 + pos(i, 3, q) = pos(i, 3, q) + offset + posPrev(i, 2, q) = posPrev(i, 2, q) + offset + end do + endif + end if + end do + + end subroutine s_wrap_particle_positions + impure subroutine s_mpi_send_random_number(phi_rn, num_freq) integer, intent(in) :: num_freq real(wp), intent(inout), dimension(1:num_freq) :: phi_rn diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index bc0def7dcf..b4c6b0a3fa 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -155,8 +155,6 @@ contains rdma_mpi, teno_CT, mp_weno, weno_avg, & riemann_solver, low_Mach, wave_speeds, avg_state, & bc_x, bc_y, bc_z, & - x_a, y_a, z_a, x_b, y_b, z_b, & - x_domain, y_domain, z_domain, & hypoelasticity, & ib, num_ibs, patch_ib, & fluid_pp, probe_wrt, prim_vars_wrt, & @@ -225,11 +223,15 @@ contains if (cfl_adap_dt .or. cfl_const_dt) cfl_dt = .true. - if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == -17) .or. & + if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_DIRICHLET) .or. & num_bc_patches > 0) then bc_io = .true. endif + if (bc_x%beg == BC_PERIODIC .and. bc_x%end == BC_PERIODIC) periodic_bc(1) = .true. + if (bc_y%beg == BC_PERIODIC .and. bc_y%end == BC_PERIODIC) periodic_bc(2) = .true. + if (bc_z%beg == BC_PERIODIC .and. bc_z%end == BC_PERIODIC) periodic_bc(3) = .true. + else call s_mpi_abort(trim(file_path)//' is missing. Exiting.') end if diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index 7521249edc..bf4f15d7a8 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -382,10 +382,6 @@ def analytic(self): SIMULATION[f'{var}_{cmp}'] = ParamType.REAL SIMULATION[f'bf_{cmp}'] = ParamType.LOG - - for prepend in ["domain%beg", "domain%end"]: - SIMULATION[f"{cmp}_{prepend}"] = ParamType.REAL - for probe_id in range(1,10+1): for cmp in ["x", "y", "z"]: SIMULATION[f'probe({probe_id})%{cmp}'] = ParamType.REAL From 58a0cba99d6bfe8b7937c86c9327466480acb50d Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:27:33 -0400 Subject: [PATCH 50/62] periodic BCs working with non blocking communication --- src/simulation/m_bubbles_EL.fpp | 120 +++++++------- src/simulation/m_mpi_proxy.fpp | 283 +++++++++++++++++++++----------- 2 files changed, 251 insertions(+), 152 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 8187b14638..5b6e59e6cc 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -79,8 +79,7 @@ module m_bubbles_EL $:GPU_DECLARE(create='[nBubs,Rmax_glb,Rmin_glb,q_beta,q_beta_idx]') integer, allocatable, dimension(:) :: keep_bubble, prefix_sum - integer, allocatable, dimension(:) :: wrap_bubble_loc - integer, allocatable, dimension(:,:) :: wrap_bubble_dir + integer, allocatable, dimension(:,:) :: wrap_bubble_loc, wrap_bubble_dir integer :: active_bubs $:GPU_DECLARE(create='[keep_bubble, prefix_sum, active_bubs]') $:GPU_DECLARE(create='[wrap_bubble_loc, wrap_bubble_dir]') @@ -163,7 +162,7 @@ contains @:ALLOCATE(mtn_dveldt(1:nBubs_glb, 1:3, 1:lag_num_ts)) @:ALLOCATE(keep_bubble(1:nBubs_glb), prefix_sum(1:nBubs_glb)) - @:ALLOCATE(wrap_bubble_loc(1:nBubs_glb), wrap_bubble_dir(1:nBubs_glb, 1:num_dims)) + @:ALLOCATE(wrap_bubble_loc(1:nBubs_glb, 1:num_dims), wrap_bubble_dir(1:nBubs_glb, 1:num_dims)) if (adap_dt .and. f_is_default(adap_dt_tol)) adap_dt_tol = dflt_adap_dt_tol @@ -1380,7 +1379,7 @@ contains $:GPU_PARALLEL_LOOP(private='[cell]') do k = 1, nBubs keep_bubble(k) = 1 - wrap_bubble_loc(k) = 0 + wrap_bubble_loc(k,:) = 0 wrap_bubble_dir(k,:) = 0 ! Relocate bubbles at solid boundaries and delete bubbles that leave @@ -1393,14 +1392,12 @@ contains mtn_pos(k, 1, dest) = x_cb(m) - intfc_rad(k, dest) elseif (bc_x%beg == BC_PERIODIC .and. mtn_pos(k, 1, dest) < pcomm_coords(1)%beg .and. & mtn_posPrev(k, 1, dest) > pcomm_coords(1)%beg) then - !print*, "1, -1", mtn_pos(k, 1, dest), mtn_posPrev(k, 1, dest), pcomm_coords(1)%beg wrap_bubble_dir(k,1) = 1 - wrap_bubble_loc(k) = -1 + wrap_bubble_loc(k,1) = -1 elseif (bc_x%end == BC_PERIODIC .and. mtn_pos(k, 1, dest) > pcomm_coords(1)%end .and. & mtn_posPrev(k, 1, dest) < pcomm_coords(1)%end) then - !print*, "1, 1", mtn_pos(k, 1, dest), mtn_posPrev(k, 1, dest), pcomm_coords(1)%end wrap_bubble_dir(k,1) = 1 - wrap_bubble_loc(k) = 1 + wrap_bubble_loc(k,1) = 1 elseif (mtn_pos(k, 1, dest) >= x_cb(m + buff_size - fd_number)) then keep_bubble(k) = 0 elseif (mtn_pos(k, 1, dest) < x_cb(fd_number - buff_size - 1)) then @@ -1415,14 +1412,12 @@ contains mtn_pos(k, 2, dest) = y_cb(n) - intfc_rad(k, dest) elseif (bc_y%beg == BC_PERIODIC .and. mtn_pos(k, 2, dest) < pcomm_coords(2)%beg .and. & mtn_posPrev(k, 2, dest) > pcomm_coords(2)%beg) then - !print*, "2, -1", mtn_pos(k, 2, dest), mtn_posPrev(k, 2, dest), pcomm_coords(2)%beg wrap_bubble_dir(k,2) = 1 - wrap_bubble_loc(k) = -1 + wrap_bubble_loc(k,2) = -1 elseif (bc_y%end == BC_PERIODIC .and. mtn_pos(k, 2, dest) > pcomm_coords(2)%end .and. & mtn_posPrev(k, 2, dest) < pcomm_coords(2)%end) then - !print*, "2, 1", mtn_pos(k, 2, dest), mtn_posPrev(k, 2, dest), pcomm_coords(2)%end wrap_bubble_dir(k,2) = 1 - wrap_bubble_loc(k) = 1 + wrap_bubble_loc(k,2) = 1 elseif (mtn_pos(k, 2, dest) >= y_cb(n + buff_size - fd_number)) then keep_bubble(k) = 0 elseif (mtn_pos(k, 2, dest) < y_cb(fd_number - buff_size - 1)) then @@ -1439,11 +1434,11 @@ contains elseif (bc_z%beg == BC_PERIODIC .and. mtn_pos(k, 3, dest) < pcomm_coords(3)%beg .and. & mtn_posPrev(k, 3, dest) > pcomm_coords(3)%beg) then wrap_bubble_dir(k,3) = 1 - wrap_bubble_loc(k) = -1 + wrap_bubble_loc(k,3) = -1 elseif (bc_z%end == BC_PERIODIC .and. mtn_pos(k, 3, dest) > pcomm_coords(3)%end .and. & mtn_posPrev(k, 3, dest) < pcomm_coords(3)%end) then wrap_bubble_dir(k,3) = 1 - wrap_bubble_loc(k) = 1 + wrap_bubble_loc(k,3) = 1 elseif (mtn_pos(k, 3, dest) >= z_cb(p + buff_size - fd_number)) then keep_bubble(k) = 0 elseif (mtn_pos(k, 3, dest) < z_cb(fd_number - buff_size - 1)) then @@ -1491,52 +1486,57 @@ contains & nBubs]') call nvtxEndRange - ! Handle deletion of bubbles leaving local domain - do k = 1, nBubs - if (k == 1) then - prefix_sum(k) = keep_bubble(k) - else - prefix_sum(k) = prefix_sum(k - 1) + keep_bubble(k) - end if - if (k == nBubs) active_bubs = prefix_sum(k) - end do + if (nBubs > 0) then + ! Handle deletion of bubbles leaving local domain + do k = 1, nBubs + if (k == 1) then + prefix_sum(k) = keep_bubble(k) + else + prefix_sum(k) = prefix_sum(k - 1) + keep_bubble(k) + end if + end do - do k = 1, nBubs - if (keep_bubble(k) == 1) then - if (prefix_sum(k) /= k) then - call s_copy_lag_bubble(k, prefix_sum(k)) + active_bubs = prefix_sum(nBubs) + + do k = 1, nBubs + if (keep_bubble(k) == 1) then + if (prefix_sum(k) /= k) then + call s_copy_lag_bubble(prefix_sum(k), k) + wrap_bubble_dir(prefix_sum(k), :) = wrap_bubble_dir(k, :) + wrap_bubble_loc(prefix_sum(k), :) = wrap_bubble_loc(k, :) + end if end if - end if - if (k == nBubs) nBubs = active_bubs - end do + end do - ! Handle periodic wrapping of bubbles on same processor - newBubs = 0 - do k = 1, nBubs - if (any(wrap_bubble_dir(k, :) == 1)) then - newBubs = newBubs + 1 - new_idx = nBubs + newBubs - call s_copy_lag_bubble(k, new_idx) - do i = 1, num_dims - if (wrap_bubble_dir(k, i) == 1) then - offset = glb_bounds(i)%end - glb_bounds(i)%beg - if (wrap_bubble_loc(k) == 1) then - do q = 1, 2 - mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) - offset - mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) - offset - end do - else if (wrap_bubble_loc(k) == -1) then - do q = 1, 2 - mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) + offset - mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) + offset - end do + nBubs = active_bubs + + ! Handle periodic wrapping of bubbles on same processor + newBubs = 0 + do k = 1, nBubs + if (any(wrap_bubble_dir(k, :) == 1)) then + newBubs = newBubs + 1 + new_idx = nBubs + newBubs + call s_copy_lag_bubble(new_idx, k) + do i = 1, num_dims + if (wrap_bubble_dir(k, i) == 1) then + offset = glb_bounds(i)%end - glb_bounds(i)%beg + if (wrap_bubble_loc(k,i) == 1) then + do q = 1, 2 + mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) - offset + mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) - offset + end do + else if (wrap_bubble_loc(k,i) == -1) then + do q = 1, 2 + mtn_pos(new_idx, i, q) = mtn_pos(new_idx, i, q) + offset + mtn_posPrev(new_idx, i, q) = mtn_posPrev(new_idx, i, q) + offset + end do + end if end if - end if - end do - end if - end do - print*, nBubs, newBubs - nBubs = nBubs + newBubs + end do + end if + end do + nBubs = nBubs + newBubs + end if ! Handle MPI transfer of bubbles going to another processor's local domain if (num_procs > 1) then @@ -1664,7 +1664,7 @@ contains end if ! 3D - if (p > 0) then + if (p > 1) then particle_in_domain = ((pos_part(1) < x_cb(m + buff_size - fd_number)) .and. & (pos_part(1) >= x_cb(fd_number - buff_size - 1)) .and. & (pos_part(2) < y_cb(n + buff_size - fd_number)) .and. & @@ -2092,7 +2092,7 @@ contains !> The purpose of this subroutine is to remove one specific particle if dt is too small. !! @param bub_id Particle id - impure subroutine s_copy_lag_bubble(src, dest) + impure subroutine s_copy_lag_bubble(dest, src) integer, intent(in) :: src, dest @@ -2110,14 +2110,14 @@ contains intfc_vel(dest, 1:2) = intfc_vel(src, 1:2) mtn_vel(dest, 1:3, 1:2) = mtn_vel(src, 1:3, 1:2) mtn_s(dest, 1:3, 1:2) = mtn_s(src, 1:3, 1:2) + mtn_pos(dest, 1:3, 1:2) = mtn_pos(src, 1:3, 1:2) + mtn_posPrev(dest, 1:3, 1:2) = mtn_posPrev(src, 1:3, 1:2) intfc_draddt(dest, 1:lag_num_ts) = intfc_draddt(src, 1:lag_num_ts) intfc_dveldt(dest, 1:lag_num_ts) = intfc_dveldt(src, 1:lag_num_ts) gas_dpdt(dest, 1:lag_num_ts) = gas_dpdt(src, 1:lag_num_ts) gas_dmvdt(dest, 1:lag_num_ts) = gas_dmvdt(src, 1:lag_num_ts) mtn_dposdt(dest, 1:3, 1:lag_num_ts) = mtn_dposdt(src, 1:3, 1:lag_num_ts) mtn_dveldt(dest, 1:3, 1:lag_num_ts) = mtn_dveldt(src, 1:3, 1:lag_num_ts) - mtn_pos(dest, 1:3, 1:2) = mtn_pos(src, 1:3, 1:2) - mtn_posPrev(dest, 1:3, 1:2) = mtn_posPrev(src, 1:3, 1:2) end subroutine s_copy_lag_bubble diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 27abce892c..316cb541d0 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -52,12 +52,21 @@ module m_mpi_proxy character(len=1), dimension(:), allocatable :: p_send_buff, p_recv_buff integer :: p_buff_size, p_var_size + !! EL Bubbles communication variables + integer, parameter :: MAX_NEIGHBORS = 27 + integer :: send_requests(MAX_NEIGHBORS), recv_requests(MAX_NEIGHBORS) + integer :: recv_offsets(MAX_NEIGHBORS) + integer :: neighbor_list(MAX_NEIGHBORS, 3) + integer :: n_neighbors + $:GPU_DECLARE(create='[p_send_counts]') contains subroutine s_initialize_mpi_proxy_module() + integer :: i, j, k + #ifdef MFC_MPI if (ib) then if (n > 0) then @@ -101,6 +110,21 @@ contains @:ALLOCATE(p_send_buff(0:p_buff_size), p_recv_buff(0:p_buff_size)) @:ALLOCATE(p_send_ids(nidx(1)%beg:nidx(1)%end, nidx(2)%beg:nidx(2)%end, nidx(3)%beg:nidx(3)%end, 0:lag_params%nBubs_glb)) + + ! First, collect all neighbor information + n_neighbors = 0 + do k = nidx(3)%beg, nidx(3)%end + do j = nidx(2)%beg, nidx(2)%end + do i = nidx(1)%beg, nidx(1)%end + if (abs(i) + abs(j) + abs(k) /= 0) then + n_neighbors = n_neighbors + 1 + neighbor_list(n_neighbors, 1) = i + neighbor_list(n_neighbors, 2) = j + neighbor_list(n_neighbors, 3) = k + end if + end do + end do + end do #endif end subroutine s_initialize_particles_mpi @@ -651,7 +675,7 @@ contains end subroutine s_add_particles_to_transfer_list - !> This subroutine performs the MPI communication for lagrangian particles/ + !> This subroutine performs the MPI communication for lagrangian particles/ !! bubbles. !! @param bub_R0 Initial radius of each bubble !! @param Rmax_stats Maximum radius of each bubble @@ -688,103 +712,178 @@ contains real(wp), dimension(:, :, :) :: pos, posPrev, vel, scoord, dpos, dvel integer :: position, bub_id, lag_num_ts, tag, partner, send_tag, recv_tag, nbubs, p_recv_size, dest - integer :: i, j, k, l, q - integer :: ierr !< Generic flag used to identify and report MPI errors + integer :: i, j, k, l, q, r + integer :: req_send, req_recv, ierr !< Generic flag used to identify and report MPI errors + integer :: send_count, send_offset, recv_count, recv_offset #ifdef MFC_MPI - do k = nidx(3)%beg, nidx(3)%end - do j = nidx(2)%beg, nidx(2)%end - do i = nidx(1)%beg, nidx(1)%end - if (abs(i) + abs(j) + abs(k) /= 0) then - partner = neighbor_ranks(i, j, k) - send_tag = neighbor_tag(i, j, k) - recv_tag = send_tag !neighbor_tag(-i,-j,-k) + ! Phase 1: Exchange particle counts using non-blocking communication + send_count = 0 + recv_count = 0 + + ! Post all receives first + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + recv_tag = neighbor_tag(i, j, k) + + recv_count = recv_count + 1 + call MPI_Irecv(p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, & + MPI_COMM_WORLD, recv_requests(recv_count), ierr) + end do - call MPI_sendrecv(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, & - p_recv_counts(i, j, k), 1, MPI_INTEGER, partner, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + ! Post all sends + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + partner = neighbor_ranks(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + send_count = send_count + 1 + call MPI_Isend(p_send_counts(i, j, k), 1, MPI_INTEGER, partner, send_tag, & + MPI_COMM_WORLD, send_requests(send_count), ierr) + end do - ! Skip if no particles to exchange - if (p_send_counts(i, j, k) == 0 .and. p_recv_counts(i, j, k) == 0) then - cycle - end if + ! Wait for all count exchanges to complete + if (recv_count > 0) then + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + end if + if (send_count > 0) then + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + end if - p_recv_size = p_recv_counts(i, j, k)*p_var_size - - position = 0 - do l = 0, p_send_counts(i, j, k) - 1 - bub_id = p_send_ids(i, j, k, l) - call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - do q = 1, 2 - call MPI_Pack(gas_p(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(gas_mv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(rad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(rvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(pos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(posPrev(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(vel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(scoord(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - end do - do q = 1, lag_num_ts - call MPI_Pack(drad(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(drvel(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dgasp(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dgasmv(bub_id, q), 1, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dpos(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - call MPI_Pack(dvel(bub_id, :, q), 3, mpi_p, p_send_buff, p_buff_size, position, MPI_COMM_WORLD, ierr) - end do - end do + ! Phase 2: Exchange particle data using non-blocking communication + send_count = 0 + recv_count = 0 + + ! Post all receives for particle data first + recv_offset = 1 + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_recv_counts(i, j, k) > 0) then + partner = neighbor_ranks(i, j, k) + p_recv_size = p_recv_counts(i, j, k) * p_var_size + recv_tag = neighbor_tag(i, j, k) + + recv_count = recv_count + 1 + call MPI_Irecv(p_recv_buff(recv_offset), p_recv_size, MPI_PACKED, partner, recv_tag, & + MPI_COMM_WORLD, recv_requests(recv_count), ierr) + recv_offsets(l) = recv_offset + recv_offset = recv_offset + p_recv_size + end if + end do - send_tag = send_tag + max(num_procs, n_neighbor) - recv_tag = recv_tag + max(num_procs, n_neighbor) - - call MPI_sendrecv(p_send_buff, position, MPI_PACKED, partner, send_tag, & - p_recv_buff, p_recv_size, MPI_PACKED, partner, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - position = 0 - do l = 0, p_recv_counts(i, j, k) - 1 - nbubs = nbubs + 1 - bub_id = nbubs - call MPI_Unpack(p_recv_buff, p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) - do q = 1, 2 - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_p(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, gas_mv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, rad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, rvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, pos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, posPrev(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, vel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, scoord(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - end do - do q = 1, lag_num_ts - call MPI_Unpack(p_recv_buff, p_recv_size, position, drad(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, drvel(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasp(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dgasmv(bub_id, q), 1, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dpos(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_Unpack(p_recv_buff, p_recv_size, position, dvel(bub_id, :, q), 3, mpi_p, MPI_COMM_WORLD, ierr) - end do - lag_id(bub_id, 2) = bub_id - end do - end if + ! Pack and send particle data + send_offset = 0 + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_send_counts(i, j, k) > 0) then + partner = neighbor_ranks(i, j, k) + send_tag = neighbor_tag(-i, -j, -k) + + ! Pack data for sending + position = 0 + do q = 0, p_send_counts(i, j, k) - 1 + bub_id = p_send_ids(i, j, k, q) + call MPI_Pack(lag_id(bub_id, 1), 1, MPI_INTEGER, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_R0(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmax_stats(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(Rmin_stats(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mg(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaT(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_betaC(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(bub_dphidt(bub_id), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + do r = 1, 2 + call MPI_Pack(gas_p(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(gas_mv(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rad(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(rvel(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(pos(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(posPrev(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(vel(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(scoord(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + end do + do r = 1, lag_num_ts + call MPI_Pack(drad(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(drvel(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasp(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dgasmv(bub_id, r), 1, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dpos(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + call MPI_Pack(dvel(bub_id, :, r), 3, mpi_p, p_send_buff(send_offset), p_buff_size, position, MPI_COMM_WORLD, ierr) + end do end do - end do + + send_count = send_count + 1 + call MPI_Isend(p_send_buff(send_offset), position, MPI_PACKED, partner, send_tag, & + MPI_COMM_WORLD, send_requests(send_count), ierr) + send_offset = send_offset + position + end if end do + + ! Wait for all recvs for contiguous data to complete + call MPI_Waitall(recv_count, recv_requests(1:recv_count), MPI_STATUSES_IGNORE, ierr) + + ! Process received data as it arrives + do l = 1, n_neighbors + i = neighbor_list(l, 1) + j = neighbor_list(l, 2) + k = neighbor_list(l, 3) + + if (p_recv_counts(i, j, k) > 0 .and. abs(i) + abs(j) + abs(k) /= 0) then + p_recv_size = p_recv_counts(i, j, k) * p_var_size + recv_offset = recv_offsets(l) + + position = 0 + ! Unpack received data + do q = 0, p_recv_counts(i, j, k) - 1 + nbubs = nbubs + 1 + bub_id = nbubs + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, lag_id(bub_id, 1), 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, bub_R0(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmax_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, Rmin_stats(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_mg(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaT(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_betaC(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, bub_dphidt(bub_id), 1, mpi_p, MPI_COMM_WORLD, ierr) + do r = 1, 2 + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_p(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, gas_mv(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rad(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, rvel(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, pos(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, posPrev(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, vel(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, scoord(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + end do + do r = 1, lag_num_ts + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drad(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, drvel(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dgasp(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dgasmv(bub_id, r), 1, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dpos(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_Unpack(p_recv_buff(recv_offset), p_recv_size, position, dvel(bub_id, :, r), 3, mpi_p, MPI_COMM_WORLD, ierr) + end do + lag_id(bub_id, 2) = bub_id + end do + recv_offset = recv_offset + p_recv_size + end if + + end do + + ! Wait for all sends to complete + if (send_count > 0) then + call MPI_Waitall(send_count, send_requests(1:send_count), MPI_STATUSES_IGNORE, ierr) + end if #endif if (any(periodic_bc)) then @@ -850,13 +949,13 @@ contains if (pos(i, 3, dest) > z_cb(p + buff_size)) then do q = 1, 2 pos(i, 3, q) = pos(i, 3, q) - offset - posPrev(i, 2, q) = posPrev(i, 2, q) - offset + posPrev(i, 3, q) = posPrev(i, 3, q) - offset end do endif if (pos(i, 3, dest) < z_cb(-1 - buff_size)) then do q = 1, 2 pos(i, 3, q) = pos(i, 3, q) + offset - posPrev(i, 2, q) = posPrev(i, 2, q) + offset + posPrev(i, 3, q) = posPrev(i, 3, q) + offset end do endif end if From 9b70f9d2ebfd25b2bb95f2936ee66f072bdaa1f6 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Wed, 17 Sep 2025 21:22:26 -0400 Subject: [PATCH 51/62] bug fixes for indexing and GPU communication --- src/simulation/m_bubbles_EL.fpp | 7 +++++-- src/simulation/m_bubbles_EL_kernels.fpp | 4 ++-- src/simulation/m_global_parameters.fpp | 5 +++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 5b6e59e6cc..fc27ec7f09 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -114,12 +114,15 @@ contains pcomm_coords(1)%beg = x_cb(buff_size - fd_number - 1) pcomm_coords(1)%end = x_cb(m - buff_size + fd_number) + $:GPU_UPDATE(device='[pcomm_coords(1)]') if (n > 0) then pcomm_coords(2)%beg = y_cb(buff_size - fd_number - 1) pcomm_coords(2)%end = y_cb(n - buff_size + fd_number) + $:GPU_UPDATE(device='[pcomm_coords(2)]') if (p > 0) then pcomm_coords(3)%beg = z_cb(buff_size - fd_number - 1) pcomm_coords(3)%end = z_cb(p - buff_size + fd_number) + $:GPU_UPDATE(device='[pcomm_coords(3)]') end if end if @@ -1482,8 +1485,8 @@ contains $:GPU_UPDATE(host='[bub_R0, Rmax_stats, Rmin_stats, gas_mg, gas_betaT, & & gas_betaC, bub_dphidt, lag_id, gas_p, gas_mv, intfc_rad, intfc_vel, & & mtn_pos, mtn_posPrev, mtn_vel, mtn_s, intfc_draddt, intfc_dveldt, & - & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, keep_bubble, & - & nBubs]') + & gas_dpdt, gas_dmvdt, mtn_dposdt, mtn_dveldt, keep_bubble, nBubs, & + & wrap_bubble_dir, wrap_bubble_loc]') call nvtxEndRange if (nBubs > 0) then diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 3ba776c368..35a7528792 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -593,11 +593,11 @@ contains eta(1) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)) - & q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) - 2))/ & (z_cc(cell(3)) - z_cc(cell(3) - 2)) - xi(2) = y_cc(cell(3)) + xi(2) = z_cc(cell(3)) eta(2) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 1) - & q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) - 1))/ & (z_cc(cell(3) + 1) - z_cc(cell(3) - 1)) - xi(3) = y_cc(cell(3) + 1) + xi(3) = z_cc(cell(3) + 1) eta(3) = (q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3) + 2) - & q_prim_vf(E_idx)%sf(cell(1), cell(2), cell(3)))/ & (z_cc(cell(3) + 2) - z_cc(cell(3))) diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index dbef5fcc98..29870bb613 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -250,6 +250,7 @@ module m_global_parameters !! Processor coordinates in MPI_CART_COMM type(bounds_info), allocatable, dimension(:) :: pcomm_coords + $:GPU_DECLARE(create='[pcomm_coords]') !! Coordinates for EL particle transfer type(int_bounds_info), dimension(3) :: nidx !< Indices for neighboring processors @@ -1389,7 +1390,7 @@ contains #:endif allocate (proc_coords(1:num_dims)) - allocate (pcomm_coords(1:num_dims)) + @:ALLOCATE(pcomm_coords(1:num_dims)) if (parallel_io .neqv. .true.) return @@ -1426,7 +1427,7 @@ contains end if deallocate (proc_coords) - deallocate (pcomm_coords) + @:DEALLOCATE(pcomm_coords) if (parallel_io) then deallocate (start_idx) From 49c194f3de9dff1109daf640c7066fec19378ed0 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Wed, 24 Sep 2025 17:47:45 -0400 Subject: [PATCH 52/62] improve run_time.inf output --- src/common/m_mpi_common.fpp | 14 ++++- src/simulation/m_data_output.fpp | 102 +++++++++++++++++++++---------- src/simulation/m_sim_helpers.fpp | 56 ++++++++++++----- 3 files changed, 123 insertions(+), 49 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 5eae8fcabb..56bf14d5a1 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -417,17 +417,21 @@ contains impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, & vcfl_max_loc, & Rc_min_loc, & + ccfl_max_loc, & icfl_max_glb, & vcfl_max_glb, & - Rc_min_glb) + Rc_min_glb, & + ccfl_max_glb) real(wp), intent(in) :: icfl_max_loc real(wp), intent(in) :: vcfl_max_loc real(wp), intent(in) :: Rc_min_loc + real(wp), intent(in) :: ccfl_max_loc real(wp), intent(out) :: icfl_max_glb real(wp), intent(out) :: vcfl_max_glb real(wp), intent(out) :: Rc_min_glb + real(wp), intent(out) :: ccfl_max_glb #ifdef MFC_SIMULATION #ifdef MFC_MPI @@ -448,6 +452,12 @@ contains MPI_COMM_WORLD, ierr) end if + if (surface_tension) then + call MPI_REDUCE(Rc_min_loc, Rc_min_glb, 1, & + mpi_p, MPI_MIN, 0, & + MPI_COMM_WORLD, ierr) + end if + #else icfl_max_glb = icfl_max_loc @@ -457,6 +467,8 @@ contains Rc_min_glb = Rc_min_loc end if + if (surface_tension) ccfl_max_glb = ccfl_max_loc + #endif #endif diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 5f5b79c481..a6009da7a7 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -36,7 +36,7 @@ module m_data_output implicit none - private; + private; public :: s_initialize_data_output_module, & s_open_run_time_information_file, & s_open_com_files, & @@ -157,24 +157,20 @@ contains write (3, '(A)') ''; write (3, '(A)') '' ! Generating table header for the stability criteria to be outputted - if (cfl_dt) then - if (viscous) then - write (3, '(A)') ' Time-steps dt = Time ICFL '// & - 'Max VCFL Max Rc Min =' - else - write (3, '(A)') ' Time-steps dt Time '// & - ' ICFL Max ' - end if - else - if (viscous) then - write (3, '(A)') ' Time-steps Time ICFL '// & - 'Max VCFL Max Rc Min ' - else - write (3, '(A)') ' Time-steps Time '// & - ' ICFL Max ' - end if + write (3, '(13X,A8,13X,A10,13X,A10,13X,A10,)', advance="no") & + trim('Time-steps'), trim('dt'), trim('Time'), trim('ICFL Max') + + if (viscous) then + write(3, '(13X,A10,13X,A16)', advance="no") & + trim('VCFL Max'), trim('Rc Min') end if + if (surface_tension) then + write(3, '(13X,A10)', advance="no") trim('CCFL Max') + end if + + write(3, *) ! new line + end subroutine s_open_run_time_information_file !> This opens a formatted data file where the root processor @@ -295,8 +291,12 @@ contains call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) - if (viscous) then - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf) + if (viscous .and. surface_tension) then + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf=vcfl_sf, Rc_sf=Rc_sf, ccfl_sf=ccfl_sf) + elseif (viscous) then + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf=vcfl_sf, Rc_sf=Rc_sf) + elseif (surface_tension) then + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, ccfl_sf=ccfl_sf) else call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf) end if @@ -311,17 +311,18 @@ contains #ifdef _CRAYFTN $:GPU_UPDATE(host='[icfl_sf]') - - if (viscous) then - $:GPU_UPDATE(host='[vcfl_sf,Rc_sf]') - end if - icfl_max_loc = maxval(icfl_sf) if (viscous) then + $:GPU_UPDATE(host='[vcfl_sf,Rc_sf]') vcfl_max_loc = maxval(vcfl_sf) Rc_min_loc = minval(Rc_sf) end if + + if (surface_tension) then + $:GPU_UPDATE(host='[ccfl_sf]') + ccfl_max_loc = maxval(ccfl_sf) + end if #else #:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]') icfl_max_loc = maxval(icfl_sf) @@ -332,6 +333,12 @@ contains Rc_min_loc = minval(Rc_sf) #:endcall GPU_PARALLEL end if + + if (surface_tension) then + #:call GPU_PARALLEL(copyout='[ccfl_max_loc]', copyin='[ccfl_sf]') + ccfl_max_loc = maxval(ccfl_sf) + #:endcall GPU_PARALLEL + end if #endif ! Determining global stability criteria extrema at current time-step @@ -339,13 +346,16 @@ contains call s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, & vcfl_max_loc, & Rc_min_loc, & + ccfl_max_loc, & icfl_max_glb, & vcfl_max_glb, & - Rc_min_glb) + Rc_min_glb, & + ccfl_max_glb) else icfl_max_glb = icfl_max_loc if (viscous) vcfl_max_glb = vcfl_max_loc if (viscous) Rc_min_glb = Rc_min_loc + if (surface_tension) ccfl_max_glb = ccfl_max_loc end if ! Determining the stability criteria extrema over all the time-steps @@ -356,18 +366,28 @@ contains if (Rc_min_glb < Rc_min) Rc_min = Rc_min_glb end if + if (surface_tension) then + if (ccfl_max_glb > ccfl_max) ccfl_max = ccfl_max_glb + end if + ! Outputting global stability criteria extrema at current time-step if (proc_rank == 0) then + write (3, '(13X,I8,13X,F10.6,13X,F10.6,13X,F10.6)', advance="no") & + t_step, dt, t_step*dt, icfl_max_glb + if (viscous) then - write (3, '(6X,I8,F10.6,6X,6X,F10.6,6X,F9.6,6X,F9.6,6X,F10.6)') & - t_step, dt, t_step*dt, icfl_max_glb, & + write (3, '(13X,F10.6,13X,ES16.6)', advance="no") & vcfl_max_glb, & Rc_min_glb - else - write (3, '(13X,I8,14X,F10.6,14X,F10.6,13X,F9.6)') & - t_step, dt, t_step*dt, icfl_max_glb end if + if (surface_tension) then + write (3, '(13X,F10.6)', advance="no") & + ccfl_max_glb + end if + + write(3, *) ! new line + if (.not. f_approx_equal(icfl_max_glb, icfl_max_glb)) then call s_mpi_abort('ICFL is NaN. Exiting.') elseif (icfl_max_glb > 1._wp) then @@ -383,6 +403,15 @@ contains call s_mpi_abort('VCFL is greater than 1.0. Exiting.') end if end if + + if (surface_tension) then + if (.not. f_approx_equal(ccfl_max_glb, ccfl_max_glb)) then + call s_mpi_abort('CCFL is NaN. Exiting.') + elseif (ccfl_max_glb > 1._wp) then + print *, 'ccfl', ccfl_max_glb + call s_mpi_abort('CCFL is greater than 1.0. Exiting.') + end if + end if end if call s_mpi_barrier() @@ -1756,7 +1785,8 @@ contains write (3, '(A,F9.6)') 'ICFL Max: ', icfl_max if (viscous) write (3, '(A,F9.6)') 'VCFL Max: ', vcfl_max - if (viscous) write (3, '(A,F10.6)') 'Rc Min: ', Rc_min + if (viscous) write (3, '(A,ES16.6)') 'Rc Min: ', Rc_min + if (surface_tension) write (3, '(A,F10.6)') 'CCFL Max: ', ccfl_max call cpu_time(run_time) @@ -1805,7 +1835,12 @@ contains @:ALLOCATE(Rc_sf (0:m, 0:n, 0:p)) vcfl_max = 0._wp - Rc_min = 1.e3_wp + Rc_min = 1.e12_wp + end if + + if (surface_tension) then + @:ALLOCATE(ccfl_sf(0:m, 0:n, 0:p)) + ccfl_max = 0._wp end if end if @@ -1841,6 +1876,9 @@ contains if (viscous) then @:DEALLOCATE(vcfl_sf, Rc_sf) end if + if (surface_tension) then + @:DEALLOCATE(ccfl_sf) + end if end if if (down_sample) then diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index df8f6b9eae..c966354a1c 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -181,12 +181,12 @@ contains !! @param icfl_sf cell-centered inviscid cfl number !! @param vcfl_sf (optional) cell-centered viscous CFL number !! @param Rc_sf (optional) cell centered Rc - pure subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf) + pure subroutine s_compute_stability_from_dt(vel, c, rho, Re_l, j, k, l, icfl_sf, vcfl_sf, Rc_sf, ccfl_sf) $:GPU_ROUTINE(parallelism='[seq]') real(wp), intent(in), dimension(num_vels) :: vel real(wp), intent(in) :: c, rho real(wp), dimension(0:m, 0:n, 0:p), intent(inout) :: icfl_sf - real(wp), dimension(0:m, 0:n, 0:p), intent(inout), optional :: vcfl_sf, Rc_sf + real(wp), dimension(0:m, 0:n, 0:p), intent(inout), optional :: vcfl_sf, Rc_sf, ccfl_sf real(wp), dimension(2), intent(in) :: Re_l integer, intent(in) :: j, k, l @@ -203,8 +203,7 @@ contains ! Viscous calculations if (viscous) then - if (p > 0) then - !3D + if (p > 0) then ! 3D if (grid_geometry == 3) then fltr_dtheta = f_compute_filtered_dtheta(k, l) vcfl_sf(j, k, l) = maxval(dt/Re_l/rho) & @@ -221,19 +220,30 @@ contains dz(l)*(abs(vel(3)) + c)) & /maxval(1._wp/Re_l) end if - elseif (n > 0) then - !2D + elseif (n > 0) then ! 2D vcfl_sf(j, k, l) = maxval(dt/Re_l/rho)/min(dx(j), dy(k))**2._wp Rc_sf(j, k, l) = min(dx(j)*(abs(vel(1)) + c), & dy(k)*(abs(vel(2)) + c)) & /maxval(1._wp/Re_l) - else - !1D + else ! 1D vcfl_sf(j, k, l) = maxval(dt/Re_l/rho)/dx(j)**2._wp Rc_sf(j, k, l) = dx(j)*(abs(vel(1)) + c)/maxval(1._wp/Re_l) end if end if + if (surface_tension) then + if (p > 0) then ! 3D + if (grid_geometry == 3) then + fltr_dtheta = f_compute_filtered_dtheta(k, l) + ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k), fltr_dtheta)**3._wp / sigma) + else + ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k), dz(l))**3._wp / sigma) + end if + elseif (n > 0) then ! 2D + ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k))**3._wp / sigma) + end if + end if + end subroutine s_compute_stability_from_dt !> Computes dt for a specified CFL number @@ -252,7 +262,7 @@ contains real(wp), dimension(2), intent(in) :: Re_l integer, intent(in) :: j, k, l - real(wp) :: icfl_dt, vcfl_dt + real(wp) :: icfl_dt, vcfl_dt, ccfl_dt real(wp) :: fltr_dtheta ! Inviscid CFL calculation @@ -266,8 +276,7 @@ contains ! Viscous calculations if (viscous) then - if (p > 0) then - !3D + if (p > 0) then ! 3D if (grid_geometry == 3) then fltr_dtheta = f_compute_filtered_dtheta(k, l) vcfl_dt = cfl_target*(min(dx(j), dy(k), fltr_dtheta)**2._wp) & @@ -276,17 +285,32 @@ contains vcfl_dt = cfl_target*(min(dx(j), dy(k), dz(l))**2._wp) & /minval(1/(rho*Re_l)) end if - elseif (n > 0) then - !2D + elseif (n > 0) then ! 2D vcfl_dt = cfl_target*(min(dx(j), dy(k))**2._wp)/maxval((1/Re_l)/rho) - else - !1D + else ! 1D vcfl_dt = cfl_target*(dx(j)**2._wp)/minval(1/(rho*Re_l)) end if end if - if (any(Re_size > 0)) then + if (surface_tension) then + if (p > 0) then ! 3D + if (grid_geometry == 3) then + fltr_dtheta = f_compute_filtered_dtheta(k, l) + ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k), fltr_dtheta)**3._wp / sigma) + else + ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k), dz(l))**3._wp / sigma) + end if + elseif (n > 0) then ! 2D + ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k))**3._wp / sigma) + end if + end if + + if (any(Re_size > 0) .and. sigma > 0) then + max_dt(j, k, l) = min(icfl_dt, vcfl_dt, ccfl_dt) + elseif (any(Re_size > 0)) then max_dt(j, k, l) = min(icfl_dt, vcfl_dt) + elseif (sigma > 0) then + max_dt(j, k, l) = min(icfl_dt, ccfl_dt) else max_dt(j, k, l) = icfl_dt end if From 60375fb0877a7c6558677f373607861eab3190dd Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Wed, 24 Sep 2025 21:28:05 -0400 Subject: [PATCH 53/62] bug fixes for run_time.inf --- src/common/m_mpi_common.fpp | 4 +-- src/pre_process/include/3dHardcodedIC.fpp | 43 ++++++++++++++++++++-- src/pre_process/m_assign_variables.fpp | 2 +- src/simulation/m_data_output.fpp | 44 +++++++++++------------ src/simulation/m_sim_helpers.fpp | 12 +++---- 5 files changed, 72 insertions(+), 33 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 56bf14d5a1..a770ce1c01 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -453,8 +453,8 @@ contains end if if (surface_tension) then - call MPI_REDUCE(Rc_min_loc, Rc_min_glb, 1, & - mpi_p, MPI_MIN, 0, & + call MPI_REDUCE(ccfl_max_loc, ccfl_max_glb, 1, & + mpi_p, MPI_MAX, 0, & MPI_COMM_WORLD, ierr) end if diff --git a/src/pre_process/include/3dHardcodedIC.fpp b/src/pre_process/include/3dHardcodedIC.fpp index 4a64c01d88..d23db26630 100644 --- a/src/pre_process/include/3dHardcodedIC.fpp +++ b/src/pre_process/include/3dHardcodedIC.fpp @@ -46,6 +46,28 @@ end if end if + if (patch_icpp(patch_id)%hcid == 304) then + allocate(ih(0:n_glb, 0:0)) + if (interface_file == '.') then + call s_mpi_abort("Error: interface_file must be specified for hcid=304") + else + inquire (file=trim(interface_file), exist=file_exist) + if (file_exist) then + open(unit=10, file=trim(interface_file), status="old", action="read") + do i = 0, n_glb + read(10, '(A)') line ! Read a full line as a string + value = trim(line) + read(value, *) ih(i, 0) ! Convert string to numeric value + if (.not. f_is_default(normMag)) ih(i, 0) = ih(i, 0) * normMag + if (.not. f_is_default(normFac)) ih(i, 0) = ih(i, 0) + normFac + end do + close(10) + else + call s_mpi_abort("Error: interface_file specified for hcid=304 does not exist") + end if + end if + end if + eps = 1e-9_wp #:enddef @@ -125,10 +147,27 @@ q_prim_vf(E_idx)%sf(i, j, k) = p_th*f_cut_on(r - r_th, eps_smooth)*f_cut_on(x_cc(i), eps_smooth) + p_am - case (303) ! 3D Interface from file + case (303) ! 3D Interface from file cartesian + + alph = 0.5_wp * (1 + (1._wp - 2._wp * eps) * & + tanh((ih(start_idx(1) + i,start_idx(3) + k) - y_cc(j))*(0.5_wp / dx))) + + q_prim_vf(advxb)%sf(i,j,k) = alph + q_prim_vf(advxe)%sf(i,j,k) = 1._wp - alph + + q_prim_vf(contxb)%sf(i,j,k) = q_prim_vf(advxb)%sf(i,j,k) * 1._wp + q_prim_vf(contxe)%sf(i,j,k) = q_prim_vf(advxe)%sf(i,j,k) * (1._wp / 950._wp) + + q_prim_vf(E_idx)%sf(i,j,k) = p0 + & + (q_prim_vf(contxb)%sf(i,j,k) + q_prim_vf(contxe)%sf(i,j,k)) * g0 * & + (ih(start_idx(1) + i, start_idx(3) + k) - y_cc(j)) + + if (surface_tension) q_prim_vf(c_idx)%sf(i,j,k) = alph + + case (304) ! 3D Interface from file axisymmetric alph = 0.5_wp * (1 + (1._wp - 2._wp * eps) * & - tanh((ih(start_idx(1) + i,start_idx(3) + k) - y_cc(j))*0.1_wp)) + tanh((ih(start_idx(2) + j,0) - x_cc(i))*(0.01_wp / dx))) q_prim_vf(advxb)%sf(i,j,k) = alph q_prim_vf(advxe)%sf(i,j,k) = 1._wp - alph diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp index f60573b1b9..ca0c7c1c16 100644 --- a/src/pre_process/m_assign_variables.fpp +++ b/src/pre_process/m_assign_variables.fpp @@ -684,7 +684,7 @@ contains if (surface_tension) then q_prim_vf(c_idx)%sf(j, k, l) = eta*patch_icpp(patch_id)%cf_val + & - (1._wp - eta)*patch_icpp(smooth_patch_id)%cf_val + (1._wp - eta)*orig_prim_vf(c_idx) end if ! Updating the patch identities bookkeeping variable diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index a6009da7a7..b500451c08 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -157,8 +157,8 @@ contains write (3, '(A)') ''; write (3, '(A)') '' ! Generating table header for the stability criteria to be outputted - write (3, '(13X,A8,13X,A10,13X,A10,13X,A10,)', advance="no") & - trim('Time-steps'), trim('dt'), trim('Time'), trim('ICFL Max') + write (3, '(13X,A9,13X,A10,13X,A10,13X,A10,)', advance="no") & + trim('Time-step'), trim('dt'), trim('Time'), trim('ICFL Max') if (viscous) then write(3, '(13X,A10,13X,A16)', advance="no") & @@ -309,7 +309,7 @@ contains ! Determining local stability criteria extrema at current time-step -#ifdef _CRAYFTN +!#ifdef _CRAYFTN $:GPU_UPDATE(host='[icfl_sf]') icfl_max_loc = maxval(icfl_sf) @@ -323,23 +323,23 @@ contains $:GPU_UPDATE(host='[ccfl_sf]') ccfl_max_loc = maxval(ccfl_sf) end if -#else - #:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]') - icfl_max_loc = maxval(icfl_sf) - #:endcall GPU_PARALLEL - if (viscous) then - #:call GPU_PARALLEL(copyout='[vcfl_max_loc, Rc_min_loc]', copyin='[vcfl_sf,Rc_sf]') - vcfl_max_loc = maxval(vcfl_sf) - Rc_min_loc = minval(Rc_sf) - #:endcall GPU_PARALLEL - end if - - if (surface_tension) then - #:call GPU_PARALLEL(copyout='[ccfl_max_loc]', copyin='[ccfl_sf]') - ccfl_max_loc = maxval(ccfl_sf) - #:endcall GPU_PARALLEL - end if -#endif +!#else + !#:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]') + !icfl_max_loc = maxval(icfl_sf) + !#:endcall GPU_PARALLEL + !if (viscous) then + !#:call GPU_PARALLEL(copyout='[vcfl_max_loc, Rc_min_loc]', copyin='[vcfl_sf,Rc_sf]') + !vcfl_max_loc = maxval(vcfl_sf) + !Rc_min_loc = minval(Rc_sf) + !#:endcall GPU_PARALLEL + !end if + + !if (surface_tension) then + !#:call GPU_PARALLEL(copyout='[ccfl_max_loc]', copyin='[ccfl_sf]') + !ccfl_max_loc = maxval(ccfl_sf) + !#:endcall GPU_PARALLEL + !end if +!#endif ! Determining global stability criteria extrema at current time-step if (num_procs > 1) then @@ -372,8 +372,8 @@ contains ! Outputting global stability criteria extrema at current time-step if (proc_rank == 0) then - write (3, '(13X,I8,13X,F10.6,13X,F10.6,13X,F10.6)', advance="no") & - t_step, dt, t_step*dt, icfl_max_glb + write (3, '(13X,I9,13X,F10.6,13X,F10.6,13X,F10.6)', advance="no") & + t_step, dt, mytime, icfl_max_glb if (viscous) then write (3, '(13X,F10.6,13X,ES16.6)', advance="no") & diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index c966354a1c..01fc7a9ec1 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -235,12 +235,12 @@ contains if (p > 0) then ! 3D if (grid_geometry == 3) then fltr_dtheta = f_compute_filtered_dtheta(k, l) - ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k), fltr_dtheta)**3._wp / sigma) + ccfl_sf(j, k, l) = dt/sqrt(rho * min(dx(j), dy(k), fltr_dtheta)**3._wp / sigma) else - ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k), dz(l))**3._wp / sigma) + ccfl_sf(j, k, l) = dt/sqrt(rho * min(dx(j), dy(k), dz(l))**3._wp / sigma) end if elseif (n > 0) then ! 2D - ccfl_sf(j, k, l) = dt/sqrt(rho * max(dx(j), dy(k))**3._wp / sigma) + ccfl_sf(j, k, l) = dt/sqrt(rho * min(dx(j), dy(k))**3._wp / sigma) end if end if @@ -296,12 +296,12 @@ contains if (p > 0) then ! 3D if (grid_geometry == 3) then fltr_dtheta = f_compute_filtered_dtheta(k, l) - ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k), fltr_dtheta)**3._wp / sigma) + ccfl_dt = cfl_target*sqrt(rho * min(dx(j), dy(k), fltr_dtheta)**3._wp / sigma) else - ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k), dz(l))**3._wp / sigma) + ccfl_dt = cfl_target*sqrt(rho * min(dx(j), dy(k), dz(l))**3._wp / sigma) end if elseif (n > 0) then ! 2D - ccfl_dt = cfl_target*sqrt(rho * max(dx(j), dy(k))**3._wp / sigma) + ccfl_dt = cfl_target*sqrt(rho * min(dx(j), dy(k))**3._wp / sigma) end if end if From 8ffc20cf0e28b0376bc4bd5095f06018cff1b360 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Tue, 7 Oct 2025 11:32:44 -0400 Subject: [PATCH 54/62] added options and drag models --- src/common/m_constants.fpp | 2 +- src/common/m_derived_types.fpp | 3 + src/common/m_mpi_common.fpp | 9 +-- src/simulation/m_body_forces.fpp | 13 ++-- src/simulation/m_bubbles.fpp | 19 ++++-- src/simulation/m_bubbles_EL.fpp | 81 +++++++++++++++---------- src/simulation/m_bubbles_EL_kernels.fpp | 51 ++++++++++++---- src/simulation/m_global_parameters.fpp | 3 + src/simulation/m_mpi_proxy.fpp | 3 +- toolchain/mfc/run/case_dicts.py | 5 +- 10 files changed, 121 insertions(+), 68 deletions(-) diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index b987eb18a1..14ae48d6be 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -22,7 +22,7 @@ module m_constants integer, parameter :: fourier_rings = 5 !< Fourier filter ring limit integer, parameter :: num_fluids_max = 10 !< Maximum number of fluids in the simulation integer, parameter :: num_probes_max = 10 !< Maximum number of flow probes in the simulation - integer, parameter :: num_patches_max = 10 + integer, parameter :: num_patches_max = 20 integer, parameter :: num_bc_patches_max = 10 integer, parameter :: pathlen_max = 400 integer, parameter :: nnode = 4 !< Number of QBMM nodes diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index 75d91dd315..47bd6b4900 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -434,6 +434,9 @@ module m_derived_types integer :: nBubs_glb !< Global number of bubbles integer :: vel_model !< Particle velocity model integer :: drag_model !< Particle drag model + logical :: pressure_force !< Include pressure force translational motion + logical :: gravity_force !< Include gravity force in translational motion + logical :: momentum_transfer_force !< Include momentum transfer from radial dynamics in translational motion real(wp) :: c_d !< Drag coefficient real(wp) :: epsilonb !< Standard deviation scaling for the gaussian function real(wp) :: charwidth !< Domain virtual depth (z direction, for 2D simulations) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index a770ce1c01..a50b78dcea 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -479,22 +479,19 @@ contains !! @param var_loc holds the local value to be reduced among !! all the processors in communicator. On output, the variable holds !! the sum, reduced amongst all of the local values. - subroutine s_mpi_reduce_int_sum(var_loc) + subroutine s_mpi_reduce_int_sum(var_loc, sum) integer, intent(inout) :: var_loc + integer, intent(inout) :: sum #ifdef MFC_MPI integer :: ierr !< Generic flag used to identify and report MPI errors - ! Temporary storage variable that holds the reduced sum value - integer :: var_glb - ! Performing reduction procedure and eventually storing its result ! into the variable that was initially inputted into the subroutine - call MPI_REDUCE(var_loc, var_glb, 1, MPI_INTEGER, & + call MPI_REDUCE(var_loc, sum, 1, MPI_INTEGER, & MPI_SUM, 0, MPI_COMM_WORLD, ierr) - var_loc = var_glb #endif end subroutine s_mpi_reduce_int_sum diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 1ea2c3be3b..aaf838d2bb 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -56,16 +56,13 @@ contains subroutine s_compute_acceleration(t) real(wp), intent(in) :: t + accel_bf(:) = 0._wp - if (m > 0) then - accel_bf(1) = g_x + k_x*sin(w_x*t - p_x) - if (n > 0) then - accel_bf(2) = g_y + k_y*sin(w_y*t - p_y) - if (p > 0) then - accel_bf(3) = g_z + k_z*sin(w_z*t - p_z) - end if + #:for DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + if (bf_${XYZ}$) then + accel_bf(${DIR}$) = g_${XYZ}$ + k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$) end if - end if + #:endfor $:GPU_UPDATE(device='[accel_bf]') diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index a936254ac2..c2bb091598 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -450,7 +450,7 @@ contains !! @param fRho Current density !! @param fP Current driving pressure !! @param fR Current bubble radius - !! @param fV Current bubble velocity + !! @param fV Current bubble radial velocity !! @param fR0 Equilibrium bubble radius !! @param fpb Internal bubble pressure !! @param fpbdot Time-derivative of internal bubble pressure @@ -490,7 +490,7 @@ contains real(wp) :: h !< Time step size real(wp), dimension(4) :: myR_tmp1, myV_tmp1, myR_tmp2, myV_tmp2 !< Bubble radius, radial velocity, and radial acceleration for the inner loop real(wp), dimension(4) :: myPb_tmp1, myMv_tmp1, myPb_tmp2, myMv_tmp2 !< Gas pressure and vapor mass for the inner loop (EL) - real(wp) :: fR2, fV2, fpb2, fmass_v2, vTemp, aTemp + real(wp) :: fR2, fV2, fpb2, fmass_v2, vTemp, aTemp, f_bTemp integer :: l, iter_count call s_initial_substep_h(fRho, fP, fR, fV, fR0, fpb, fpbdot, alf, & @@ -571,10 +571,19 @@ contains end do elseif (lag_params%vel_model == 2) then do l = 1, num_dims - aTemp = f_get_acceleration(fPos(l), fR, fVel(l), fmass_n, fmass_v, & + f_bTemp = f_get_bubble_force(fPos(l), fR, fV, fVel, fmass_n, fmass_v, & + fRe, fRho, cell, l, q_prim_vf) + aTemp = f_bTemp/(fmass_n + fmass_v) + fPos(l) = fPos(l) + h * fVel(l) + fVel(l) = fVel(l) + h * aTemp + end do + elseif (lag_params%vel_model == 3) then + do l = 1, num_dims + f_bTemp = f_get_bubble_force(fPos(l), fR, fV, fVel, fmass_n, fmass_v, & fRe, fRho, cell, l, q_prim_vf) - fPos(l) = fPos(l) + h*fVel(l) - fVel(l) = fVel(l) + h*aTemp + aTemp = 2._wp * f_bTemp / (fmass_n + fmass_v) - 3 * fV * fVel(l) / fR + fPos(l) = fPos(l) + h * fVel(l) + fVel(l) = fVel(l) + h * aTemp end do end if end if diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index fc27ec7f09..78e33d9b3c 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -279,7 +279,7 @@ contains print *, " Lagrange bubbles running, in proc", proc_rank, "number:", bub_id, "/", id - call s_mpi_reduce_int_sum(bub_id) + call s_mpi_reduce_int_sum(bub_id, bub_id) if (proc_rank == 0) then if (bub_id == 0) call s_mpi_abort('No bubbles in the domain. Check input/lag_bubbles.dat') @@ -621,7 +621,7 @@ contains real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot real(wp) :: myPinf, aux1, aux2, myCson, myRho real(wp), dimension(3) :: myPos, myVel - real(wp) :: gamma, pi_inf, qv + real(wp) :: gamma, pi_inf, qv, f_b real(wp), dimension(contxe) :: myalpha_rho, myalpha real(wp), dimension(2) :: Re integer, dimension(3) :: cell @@ -734,10 +734,18 @@ contains mtn_dveldt(k, l, stage) = 0._wp elseif (lag_params%vel_model == 2) then mtn_dposdt(k, l, stage) = myVel(l) - mtn_dveldt(k, l, stage) = f_get_acceleration(myPos(l), & - myR, myVel(l), & - myMass_n, myMass_v, & - Re(1), myRho, cell, l, q_prim_vf) + f_b = f_get_bubble_force(myPos(l), & + myR, myV, myVel, & + myMass_n, myMass_v, & + Re(1), myRho, cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = f_b / (myMass_n + myMass_v) + elseif (lag_params%vel_model == 3) then + mtn_dposdt(k, l, stage) = myVel(l) + f_b = f_get_bubble_force(myPos(l), & + myR, myV, myVel, & + myMass_n, myMass_v, & + Re(1), myRho, cell, l, q_prim_vf) + mtn_dveldt(k, l, stage) = 2._wp * f_b / (myMass_n + myMass_v) - 3._wp * myV * myVel(l) / myR else mtn_dposdt(k, l, stage) = 0._wp mtn_dveldt(k, l, stage) = 0._wp @@ -776,6 +784,7 @@ contains if (lag_params%solver_approach == 2) then + ! (q / (1 - beta)) * d(beta)/dt source if (p == 0) then $:GPU_PARALLEL_LOOP(collapse=4) do k = 0, p @@ -813,6 +822,7 @@ contains call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l) + ! (beta / (1 - beta)) * dP/dl source $:GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n @@ -839,6 +849,7 @@ contains call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) + ! (beta / (1 - beta)) * d(Pu)/dl source $:GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n @@ -852,7 +863,6 @@ contains end do end do end do - end if end subroutine s_compute_bubbles_EL_source @@ -1541,6 +1551,13 @@ contains nBubs = nBubs + newBubs end if + if (run_time_info) then + call s_mpi_reduce_int_sum(nBubs, active_bubs) + if (proc_rank == 0 .and. active_bubs == 0) then + call s_mpi_abort('No bubbles remain in the domain. Simulation ending.') + end if + end if + ! Handle MPI transfer of bubbles going to another processor's local domain if (num_procs > 1) then call nvtxStartRange("LAG-BC-TRANSFER-LIST") @@ -1745,36 +1762,34 @@ contains end do end do end do - else - if (dir == 2) then - ! Gradient in y dir. - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & - + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & - - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) - end do + elseif (dir == 2) then + ! Gradient in y dir. + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & + + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & + - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) end do end do - else - ! Gradient in z dir. - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & - + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & - - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) - end do + end do + elseif (dir == 3) then + ! Gradient in z dir. + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & + + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & + - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) end do end do - end if + end do end if end subroutine s_gradient_dir diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 35a7528792..484a433da0 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -523,10 +523,11 @@ contains end function f_interpolate_velocity - !! This function calculates the acceleration of the bubble + !! This function calculates the force on a bubble !! based on the pressure gradient, velocity, and drag model. !! @param pos Position of the bubble in direction i !! @param rad Radius of the bubble + !! @param rdot Radial velocity of the bubble !! @param vel Velocity of the bubble !! @param mg Mass of the gas in the bubble !! @param mv Mass of the liquid in the bubble @@ -536,16 +537,18 @@ contains !! @param i Direction of the velocity (1: x, 2: y, 3: z) !! @param q_prim_vf Eulerian field with primitive variables !! @return a Acceleration of the bubble in direction i - pure function f_get_acceleration(pos, rad, vel, mg, mv, Re, rho, cell, i, q_prim_vf) result(a) + pure function f_get_bubble_force(pos, rad, rdot, vel, mg, mv, Re, rho, cell, i, q_prim_vf) result(force) $:GPU_ROUTINE(parallelism='[seq]') - real(wp), intent(in) :: pos, rad, vel, mg, mv, Re, rho + real(wp), intent(in) :: pos, rad, rdot, mg, mv, Re, rho + real(wp), intent(in), dimension(3) :: vel integer, dimension(3), intent(in) :: cell integer, intent(in) :: i type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf - real(wp) :: a - real(wp) :: dp, vol, force, v_rel + real(wp) :: a, dp, vol, force, Re_b, C_d, v_rel_mag, rho_b + real(wp), dimension(3) :: v_rel real(wp), dimension(fd_order - 1) :: xi, eta, L + integer :: j if (fd_order == 2) then if (i == 1) then @@ -610,19 +613,43 @@ contains dp = L(1)*eta(1) + L(2)*eta(2) + L(3)*eta(3) end if - vol = (4._wp/3._wp)*pi*rad**3._wp - force = -1._wp*vol*dp + vol = (4._wp/3._wp) * pi * (rad**3._wp) + v_rel_mag = 0._wp - v_rel = vel - f_interpolate_velocity(pos, cell, i, q_prim_vf) + do j = 1, num_dims + v_rel(j) = vel(j) - f_interpolate_velocity(pos, cell, j, q_prim_vf) + v_rel_mag = v_rel_mag + v_rel(j)**2._wp + end do + + force = 0._wp if (lag_params%drag_model == 1) then ! Free slip Stokes drag - force = force - (4._wp*pi*rad*v_rel)/Re + force = force - (4._wp*pi*rad*v_rel(i))/Re else if (lag_params%drag_model == 2) then ! No slip Stokes drag - force = force - (6._wp*pi*rad*v_rel)/Re + force = force - (6._wp*pi*rad*v_rel(i))/Re + else if (lag_params%drag_model == 3) then ! Levich drag + force = force - (12._wp*pi*rad*v_rel(i))/Re + else if (lag_params%drag_model > 0) then ! Drag coefficient model + v_rel_mag = sqrt(v_rel_mag) + Re_b = max(1e-3, rho * v_rel_mag * 2._wp * rad * Re) + if (lag_params%drag_model == 4) then ! Mei et al. 1994 + C_d = 16._wp / Re_b * (1._wp + (8._wp / Re_b + 0.5_wp * (1._wp + 3.315_wp * Re_b**(-0.5_wp))) ** -1._wp) + end if + force = force - 0.5_wp * C_d * rho * pi * rad**2._wp * v_rel(i) * v_rel_mag + end if + + if (lag_params%pressure_force) then + force = force - vol * dp + end if + + if (lag_params%gravity_force) then + force = force + (mg + mv) * accel_bf(i) end if - a = force/(mg + mv) + if (lag_params%momentum_transfer_force) then + force = force - 4._wp * pi * rho * rad**2._wp * v_rel(i) * rdot + end if - end function f_get_acceleration + end function f_get_bubble_force end module m_bubbles_EL_kernels diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 29870bb613..c6ac933a93 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -816,6 +816,9 @@ contains lag_params%nBubs_glb = dflt_int lag_params%vel_model = dflt_int lag_params%drag_model = dflt_int + lag_params%pressure_force = .true. + lag_params%gravity_force = .false. + lag_params%momentum_transfer_force = .false. lag_params%c_d = dflt_real lag_params%epsilonb = 1._wp lag_params%charwidth = dflt_real diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 316cb541d0..380997948a 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -188,7 +188,8 @@ contains if (bubbles_lagrange) then #:for VAR in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', & - & 'write_bubbles', 'write_bubbles_stats', 'vel_model', 'drag_model'] + & 'write_bubbles', 'write_bubbles_stats', 'vel_model', 'drag_model', & + & 'pressure_force', 'gravity_force', 'momentum_transfer_force'] call MPI_BCAST(lag_params%${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index bf4f15d7a8..22c5459b02 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -163,7 +163,7 @@ def analytic(self): PRE_PROCESS[f"patch_bc({bc_p_id})%radius"] = ParamType.REAL -for p_id in range(1, 10+1): +for p_id in range(1, 20+1): for attribute, ty in [("geometry", ParamType.INT), ("smoothen", ParamType.LOG), ("smooth_patch_id", ParamType.INT), ("hcid", ParamType.INT)]: PRE_PROCESS[f"patch_icpp({p_id})%{attribute}"] = ty @@ -326,7 +326,8 @@ def analytic(self): }) for var in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', - 'write_bubbles', 'write_bubbles_stats' ]: + 'write_bubbles', 'write_bubbles_stats', 'pressure_force', + 'gravity_force', 'momentum_transfer_force']: SIMULATION[f'lag_params%{var}'] = ParamType.LOG for var in [ 'solver_approach', 'cluster_type', 'smooth_type', 'nBubs_glb', From c23a919511eed92ad5c4cf52b485e8e6cf59a6bb Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Thu, 9 Oct 2025 09:54:49 -0400 Subject: [PATCH 55/62] Add EL Silo I/O, improve run_time.inf, and post process terminal output --- docs/documentation/case.md | 24 +- examples/2D_lagrange_bubblescreen/case.py | 1 + examples/3D_lagrange_bubblescreen/case.py | 1 + src/common/m_constants.fpp | 1 + src/post_process/m_data_output.fpp | 539 +++++++++++++++++++--- src/post_process/m_global_parameters.fpp | 38 ++ src/post_process/m_mpi_proxy.fpp | 11 +- src/post_process/m_start_up.f90 | 17 +- src/post_process/p_main.fpp | 13 + src/pre_process/m_assign_variables.fpp | 2 +- src/simulation/m_body_forces.fpp | 12 +- src/simulation/m_bubbles.fpp | 5 +- src/simulation/m_bubbles_EL.fpp | 418 ++++++++++------- src/simulation/m_data_output.fpp | 37 +- toolchain/mfc/run/case_dicts.py | 18 + 15 files changed, 869 insertions(+), 268 deletions(-) diff --git a/docs/documentation/case.md b/docs/documentation/case.md index 5f1d739530..9e0536f913 100644 --- a/docs/documentation/case.md +++ b/docs/documentation/case.md @@ -482,9 +482,9 @@ It is recommended to set `weno_eps` to $10^{-6}$ for WENO-JS, and to $10^{-40}$ - `mp_weno` activates monotonicity preservation in the WENO reconstruction (MPWENO) such that the values of reconstructed variables do not reside outside the range spanned by WENO stencil ([Balsara and Shu, 2000](references.md); [Suresh and Huynh, 1997](references.md)). -- `muscl_order` specifies the order of the MUSCL scheme that is used for spatial reconstruction of variables by an integer of 1, or 2, that corresponds to the 1st, and 2nd order respectively. When using `muscl_order = 2`, `muscl_lim` must be defined. +- `muscl_order` specifies the order of the MUSCL scheme that is used for spatial reconstruction of variables by an integer of 1, or 2, that corresponds to the 1st, and 2nd order respectively. When using `muscl_order = 2`, `muscl_lim` must be defined. -- `muscl_lim` specifies the slope limiter that is used in 2nd order MUSCL Reconstruction by an integer from 1 through 5. +- `muscl_lim` specifies the slope limiter that is used in 2nd order MUSCL Reconstruction by an integer from 1 through 5. `muscl_lim = 1`, `2`, `3`, `4`, and `5` correspond to minmod, monotonized central, Van Albada, Van Leer, and SUPERBEE, respectively. - `int_comp` activates interface compression using THINC used in MUSCL Reconstruction, with control parameters (`ic_eps`, and `ic_beta`). @@ -599,6 +599,24 @@ To restart the simulation from $k$-th time step, see [Restarting Cases](running. | `output_partial_domain` | Logical | Output part of the domain | | `[x,y,z]_output%beg` | Real | Beginning of the output domain in the [x,y,z]-direction | | `[x,y,z]_output%end` | Real | End of the output domain in the [x,y,z]-direction | +| `lag_txt_wrt` | Logical | Write Lagrangian bubble data to `.dat` files | +| `lag_header` | Logical | Write header to Lagrangian bubble `.dat` files | +| `lag_db_wrt` | Logical | Write Lagrangian bubble data to silo/hdf5 database files | +| `lag_id_wrt` | Logical | Add the global bubble idea to the database file | +| `lag_pos_wrt` | Logical | Add the bubble position to the database file | +| `lag_pos_prev_wrt` | Logical | Add the previous bubble position to the database file | +| `lag_vel_wrt` | Logical | Add the bubble translational velocity to the database file | +| `lag_rad_wrt` | Logical | Add the bubble radius to the database file | +| `lag_rvel_wrt` | Logical | Add the bubble radial velocity to the database file | +| `lag_r0_wrt` | Logical | Add the bubble initial radius to the database file | +| `lag_rmax_wrt` | Logical | Add the bubble maximum radius to the database file | +| `lag_rmin_wrt` | Logical | Add the bubble minimum radius to the database file | +| `lag_dphidt_wrt` | Logical | Add the bubble subgrid velocity potential to the database file | +| `lag_pres_wrt` | Logical | Add the bubble pressure to the database file | +| `lag_mv_wrt` | Logical | Add the bubble vapor mass to the database file | +| `lag_mg_wrt` | Logical | Add the bubble gas mass to the database file | +| `lag_betaT_wrt` | Logical | Add the bubble heat flux model coefficient to the database file | +| `lag_betaC_wrt` | Logical | Add the bubble mass flux model coefficient to the database file | The table lists formatted database output parameters. The parameters define variables that are outputted from simulation and file types and formats of data as well as options for post-processing. @@ -628,7 +646,7 @@ If `file_per_process` is true, then pre_process, simulation, and post_process mu - `output_partial_domain` activates the output of part of the domain specified by `[x,y,z]_output%beg` and `[x,y,z]_output%end`. This is useful for large domains where only a portion of the domain is of interest. -It is not supported when `precision = 1` and `format = 1`. +It is not supported when `precision = 1` and `format = 1`. It also cannot be enabled with `flux_wrt`, `heat_ratio_wrt`, `pres_inf_wrt`, `c_wrt`, `omega_wrt`, `ib`, `schlieren_wrt`, `qm_wrt`, or 'liutex_wrt'. ### 8. Acoustic Source {#acoustic-source} diff --git a/examples/2D_lagrange_bubblescreen/case.py b/examples/2D_lagrange_bubblescreen/case.py index fb1dd1cf81..18fba87222 100644 --- a/examples/2D_lagrange_bubblescreen/case.py +++ b/examples/2D_lagrange_bubblescreen/case.py @@ -111,6 +111,7 @@ "precision": 2, "prim_vars_wrt": "T", "parallel_io": "T", + "lag_db_wrt": "T", # Patch 1: Water (left) "patch_icpp(1)%geometry": 3, "patch_icpp(1)%x_centroid": 0.0, diff --git a/examples/3D_lagrange_bubblescreen/case.py b/examples/3D_lagrange_bubblescreen/case.py index 1e0d81b81b..40a85eb022 100644 --- a/examples/3D_lagrange_bubblescreen/case.py +++ b/examples/3D_lagrange_bubblescreen/case.py @@ -120,6 +120,7 @@ "precision": 2, "prim_vars_wrt": "T", "parallel_io": "T", + "lag_db_wrt": "T", # Patch 1: Water (left) "patch_icpp(1)%geometry": 9, "patch_icpp(1)%x_centroid": 0.0, diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index 1d2e53d206..114286f53b 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -61,6 +61,7 @@ module m_constants ! Lagrange bubbles constants integer, parameter :: mapCells = 3 !< Number of cells around the bubble where the smoothening function will have effect real(wp), parameter :: R_uni = 8314._wp !< Universal gas constant - J/kmol/K + integer, parameter :: lag_io_vars = 21 ! Number of variables per particle for MPI_IO ! Strang Splitting constants real(wp), parameter :: dflt_adap_dt_tol = 1.e-4_wp !< Default tolerance for adaptive step size diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp index 4c8867225e..d9b9fec3eb 100644 --- a/src/post_process/m_data_output.fpp +++ b/src/post_process/m_data_output.fpp @@ -30,7 +30,8 @@ module m_data_output s_open_energy_data_file, & s_write_grid_to_formatted_database_file, & s_write_variable_to_formatted_database_file, & - s_write_lag_bubbles_results, & + s_write_lag_bubbles_results_to_text, & + s_write_lag_bubbles_to_formatted_database_file, & s_write_intf_data_file, & s_write_energy_data_file, & s_close_formatted_database_file, & @@ -276,12 +277,14 @@ contains end if if (bubbles_lagrange) then !Lagrangian solver - dbdir = trim(case_dir)//'/lag_bubbles_post_process' - file_loc = trim(dbdir)//'/.' - call my_inquire(file_loc, dir_check) + if (lag_txt_wrt) then + dbdir = trim(case_dir)//'/lag_bubbles_post_process' + file_loc = trim(dbdir)//'/.' + call my_inquire(file_loc, dir_check) - if (dir_check .neqv. .true.) then - call s_create_directory(trim(dbdir)) + if (dir_check .neqv. .true.) then + call s_create_directory(trim(dbdir)) + end if end if end if @@ -1094,7 +1097,7 @@ contains !> Subroutine that writes the post processed results in the folder 'lag_bubbles_data' !! @param t_step Current time step - impure subroutine s_write_lag_bubbles_results(t_step) + impure subroutine s_write_lag_bubbles_results_to_text(t_step) integer, intent(in) :: t_step @@ -1112,32 +1115,66 @@ contains logical :: lg_bub_file, file_exist integer, dimension(2) :: gsizes, lsizes, start_idx_part - integer :: ifile, tot_data + integer :: ifile integer :: ierr !< Generic flag used to identify and report MPI errors + real(wp) :: file_time, file_dt + integer :: file_num_procs, file_tot_part, tot_part integer :: i - write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat' + integer, dimension(:), allocatable :: proc_bubble_counts + real(wp), dimension(1:1, 1:lag_io_vars) :: lag_io_null + lag_io_null = 0._wp + + ! Construct file path + write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + + ! Check if file exists inquire (FILE=trim(file_loc), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!') + end if - if (file_exist) then - if (proc_rank == 0) then - open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown') - read (9) tot_data, time_real - close (9) - end if - else - print '(A)', trim(file_loc)//' is missing. Exiting.' - call s_mpi_abort + if (.not. parallel_io) return + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) end if - call MPI_BCAST(tot_data, 1, MPI_integer, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(time_real, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + allocate (proc_bubble_counts(file_num_procs)) + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) - gsizes(1) = tot_data - gsizes(2) = 21 - lsizes(1) = tot_data - lsizes(2) = 21 + ! Skip to processor counts position + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), & + MPI_OFFSET_KIND) + call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr) + call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + gsizes(1) = file_tot_part + gsizes(2) = lag_io_vars + lsizes(1) = file_tot_part + lsizes(2) = lag_io_vars start_idx_part(1) = 0 start_idx_part(2) = 0 @@ -1145,59 +1182,451 @@ contains MPI_ORDER_FORTRAN, mpi_p, view, ierr) call MPI_TYPE_COMMIT(view, ierr) + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & + 'native', mpi_info_null, ierr) + + allocate (MPI_IO_DATA_lg_bubbles(file_tot_part, 1:lag_io_vars)) + + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, lag_io_vars*file_tot_part, & + mpi_p, status, ierr) + + write (file_loc, '(A,I0,A)') 'lag_bubbles_post_process_', t_step, '.dat' + file_loc = trim(case_dir)//'/lag_bubbles_post_process/'//trim(file_loc) + + if (proc_rank == 0) then + open (unit=29, file=file_loc, form='formatted', position='rewind') + + if (lag_header) then + write (29, '(A)', advance='no') + if (lag_id_wrt) write (29, '(A8)', advance='no') 'id, ' + if (lag_pos_wrt) write (29, '(3(A17))', advance='no') 'px, ', 'py, ', 'pz, ' + if (lag_pos_prev_wrt) write (29, '(3(A17))', advance='no') 'pvx, ', 'pvy, ', 'pvz, ' + if (lag_vel_wrt) write (29, '(3(A17))', advance='no') 'vx, ', 'vy, ', 'vz, ' + if (lag_rad_wrt) write (29, '(A17)', advance='no') 'radius, ' + if (lag_rvel_wrt) write (29, '(A17)', advance='no') 'rvel, ' + if (lag_r0_wrt) write (29, '(A17)', advance='no') 'r0, ' + if (lag_rmax_wrt) write (29, '(A17)', advance='no') 'rmax, ' + if (lag_rmin_wrt) write (29, '(A17)', advance='no') 'rmin, ' + if (lag_dphidt_wrt) write (29, '(A17)', advance='no') 'dphidt, ' + if (lag_pres_wrt) write (29, '(A17)', advance='no') 'pressure, ' + if (lag_mv_wrt) write (29, '(A17)', advance='no') 'mv, ' + if (lag_mg_wrt) write (29, '(A17)', advance='no') 'mg, ' + if (lag_betaT_wrt) write (29, '(A17)', advance='no') 'betaT, ' + if (lag_betaC_wrt) write (29, '(A17)', advance='no') 'betaC, ' + write (29, '(A15)') 'time' + end if + + do i = 1, file_tot_part + id = int(MPI_IO_DATA_lg_bubbles(i, 1)) + inputvals(1:20) = MPI_IO_DATA_lg_bubbles(i, 2:21) + if (id > 0) then + write (29, '(100(A))', advance='no') '' + + if (lag_id_wrt) write (29, '(I6, A)', advance='no') id, ', ' + if (lag_pos_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(1), ', ', inputvals(2), ', ', inputvals(3), ', ' + if (lag_pos_prev_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(4), ', ', inputvals(5), ', ', inputvals(6), ', ' + if (lag_vel_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(8), ', ' + if (lag_rad_wrt) write (29, '(E15.7, A)', advance='no') inputvals(10), ', ' + if (lag_rvel_wrt) write (29, '(E15.7, A)', advance='no') inputvals(11), ', ' + if (lag_r0_wrt) write (29, '(E15.7, A)', advance='no') inputvals(12), ', ' + if (lag_rmax_wrt) write (29, '(E15.7, A)', advance='no') inputvals(13), ', ' + if (lag_rmin_wrt) write (29, '(E15.7, A)', advance='no') inputvals(14), ', ' + if (lag_dphidt_wrt) write (29, '(E15.7, A)', advance='no') inputvals(15), ', ' + if (lag_pres_wrt) write (29, '(E15.7, A)', advance='no') inputvals(16), ', ' + if (lag_mv_wrt) write (29, '(E15.7, A)', advance='no') inputvals(17), ', ' + if (lag_mg_wrt) write (29, '(E15.7, A)', advance='no') inputvals(18), ', ' + if (lag_betaT_wrt) write (29, '(E15.7, A)', advance='no') inputvals(19), ', ' + if (lag_betaC_wrt) write (29, '(E15.7, A)', advance='no') inputvals(20), ', ' + write (29, '(E15.7)') time_real + end if + end do + close (29) + end if + + deallocate (MPI_IO_DATA_lg_bubbles) + + call s_mpi_barrier() + + call MPI_FILE_CLOSE(ifile, ierr) +#endif + + end subroutine s_write_lag_bubbles_results_to_text + + + impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step) + + integer, intent(in) :: t_step + + character(len=len_trim(case_dir) + 3*name_len) :: file_loc + + integer :: id + +#ifdef MFC_MPI + real(wp), dimension(20) :: inputvals + real(wp) :: time_real + integer, dimension(MPI_STATUS_SIZE) :: status + integer(KIND=MPI_OFFSET_KIND) :: disp + integer :: view + + logical :: lg_bub_file, file_exist + + integer, dimension(2) :: gsizes, lsizes, start_idx_part + integer :: ifile, ierr, tot_data, valid_data, nBub + real(wp) :: file_time, file_dt + integer :: file_num_procs, file_tot_part + integer, dimension(:), allocatable :: proc_bubble_counts + real(wp), dimension(1:1, 1:lag_io_vars) :: dummy + character(LEN=4*name_len), dimension(num_procs) :: meshnames + integer, dimension(num_procs) :: meshtypes + real(wp) :: dummy_data + + integer :: i, j + + real(wp), dimension(:), allocatable :: bub_id + real(wp), dimension(:), allocatable :: px, py, pz, ppx, ppy, ppz, vx, vy, vz + real(wp), dimension(:), allocatable :: radius, rvel, rnot, rmax, rmin, dphidt + real(wp), dimension(:), allocatable :: pressure, mv, mg, betaT, betaC + + dummy = 0._wp + dummy_data = 0._wp + + ! Construct file path write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=lg_bub_file) - if (lg_bub_file) then + ! Check if file exists + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!') + end if + + if (.not. parallel_io) return + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + allocate (proc_bubble_counts(file_num_procs)) + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + ! Skip to processor counts position + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), & + MPI_OFFSET_KIND) + call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr) + call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + ! Set time variables from file + + nBub = proc_bubble_counts(proc_rank + 1) + + start_idx_part(1) = 0 + do i = 1, proc_rank + start_idx_part(1) = start_idx_part(1) + proc_bubble_counts(i) + end do + + start_idx_part(2) = 0 + lsizes(1) = nBub + lsizes(2) = lag_io_vars + + gsizes(1) = file_tot_part + gsizes(2) = lag_io_vars + + if (nBub > 0) then + + #:for VAR in ['bub_id', 'px', 'py', 'pz', 'ppx', 'ppy', 'ppz', 'vx', 'vy', 'vz', & + 'radius', 'rvel', 'rnot', 'rmax', 'rmin', 'dphidt', & + 'pressure', 'mv', 'mg', 'betaT', 'betaC'] + allocate(${VAR}$(nBub)) + #:endfor + allocate (MPI_IO_DATA_lg_bubbles(nBub, 1:lag_io_vars)) + + call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & + MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & mpi_info_int, ifile, ierr) - disp = 0._wp - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & - 'native', mpi_info_null, ierr) + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) - allocate (MPI_IO_DATA_lg_bubbles(tot_data, 1:21)) + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, & + lag_io_vars*nBub, mpi_p, status, ierr) - call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lg_bubbles, 21*tot_data, & - mpi_p, status, ierr) + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) - write (file_loc, '(A,I0,A)') 'lag_bubbles_post_process_', t_step, '.dat' - file_loc = trim(case_dir)//'/lag_bubbles_post_process/'//trim(file_loc) + ! Extract data from MPI_IO_DATA_lg_bubbles array + ! Adjust these indices based on your actual data layout + #:for VAR, IDX in [('bub_id', 1), ('px', 2), ('py',3), ('pz',4), ('ppx',5), ('ppy',6), ('ppz',7), & + ('vx',8), ('vy',9), ('vz',10), ('radius',11), ('rvel',12), & + ('rnot',13), ('rmax',14), ('rmin',15), ('dphidt',16), & + ('pressure',17), ('mv',18), ('mg',19), ('betaT',20), ('betaC',21)] + ${VAR}$(:) = MPI_IO_DATA_lg_bubbles(:, ${IDX}$) + #:endfor + ! Next, the root processor proceeds to record all of the spatial + ! extents in the formatted database master file. In addition, it + ! also records a sub-domain connectivity map so that the entire + ! grid may be reassembled by looking at the master file. if (proc_rank == 0) then - open (unit=29, file=file_loc, form='formatted', position='rewind') - !write(29,*) 'lg_bubID, x, y, z, xPrev, yPrev, zPrev, xVel, yVel, ', & - ! 'zVel, radius, interfaceVelocity, equilibriumRadius', & - ! 'Rmax, Rmin, dphidt, pressure, mv, mg, betaT, betaC, time' - do i = 1, tot_data - id = int(MPI_IO_DATA_lg_bubbles(i, 1)) - inputvals(1:20) = MPI_IO_DATA_lg_bubbles(i, 2:21) - if (id > 0) then - write (29, 6) int(id), inputvals(1), inputvals(2), & - inputvals(3), inputvals(4), inputvals(5), inputvals(6), inputvals(7), & - inputvals(8), inputvals(9), inputvals(10), inputvals(11), & - inputvals(12), inputvals(13), inputvals(14), inputvals(15), & - inputvals(16), inputvals(17), inputvals(18), inputvals(19), & - inputvals(20), time_real -6 format(I6, 21(1x, E15.7)) - end if + + do i = 1, num_procs + write (meshnames(i), '(A,I0,A,I0,A)') '../p', i - 1, & + '/', t_step, '.silo:lag_bubbles' + meshtypes(i) = DB_POINTMESH end do - close (29) + err = DBSET2DSTRLEN(len(meshnames(1))) + err = DBPUTMMESH(dbroot, 'lag_bubbles', 16, & + num_procs, meshnames, & + len_trim(meshnames), & + meshtypes, DB_F77NULL, ierr) + end if + + err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, & + px, py, pz, nBub, & + DB_DOUBLE, DB_F77NULL, ierr) + + if (lag_id_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_id', t_step, bub_id, nBub) + end if + + if (lag_vel_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_vel1', t_step, vx, nBub) + call s_write_lag_variable_to_formatted_database_file('part_vel2', t_step, vy, nBub) + if (p > 0) then + call s_write_lag_variable_to_formatted_database_file('part_vel3', t_step, vz, nBub) + end if + end if + + if (lag_rad_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_radius', t_step, radius, nBub) end if + if (lag_rvel_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rdot', t_step, rvel, nBub) + end if + + if (lag_r0_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_r0', t_step, rnot, nBub) + end if + + if (lag_rmax_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rmax', t_step, rmax, nBub) + end if + + if (lag_rmin_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rmin', t_step, rmin, nBub) + end if + + if (lag_dphidt_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_dphidt', t_step, dphidt, nBub) + end if + + if (lag_pres_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_pressure', t_step, pressure, nBub) + end if + + if (lag_mv_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_mv', t_step, mv, nBub) + end if + + if (lag_mg_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_mg', t_step, mg, nBub) + end if + + if (lag_betaT_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_betaT', t_step, betaT, nBub) + end if + + if (lag_betaC_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_betaC', t_step, betaC, nBub) + end if + + deallocate (bub_id, px, py, pz, ppx, ppy, ppz, vx, vy, vz, radius, & + rvel, rnot, rmax, rmin, dphidt, pressure, mv, mg, & + betaT, betaC) deallocate (MPI_IO_DATA_lg_bubbles) + else + call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) - end if + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) - call s_mpi_barrier() + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) - call MPI_FILE_CLOSE(ifile, ierr) + call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) + + if (proc_rank == 0) then + + do i = 1, num_procs + write (meshnames(i), '(A,I0,A,I0,A)') '../p', i - 1, & + '/', t_step, '.silo:lag_bubbles' + meshtypes(i) = DB_POINTMESH + end do + err = DBSET2DSTRLEN(len(meshnames(1))) + err = DBPUTMMESH(dbroot, 'lag_bubbles', 16, & + num_procs, meshnames, & + len_trim(meshnames), & + meshtypes, DB_F77NULL, ierr) + end if + + err = DBSETEMPTYOK(1) + err = DBPUTPM(dbfile, 'lag_bubbles', 11, 3, & + dummy_data, dummy_data, dummy_data, 0, & + DB_DOUBLE, DB_F77NULL, ierr) + + if (lag_id_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_id', t_step) + end if + + if (lag_vel_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_vel1', t_step) + call s_write_lag_variable_to_formatted_database_file('part_vel2', t_step) + if (p > 0) then + call s_write_lag_variable_to_formatted_database_file('part_vel3', t_step) + end if + end if + + if (lag_rad_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_radius', t_step) + end if + + if (lag_rvel_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rdot', t_step) + end if + + if (lag_r0_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_r0', t_step) + end if + + if (lag_rmax_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rmax', t_step) + end if + + if (lag_rmin_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_rmin', t_step) + end if + + if (lag_dphidt_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_dphidt', t_step) + end if + + if (lag_pres_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_pressure', t_step) + end if + + if (lag_mv_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_mv', t_step) + end if + + if (lag_mg_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_mg', t_step) + end if + + if (lag_betaT_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_betaT', t_step) + end if + + if (lag_betaC_wrt) then + call s_write_lag_variable_to_formatted_database_file('part_betaC', t_step) + end if + + end if #endif - end subroutine s_write_lag_bubbles_results + end subroutine s_write_lag_bubbles_to_formatted_database_file + + subroutine s_write_lag_variable_to_formatted_database_file(varname, t_step, data, nBubs) + + character(len=*), intent(in) :: varname + integer, intent(in) :: t_step + real(wp), dimension(1:), intent(in), optional :: data + integer, intent(in), optional :: nBubs + + character(len=64), dimension(num_procs) :: var_names + integer, dimension(num_procs) :: var_types + real(wp) :: dummy_data + + integer :: ierr !< Generic flag used to identify and report database errors + integer :: i + + dummy_data = 0._wp + + if (present(nBubs) .and. present(data)) then + if (proc_rank == 0) then + do i = 1, num_procs + write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, & + '/', t_step, '.silo:'//trim(varname) + var_types(i) = DB_POINTVAR + end do + err = DBSET2DSTRLEN(len(var_names(1))) + err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), & + num_procs, var_names, & + len_trim(var_names), & + var_types, DB_F77NULL, ierr) + end if + + err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), & + 'lag_bubbles', 11, data, nBubs, DB_DOUBLE, DB_F77NULL, ierr) + else + if (proc_rank == 0) then + do i = 1, num_procs + write (var_names(i), '(A,I0,A,I0,A)') '../p', i - 1, & + '/', t_step, '.silo:'//trim(varname) + var_types(i) = DB_POINTVAR + end do + err = DBSET2DSTRLEN(len(var_names(1))) + err = DBSETEMPTYOK(1) + err = DBPUTMVAR(dbroot, trim(varname), len_trim(varname), & + num_procs, var_names, & + len_trim(var_names), & + var_types, DB_F77NULL, ierr) + end if + + err = DBSETEMPTYOK(1) + err = DBPUTPV1(dbfile, trim(varname), len_trim(varname), & + 'lag_bubbles', 11, dummy_data, 0, DB_DOUBLE, DB_F77NULL, ierr) + end if + + end subroutine s_write_lag_variable_to_formatted_database_file + impure subroutine s_write_intf_data_file(q_prim_vf) type(scalar_field), dimension(sys_size), intent(IN) :: q_prim_vf diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 134d900f91..2400b13ad1 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -258,6 +258,24 @@ module m_global_parameters logical :: ib logical :: chem_wrt_Y(1:num_species) logical :: chem_wrt_T + logical :: lag_header + logical :: lag_txt_wrt + logical :: lag_db_wrt + logical :: lag_id_wrt + logical :: lag_pos_wrt + logical :: lag_pos_prev_wrt + logical :: lag_vel_wrt + logical :: lag_rad_wrt + logical :: lag_rvel_wrt + logical :: lag_r0_wrt + logical :: lag_rmax_wrt + logical :: lag_rmin_wrt + logical :: lag_dphidt_wrt + logical :: lag_pres_wrt + logical :: lag_mv_wrt + logical :: lag_mg_wrt + logical :: lag_betaT_wrt + logical :: lag_betaC_wrt !> @} real(wp), dimension(num_fluids_max) :: schlieren_alpha !< @@ -329,6 +347,8 @@ module m_global_parameters real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) + real(wp) :: wall_time, wall_time_avg !< Wall time measurements + contains !> Assigns default values to user inputs prior to reading @@ -438,6 +458,24 @@ contains sim_data = .false. cf_wrt = .false. ib = .false. + lag_txt_wrt = .false. + lag_header = .true. + lag_db_wrt = .false. + lag_id_wrt = .true. + lag_pos_wrt = .true. + lag_pos_prev_wrt = .false. + lag_vel_wrt = .true. + lag_rad_wrt = .true. + lag_rvel_wrt = .false. + lag_r0_wrt = .false. + lag_rmax_wrt = .false. + lag_rmin_wrt = .false. + lag_dphidt_wrt = .false. + lag_pres_wrt = .false. + lag_mv_wrt = .false. + lag_mg_wrt = .false. + lag_betaT_wrt = .false. + lag_betaC_wrt = .false. schlieren_alpha = dflt_real diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 8699b97fe4..7adb31a433 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -106,10 +106,19 @@ contains & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', & & 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io', & & 'down_sample' ] - call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor + if (bubbles_lagrange) then + #:for VAR in ['lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt', & + & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', & + & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt', & + & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt', & + & 'lag_betaT_wrt', 'lag_betaC_wrt', 'bc_io', 'down_sample' ] + call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + #:endfor + end if + call MPI_BCAST(flux_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(omega_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) call MPI_BCAST(mom_wrt(1), 3, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index c5e7fe31be..c9b681f8f9 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -90,7 +90,11 @@ impure subroutine s_read_input_file cfl_target, surface_tension, bubbles_lagrange, & sim_data, hyperelasticity, Bx0, relativity, cont_damage, & num_bc_patches, igr, igr_order, down_sample, recon_type, & - muscl_order + muscl_order, lag_header, lag_txt_wrt, lag_db_wrt, & + lag_id_wrt, lag_pos_wrt, lag_pos_prev_wrt, lag_vel_wrt, & + lag_rad_wrt, lag_rvel_wrt, lag_r0_wrt, lag_rmax_wrt, & + lag_rmin_wrt, lag_dphidt_wrt, lag_pres_wrt, lag_mv_wrt, & + lag_mg_wrt, lag_betaT_wrt, lag_betaC_wrt ! Inquiring the status of the post_process.inp file file_loc = 'post_process.inp' @@ -176,15 +180,15 @@ impure subroutine s_perform_time_step(t_step) integer, intent(inout) :: t_step if (proc_rank == 0) then if (cfl_dt) then - print '(" [", I3, "%] Saving ", I8, " of ", I0, "")', & + print '(" [", I3, "%] Saving ", I8, " of ", I0, " Time Avg = ", ES16.6, " Time/step = ", ES12.6, "")', & int(ceiling(100._wp*(real(t_step - n_start)/(n_save)))), & - t_step, n_save + t_step, n_save, wall_time_avg, wall_time else - print '(" [", I3, "%] Saving ", I8, " of ", I0, " @ t_step = ", I0, "")', & + print '(" [", I3, "%] Saving ", I8, " of ", I0, " @ t_step = ", I0, " Time Avg = ", ES16.6, " Time/step = ", ES12.6, "")', & int(ceiling(100._wp*(real(t_step - t_step_start)/(t_step_stop - t_step_start + 1)))), & (t_step - t_step_start)/t_step_save + 1, & (t_step_stop - t_step_start)/t_step_save + 1, & - t_step + t_step, wall_time_avg, wall_time end if end if @@ -719,7 +723,8 @@ impure subroutine s_save_data(t_step, varname, pres, c, H) call s_write_variable_to_formatted_database_file(varname, t_step) varname(:) = ' ' - call s_write_lag_bubbles_results(t_step) !! Individual bubble evolution + if (lag_txt_wrt) call s_write_lag_bubbles_results_to_text(t_step) ! text output + if (lag_db_wrt) call s_write_lag_bubbles_to_formatted_database_file(t_step) ! silo file output end if if (sim_data .and. proc_rank == 0) then diff --git a/src/post_process/p_main.fpp b/src/post_process/p_main.fpp index 59b69e3b07..12d6353c67 100644 --- a/src/post_process/p_main.fpp +++ b/src/post_process/p_main.fpp @@ -26,6 +26,7 @@ program p_main real(wp) :: pres real(wp) :: c real(wp) :: H + real(wp) :: start, finish call s_initialize_mpi_domain() @@ -49,10 +50,22 @@ program p_main ! available step. To avoid this, we force synchronization here. call s_mpi_barrier() + call cpu_time(start) + call s_perform_time_step(t_step) call s_save_data(t_step, varname, pres, c, H) + call cpu_time(finish) + + wall_time = abs(finish - start) + + if (t_step >= 2) then + wall_time_avg = (wall_time + (t_step - 2)*wall_time_avg)/(t_step - 1) + else + wall_time_avg = 0._wp + end if + if (cfl_dt) then if (t_step == n_save - 1) then exit diff --git a/src/pre_process/m_assign_variables.fpp b/src/pre_process/m_assign_variables.fpp index f60573b1b9..ca0c7c1c16 100644 --- a/src/pre_process/m_assign_variables.fpp +++ b/src/pre_process/m_assign_variables.fpp @@ -684,7 +684,7 @@ contains if (surface_tension) then q_prim_vf(c_idx)%sf(j, k, l) = eta*patch_icpp(patch_id)%cf_val + & - (1._wp - eta)*patch_icpp(smooth_patch_id)%cf_val + (1._wp - eta)*orig_prim_vf(c_idx) end if ! Updating the patch identities bookkeeping variable diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 1ea2c3be3b..9c094c8998 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -57,15 +57,11 @@ contains real(wp), intent(in) :: t - if (m > 0) then - accel_bf(1) = g_x + k_x*sin(w_x*t - p_x) - if (n > 0) then - accel_bf(2) = g_y + k_y*sin(w_y*t - p_y) - if (p > 0) then - accel_bf(3) = g_z + k_z*sin(w_z*t - p_z) - end if + #:for DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + if (bf_${XYZ}$) then + accel_bf(${DIR}$) = g_${XYZ}$ + k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$) end if - end if + #:endfor $:GPU_UPDATE(device='[accel_bf]') diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp index 0ec758dc22..23c255428a 100644 --- a/src/simulation/m_bubbles.fpp +++ b/src/simulation/m_bubbles.fpp @@ -72,6 +72,9 @@ contains ! Rayleigh-Plesset bubbles fCpbw = f_cpbw_KM(fR0, fR, fV, fpb) f_rddot = f_rddot_RP(fP, fRho, fR, fV, fCpbw) + else + ! Default: No bubble dynamics + f_rddot = 0._wp end if end function f_rddot @@ -445,7 +448,7 @@ contains !! @param fRho Current density !! @param fP Current driving pressure !! @param fR Current bubble radius - !! @param fV Current bubble velocity + !! @param fV Current bubble radial velocity !! @param fR0 Equilibrium bubble radius !! @param fpb Internal bubble pressure !! @param fpbdot Time-derivative of internal bubble pressure diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index bacd19497d..cebffbeb96 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -226,7 +226,7 @@ contains do while (ios == 0) read (94, *, iostat=ios) (inputBubble(i), i=1, 8) if (ios /= 0) cycle - indomain = particle_in_domain(inputBubble(1:3)) + indomain = particle_in_domain_physical(inputBubble(1:3)) id = id + 1 if (id > lag_params%nBubs_glb .and. proc_rank == 0) then call s_mpi_abort("Current number of bubbles is larger than nBubs_glb") @@ -324,25 +324,25 @@ contains cell = -buff_size call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) - ! Check if the bubble is located in the ghost cell of a symmetric boundary - if ((bc_x%beg == BC_REFLECTIVE .and. cell(1) < 0) .or. & - (bc_x%end == BC_REFLECTIVE .and. cell(1) > m) .or. & - (bc_y%beg == BC_REFLECTIVE .and. cell(2) < 0) .or. & - (bc_y%end == BC_REFLECTIVE .and. cell(2) > n)) then - call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric boundary.") + ! Check if the bubble is located in the ghost cell of a symmetric, or wall boundary + if ((any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) < 0) .or. & + (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(1) > m) .or. & + (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) < 0) .or. & + (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(2) > n)) then + call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") end if if (p > 0) then - if ((bc_z%beg == BC_REFLECTIVE .and. cell(3) < 0) .or. & - (bc_z%end == BC_REFLECTIVE .and. cell(3) > p)) then - call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric boundary.") + if ((any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) < 0) .or. & + (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. cell(3) > p)) then + call s_mpi_abort("Lagrange bubble is in the ghost cells of a symmetric or wall boundary.") end if end if ! If particle is in the ghost cells, find the closest non-ghost cell - cell(1) = min(max(cell(1), 0), m) - cell(2) = min(max(cell(2), 0), n) - if (p > 0) cell(3) = min(max(cell(3), 0), p) + !cell(1) = min(max(cell(1), 0), m) + !cell(2) = min(max(cell(2), 0), n) + !if (p > 0) cell(3) = min(max(cell(3), 0), p) call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), & rhol, gamma, pi_inf, qv, Re) dynP = 0._wp @@ -405,6 +405,8 @@ contains integer, intent(inout) :: bub_id, save_count character(LEN=path_len + 2*name_len) :: file_loc + real(wp) :: file_time, file_dt + integer :: file_num_procs, file_tot_part, tot_part #ifdef MFC_MPI real(wp), dimension(20) :: inputvals @@ -419,81 +421,144 @@ contains integer :: ifile, ierr, tot_data, id integer :: i - write (file_loc, '(a,i0,a)') 'lag_bubbles_mpi_io_', save_count, '.dat' + integer, dimension(:), allocatable :: proc_bubble_counts + real(wp), dimension(1:1, 1:lag_io_vars) :: dummy + dummy = 0._wp + + ! Construct file path + write (file_loc, '(A,I0,A)') 'lag_bubbles_', save_count, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (file=trim(file_loc), exist=file_exist) - - if (file_exist) then - if (proc_rank == 0) then - open (9, file=trim(file_loc), form='unformatted', status='unknown') - read (9) tot_data, mytime, dt - close (9) - print *, 'Reading lag_bubbles_mpi_io: ', tot_data, mytime, dt - end if - else - print '(a)', trim(file_loc)//' is missing. exiting.' - call s_mpi_abort + + ! Check if file exists + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort('Restart file '//trim(file_loc)//' does not exist!') end if - call MPI_BCAST(tot_data, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(mytime, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) - call MPI_BCAST(dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + if (.not. parallel_io) return + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + call MPI_FILE_READ(ifile, file_tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_READ(ifile, file_time, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_dt, 1, mpi_p, status, ierr) + call MPI_FILE_READ(ifile, file_num_procs, 1, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(file_tot_part, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_time, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_dt, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(file_num_procs, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + allocate (proc_bubble_counts(file_num_procs)) + + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + ! Skip to processor counts position + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs), & + MPI_OFFSET_KIND) + call MPI_FILE_SEEK(ifile, disp, MPI_SEEK_SET, ierr) + call MPI_FILE_READ(ifile, proc_bubble_counts, file_num_procs, MPI_INTEGER, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if + + call MPI_BCAST(proc_bubble_counts, file_num_procs, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + + ! Set time variables from file + mytime = file_time + dt = file_dt + + bub_id = proc_bubble_counts(proc_rank + 1) - gsizes(1) = tot_data - gsizes(2) = 21 - lsizes(1) = tot_data - lsizes(2) = 21 start_idx_part(1) = 0 + do i = 1, proc_rank + start_idx_part(1) = start_idx_part(1) + proc_bubble_counts(i) + end do + start_idx_part(2) = 0 + lsizes(1) = bub_id + lsizes(2) = lag_io_vars - call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & - MPI_ORDER_FORTRAN, mpi_p, view, ierr) - call MPI_type_COMMIT(view, ierr) + gsizes(1) = file_tot_part + gsizes(2) = lag_io_vars - ! Open the file to write all flow variables - write (file_loc, '(a,i0,a)') 'lag_bubbles_', save_count, '.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (file=trim(file_loc), exist=particle_file) + if (bub_id > 0) then + + allocate (MPI_IO_DATA_lag_bubbles(bub_id, 1:lag_io_vars)) + + call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & + MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) - if (particle_file) then - call MPI_FILE_open(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & mpi_info_int, ifile, ierr) - disp = 0._wp - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & - 'native', mpi_info_null, ierr) - allocate (MPI_IO_DATA_lag_bubbles(tot_data, 1:21)) - call MPI_FILE_read_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*tot_data, & - mpi_p, status, ierr) - do i = 1, tot_data - id = int(MPI_IO_DATA_lag_bubbles(i, 1)) - inputvals(1:20) = MPI_IO_DATA_lag_bubbles(i, 2:21) - indomain = particle_in_domain(inputvals(1:3)) - if (indomain .and. (id > 0)) then - bub_id = bub_id + 1 - nBubs = bub_id ! local number of bubbles - lag_id(bub_id, 1) = id ! global ID - lag_id(bub_id, 2) = bub_id ! local ID - mtn_pos(bub_id, 1:3, 1) = inputvals(1:3) - mtn_posPrev(bub_id, 1:3, 1) = inputvals(4:6) - mtn_vel(bub_id, 1:3, 1) = inputvals(7:9) - intfc_rad(bub_id, 1) = inputvals(10) - intfc_vel(bub_id, 1) = inputvals(11) - bub_R0(bub_id) = inputvals(12) - Rmax_stats(bub_id) = inputvals(13) - Rmin_stats(bub_id) = inputvals(14) - bub_dphidt(bub_id) = inputvals(15) - gas_p(bub_id, 1) = inputvals(16) - gas_mv(bub_id, 1) = inputvals(17) - gas_mg(bub_id) = inputvals(18) - gas_betaT(bub_id) = inputvals(19) - gas_betaC(bub_id) = inputvals(20) - cell = -buff_size - call s_locate_cell(mtn_pos(bub_id, 1:3, 1), cell, mtn_s(bub_id, 1:3, 1)) - end if + + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA_lag_bubbles, & + lag_io_vars*bub_id, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) + + nBubs = bub_id + + do i = 1, bub_id + lag_id(i, 1) = int(MPI_IO_DATA_lag_bubbles(i, 1)) + mtn_pos(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 2:4) + mtn_posPrev(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 5:7) + mtn_vel(i, 1:3, 1) = MPI_IO_DATA_lag_bubbles(i, 8:10) + intfc_rad(i, 1) = MPI_IO_DATA_lag_bubbles(i, 11) + intfc_vel(i, 1) = MPI_IO_DATA_lag_bubbles(i, 12) + bub_R0(i) = MPI_IO_DATA_lag_bubbles(i, 13) + Rmax_stats(i) = MPI_IO_DATA_lag_bubbles(i, 14) + Rmin_stats(i) = MPI_IO_DATA_lag_bubbles(i, 15) + bub_dphidt(i) = MPI_IO_DATA_lag_bubbles(i, 16) + gas_p(i, 1) = MPI_IO_DATA_lag_bubbles(i, 17) + gas_mv(i, 1) = MPI_IO_DATA_lag_bubbles(i, 18) + gas_mg(i) = MPI_IO_DATA_lag_bubbles(i, 19) + gas_betaT(i) = MPI_IO_DATA_lag_bubbles(i, 20) + gas_betaC(i) = MPI_IO_DATA_lag_bubbles(i, 21) end do + deallocate (MPI_IO_DATA_lag_bubbles) + + else + nBubs = 0 + + call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, & + mpi_info_int, ifile, ierr) + + ! Skip extended header + disp = int(sizeof(file_tot_part) + 2*sizeof(file_time) + sizeof(file_num_procs) + & + file_num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_READ_ALL(ifile, dummy, 0, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + call MPI_TYPE_FREE(view, ierr) + end if + + if (proc_rank == 0) then + write (*, '(A,I0,A,I0)') 'Read ', file_tot_part, ' particles from restart file at t_step = ', save_count + write (*, '(A,E15.7,A,E15.7)') 'Restart time = ', mytime, ', dt = ', dt end if - call MPI_FILE_CLOSE(ifile, ierr) + + deallocate (proc_bubble_counts) #endif end subroutine s_restart_bubbles @@ -649,6 +714,7 @@ contains if (lag_params%solver_approach == 2) then + ! (q / (1 - beta)) * d(beta)/dt source if (p == 0) then $:GPU_PARALLEL_LOOP(collapse=4) do k = 0, p @@ -686,6 +752,7 @@ contains call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l) + ! (q / (1 - beta)) * d(beta)/dt source $:GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n @@ -712,6 +779,7 @@ contains call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) + ! (beta / (1 - beta)) * d(Pu)/dl source $:GPU_PARALLEL_LOOP(collapse=3) do k = 0, p do j = 0, n @@ -1234,25 +1302,24 @@ contains (pos_part(3) < z_cb(p + buff_size)) .and. (pos_part(3) >= z_cb(-buff_size - 1))) end if - ! For symmetric boundary condition - if (bc_x%beg == BC_REFLECTIVE) then + ! For symmetric and wall boundary condition + if (any(bc_x%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then particle_in_domain = (particle_in_domain .and. (pos_part(1) >= x_cb(-1))) end if - if (bc_x%end == BC_REFLECTIVE) then + if (any(bc_x%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then particle_in_domain = (particle_in_domain .and. (pos_part(1) < x_cb(m))) end if - if (bc_y%beg == BC_REFLECTIVE .and. (.not. cyl_coord)) then + if (any(bc_y%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then particle_in_domain = (particle_in_domain .and. (pos_part(2) >= y_cb(-1))) end if - if (bc_y%end == BC_REFLECTIVE .and. (.not. cyl_coord)) then + if (any(bc_y%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/)) .and. (.not. cyl_coord)) then particle_in_domain = (particle_in_domain .and. (pos_part(2) < y_cb(n))) end if - if (p > 0) then - if (bc_z%beg == BC_REFLECTIVE) then + if (any(bc_z%beg == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then particle_in_domain = (particle_in_domain .and. (pos_part(3) >= z_cb(-1))) end if - if (bc_z%end == BC_REFLECTIVE) then + if (any(bc_z%end == (/BC_REFLECTIVE, BC_CHAR_SLIP_WALL, BC_SLIP_WALL, BC_NO_SLIP_WALL/))) then particle_in_domain = (particle_in_domain .and. (pos_part(3) < z_cb(p))) end if end if @@ -1303,41 +1370,39 @@ contains end do end do end do - else - if (dir == 2) then - ! Gradient in y dir. - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & - + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & - - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) - end do + elseif (dir == 2) then + ! Gradient in y dir. + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & + + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & + - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) end do end do - else - ! Gradient in z dir. - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & - + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & - - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) - end do + end do + elseif (dir == 3) then + ! Gradient in z dir. + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & + + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & + - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) end do end do - end if + end do end if end subroutine s_gradient_dir - !> Subroutine that writes on each time step the changes of the lagrangian bubbles. + !> Subroutine that writes on each time step the changes of the lagrangian bubbles. !! @param q_time Current time impure subroutine s_write_lag_particles(qtime) @@ -1465,7 +1530,7 @@ contains character(LEN=path_len + 2*name_len) :: file_loc logical :: file_exist - integer :: bub_id, tot_part, tot_part_wrtn, npart_wrtn + integer :: bub_id, tot_part integer :: i, k #ifdef MFC_MPI @@ -1476,6 +1541,9 @@ contains integer :: view integer, dimension(2) :: gsizes, lsizes, start_idx_part integer, dimension(num_procs) :: part_order, part_ord_mpi + integer, dimension(num_procs) :: proc_bubble_counts + real(wp), dimension(1:1, 1:lag_io_vars) :: dummy + dummy = 0._wp bub_id = 0._wp if (nBubs /= 0) then @@ -1488,78 +1556,60 @@ contains if (.not. parallel_io) return + lsizes(1) = bub_id + lsizes(2) = lag_io_vars + ! Total number of particles call MPI_ALLREDUCE(bub_id, tot_part, 1, MPI_integer, & MPI_SUM, MPI_COMM_WORLD, ierr) - ! Total number of particles written so far - call MPI_ALLREDUCE(npart_wrtn, tot_part_wrtn, 1, MPI_integer, & - MPI_SUM, MPI_COMM_WORLD, ierr) - - lsizes(1) = max(1, bub_id) - lsizes(2) = 21 - - ! if the partcle number is zero, put 1 since MPI cannot deal with writing - ! zero particle - part_order(:) = 1 - part_order(proc_rank + 1) = max(1, bub_id) + call MPI_ALLGATHER(bub_id, 1, MPI_INTEGER, proc_bubble_counts, 1, MPI_INTEGER, & + MPI_COMM_WORLD, ierr) - call MPI_ALLREDUCE(part_order, part_ord_mpi, num_procs, MPI_integer, & - MPI_MAX, MPI_COMM_WORLD, ierr) - - gsizes(1) = sum(part_ord_mpi(1:num_procs)) - gsizes(2) = 21 - - start_idx_part(1) = sum(part_ord_mpi(1:proc_rank + 1)) - part_ord_mpi(proc_rank + 1) + ! Calculate starting index for this processor's particles + call MPI_EXSCAN(lsizes(1), start_idx_part(1), 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) + if (proc_rank == 0) start_idx_part(1) = 0 start_idx_part(2) = 0 - write (file_loc, '(A,I0,A)') 'lag_bubbles_mpi_io_', t_step, '.dat' + gsizes(1) = tot_part + gsizes(2) = lag_io_vars + + write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist .and. proc_rank == 0) then - call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) - end if - ! Writing down the total number of particles + ! Clean up existing file if (proc_rank == 0) then - open (9, FILE=trim(file_loc), FORM='unformatted', STATUS='unknown') - write (9) gsizes(1), mytime, dt - close (9) + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (file_exist) then + call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) + end if end if - call MPI_type_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & - MPI_ORDER_FORTRAN, mpi_p, view, ierr) - call MPI_type_COMMIT(view, ierr) - - allocate (MPI_IO_DATA_lag_bubbles(1:max(1, bub_id), 1:21)) + call MPI_BARRIER(MPI_COMM_WORLD, ierr) - ! Open the file to write all flow variables - write (file_loc, '(A,I0,A)') 'lag_bubbles_', t_step, '.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist .and. proc_rank == 0) then - call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) - end if - - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & - mpi_info_int, ifile, ierr) + if (proc_rank == 0) then + call MPI_FILE_OPEN(MPI_COMM_SELF, file_loc, & + ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) - disp = 0._wp + ! Write header using MPI I/O for consistency + call MPI_FILE_WRITE(ifile, tot_part, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_WRITE(ifile, mytime, 1, mpi_p, status, ierr) + call MPI_FILE_WRITE(ifile, dt, 1, mpi_p, status, ierr) + call MPI_FILE_WRITE(ifile, num_procs, 1, MPI_INTEGER, status, ierr) + call MPI_FILE_WRITE(ifile, proc_bubble_counts, num_procs, MPI_INTEGER, status, ierr) - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, & - 'native', mpi_info_null, ierr) + call MPI_FILE_CLOSE(ifile, ierr) + end if - ! Cycle through list - i = 1 + call MPI_BARRIER(MPI_COMM_WORLD, ierr) - if (bub_id == 0) then - MPI_IO_DATA_lag_bubbles(1, 1:21) = 0._wp - else + if (bub_id > 0) then + allocate (MPI_IO_DATA_lag_bubbles(max(1, bub_id), 1:lag_io_vars)) + i = 1 do k = 1, nBubs - if (particle_in_domain_physical(mtn_pos(k, 1:3, 1))) then - MPI_IO_DATA_lag_bubbles(i, 1) = real(lag_id(k, 1)) MPI_IO_DATA_lag_bubbles(i, 2:4) = mtn_pos(k, 1:3, 1) MPI_IO_DATA_lag_bubbles(i, 5:7) = mtn_posPrev(k, 1:3, 1) @@ -1575,21 +1625,47 @@ contains MPI_IO_DATA_lag_bubbles(i, 19) = gas_mg(k) MPI_IO_DATA_lag_bubbles(i, 20) = gas_betaT(k) MPI_IO_DATA_lag_bubbles(i, 21) = gas_betaC(k) - i = i + 1 - end if - end do - end if + call MPI_TYPE_CREATE_SUBARRAY(2, gsizes, lsizes, start_idx_part, & + MPI_ORDER_FORTRAN, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) + + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, & + ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) + + ! Skip header (written by rank 0) + disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + & + num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA_lag_bubbles, & + lag_io_vars*bub_id, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) - call MPI_FILE_write_ALL(ifile, MPI_IO_DATA_lag_bubbles, 21*max(1, bub_id), & - mpi_p, status, ierr) + deallocate (MPI_IO_DATA_lag_bubbles) + + else + call MPI_TYPE_CONTIGUOUS(0, mpi_p, view, ierr) + call MPI_TYPE_COMMIT(view, ierr) - call MPI_FILE_CLOSE(ifile, ierr) + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, & + ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) - deallocate (MPI_IO_DATA_lag_bubbles) + ! Skip header (written by rank 0) + disp = int(sizeof(tot_part) + 2*sizeof(mytime) + sizeof(num_procs) + & + num_procs*sizeof(proc_bubble_counts(1)), MPI_OFFSET_KIND) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, view, 'native', mpi_info_int, ierr) + + call MPI_FILE_WRITE_ALL(ifile, dummy, 0, mpi_p, status, ierr) + + call MPI_FILE_CLOSE(ifile, ierr) + end if #endif diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 5f5b79c481..90e750babe 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -36,7 +36,7 @@ module m_data_output implicit none - private; + private; public :: s_initialize_data_output_module, & s_open_run_time_information_file, & s_open_com_files, & @@ -157,24 +157,16 @@ contains write (3, '(A)') ''; write (3, '(A)') '' ! Generating table header for the stability criteria to be outputted - if (cfl_dt) then - if (viscous) then - write (3, '(A)') ' Time-steps dt = Time ICFL '// & - 'Max VCFL Max Rc Min =' - else - write (3, '(A)') ' Time-steps dt Time '// & - ' ICFL Max ' - end if - else - if (viscous) then - write (3, '(A)') ' Time-steps Time ICFL '// & - 'Max VCFL Max Rc Min ' - else - write (3, '(A)') ' Time-steps Time '// & - ' ICFL Max ' - end if + write (3, '(13X,A9,13X,A10,13X,A10,13X,A10)', advance="no") & + trim('Time-step'), trim('dt'), trim('Time'), trim('ICFL Max') + + if (viscous) then + write(3, '(13X,A10,13X,A16)', advance="no") & + trim('VCFL Max'), trim('Rc Min') end if + write(3, *) ! new line + end subroutine s_open_run_time_information_file !> This opens a formatted data file where the root processor @@ -358,16 +350,17 @@ contains ! Outputting global stability criteria extrema at current time-step if (proc_rank == 0) then + write (3, '(13X,I9,13X,F10.6,13X,F10.6,13X,F10.6)', advance="no") & + t_step, dt, mytime, icfl_max_glb + if (viscous) then - write (3, '(6X,I8,F10.6,6X,6X,F10.6,6X,F9.6,6X,F9.6,6X,F10.6)') & - t_step, dt, t_step*dt, icfl_max_glb, & + write (3, '(13X,F10.6,13X,ES16.6)', advance="no") & vcfl_max_glb, & Rc_min_glb - else - write (3, '(13X,I8,14X,F10.6,14X,F10.6,13X,F9.6)') & - t_step, dt, t_step*dt, icfl_max_glb end if + write(3, *) ! new line + if (.not. f_approx_equal(icfl_max_glb, icfl_max_glb)) then call s_mpi_abort('ICFL is NaN. Exiting.') elseif (icfl_max_glb > 1._wp) then diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index 8378d3044d..c72fd9b64c 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -456,6 +456,24 @@ def analytic(self): 'surface_tension': ParamType.LOG, 'output_partial_domain': ParamType.LOG, 'bubbles_lagrange': ParamType.LOG, + 'lag_header': ParamType.LOG, + 'lag_txt_wrt': ParamType.LOG, + 'lag_db_wrt': ParamType.LOG, + 'lag_id_wrt': ParamType.LOG, + 'lag_pos_wrt': ParamType.LOG, + 'lag_pos_prev_wrt': ParamType.LOG, + 'lag_vel_wrt': ParamType.LOG, + 'lag_rad_wrt': ParamType.LOG, + 'lag_rvel_wrt': ParamType.LOG, + 'lag_r0_wrt': ParamType.LOG, + 'lag_rmax_wrt': ParamType.LOG, + 'lag_rmin_wrt': ParamType.LOG, + 'lag_dphidt_wrt': ParamType.LOG, + 'lag_pres_wrt': ParamType.LOG, + 'lag_mv_wrt': ParamType.LOG, + 'lag_mg_wrt': ParamType.LOG, + 'lag_betaT_wrt': ParamType.LOG, + 'lag_betaC_wrt': ParamType.LOG, }) for cmp in ["x", "y", "z"]: From e549426511b66f467174de25a7016d0ca1247cd5 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Thu, 9 Oct 2025 09:58:27 -0400 Subject: [PATCH 56/62] format, spelling, and lint --- src/post_process/m_data_output.fpp | 17 ++++++++--------- src/simulation/m_body_forces.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 2 +- src/simulation/m_data_output.fpp | 8 ++++---- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp index d9b9fec3eb..db9d4f3e0d 100644 --- a/src/post_process/m_data_output.fpp +++ b/src/post_process/m_data_output.fpp @@ -1257,8 +1257,7 @@ contains end subroutine s_write_lag_bubbles_results_to_text - - impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step) + impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step) integer, intent(in) :: t_step @@ -1360,9 +1359,9 @@ contains if (nBub > 0) then #:for VAR in ['bub_id', 'px', 'py', 'pz', 'ppx', 'ppy', 'ppz', 'vx', 'vy', 'vz', & - 'radius', 'rvel', 'rnot', 'rmax', 'rmin', 'dphidt', & - 'pressure', 'mv', 'mg', 'betaT', 'betaC'] - allocate(${VAR}$(nBub)) + 'radius', 'rvel', 'rnot', 'rmax', 'rmin', 'dphidt', & + 'pressure', 'mv', 'mg', 'betaT', 'betaC'] + allocate (${VAR}$ (nBub)) #:endfor allocate (MPI_IO_DATA_lg_bubbles(nBub, 1:lag_io_vars)) @@ -1387,10 +1386,10 @@ contains ! Extract data from MPI_IO_DATA_lg_bubbles array ! Adjust these indices based on your actual data layout #:for VAR, IDX in [('bub_id', 1), ('px', 2), ('py',3), ('pz',4), ('ppx',5), ('ppy',6), ('ppz',7), & - ('vx',8), ('vy',9), ('vz',10), ('radius',11), ('rvel',12), & - ('rnot',13), ('rmax',14), ('rmin',15), ('dphidt',16), & - ('pressure',17), ('mv',18), ('mg',19), ('betaT',20), ('betaC',21)] - ${VAR}$(:) = MPI_IO_DATA_lg_bubbles(:, ${IDX}$) + ('vx',8), ('vy',9), ('vz',10), ('radius',11), ('rvel',12), & + ('rnot',13), ('rmax',14), ('rmin',15), ('dphidt',16), & + ('pressure',17), ('mv',18), ('mg',19), ('betaT',20), ('betaC',21)] + ${VAR}$ (:) = MPI_IO_DATA_lg_bubbles(:, ${IDX}$) #:endfor ! Next, the root processor proceeds to record all of the spatial diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 9c094c8998..3b12755aa7 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -59,7 +59,7 @@ contains #:for DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (bf_${XYZ}$) then - accel_bf(${DIR}$) = g_${XYZ}$ + k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$) + accel_bf(${DIR}$) = g_${XYZ}$+k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$) end if #:endfor diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index cebffbeb96..7cda250df3 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -1402,7 +1402,7 @@ contains end subroutine s_gradient_dir - !> Subroutine that writes on each time step the changes of the lagrangian bubbles. + !> Subroutine that writes on each time step the changes of the lagrangian bubbles. !! @param q_time Current time impure subroutine s_write_lag_particles(qtime) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 90e750babe..0bc804b183 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -36,7 +36,7 @@ module m_data_output implicit none - private; + private; public :: s_initialize_data_output_module, & s_open_run_time_information_file, & s_open_com_files, & @@ -161,11 +161,11 @@ contains trim('Time-step'), trim('dt'), trim('Time'), trim('ICFL Max') if (viscous) then - write(3, '(13X,A10,13X,A16)', advance="no") & + write (3, '(13X,A10,13X,A16)', advance="no") & trim('VCFL Max'), trim('Rc Min') end if - write(3, *) ! new line + write (3, *) ! new line end subroutine s_open_run_time_information_file @@ -359,7 +359,7 @@ contains Rc_min_glb end if - write(3, *) ! new line + write (3, *) ! new line if (.not. f_approx_equal(icfl_max_glb, icfl_max_glb)) then call s_mpi_abort('ICFL is NaN. Exiting.') From bf2138ba0807ee5dddc723d3ac8bea83e031fcfb Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Thu, 9 Oct 2025 10:07:05 -0400 Subject: [PATCH 57/62] typos from github runner --- .typos.toml | 1 + docs/documentation/gpuParallelization.md | 2 +- examples/1D_shuosher_analytical/case.py | 2 +- examples/1D_shuosher_old/case.py | 2 +- examples/1D_shuosher_teno5/case.py | 2 +- examples/1D_shuosher_teno7/case.py | 2 +- examples/1D_shuosher_wenojs5/case.py | 2 +- examples/1D_shuosher_wenom5/case.py | 2 +- examples/1D_shuosher_wenoz5/case.py | 2 +- src/common/m_boundary_common.fpp | 2 +- src/common/m_phase_change.fpp | 4 ++-- src/simulation/m_body_forces.fpp | 2 +- 12 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.typos.toml b/.typos.toml index 492855221f..5a9775699a 100644 --- a/.typos.toml +++ b/.typos.toml @@ -19,6 +19,7 @@ strang = "strang" Strang = "Strang" TKE = "TKE" HSA = "HSA" +INFP = "INFP" [files] extend-exclude = ["docs/documentation/references*", "tests/", "toolchain/cce_simulation_workgroup_256.sh"] diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index c40d3c57d9..8c64996599 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -31,7 +31,7 @@ Note: Ordering is not guaranteed or stable, so use key-value pairing when using - Data on the GPU has a reference counter - When data is referred to being allocated, it means that GPU memory is allocated if it is not already present in GPU memory. If a variable is already present, the reference counter is just incremented. -- When data is referred to being dellocated, it means that the reference counter is decremented. If the reference counter is zero, then the data is actually deallocated from GPU memory +- When data is referred to being deallocated, it means that the reference counter is decremented. If the reference counter is zero, then the data is actually deallocated from GPU memory - When data is referred to being attached, it means that the device pointer attaches to target if it not already attached. If pointer is already attached, then the attachment counter is just incremented - When data is referred to being detached, it means that the attachment counter is decremented. If attachment counter is zero, then actually detached diff --git a/examples/1D_shuosher_analytical/case.py b/examples/1D_shuosher_analytical/case.py index 30e50acff5..8126714ff4 100644 --- a/examples/1D_shuosher_analytical/case.py +++ b/examples/1D_shuosher_analytical/case.py @@ -57,7 +57,7 @@ "patch_icpp(1)%pres": 10.3333, "patch_icpp(1)%alpha_rho(1)": 3.957143, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of -4 < x < 5 + # One analytic patch to take care of -4 < x < 5 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 0.5, diff --git a/examples/1D_shuosher_old/case.py b/examples/1D_shuosher_old/case.py index 24dd240ddf..7ad71e733a 100644 --- a/examples/1D_shuosher_old/case.py +++ b/examples/1D_shuosher_old/case.py @@ -57,7 +57,7 @@ "patch_icpp(1)%pres": 10.3333, "patch_icpp(1)%alpha_rho(1)": 3.957143, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of -4 < x < 5 + # One analytic patch to take care of -4 < x < 5 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 0.5, diff --git a/examples/1D_shuosher_teno5/case.py b/examples/1D_shuosher_teno5/case.py index 73189b3cec..2477ba6a87 100644 --- a/examples/1D_shuosher_teno5/case.py +++ b/examples/1D_shuosher_teno5/case.py @@ -59,7 +59,7 @@ "patch_icpp(1)%pres": 10.333, "patch_icpp(1)%alpha_rho(1)": 3.857, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of 1 < x < 10 + # One analytic patch to take care of 1 < x < 10 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 5.5, diff --git a/examples/1D_shuosher_teno7/case.py b/examples/1D_shuosher_teno7/case.py index 8a21aa1309..9bac5d82a0 100644 --- a/examples/1D_shuosher_teno7/case.py +++ b/examples/1D_shuosher_teno7/case.py @@ -59,7 +59,7 @@ "patch_icpp(1)%pres": 10.333, "patch_icpp(1)%alpha_rho(1)": 3.857, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of 1 < x < 10 + # One analytic patch to take care of 1 < x < 10 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 5.5, diff --git a/examples/1D_shuosher_wenojs5/case.py b/examples/1D_shuosher_wenojs5/case.py index b99e5ef7e2..52763938fd 100644 --- a/examples/1D_shuosher_wenojs5/case.py +++ b/examples/1D_shuosher_wenojs5/case.py @@ -58,7 +58,7 @@ "patch_icpp(1)%pres": 10.333, "patch_icpp(1)%alpha_rho(1)": 3.857, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of 1 < x < 10 + # One analytic patch to take care of 1 < x < 10 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 5.5, diff --git a/examples/1D_shuosher_wenom5/case.py b/examples/1D_shuosher_wenom5/case.py index 8c96fea5b6..c3dde3a589 100644 --- a/examples/1D_shuosher_wenom5/case.py +++ b/examples/1D_shuosher_wenom5/case.py @@ -58,7 +58,7 @@ "patch_icpp(1)%pres": 10.333, "patch_icpp(1)%alpha_rho(1)": 3.857, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of 1 < x < 10 + # One analytic patch to take care of 1 < x < 10 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 5.5, diff --git a/examples/1D_shuosher_wenoz5/case.py b/examples/1D_shuosher_wenoz5/case.py index a7e8e3e3cd..f959f363ae 100644 --- a/examples/1D_shuosher_wenoz5/case.py +++ b/examples/1D_shuosher_wenoz5/case.py @@ -58,7 +58,7 @@ "patch_icpp(1)%pres": 10.333, "patch_icpp(1)%alpha_rho(1)": 3.857, "patch_icpp(1)%alpha(1)": 1.0, - # One anlytic patch to take care of 1 < x < 10 + # One analytic patch to take care of 1 < x < 10 # Patch 2 Analytic "patch_icpp(2)%geometry": 1, "patch_icpp(2)%x_centroid": 5.5, diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 4958bad881..f89278a86d 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -3,7 +3,7 @@ !! @brief Contains module m_boundary_conditions_common !> @brief The purpose of the module is to apply noncharacteristic and processor -!! boundary condiitons +!! boundary conditions #:include 'macros.fpp' diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index e04242a787..27cadf555a 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -165,7 +165,7 @@ contains ! depleting the mass of liquid q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM - ! tranferring the total mass to vapor + ! transferring the total mass to vapor q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM ! calling pT-equilibrium for overheated vapor, which is MFL = 0 @@ -175,7 +175,7 @@ contains call s_TSat(pSOV, TSatOV, TSOV) ! subcooled liquid case - ! tranferring the total mass to liquid + ! transferring the total mass to liquid q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM ! depleting the mass of vapor diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 3b12755aa7..3b88efe060 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -26,7 +26,7 @@ module m_body_forces contains - !> This subroutine inializes the module global array of mixture + !> This subroutine initializes the module global array of mixture !! densities in each grid cell impure subroutine s_initialize_body_forces_module From 1debcdc8df944ac0f3abf4306d1fb976eb9c5092 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Thu, 9 Oct 2025 13:23:31 -0400 Subject: [PATCH 58/62] formatting and bug fixes --- src/post_process/m_data_output.fpp | 17 ++++++++--------- src/post_process/m_start_up.f90 | 2 +- src/simulation/m_body_forces.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 4 +++- src/simulation/m_data_output.fpp | 8 ++++---- src/simulation/m_start_up.fpp | 6 ++++-- 6 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp index d9b9fec3eb..db9d4f3e0d 100644 --- a/src/post_process/m_data_output.fpp +++ b/src/post_process/m_data_output.fpp @@ -1257,8 +1257,7 @@ contains end subroutine s_write_lag_bubbles_results_to_text - - impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step) + impure subroutine s_write_lag_bubbles_to_formatted_database_file(t_step) integer, intent(in) :: t_step @@ -1360,9 +1359,9 @@ contains if (nBub > 0) then #:for VAR in ['bub_id', 'px', 'py', 'pz', 'ppx', 'ppy', 'ppz', 'vx', 'vy', 'vz', & - 'radius', 'rvel', 'rnot', 'rmax', 'rmin', 'dphidt', & - 'pressure', 'mv', 'mg', 'betaT', 'betaC'] - allocate(${VAR}$(nBub)) + 'radius', 'rvel', 'rnot', 'rmax', 'rmin', 'dphidt', & + 'pressure', 'mv', 'mg', 'betaT', 'betaC'] + allocate (${VAR}$ (nBub)) #:endfor allocate (MPI_IO_DATA_lg_bubbles(nBub, 1:lag_io_vars)) @@ -1387,10 +1386,10 @@ contains ! Extract data from MPI_IO_DATA_lg_bubbles array ! Adjust these indices based on your actual data layout #:for VAR, IDX in [('bub_id', 1), ('px', 2), ('py',3), ('pz',4), ('ppx',5), ('ppy',6), ('ppz',7), & - ('vx',8), ('vy',9), ('vz',10), ('radius',11), ('rvel',12), & - ('rnot',13), ('rmax',14), ('rmin',15), ('dphidt',16), & - ('pressure',17), ('mv',18), ('mg',19), ('betaT',20), ('betaC',21)] - ${VAR}$(:) = MPI_IO_DATA_lg_bubbles(:, ${IDX}$) + ('vx',8), ('vy',9), ('vz',10), ('radius',11), ('rvel',12), & + ('rnot',13), ('rmax',14), ('rmin',15), ('dphidt',16), & + ('pressure',17), ('mv',18), ('mg',19), ('betaT',20), ('betaC',21)] + ${VAR}$ (:) = MPI_IO_DATA_lg_bubbles(:, ${IDX}$) #:endfor ! Next, the root processor proceeds to record all of the spatial diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index c9b681f8f9..7ef6de571d 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -184,7 +184,7 @@ impure subroutine s_perform_time_step(t_step) int(ceiling(100._wp*(real(t_step - n_start)/(n_save)))), & t_step, n_save, wall_time_avg, wall_time else - print '(" [", I3, "%] Saving ", I8, " of ", I0, " @ t_step = ", I0, " Time Avg = ", ES16.6, " Time/step = ", ES12.6, "")', & + print '(" [", I3, "%] Saving ", I8, " of ", I0, " @ t_step = ", I8, " Time Avg = ", ES16.6, " Time/step = ", ES12.6, "")', & int(ceiling(100._wp*(real(t_step - t_step_start)/(t_step_stop - t_step_start + 1)))), & (t_step - t_step_start)/t_step_save + 1, & (t_step_stop - t_step_start)/t_step_save + 1, & diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 9c094c8998..3b12755aa7 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -59,7 +59,7 @@ contains #:for DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (bf_${XYZ}$) then - accel_bf(${DIR}$) = g_${XYZ}$ + k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$) + accel_bf(${DIR}$) = g_${XYZ}$+k_${XYZ}$*sin(w_${XYZ}$*t - p_${XYZ}$) end if #:endfor diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index cebffbeb96..fa01e93475 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -529,6 +529,8 @@ contains gas_mg(i) = MPI_IO_DATA_lag_bubbles(i, 19) gas_betaT(i) = MPI_IO_DATA_lag_bubbles(i, 20) gas_betaC(i) = MPI_IO_DATA_lag_bubbles(i, 21) + cell = -buff_size + call s_locate_cell(mtn_pos(i, 1:3, 1), cell, mtn_s(i, 1:3, 1)) end do deallocate (MPI_IO_DATA_lag_bubbles) @@ -1402,7 +1404,7 @@ contains end subroutine s_gradient_dir - !> Subroutine that writes on each time step the changes of the lagrangian bubbles. + !> Subroutine that writes on each time step the changes of the lagrangian bubbles. !! @param q_time Current time impure subroutine s_write_lag_particles(qtime) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 90e750babe..0bc804b183 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -36,7 +36,7 @@ module m_data_output implicit none - private; + private; public :: s_initialize_data_output_module, & s_open_run_time_information_file, & s_open_com_files, & @@ -161,11 +161,11 @@ contains trim('Time-step'), trim('dt'), trim('Time'), trim('ICFL Max') if (viscous) then - write(3, '(13X,A10,13X,A16)', advance="no") & + write (3, '(13X,A10,13X,A16)', advance="no") & trim('VCFL Max'), trim('Rc Min') end if - write(3, *) ! new line + write (3, *) ! new line end subroutine s_open_run_time_information_file @@ -359,7 +359,7 @@ contains Rc_min_glb end if - write(3, *) ! new line + write (3, *) ! new line if (.not. f_approx_equal(icfl_max_glb, icfl_max_glb)) then call s_mpi_abort('ICFL is NaN. Exiting.') diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index c69364aa2f..21be331e8e 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1261,7 +1261,9 @@ contains end if if (bubbles_lagrange) then - $:GPU_UPDATE(host='[intfc_rad]') + $:GPU_UPDATE(host='[lag_id, mtn_pos, mtn_posPrev, mtn_vel, intfc_rad, & + & intfc_vel, bub_R0, Rmax_stats, Rmin_stats, bub_dphidt, gas_p, & + & gas_mv, gas_mg, gas_betaT, gas_betaC]') do i = 1, nBubs if (ieee_is_nan(intfc_rad(i, 1)) .or. intfc_rad(i, 1) <= 0._wp) then call s_mpi_abort("Bubble radius is negative or NaN, please reduce dt.") @@ -1270,7 +1272,7 @@ contains $:GPU_UPDATE(host='[q_beta%vf(1)%sf]') call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, bc_type, q_beta%vf(1)) - $:GPU_UPDATE(host='[Rmax_stats,Rmin_stats,gas_p,gas_mv,intfc_vel]') + call s_write_restart_lag_bubbles(save_count) !parallel if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats() else From eea226fb3990de338bc36d59a419b320ae5aac8f Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Thu, 9 Oct 2025 13:40:12 -0400 Subject: [PATCH 59/62] autoreview suggestions --- src/post_process/m_data_output.fpp | 2 +- src/simulation/m_bubbles_EL.fpp | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp index db9d4f3e0d..5064490d34 100644 --- a/src/post_process/m_data_output.fpp +++ b/src/post_process/m_data_output.fpp @@ -1230,7 +1230,7 @@ contains if (lag_id_wrt) write (29, '(I6, A)', advance='no') id, ', ' if (lag_pos_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(1), ', ', inputvals(2), ', ', inputvals(3), ', ' if (lag_pos_prev_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(4), ', ', inputvals(5), ', ', inputvals(6), ', ' - if (lag_vel_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(8), ', ' + if (lag_vel_wrt) write (29, '(3(E15.7, A))', advance='no') inputvals(7), ', ', inputvals(8), ', ', inputvals(9), ', ' if (lag_rad_wrt) write (29, '(E15.7, A)', advance='no') inputvals(10), ', ' if (lag_rvel_wrt) write (29, '(E15.7, A)', advance='no') inputvals(11), ', ' if (lag_r0_wrt) write (29, '(E15.7, A)', advance='no') inputvals(12), ', ' diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index fa01e93475..7d0dd32bd8 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -339,10 +339,6 @@ contains end if end if - ! If particle is in the ghost cells, find the closest non-ghost cell - !cell(1) = min(max(cell(1), 0), m) - !cell(2) = min(max(cell(2), 0), n) - !if (p > 0) cell(3) = min(max(cell(3), 0), p) call s_convert_to_mixture_variables(q_cons_vf, cell(1), cell(2), cell(3), & rhol, gamma, pi_inf, qv, Re) dynP = 0._wp From 135401168b824db564bbf4ac29686c5272a0ffe9 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Fri, 10 Oct 2025 08:17:32 -0400 Subject: [PATCH 60/62] support 1D output in paraview with 1D meshes --- src/common/m_mpi_common.fpp | 4 +- src/post_process/m_data_output.fpp | 339 ++++++++++------------- src/post_process/m_global_parameters.fpp | 22 +- src/post_process/m_mpi_proxy.fpp | 15 +- 4 files changed, 167 insertions(+), 213 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 07463137f2..bc63e1bdfc 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -1570,14 +1570,14 @@ contains #ifdef MFC_POST_PROCESS ! Ghost zone at the beginning - if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then + if (proc_coords(1) > 0 .and. format == 1) then offset_x%beg = 2 else offset_x%beg = 0 end if ! Ghost zone at the end - if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then + if (proc_coords(1) < num_procs_x - 1 .and. format == 1) then offset_x%end = 2 else offset_x%end = 0 diff --git a/src/post_process/m_data_output.fpp b/src/post_process/m_data_output.fpp index 5064490d34..371ad8e862 100644 --- a/src/post_process/m_data_output.fpp +++ b/src/post_process/m_data_output.fpp @@ -156,7 +156,7 @@ contains ! the offsets and the one bookkeeping the number of cell-boundaries ! in each active coordinate direction. Note that all these variables ! are only needed by the Silo-HDF5 format for multidimensional data. - if (format == 1 .and. n > 0) then + if (format == 1) then allocate (data_extents(1:2, 0:num_procs - 1)) @@ -165,11 +165,16 @@ contains allocate (lo_offset(1:3)) allocate (hi_offset(1:3)) allocate (dims(1:3)) - else + elseif (n > 0) then allocate (spatial_extents(1:4, 0:num_procs - 1)) allocate (lo_offset(1:2)) allocate (hi_offset(1:2)) allocate (dims(1:2)) + else + allocate (spatial_extents(1:2, 0:num_procs - 1)) + allocate (lo_offset(1:1)) + allocate (hi_offset(1:1)) + allocate (dims(1:1)) end if end if @@ -181,7 +186,7 @@ contains ! With the same, latter, requirements, the variables bookkeeping the ! number of cell-boundaries in each active coordinate direction are ! also set here. - if (format == 1 .and. n > 0) then + if (format == 1) then if (p > 0) then if (grid_geometry == 3) then lo_offset(:) = (/offset_y%beg, offset_z%beg, offset_x%beg/) @@ -200,12 +205,16 @@ contains n + offset_y%beg + offset_y%end + 2, & p + offset_z%beg + offset_z%end + 2/) end if - else + elseif (n > 0) then lo_offset(:) = (/offset_x%beg, offset_y%beg/) hi_offset(:) = (/offset_x%end, offset_y%end/) dims(:) = (/m + offset_x%beg + offset_x%end + 2, & n + offset_y%beg + offset_y%end + 2/) + else + lo_offset(:) = (/offset_x%beg/) + hi_offset(:) = (/offset_x%end/) + dims(:) = (/m + offset_x%beg + offset_x%end + 2/) end if end if @@ -659,7 +668,7 @@ contains ! Silo-HDF5 Database Format - if (format == 1 .and. n > 0) then + if (format == 1) then ! For multidimensional data sets, the spatial extents of all of ! the grid(s) handled by the local processor(s) are recorded so @@ -679,10 +688,13 @@ contains maxval(y_cb), maxval(z_cb)/) end if - else + elseif (n > 0) then spatial_extents(:, 0) = (/minval(x_cb), minval(y_cb), & maxval(x_cb), maxval(y_cb)/) + else + + spatial_extents(:, 0) = (/minval(x_cb), maxval(x_cb)/) end if ! Next, the root processor proceeds to record all of the spatial @@ -715,48 +727,45 @@ contains ! with its offsets that indicate the presence and size of ghost ! zone layer(s), are put in the formatted database slave file. - if (precision == 1) then - if (p > 0) then - z_cb_s(:) = real(z_cb(:), sp) + if (p > 0) then + err = DBMKOPTLIST(2, optlist) + err = DBADDIOPT(optlist, DBOPT_LO_OFFSET, lo_offset) + err = DBADDIOPT(optlist, DBOPT_HI_OFFSET, hi_offset) + if (grid_geometry == 3) then + err = DBPUTQM(dbfile, 'rectilinear_grid', 16, & + 'x', 1, 'y', 1, 'z', 1, & + y_cb, z_cb, x_cb, dims, 3, & + DB_DOUBLE, DB_COLLINEAR, & + optlist, ierr) + else + err = DBPUTQM(dbfile, 'rectilinear_grid', 16, & + 'x', 1, 'y', 1, 'z', 1, & + x_cb, y_cb, z_cb, dims, 3, & + DB_DOUBLE, DB_COLLINEAR, & + optlist, ierr) end if - x_cb_s(:) = real(x_cb(:), sp) - y_cb_s(:) = real(y_cb(:), sp) + err = DBFREEOPTLIST(optlist) + elseif (n > 0) then + err = DBMKOPTLIST(2, optlist) + err = DBADDIOPT(optlist, DBOPT_LO_OFFSET, lo_offset) + err = DBADDIOPT(optlist, DBOPT_HI_OFFSET, hi_offset) + err = DBPUTQM(dbfile, 'rectilinear_grid', 16, & + 'x', 1, 'y', 1, 'z', 1, & + x_cb, y_cb, DB_F77NULL, dims, 2, & + DB_DOUBLE, DB_COLLINEAR, & + optlist, ierr) + err = DBFREEOPTLIST(optlist) + else + err = DBMKOPTLIST(2, optlist) + err = DBADDIOPT(optlist, DBOPT_LO_OFFSET, lo_offset) + err = DBADDIOPT(optlist, DBOPT_HI_OFFSET, hi_offset) + err = DBPUTQM(dbfile, 'rectilinear_grid', 16, & + 'x', 1, 'y', 1, 'z', 1, & + x_cb, DB_F77NULL, DB_F77NULL, dims, 1, & + DB_DOUBLE, DB_COLLINEAR, & + optlist, ierr) + err = DBFREEOPTLIST(optlist) end if - - #:for PRECISION, SFX, DBT in [(1,'_s','DB_FLOAT'),(2,'',"DB_DOUBLE")] - if (precision == ${PRECISION}$) then - if (p > 0) then - err = DBMKOPTLIST(2, optlist) - err = DBADDIOPT(optlist, DBOPT_LO_OFFSET, lo_offset) - err = DBADDIOPT(optlist, DBOPT_HI_OFFSET, hi_offset) - if (grid_geometry == 3) then - err = DBPUTQM(dbfile, 'rectilinear_grid', 16, & - 'x', 1, 'y', 1, 'z', 1, & - y_cb${SFX}$, z_cb${SFX}$, x_cb${SFX}$, dims, 3, & - ${DBT}$, DB_COLLINEAR, & - optlist, ierr) - else - err = DBPUTQM(dbfile, 'rectilinear_grid', 16, & - 'x', 1, 'y', 1, 'z', 1, & - x_cb${SFX}$, y_cb${SFX}$, z_cb${SFX}$, dims, 3, & - ${DBT}$, DB_COLLINEAR, & - optlist, ierr) - end if - err = DBFREEOPTLIST(optlist) - else - err = DBMKOPTLIST(2, optlist) - err = DBADDIOPT(optlist, DBOPT_LO_OFFSET, lo_offset) - err = DBADDIOPT(optlist, DBOPT_HI_OFFSET, hi_offset) - err = DBPUTQM(dbfile, 'rectilinear_grid', 16, & - 'x', 1, 'y', 1, 'z', 1, & - x_cb${SFX}$, y_cb${SFX}$, DB_F77NULL, dims, 2, & - ${DBT}$, DB_COLLINEAR, & - optlist, ierr) - err = DBFREEOPTLIST(optlist) - end if - end if - #:endfor - ! END: Silo-HDF5 Database Format ! Binary Database Format @@ -873,144 +882,46 @@ contains if (format == 1) then - ! In 1D, a curve object, featuring the local processor grid and - ! flow variable data, is written to the formatted database slave - ! file. The root process, on the other hand, will also take care - ! of gathering the entire grid and associated flow variable data - ! and write it to the formatted database master file. - if (n == 0) then - - if (precision == 1 .and. wp == dp) then - x_cc_s(:) = real(x_cc(:), sp) - q_sf_s(:, :, :) = real(q_sf(:, :, :), sp) - elseif (precision == 1 .and. wp == sp) then - x_cc_s(:) = x_cc(:) - q_sf_s(:, :, :) = q_sf(:, :, :) - end if - - ! Writing the curve object associated with the local process - ! to the formatted database slave file - #:for PRECISION, SFX, DBT in [(1,'_s','DB_FLOAT'),(2,'',"DB_DOUBLE")] - if (precision == ${PRECISION}$) then - err = DBPUTCURVE(dbfile, trim(varname), len_trim(varname), & - x_cc${SFX}$ (0:m), q_sf${SFX}$, ${DBT}$, m + 1, & - DB_F77NULL, ierr) - end if - #:endfor - - ! Assembling the local grid and flow variable data for the - ! entire computational domain on to the root process - - if (num_procs > 1) then - call s_mpi_defragment_1d_grid_variable() - call s_mpi_defragment_1d_flow_variable(q_sf, q_root_sf) - - if (precision == 1) then - x_root_cc_s(:) = real(x_root_cc(:), sp) - q_root_sf_s(:, :, :) = real(q_root_sf(:, :, :), sp) - end if - else - if (precision == 1) then - x_root_cc_s(:) = real(x_cc(:), sp) - q_root_sf_s(:, :, :) = real(q_sf(:, :, :), sp) - else - x_root_cc(:) = x_cc(:) - q_root_sf(:, :, :) = q_sf(:, :, :) - end if - end if - - ! Writing the curve object associated with the root process - ! to the formatted database master file - if (proc_rank == 0) then - #:for PRECISION, SFX, DBT in [(1,'_s','DB_FLOAT'),(2,'',"DB_DOUBLE")] - if (precision == ${PRECISION}$) then - err = DBPUTCURVE(dbroot, trim(varname), & - len_trim(varname), & - x_root_cc${SFX}$, q_root_sf${SFX}$, & - ${DBT}$, m_root + 1, & - DB_F77NULL, ierr) - end if - #:endfor - end if - - return - - ! In multidimensions, the local process(es) take care of writing - ! the flow variable data they are in charge of to the formatted - ! database slave file. The root processor, additionally, is also - ! responsible in gathering the flow variable extents of each of - ! the local processor(s) and writing them to formatted database - ! master file. + ! Determining the extents of the flow variable on each local + ! process and gathering all this information on root process + if (num_procs > 1) then + call s_mpi_gather_data_extents(q_sf, data_extents) else + data_extents(:, 0) = (/minval(q_sf), maxval(q_sf)/) + end if - ! Determining the extents of the flow variable on each local - ! process and gathering all this information on root process - if (num_procs > 1) then - call s_mpi_gather_data_extents(q_sf, data_extents) - else - data_extents(:, 0) = (/minval(q_sf), maxval(q_sf)/) - end if - - ! Next, the root process proceeds to write the gathered flow - ! variable data extents to formatted database master file. - if (proc_rank == 0) then + ! Next, the root process proceeds to write the gathered flow + ! variable data extents to formatted database master file. + if (proc_rank == 0) then - do i = 1, num_procs - write (varnames(i), '(A,I0,A,I0,A)') '../p', i - 1, & - '/', t_step, '.silo:'//trim(varname) - end do + do i = 1, num_procs + write (varnames(i), '(A,I0,A,I0,A)') '../p', i - 1, & + '/', t_step, '.silo:'//trim(varname) + end do - vartypes = DB_QUADVAR + vartypes = DB_QUADVAR - err = DBSET2DSTRLEN(len(varnames(1))) - err = DBMKOPTLIST(2, optlist) - err = DBADDIOPT(optlist, DBOPT_EXTENTS_SIZE, 2) - err = DBADDDOPT(optlist, DBOPT_EXTENTS, data_extents) - err = DBPUTMVAR(dbroot, trim(varname), & - len_trim(varname), num_procs, & - varnames, len_trim(varnames), & - vartypes, optlist, ierr) - err = DBFREEOPTLIST(optlist) + err = DBSET2DSTRLEN(len(varnames(1))) + err = DBMKOPTLIST(2, optlist) + err = DBADDIOPT(optlist, DBOPT_EXTENTS_SIZE, 2) + err = DBADDDOPT(optlist, DBOPT_EXTENTS, data_extents) + err = DBPUTMVAR(dbroot, trim(varname), & + len_trim(varname), num_procs, & + varnames, len_trim(varnames), & + vartypes, optlist, ierr) + err = DBFREEOPTLIST(optlist) - end if + end if - ! Finally, each of the local processor(s) proceeds to write - ! the flow variable data that it is responsible for to the - ! formatted database slave file. - if (wp == dp) then - if (precision == 1) then - do i = -offset_x%beg, m + offset_x%end - do j = -offset_y%beg, n + offset_y%end - do k = -offset_z%beg, p + offset_z%end - q_sf_s(i, j, k) = real(q_sf(i, j, k), sp) - end do - end do - end do - if (grid_geometry == 3) then - do i = -offset_x%beg, m + offset_x%end - do j = -offset_y%beg, n + offset_y%end - do k = -offset_z%beg, p + offset_z%end - cyl_q_sf_s(j, k, i) = q_sf_s(i, j, k) - end do - end do - end do - end if - else - if (grid_geometry == 3) then - do i = -offset_x%beg, m + offset_x%end - do j = -offset_y%beg, n + offset_y%end - do k = -offset_z%beg, p + offset_z%end - cyl_q_sf(j, k, i) = q_sf(i, j, k) - end do - end do - end do - end if - end if - elseif (wp == sp) then + ! Finally, each of the local processor(s) proceeds to write + ! the flow variable data that it is responsible for to the + ! formatted database slave file. + if (wp == dp) then + if (precision == 1) then do i = -offset_x%beg, m + offset_x%end do j = -offset_y%beg, n + offset_y%end do k = -offset_z%beg, p + offset_z%end - q_sf_s(i, j, k) = q_sf(i, j, k) + q_sf_s(i, j, k) = real(q_sf(i, j, k), sp) end do end do end do @@ -1023,38 +934,72 @@ contains end do end do end if + else + if (grid_geometry == 3) then + do i = -offset_x%beg, m + offset_x%end + do j = -offset_y%beg, n + offset_y%end + do k = -offset_z%beg, p + offset_z%end + cyl_q_sf(j, k, i) = q_sf(i, j, k) + end do + end do + end do + end if end if + elseif (wp == sp) then + do i = -offset_x%beg, m + offset_x%end + do j = -offset_y%beg, n + offset_y%end + do k = -offset_z%beg, p + offset_z%end + q_sf_s(i, j, k) = q_sf(i, j, k) + end do + end do + end do + if (grid_geometry == 3) then + do i = -offset_x%beg, m + offset_x%end + do j = -offset_y%beg, n + offset_y%end + do k = -offset_z%beg, p + offset_z%end + cyl_q_sf_s(j, k, i) = q_sf_s(i, j, k) + end do + end do + end do + end if + end if - #:for PRECISION, SFX, DBT in [(1,'_s','DB_FLOAT'),(2,'',"DB_DOUBLE")] - if (precision == ${PRECISION}$) then - if (p > 0) then - if (grid_geometry == 3) then - err = DBPUTQV1(dbfile, trim(varname), & - len_trim(varname), & - 'rectilinear_grid', 16, & - cyl_q_sf${SFX}$, dims - 1, 3, DB_F77NULL, & - 0, ${DBT}$, DB_ZONECENT, & - DB_F77NULL, ierr) - else - err = DBPUTQV1(dbfile, trim(varname), & - len_trim(varname), & - 'rectilinear_grid', 16, & - q_sf${SFX}$, dims - 1, 3, DB_F77NULL, & - 0, ${DBT}$, DB_ZONECENT, & - DB_F77NULL, ierr) - end if + #:for PRECISION, SFX, DBT in [(1,'_s','DB_FLOAT'),(2,'',"DB_DOUBLE")] + if (precision == ${PRECISION}$) then + if (p > 0) then + if (grid_geometry == 3) then + err = DBPUTQV1(dbfile, trim(varname), & + len_trim(varname), & + 'rectilinear_grid', 16, & + cyl_q_sf${SFX}$, dims - 1, 3, DB_F77NULL, & + 0, ${DBT}$, DB_ZONECENT, & + DB_F77NULL, ierr) else err = DBPUTQV1(dbfile, trim(varname), & len_trim(varname), & 'rectilinear_grid', 16, & - q_sf${SFX}$, dims - 1, 2, DB_F77NULL, & + q_sf${SFX}$, dims - 1, 3, DB_F77NULL, & 0, ${DBT}$, DB_ZONECENT, & DB_F77NULL, ierr) end if - end if - #:endfor + elseif (n > 0) then + err = DBPUTQV1(dbfile, trim(varname), & + len_trim(varname), & + 'rectilinear_grid', 16, & + q_sf${SFX}$, dims - 1, 2, DB_F77NULL, & + 0, ${DBT}$, DB_ZONECENT, & + DB_F77NULL, ierr) + else + err = DBPUTQV1(dbfile, trim(varname), & + len_trim(varname), & + 'rectilinear_grid', 16, & + q_sf${SFX}$, dims - 1, 1, DB_F77NULL, & + 0, ${DBT}$, DB_ZONECENT, & + DB_F77NULL, ierr) - end if + end if + end if + #:endfor ! END: Silo-HDF5 Database Format @@ -1871,7 +1816,7 @@ contains ! the offsets and the one bookkeeping the number of cell-boundaries ! in each active coordinate direction. Note that all these variables ! were only needed by Silo-HDF5 format for multidimensional data. - if (format == 1 .and. n > 0) then + if (format == 1) then deallocate (spatial_extents) deallocate (data_extents) deallocate (lo_offset) diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 2400b13ad1..07046fc2bf 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -60,7 +60,6 @@ module m_global_parameters !> @name Cell-boundary locations in the x-, y- and z-coordinate directions !> @{ real(wp), allocatable, dimension(:) :: x_cb, x_root_cb, y_cb, z_cb - real(wp), allocatable, dimension(:) :: x_cb_s, y_cb_s, z_cb_s !> @} !> @name Cell-center locations in the x-, y- and z-coordinate directions @@ -850,7 +849,7 @@ contains ! in the Silo-HDF5 format. If this is the case, one must also verify ! whether the raw simulation data is 2D or 3D. In the 2D case, size ! of the z-coordinate direction ghost zone layer must be zeroed out. - if (num_procs == 1 .or. format /= 1 .or. n == 0) then + if (num_procs == 1 .or. format /= 1) then offset_x%beg = 0 offset_x%end = 0 @@ -859,6 +858,13 @@ contains offset_z%beg = 0 offset_z%end = 0 + elseif (n == 0) then + + offset_y%beg = 0 + offset_y%end = 0 + offset_z%beg = 0 + offset_z%end = 0 + elseif (p == 0) then offset_z%beg = 0 @@ -893,17 +899,7 @@ contains idwbuff(3)%end = idwint(3)%end - idwbuff(3)%beg ! Allocating single precision grid variables if needed - if (precision == 1) then - allocate (x_cb_s(-1 - offset_x%beg:m + offset_x%end)) - if (n > 0) then - allocate (y_cb_s(-1 - offset_y%beg:n + offset_y%end)) - if (p > 0) then - allocate (z_cb_s(-1 - offset_z%beg:p + offset_z%end)) - end if - end if - else - allocate (x_cc_s(-buff_size:m + buff_size)) - end if + allocate (x_cc_s(-buff_size:m + buff_size)) ! Allocating the grid variables in the x-coordinate direction allocate (x_cb(-1 - offset_x%beg:m + offset_x%end)) diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 7adb31a433..795936250e 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -235,7 +235,7 @@ contains ierr) end if ! Simulation is 2D - else + elseif (n > 0) then ! Minimum spatial extent in the x-direction call MPI_GATHERV(minval(x_cb), 1, mpi_p, & @@ -260,7 +260,20 @@ contains spatial_extents(4, 0), recvcounts, 4*displs, & mpi_p, 0, MPI_COMM_WORLD, & ierr) + ! Simulation is 1D + else + ! Minimum spatial extent in the x-direction + call MPI_GATHERV(minval(x_cb), 1, mpi_p, & + spatial_extents(1, 0), recvcounts, 4*displs, & + mpi_p, 0, MPI_COMM_WORLD, & + ierr) + + ! Maximum spatial extent in the x-direction + call MPI_GATHERV(maxval(x_cb), 1, mpi_p, & + spatial_extents(2, 0), recvcounts, 4*displs, & + mpi_p, 0, MPI_COMM_WORLD, & + ierr) end if #endif From 8311260032601cddf68fe05f3ff0ec90913e0f97 Mon Sep 17 00:00:00 2001 From: Ben Wilfong <48168887+wilfonba@users.noreply.github.com> Date: Fri, 10 Oct 2025 08:20:51 -0400 Subject: [PATCH 61/62] CI fix for MacOS runners --- .github/workflows/test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3031858eb1..503f691ec6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,6 +48,11 @@ jobs: - name: Clone uses: actions/checkout@v4 + - name: Set up Python 3.13 + uses: actions/setup-python@v5 + with: + python-version: '3.13' + - name: Setup MacOS if: matrix.os == 'macos' run: | From ca6e5929911d26e603ea06b8936a41866ee4d9d5 Mon Sep 17 00:00:00 2001 From: Ben Wilfong Date: Mon, 13 Oct 2025 15:35:46 -0400 Subject: [PATCH 62/62] correct merge errors --- src/common/m_mpi_common.fpp | 17 +---------------- src/post_process/m_mpi_proxy.fpp | 9 --------- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 6e403a306e..fd9310abc3 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -404,21 +404,17 @@ contains impure subroutine s_mpi_reduce_stability_criteria_extrema(icfl_max_loc, & vcfl_max_loc, & Rc_min_loc, & - ccfl_max_loc, & icfl_max_glb, & vcfl_max_glb, & - Rc_min_glb, & - ccfl_max_glb) + Rc_min_glb) real(wp), intent(in) :: icfl_max_loc real(wp), intent(in) :: vcfl_max_loc real(wp), intent(in) :: Rc_min_loc - real(wp), intent(in) :: ccfl_max_loc real(wp), intent(out) :: icfl_max_glb real(wp), intent(out) :: vcfl_max_glb real(wp), intent(out) :: Rc_min_glb - real(wp), intent(out) :: ccfl_max_glb #ifdef MFC_SIMULATION #ifdef MFC_MPI @@ -438,24 +434,13 @@ contains mpi_p, MPI_MIN, 0, & MPI_COMM_WORLD, ierr) end if - - if (surface_tension) then - call MPI_REDUCE(ccfl_max_loc, ccfl_max_glb, 1, & - mpi_p, MPI_MAX, 0, & - MPI_COMM_WORLD, ierr) - end if - #else - icfl_max_glb = icfl_max_loc if (viscous) then vcfl_max_glb = vcfl_max_loc Rc_min_glb = Rc_min_loc end if - - if (surface_tension) ccfl_max_glb = ccfl_max_loc - #endif #endif diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index aa127af969..795936250e 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -104,17 +104,8 @@ contains & 'file_per_process', 'relax', 'cf_wrt', 'igr', 'liutex_wrt', & & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', & & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', & -<<<<<<< HEAD - & 'output_partial_domain', 'relativity', 'cont_damage', & - & 'lag_header', 'lag_txt_wrt', 'lag_db_wrt', 'lag_id_wrt', & - & 'lag_pos_wrt', 'lag_pos_prev_wrt', 'lag_vel_wrt', 'lag_rad_wrt', & - & 'lag_rvel_wrt', 'lag_r0_wrt', 'lag_rmax_wrt', 'lag_rmin_wrt', & - & 'lag_dphidt_wrt', 'lag_pres_wrt', 'lag_mv_wrt', 'lag_mg_wrt', & - & 'lag_betaT_wrt', 'lag_betaC_wrt', 'bc_io', 'down_sample' ] -======= & 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io', & & 'down_sample' ] ->>>>>>> ImprovedELBubblesIO call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor