@@ -1227,157 +1227,244 @@ namespace amrex
12271227 return hv;
12281228 }
12291229
1230- Real volumeWeightedSum (Vector<MultiFab const *> const & mf, int icomp,
1231- Vector<Geometry> const & geom,
1232- Vector<IntVect> const & ratio,
1233- bool local)
1234- {
1235- ReduceOps<ReduceOpSum> reduce_op;
1236- ReduceData<Real> reduce_data (reduce_op);
1230+ Real volumeWeightedSum (Vector<MultiFab const *> const & mf, int icomp,
1231+ Vector<Geometry> const & geom,
1232+ Vector<IntVect> const & ratio,
1233+ bool local)
1234+ {
1235+ ReduceOps<ReduceOpSum> reduce_op;
1236+ ReduceData<Real> reduce_data (reduce_op);
12371237
12381238#ifdef AMREX_USE_EB
1239- bool has_eb = !(mf[0 ]->isAllRegular ());
1239+ bool has_eb = !(mf[0 ]->isAllRegular ());
12401240#endif
12411241
1242- int nlevels = mf.size ();
1243- for (int ilev = 0 ; ilev < nlevels-1 ; ++ilev) {
1244- iMultiFab mask = makeFineMask (*mf[ilev], *mf[ilev+1 ], IntVect (0 ),
1245- ratio[ilev],Periodicity::NonPeriodic (),
1246- 0 , 1 );
1247- auto const & m = mask.const_arrays ();
1248- auto const & a = mf[ilev]->const_arrays ();
1249- auto const dx = geom[ilev].CellSizeArray ();
1250- Real dv = AMREX_D_TERM (dx[0 ],*dx[1 ],*dx[2 ]);
1251- #ifdef AMREX_USE_EB
1252- if (has_eb) {
1253- AMREX_ASSERT (mf[ilev]->hasEBFabFactory ());
1254- auto const & f = dynamic_cast <EBFArrayBoxFactory const &>
1255- (mf[ilev]->Factory ());
1256- auto const & vfrac = f.getVolFrac ();
1257- auto const & va = vfrac.const_arrays ();
1258- reduce_op.eval (*mf[ilev], IntVect (0 ), reduce_data,
1259- [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
1260- -> Real
1261- {
1262- return m[box_no](i,j,k) ? Real (0 .)
1263- : dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k);
1264- });
1265- } else
1266- #endif
1267- {
1268- #if (AMREX_SPACEDIM == 1)
1269- if (geom[ilev].IsSPHERICAL ()) {
1270- const auto rlo = geom[ilev].CellSize (0 );
1271- reduce_op.eval (*mf[ilev], IntVect (0 ), reduce_data,
1272- [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k)
1273- noexcept -> Real
1274- {
1275- if (m[box_no](i,j,k)) {
1276- return Real (0 .);
1277- } else {
1278- constexpr Real pi = Real (3.1415926535897932 );
1279- Real ri = rlo + dx[0 ]*i;
1280- Real ro = ri + dx[0 ];
1281- return Real (4 ./3 .)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri)
1282- * a[box_no](i,j,k,icomp);
1283- }
1284- });
1285- } else
1286- #elif (AMREX_SPACEDIM == 2)
1287- if (geom[ilev].IsRZ ()) {
1288- const auto rlo = geom[ilev].CellSize (0 );
1289- reduce_op.eval (*mf[ilev], IntVect (0 ), reduce_data,
1290- [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k)
1291- noexcept -> Real
1292- {
1293- if (m[box_no](i,j,k)) {
1294- return Real (0 .);
1295- } else {
1296- Real ri = rlo + dx[0 ]*i;
1297- Real ro = ri + dx[0 ];
1298- constexpr Real pi = Real (3.1415926535897932 );
1299- return pi*dx[1 ]*dx[0 ]*(ro+ri)
1300- * a[box_no](i,j,k,icomp);
1301- }
1302- });
1303- } else
1304- #endif
1305- {
1306- reduce_op.eval (*mf[ilev], IntVect (0 ), reduce_data,
1307- [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k)
1308- noexcept -> Real
1309- {
1310- return m[box_no](i,j,k) ? Real (0 .)
1311- : dv*a[box_no](i,j,k,icomp);
1312- });
1313- }
1314- }
1315- Gpu::streamSynchronize ();
1316- }
1317-
1318- auto const & a = mf.back ()->const_arrays ();
1319- auto const dx = geom[nlevels-1 ].CellSizeArray ();
1242+ int nlevels = mf.size ();
1243+ for (int ilev = 0 ; ilev < nlevels-1 ; ++ilev) {
1244+ iMultiFab mask = makeFineMask (*mf[ilev], *mf[ilev+1 ], IntVect (0 ),
1245+ ratio[ilev],Periodicity::NonPeriodic (),
1246+ 0 , 1 );
1247+ auto const & m = mask.const_arrays ();
1248+ auto const & a = mf[ilev]->const_arrays ();
1249+ auto const dx = geom[ilev].CellSizeArray ();
13201250 Real dv = AMREX_D_TERM (dx[0 ],*dx[1 ],*dx[2 ]);
13211251#ifdef AMREX_USE_EB
13221252 if (has_eb) {
1323- AMREX_ASSERT (mf. back () ->hasEBFabFactory ());
1253+ AMREX_ASSERT (mf[ilev] ->hasEBFabFactory ());
13241254 auto const & f = dynamic_cast <EBFArrayBoxFactory const &>
1325- (mf. back () ->Factory ());
1255+ (mf[ilev] ->Factory ());
13261256 auto const & vfrac = f.getVolFrac ();
13271257 auto const & va = vfrac.const_arrays ();
1328- reduce_op.eval (*mf. back () , IntVect (0 ), reduce_data,
1258+ reduce_op.eval (*mf[ilev] , IntVect (0 ), reduce_data,
13291259 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
13301260 -> Real
13311261 {
1332- return dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k);
1262+ return m[box_no](i,j,k) ? Real (0 .)
1263+ : dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k);
13331264 });
13341265 } else
13351266#endif
13361267 {
13371268#if (AMREX_SPACEDIM == 1)
1338- if (geom[nlevels- 1 ].IsSPHERICAL ()) {
1339- const auto rlo = geom[nlevels- 1 ].CellSize (0 );
1340- reduce_op.eval (*mf. back () , IntVect (0 ), reduce_data,
1269+ if (geom[ilev ].IsSPHERICAL ()) {
1270+ const auto rlo = geom[ilev ].CellSize (0 );
1271+ reduce_op.eval (*mf[ilev] , IntVect (0 ), reduce_data,
13411272 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k)
13421273 noexcept -> Real
13431274 {
1344- constexpr Real pi = Real (3.1415926535897932 );
1345- Real ri = rlo + dx[0 ]*i;
1346- Real ro = ri + dx[0 ];
1347- return Real (4 ./3 .)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri)
1348- * a[box_no](i,j,k,icomp);
1275+ if (m[box_no](i,j,k)) {
1276+ return Real (0 .);
1277+ } else {
1278+ constexpr Real pi = Real (3.1415926535897932 );
1279+ Real ri = rlo + dx[0 ]*i;
1280+ Real ro = ri + dx[0 ];
1281+ return Real (4 ./3 .)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri)
1282+ * a[box_no](i,j,k,icomp);
1283+ }
13491284 });
13501285 } else
13511286#elif (AMREX_SPACEDIM == 2)
1352- if (geom[nlevels- 1 ].IsRZ ()) {
1353- const auto rlo = geom[nlevels- 1 ].CellSize (0 );
1354- reduce_op.eval (*mf. back () , IntVect (0 ), reduce_data,
1287+ if (geom[ilev ].IsRZ ()) {
1288+ const auto rlo = geom[ilev ].CellSize (0 );
1289+ reduce_op.eval (*mf[ilev] , IntVect (0 ), reduce_data,
13551290 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k)
13561291 noexcept -> Real
13571292 {
1358- Real ri = rlo + dx[0 ]*i;
1359- Real ro = ri + dx[0 ];
1360- constexpr Real pi = Real (3.1415926535897932 );
1361- return pi*dx[1 ]*dx[0 ]*(ro+ri)
1362- * a[box_no](i,j,k,icomp);
1293+ if (m[box_no](i,j,k)) {
1294+ return Real (0 .);
1295+ } else {
1296+ Real ri = rlo + dx[0 ]*i;
1297+ Real ro = ri + dx[0 ];
1298+ constexpr Real pi = Real (3.1415926535897932 );
1299+ return pi*dx[1 ]*dx[0 ]*(ro+ri)
1300+ * a[box_no](i,j,k,icomp);
1301+ }
13631302 });
13641303 } else
13651304#endif
13661305 {
1367- reduce_op.eval (*mf.back (), IntVect (0 ), reduce_data,
1368- [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
1306+ reduce_op.eval (*mf[ilev], IntVect (0 ), reduce_data,
1307+ [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k)
1308+ noexcept -> Real
13691309 {
1370- return dv*a[box_no](i,j,k,icomp);
1310+ return m[box_no](i,j,k) ? Real (0 .)
1311+ : dv*a[box_no](i,j,k,icomp);
13711312 });
13721313 }
13731314 }
1315+ Gpu::streamSynchronize ();
1316+ }
1317+
1318+ auto const & a = mf.back ()->const_arrays ();
1319+ auto const dx = geom[nlevels-1 ].CellSizeArray ();
1320+ Real dv = AMREX_D_TERM (dx[0 ],*dx[1 ],*dx[2 ]);
1321+ #ifdef AMREX_USE_EB
1322+ if (has_eb) {
1323+ AMREX_ASSERT (mf.back ()->hasEBFabFactory ());
1324+ auto const & f = dynamic_cast <EBFArrayBoxFactory const &>
1325+ (mf.back ()->Factory ());
1326+ auto const & vfrac = f.getVolFrac ();
1327+ auto const & va = vfrac.const_arrays ();
1328+ reduce_op.eval (*mf.back (), IntVect (0 ), reduce_data,
1329+ [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
1330+ -> Real
1331+ {
1332+ return dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k);
1333+ });
1334+ } else
1335+ #endif
1336+ {
1337+ #if (AMREX_SPACEDIM == 1)
1338+ if (geom[nlevels-1 ].IsSPHERICAL ()) {
1339+ const auto rlo = geom[nlevels-1 ].CellSize (0 );
1340+ reduce_op.eval (*mf.back (), IntVect (0 ), reduce_data,
1341+ [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k)
1342+ noexcept -> Real
1343+ {
1344+ constexpr Real pi = Real (3.1415926535897932 );
1345+ Real ri = rlo + dx[0 ]*i;
1346+ Real ro = ri + dx[0 ];
1347+ return Real (4 ./3 .)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri)
1348+ * a[box_no](i,j,k,icomp);
1349+ });
1350+ } else
1351+ #elif (AMREX_SPACEDIM == 2)
1352+ if (geom[nlevels-1 ].IsRZ ()) {
1353+ const auto rlo = geom[nlevels-1 ].CellSize (0 );
1354+ reduce_op.eval (*mf.back (), IntVect (0 ), reduce_data,
1355+ [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k)
1356+ noexcept -> Real
1357+ {
1358+ Real ri = rlo + dx[0 ]*i;
1359+ Real ro = ri + dx[0 ];
1360+ constexpr Real pi = Real (3.1415926535897932 );
1361+ return pi*dx[1 ]*dx[0 ]*(ro+ri)
1362+ * a[box_no](i,j,k,icomp);
1363+ });
1364+ } else
1365+ #endif
1366+ {
1367+ reduce_op.eval (*mf.back (), IntVect (0 ), reduce_data,
1368+ [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
1369+ {
1370+ return dv*a[box_no](i,j,k,icomp);
1371+ });
1372+ }
1373+ }
1374+
1375+ auto const & hv = reduce_data.value (reduce_op);
1376+ Real r = amrex::get<0 >(hv);
1377+
1378+ if (!local) {
1379+ ParallelAllReduce::Sum (r, ParallelContext::CommunicatorSub ());
1380+ }
1381+ return r;
1382+ }
1383+
1384+ void FourthOrderInterpFromFineToCoarse (MultiFab& cmf, int scomp, int ncomp,
1385+ MultiFab const & fmf,
1386+ IntVect const & ratio)
1387+ {
1388+ AMREX_ASSERT (AMREX_D_TERM ( (ratio[0 ] == 2 || ratio[0 ] == 4 ),
1389+ && (ratio[1 ] == 2 || ratio[1 ] == 4 ),
1390+ && (ratio[2 ] == 2 || ratio[2 ] == 4 )));
1391+
1392+ MultiFab tmp (amrex::coarsen (fmf.boxArray (), ratio), fmf.DistributionMap (),
1393+ ncomp, 0 );
1394+
1395+ #ifdef AMREX_USE_OMP
1396+ #pragma omp parallel if (Gpu::notInLaunchRegion())
1397+ #endif
1398+ {
1399+ #if (AMREX_SPACEDIM > 1)
1400+ FArrayBox xtmp;
1401+ #if (AMREX_SPACEDIM > 2)
1402+ FArrayBox ytmp;
1403+ #endif
1404+ #endif
1405+ for (MFIter mfi (tmp,TilingIfNotGPU ()); mfi.isValid (); ++mfi) {
1406+ Box const & bx = mfi.tilebox ();
1407+ auto const & fa = fmf.const_array (mfi,scomp);
13741408
1375- auto const & hv = reduce_data.value (reduce_op);
1376- Real r = amrex::get<0 >(hv);
1409+ Box xbx = bx;
1410+ #if (AMREX_SPACEDIM == 1)
1411+ auto const & xa = tmp.array (mfi);
1412+ #else
1413+ xbx.refine (IntVect (AMREX_D_DECL (1 ,ratio[1 ],ratio[2 ])));
1414+ if (ratio[1 ] == 2 ) { xbx.grow (1 ,1 ); }
1415+ #if (AMREX_SPACEDIM == 3)
1416+ if (ratio[2 ] == 2 ) { xbx.grow (2 ,1 ); }
1417+ #endif
1418+ xtmp.resize (xbx,ncomp);
1419+ Elixir eli = xtmp.elixir ();
1420+ auto const & xa = xtmp.array ();
1421+ #endif
1422+ AMREX_HOST_DEVICE_PARALLEL_FOR_4D (xbx, ncomp, i, j, k, n,
1423+ {
1424+ int ii = 2 *i;
1425+ xa (i,j,k,n) = Real (1 ./16 )*(Real (9 .)*(fa (ii ,j,k,n) +
1426+ fa (ii+1 ,j,k,n))
1427+ - fa (ii-1 ,j,k,n)
1428+ - fa (ii+2 ,j,k,n));
1429+ });
13771430
1378- if (!local) {
1379- ParallelAllReduce::Sum (r, ParallelContext::CommunicatorSub ());
1431+ #if (AMREX_SPACEDIM > 1)
1432+ Box ybx = bx;
1433+ auto const & xca = xtmp.const_array ();
1434+ #if (AMREX_SPACEDIM == 2)
1435+ auto const & ya = tmp.array (mfi);
1436+ #else
1437+ ybx.refine (IntVect (AMREX_D_DECL (1 ,1 ,ratio[2 ])));
1438+ if (ratio[2 ] == 2 ) { ybx.grow (2 ,1 ); }
1439+ ytmp.resize (ybx,ncomp);
1440+ eli.append (ytmp.elixir ());
1441+ auto const & ya = ytmp.array ();
1442+ #endif
1443+ AMREX_HOST_DEVICE_PARALLEL_FOR_4D (ybx, ncomp, i, j, k, n,
1444+ {
1445+ int jj = 2 *j;
1446+ ya (i,j,k,n) = Real (1 ./16 )*(Real (9 .)*(xca (i,jj ,k,n) +
1447+ xca (i,jj+1 ,k,n))
1448+ - xca (i,jj-1 ,k,n)
1449+ - xca (i,jj+2 ,k,n));
1450+ });
1451+
1452+ #if (AMREX_SPACEDIM == 3)
1453+ auto const & yca = ytmp.const_array ();
1454+ auto const & ca = tmp.array (mfi);
1455+ AMREX_HOST_DEVICE_PARALLEL_FOR_4D (bx, ncomp, i, j, k, n,
1456+ {
1457+ int kk = 2 *k;
1458+ ca (i,j,k,n) = Real (1 ./16 )*(Real (9 .)*(yca (i,j,kk ,n) +
1459+ yca (i,j,kk+1 ,n))
1460+ - yca (i,j,kk-1 ,n)
1461+ - yca (i,j,kk+2 ,n));
1462+ });
1463+ #endif
1464+ #endif
13801465 }
1381- return r;
13821466 }
1467+
1468+ cmf.ParallelCopy (tmp, 0 , scomp, ncomp);
1469+ }
13831470}
0 commit comments