Skip to content

Commit e452137

Browse files
authored
Convert more mfiter loops to the MF parallelfor (Exawind#1502)
1 parent 458878e commit e452137

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+2956
-2980
lines changed

amr-wind/diffusion/incflo_diffusion.cpp

+31-33
Original file line numberDiff line numberDiff line change
@@ -137,15 +137,15 @@ void fixup_eta_on_domain_faces(
137137
mfi_info.SetDynamic(true);
138138
}
139139
#ifdef AMREX_USE_OMP
140-
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
140+
#pragma omp parallel if (false)
141141
#endif
142142
for (MFIter mfi(cc, mfi_info); mfi.isValid(); ++mfi) {
143-
Box const& bx = mfi.validbox();
144-
Array4<Real const> const& cca = cc.const_array(mfi);
143+
const auto& bx = mfi.validbox();
144+
const auto& cca = cc.const_array(mfi);
145145

146146
int idim = 0;
147147
if (!geom.isPeriodic(idim)) {
148-
Array4<Real> const& fca = fc[idim].array(mfi);
148+
const auto& fca = fc[idim].array(mfi);
149149
if (bx.smallEnd(idim) == domain.smallEnd(idim)) {
150150
amrex::ParallelFor(
151151
amrex::bdryLo(bx, idim),
@@ -164,7 +164,7 @@ void fixup_eta_on_domain_faces(
164164

165165
idim = 1;
166166
if (!geom.isPeriodic(idim)) {
167-
Array4<Real> const& fca = fc[idim].array(mfi);
167+
const auto& fca = fc[idim].array(mfi);
168168
if (bx.smallEnd(idim) == domain.smallEnd(idim)) {
169169
amrex::ParallelFor(
170170
amrex::bdryLo(bx, idim),
@@ -183,7 +183,7 @@ void fixup_eta_on_domain_faces(
183183

184184
idim = 2;
185185
if (!geom.isPeriodic(idim)) {
186-
Array4<Real> const& fca = fc[idim].array(mfi);
186+
const auto& fca = fc[idim].array(mfi);
187187
if (bx.smallEnd(idim) == domain.smallEnd(idim)) {
188188
amrex::ParallelFor(
189189
amrex::bdryLo(bx, idim),
@@ -221,47 +221,45 @@ void viscosity_to_uniform_space(
221221
repo.get_mesh_mapping_det_j(amr_wind::FieldLoc::ZFACE);
222222

223223
// beta accounted for mesh mapping (x-face) = J/fac^2 * mu
224-
for (amrex::MFIter mfi(b[0]); mfi.isValid(); ++mfi) {
225-
amrex::Array4<amrex::Real> const& mu = b[0].array(mfi);
226-
amrex::Array4<amrex::Real const> const& fac =
227-
mesh_fac_xf(lev).array(mfi);
228-
amrex::Array4<amrex::Real const> const& detJ =
229-
mesh_detJ_xf(lev).const_array(mfi);
224+
{
225+
const auto& mu_arrs = b[0].arrays();
226+
const auto& fac_arrs = mesh_fac_xf(lev).arrays();
227+
const auto& detJ_arrs = mesh_detJ_xf(lev).const_arrays();
230228

231229
amrex::ParallelFor(
232-
mfi.tilebox(), [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
233-
mu(i, j, k) =
234-
mu(i, j, k) * detJ(i, j, k) / std::pow(fac(i, j, k, 0), 2);
230+
b[0], [=] AMREX_GPU_DEVICE(int nbx, int i, int j, int k) noexcept {
231+
mu_arrs[nbx](i, j, k) = mu_arrs[nbx](i, j, k) *
232+
detJ_arrs[nbx](i, j, k) /
233+
std::pow(fac_arrs[nbx](i, j, k, 0), 2);
235234
});
236235
}
237236
// beta accounted for mesh mapping (y-face) = J/fac^2 * mu
238-
for (amrex::MFIter mfi(b[1]); mfi.isValid(); ++mfi) {
239-
amrex::Array4<amrex::Real> const& mu = b[1].array(mfi);
240-
amrex::Array4<amrex::Real const> const& fac =
241-
mesh_fac_yf(lev).array(mfi);
242-
amrex::Array4<amrex::Real const> const& detJ =
243-
mesh_detJ_yf(lev).const_array(mfi);
237+
{
238+
const auto& mu_arrs = b[1].arrays();
239+
const auto& fac_arrs = mesh_fac_yf(lev).arrays();
240+
const auto& detJ_arrs = mesh_detJ_yf(lev).const_arrays();
244241

245242
amrex::ParallelFor(
246-
mfi.tilebox(), [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
247-
mu(i, j, k) =
248-
mu(i, j, k) * detJ(i, j, k) / std::pow(fac(i, j, k, 1), 2);
243+
b[1], [=] AMREX_GPU_DEVICE(int nbx, int i, int j, int k) noexcept {
244+
mu_arrs[nbx](i, j, k) = mu_arrs[nbx](i, j, k) *
245+
detJ_arrs[nbx](i, j, k) /
246+
std::pow(fac_arrs[nbx](i, j, k, 1), 2);
249247
});
250248
}
251249
// beta accounted for mesh mapping (z-face) = J/fac^2 * mu
252-
for (amrex::MFIter mfi(b[2]); mfi.isValid(); ++mfi) {
253-
amrex::Array4<amrex::Real> const& mu = b[2].array(mfi);
254-
amrex::Array4<amrex::Real const> const& fac =
255-
mesh_fac_zf(lev).array(mfi);
256-
amrex::Array4<amrex::Real const> const& detJ =
257-
mesh_detJ_zf(lev).const_array(mfi);
250+
{
251+
const auto& mu_arrs = b[2].arrays();
252+
const auto& fac_arrs = mesh_fac_zf(lev).arrays();
253+
const auto& detJ_arrs = mesh_detJ_zf(lev).const_arrays();
258254

259255
amrex::ParallelFor(
260-
mfi.tilebox(), [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
261-
mu(i, j, k) =
262-
mu(i, j, k) * detJ(i, j, k) / std::pow(fac(i, j, k, 2), 2);
256+
b[2], [=] AMREX_GPU_DEVICE(int nbx, int i, int j, int k) noexcept {
257+
mu_arrs[nbx](i, j, k) = mu_arrs[nbx](i, j, k) *
258+
detJ_arrs[nbx](i, j, k) /
259+
std::pow(fac_arrs[nbx](i, j, k, 2), 2);
263260
});
264261
}
262+
amrex::Gpu::synchronize();
265263
}
266264

267265
} // namespace diffusion

amr-wind/equation_systems/CompRHSOps.H

+122-126
Original file line numberDiff line numberDiff line change
@@ -76,68 +76,66 @@ struct ComputeRHSOp
7676
: nullptr;
7777

7878
for (int lev = 0; lev < nlevels; ++lev) {
79-
#ifdef AMREX_USE_OMP
80-
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
81-
#endif
82-
for (amrex::MFIter mfi(field(lev)); mfi.isValid(); ++mfi) {
83-
const auto& bx = mfi.tilebox();
84-
auto fld = field(lev).array(mfi);
85-
const auto fld_o = field_old(lev).const_array(mfi);
86-
const auto rho_o = den_old(lev).const_array(mfi);
87-
const auto rho = den_new(lev).const_array(mfi);
88-
const auto src = src_term(lev).const_array(mfi);
89-
const auto diff = diff_term(lev).const_array(mfi);
90-
const auto ddt_o = conv_term(lev).const_array(mfi);
91-
const auto imask = mask_cell(lev).const_array(mfi);
92-
amrex::Array4<amrex::Real const> detJ =
93-
mesh_mapping ? ((*mesh_detJ)(lev).const_array(mfi))
94-
: amrex::Array4<amrex::Real const>();
95-
96-
if (PDE::multiply_rho) {
97-
// Remove multiplication by density as it will be added back
98-
// in solver
99-
amrex::ParallelFor(
100-
bx, PDE::ndim,
101-
[=] AMREX_GPU_DEVICE(
102-
int i, int j, int k, int n) noexcept {
103-
amrex::Real det_j =
104-
mesh_mapping ? (detJ(i, j, k)) : 1.0;
105-
106-
fld(i, j, k, n) =
107-
rho_o(i, j, k) * det_j * fld_o(i, j, k, n) +
108-
static_cast<amrex::Real>(imask(i, j, k)) * dt *
109-
(ddt_o(i, j, k, n) +
110-
det_j * src(i, j, k, n) +
111-
factor * diff(i, j, k, n));
112-
113-
fld(i, j, k, n) /= rho(i, j, k);
114-
115-
if (difftype == DiffusionType::Explicit) {
116-
fld(i, j, k, n) /= det_j;
117-
}
118-
});
119-
} else {
120-
amrex::ParallelFor(
121-
bx, PDE::ndim,
122-
[=] AMREX_GPU_DEVICE(
123-
int i, int j, int k, int n) noexcept {
124-
amrex::Real det_j =
125-
mesh_mapping ? (detJ(i, j, k)) : 1.0;
126-
127-
fld(i, j, k, n) =
128-
det_j * fld_o(i, j, k, n) +
129-
static_cast<amrex::Real>(imask(i, j, k)) * dt *
130-
(ddt_o(i, j, k, n) +
131-
det_j * src(i, j, k, n) +
132-
factor * diff(i, j, k, n));
133-
134-
if (difftype == DiffusionType::Explicit) {
135-
fld(i, j, k, n) /= det_j;
136-
}
137-
});
138-
}
79+
const auto& fld_arrs = field(lev).arrays();
80+
const auto& fld_o_arrs = field_old(lev).const_arrays();
81+
const auto& rho_o_arrs = den_old(lev).const_arrays();
82+
const auto& rho_arrs = den_new(lev).const_arrays();
83+
const auto& src_arrs = src_term(lev).const_arrays();
84+
const auto& diff_arrs = diff_term(lev).const_arrays();
85+
const auto& ddt_o_arrs = conv_term(lev).const_arrays();
86+
const auto& imask_arrs = mask_cell(lev).const_arrays();
87+
const auto& detJ_arrs =
88+
mesh_mapping ? ((*mesh_detJ)(lev).const_arrays())
89+
: amrex::MultiArray4<amrex::Real const>();
90+
91+
if (PDE::multiply_rho) {
92+
// Remove multiplication by density as it will be added back
93+
// in solver
94+
amrex::ParallelFor(
95+
field(lev), amrex::IntVect(0), PDE::ndim,
96+
[=] AMREX_GPU_DEVICE(
97+
int nbx, int i, int j, int k, int n) noexcept {
98+
amrex::Real det_j =
99+
mesh_mapping ? (detJ_arrs[nbx](i, j, k)) : 1.0;
100+
101+
fld_arrs[nbx](i, j, k, n) =
102+
rho_o_arrs[nbx](i, j, k) * det_j *
103+
fld_o_arrs[nbx](i, j, k, n) +
104+
static_cast<amrex::Real>(imask_arrs[nbx](i, j, k)) *
105+
dt *
106+
(ddt_o_arrs[nbx](i, j, k, n) +
107+
det_j * src_arrs[nbx](i, j, k, n) +
108+
factor * diff_arrs[nbx](i, j, k, n));
109+
110+
fld_arrs[nbx](i, j, k, n) /= rho_arrs[nbx](i, j, k);
111+
112+
if (difftype == DiffusionType::Explicit) {
113+
fld_arrs[nbx](i, j, k, n) /= det_j;
114+
}
115+
});
116+
} else {
117+
amrex::ParallelFor(
118+
field(lev), amrex::IntVect(0), PDE::ndim,
119+
[=] AMREX_GPU_DEVICE(
120+
int nbx, int i, int j, int k, int n) noexcept {
121+
amrex::Real det_j =
122+
mesh_mapping ? (detJ_arrs[nbx](i, j, k)) : 1.0;
123+
124+
fld_arrs[nbx](i, j, k, n) =
125+
det_j * fld_o_arrs[nbx](i, j, k, n) +
126+
static_cast<amrex::Real>(imask_arrs[nbx](i, j, k)) *
127+
dt *
128+
(ddt_o_arrs[nbx](i, j, k, n) +
129+
det_j * src_arrs[nbx](i, j, k, n) +
130+
factor * diff_arrs[nbx](i, j, k, n));
131+
132+
if (difftype == DiffusionType::Explicit) {
133+
fld_arrs[nbx](i, j, k, n) /= det_j;
134+
}
135+
});
139136
}
140137
}
138+
amrex::Gpu::synchronize();
141139
}
142140

143141
/** Compute right-hand side for corrector steps
@@ -196,74 +194,72 @@ struct ComputeRHSOp
196194
: nullptr;
197195

198196
for (int lev = 0; lev < nlevels; ++lev) {
199-
#ifdef AMREX_USE_OMP
200-
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
201-
#endif
202-
for (amrex::MFIter mfi(field(lev)); mfi.isValid(); ++mfi) {
203-
const auto& bx = mfi.tilebox();
204-
auto fld = field(lev).array(mfi);
205-
const auto fld_o = field_old(lev).const_array(mfi);
206-
const auto rho_o = den_old(lev).const_array(mfi);
207-
const auto rho = den_new(lev).const_array(mfi);
208-
const auto src = src_term(lev).const_array(mfi);
209-
const auto diff = diff_term(lev).const_array(mfi);
210-
const auto ddt = conv_term(lev).const_array(mfi);
211-
const auto diff_o = diff_term_old(lev).const_array(mfi);
212-
const auto ddt_o = conv_term_old(lev).const_array(mfi);
213-
const auto imask = mask_cell(lev).const_array(mfi);
214-
amrex::Array4<amrex::Real const> detJ =
215-
mesh_mapping ? ((*mesh_detJ)(lev).const_array(mfi))
216-
: amrex::Array4<amrex::Real const>();
217-
218-
if (PDE::multiply_rho) {
219-
// Remove multiplication by density as it will be added back
220-
// in solver
221-
amrex::ParallelFor(
222-
bx, PDE::ndim,
223-
[=] AMREX_GPU_DEVICE(
224-
int i, int j, int k, int n) noexcept {
225-
amrex::Real det_j =
226-
mesh_mapping ? (detJ(i, j, k)) : 1.0;
227-
228-
fld(i, j, k, n) =
229-
rho_o(i, j, k) * det_j * fld_o(i, j, k, n) +
230-
static_cast<amrex::Real>(imask(i, j, k)) * dt *
231-
(0.5 *
232-
(ddt_o(i, j, k, n) + ddt(i, j, k, n)) +
233-
ofac * diff_o(i, j, k, n) +
234-
nfac * diff(i, j, k, n) +
235-
det_j * src(i, j, k, n));
236-
237-
fld(i, j, k, n) /= rho(i, j, k);
238-
239-
if (difftype == DiffusionType::Explicit) {
240-
fld(i, j, k, n) /= det_j;
241-
}
242-
});
243-
} else {
244-
amrex::ParallelFor(
245-
bx, PDE::ndim,
246-
[=] AMREX_GPU_DEVICE(
247-
int i, int j, int k, int n) noexcept {
248-
amrex::Real det_j =
249-
mesh_mapping ? (detJ(i, j, k)) : 1.0;
250-
251-
fld(i, j, k, n) =
252-
det_j * fld_o(i, j, k, n) +
253-
static_cast<amrex::Real>(imask(i, j, k)) * dt *
254-
(0.5 *
255-
(ddt_o(i, j, k, n) + ddt(i, j, k, n)) +
256-
ofac * diff_o(i, j, k, n) +
257-
nfac * diff(i, j, k, n) +
258-
det_j * src(i, j, k, n));
259-
260-
if (difftype == DiffusionType::Explicit) {
261-
fld(i, j, k, n) /= det_j;
262-
}
263-
});
264-
}
197+
const auto& fld_arrs = field(lev).arrays();
198+
const auto& fld_o_arrs = field_old(lev).const_arrays();
199+
const auto& rho_o_arrs = den_old(lev).const_arrays();
200+
const auto& rho_arrs = den_new(lev).const_arrays();
201+
const auto& src_arrs = src_term(lev).const_arrays();
202+
const auto& diff_arrs = diff_term(lev).const_arrays();
203+
const auto& ddt_arrs = conv_term(lev).const_arrays();
204+
const auto& diff_o_arrs = diff_term_old(lev).const_arrays();
205+
const auto& ddt_o_arrs = conv_term_old(lev).const_arrays();
206+
const auto& imask_arrs = mask_cell(lev).const_arrays();
207+
const auto& detJ_arrs =
208+
mesh_mapping ? ((*mesh_detJ)(lev).const_arrays())
209+
: amrex::MultiArray4<amrex::Real const>();
210+
211+
if (PDE::multiply_rho) {
212+
// Remove multiplication by density as it will be added back
213+
// in solver
214+
amrex::ParallelFor(
215+
field(lev), amrex::IntVect(0), PDE::ndim,
216+
[=] AMREX_GPU_DEVICE(
217+
int nbx, int i, int j, int k, int n) noexcept {
218+
amrex::Real det_j =
219+
mesh_mapping ? (detJ_arrs[nbx](i, j, k)) : 1.0;
220+
221+
fld_arrs[nbx](i, j, k, n) =
222+
rho_o_arrs[nbx](i, j, k) * det_j *
223+
fld_o_arrs[nbx](i, j, k, n) +
224+
static_cast<amrex::Real>(imask_arrs[nbx](i, j, k)) *
225+
dt *
226+
(0.5 * (ddt_o_arrs[nbx](i, j, k, n) +
227+
ddt_arrs[nbx](i, j, k, n)) +
228+
ofac * diff_o_arrs[nbx](i, j, k, n) +
229+
nfac * diff_arrs[nbx](i, j, k, n) +
230+
det_j * src_arrs[nbx](i, j, k, n));
231+
232+
fld_arrs[nbx](i, j, k, n) /= rho_arrs[nbx](i, j, k);
233+
234+
if (difftype == DiffusionType::Explicit) {
235+
fld_arrs[nbx](i, j, k, n) /= det_j;
236+
}
237+
});
238+
} else {
239+
amrex::ParallelFor(
240+
field(lev), amrex::IntVect(0), PDE::ndim,
241+
[=] AMREX_GPU_DEVICE(
242+
int nbx, int i, int j, int k, int n) noexcept {
243+
amrex::Real det_j =
244+
mesh_mapping ? (detJ_arrs[nbx](i, j, k)) : 1.0;
245+
246+
fld_arrs[nbx](i, j, k, n) =
247+
det_j * fld_o_arrs[nbx](i, j, k, n) +
248+
static_cast<amrex::Real>(imask_arrs[nbx](i, j, k)) *
249+
dt *
250+
(0.5 * (ddt_o_arrs[nbx](i, j, k, n) +
251+
ddt_arrs[nbx](i, j, k, n)) +
252+
ofac * diff_o_arrs[nbx](i, j, k, n) +
253+
nfac * diff_arrs[nbx](i, j, k, n) +
254+
det_j * src_arrs[nbx](i, j, k, n));
255+
256+
if (difftype == DiffusionType::Explicit) {
257+
fld_arrs[nbx](i, j, k, n) /= det_j;
258+
}
259+
});
265260
}
266261
}
262+
amrex::Gpu::synchronize();
267263
}
268264

269265
// data members

0 commit comments

Comments
 (0)