@@ -76,68 +76,66 @@ struct ComputeRHSOp
76
76
: nullptr ;
77
77
78
78
for (int lev = 0 ; lev < nlevels; ++lev) {
79
- #ifdef AMREX_USE_OMP
80
- #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
81
- #endif
82
- for (amrex::MFIter mfi (field (lev)); mfi.isValid (); ++mfi) {
83
- const auto & bx = mfi.tilebox ();
84
- auto fld = field (lev).array (mfi);
85
- const auto fld_o = field_old (lev).const_array (mfi);
86
- const auto rho_o = den_old (lev).const_array (mfi);
87
- const auto rho = den_new (lev).const_array (mfi);
88
- const auto src = src_term (lev).const_array (mfi);
89
- const auto diff = diff_term (lev).const_array (mfi);
90
- const auto ddt_o = conv_term (lev).const_array (mfi);
91
- const auto imask = mask_cell (lev).const_array (mfi);
92
- amrex::Array4<amrex::Real const > detJ =
93
- mesh_mapping ? ((*mesh_detJ)(lev).const_array (mfi))
94
- : amrex::Array4<amrex::Real const >();
95
-
96
- if (PDE::multiply_rho) {
97
- // Remove multiplication by density as it will be added back
98
- // in solver
99
- amrex::ParallelFor (
100
- bx, PDE::ndim,
101
- [=] AMREX_GPU_DEVICE (
102
- int i, int j, int k, int n) noexcept {
103
- amrex::Real det_j =
104
- mesh_mapping ? (detJ (i, j, k)) : 1.0 ;
105
-
106
- fld (i, j, k, n) =
107
- rho_o (i, j, k) * det_j * fld_o (i, j, k, n) +
108
- static_cast <amrex::Real>(imask (i, j, k)) * dt *
109
- (ddt_o (i, j, k, n) +
110
- det_j * src (i, j, k, n) +
111
- factor * diff (i, j, k, n));
112
-
113
- fld (i, j, k, n) /= rho (i, j, k);
114
-
115
- if (difftype == DiffusionType::Explicit) {
116
- fld (i, j, k, n) /= det_j;
117
- }
118
- });
119
- } else {
120
- amrex::ParallelFor (
121
- bx, PDE::ndim,
122
- [=] AMREX_GPU_DEVICE (
123
- int i, int j, int k, int n) noexcept {
124
- amrex::Real det_j =
125
- mesh_mapping ? (detJ (i, j, k)) : 1.0 ;
126
-
127
- fld (i, j, k, n) =
128
- det_j * fld_o (i, j, k, n) +
129
- static_cast <amrex::Real>(imask (i, j, k)) * dt *
130
- (ddt_o (i, j, k, n) +
131
- det_j * src (i, j, k, n) +
132
- factor * diff (i, j, k, n));
133
-
134
- if (difftype == DiffusionType::Explicit) {
135
- fld (i, j, k, n) /= det_j;
136
- }
137
- });
138
- }
79
+ const auto & fld_arrs = field (lev).arrays ();
80
+ const auto & fld_o_arrs = field_old (lev).const_arrays ();
81
+ const auto & rho_o_arrs = den_old (lev).const_arrays ();
82
+ const auto & rho_arrs = den_new (lev).const_arrays ();
83
+ const auto & src_arrs = src_term (lev).const_arrays ();
84
+ const auto & diff_arrs = diff_term (lev).const_arrays ();
85
+ const auto & ddt_o_arrs = conv_term (lev).const_arrays ();
86
+ const auto & imask_arrs = mask_cell (lev).const_arrays ();
87
+ const auto & detJ_arrs =
88
+ mesh_mapping ? ((*mesh_detJ)(lev).const_arrays ())
89
+ : amrex::MultiArray4<amrex::Real const >();
90
+
91
+ if (PDE::multiply_rho) {
92
+ // Remove multiplication by density as it will be added back
93
+ // in solver
94
+ amrex::ParallelFor (
95
+ field (lev), amrex::IntVect (0 ), PDE::ndim,
96
+ [=] AMREX_GPU_DEVICE (
97
+ int nbx, int i, int j, int k, int n) noexcept {
98
+ amrex::Real det_j =
99
+ mesh_mapping ? (detJ_arrs[nbx](i, j, k)) : 1.0 ;
100
+
101
+ fld_arrs[nbx](i, j, k, n) =
102
+ rho_o_arrs[nbx](i, j, k) * det_j *
103
+ fld_o_arrs[nbx](i, j, k, n) +
104
+ static_cast <amrex::Real>(imask_arrs[nbx](i, j, k)) *
105
+ dt *
106
+ (ddt_o_arrs[nbx](i, j, k, n) +
107
+ det_j * src_arrs[nbx](i, j, k, n) +
108
+ factor * diff_arrs[nbx](i, j, k, n));
109
+
110
+ fld_arrs[nbx](i, j, k, n) /= rho_arrs[nbx](i, j, k);
111
+
112
+ if (difftype == DiffusionType::Explicit) {
113
+ fld_arrs[nbx](i, j, k, n) /= det_j;
114
+ }
115
+ });
116
+ } else {
117
+ amrex::ParallelFor (
118
+ field (lev), amrex::IntVect (0 ), PDE::ndim,
119
+ [=] AMREX_GPU_DEVICE (
120
+ int nbx, int i, int j, int k, int n) noexcept {
121
+ amrex::Real det_j =
122
+ mesh_mapping ? (detJ_arrs[nbx](i, j, k)) : 1.0 ;
123
+
124
+ fld_arrs[nbx](i, j, k, n) =
125
+ det_j * fld_o_arrs[nbx](i, j, k, n) +
126
+ static_cast <amrex::Real>(imask_arrs[nbx](i, j, k)) *
127
+ dt *
128
+ (ddt_o_arrs[nbx](i, j, k, n) +
129
+ det_j * src_arrs[nbx](i, j, k, n) +
130
+ factor * diff_arrs[nbx](i, j, k, n));
131
+
132
+ if (difftype == DiffusionType::Explicit) {
133
+ fld_arrs[nbx](i, j, k, n) /= det_j;
134
+ }
135
+ });
139
136
}
140
137
}
138
+ amrex::Gpu::synchronize ();
141
139
}
142
140
143
141
/* * Compute right-hand side for corrector steps
@@ -196,74 +194,72 @@ struct ComputeRHSOp
196
194
: nullptr ;
197
195
198
196
for (int lev = 0 ; lev < nlevels; ++lev) {
199
- #ifdef AMREX_USE_OMP
200
- #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
201
- #endif
202
- for (amrex::MFIter mfi (field (lev)); mfi.isValid (); ++mfi) {
203
- const auto & bx = mfi.tilebox ();
204
- auto fld = field (lev).array (mfi);
205
- const auto fld_o = field_old (lev).const_array (mfi);
206
- const auto rho_o = den_old (lev).const_array (mfi);
207
- const auto rho = den_new (lev).const_array (mfi);
208
- const auto src = src_term (lev).const_array (mfi);
209
- const auto diff = diff_term (lev).const_array (mfi);
210
- const auto ddt = conv_term (lev).const_array (mfi);
211
- const auto diff_o = diff_term_old (lev).const_array (mfi);
212
- const auto ddt_o = conv_term_old (lev).const_array (mfi);
213
- const auto imask = mask_cell (lev).const_array (mfi);
214
- amrex::Array4<amrex::Real const > detJ =
215
- mesh_mapping ? ((*mesh_detJ)(lev).const_array (mfi))
216
- : amrex::Array4<amrex::Real const >();
217
-
218
- if (PDE::multiply_rho) {
219
- // Remove multiplication by density as it will be added back
220
- // in solver
221
- amrex::ParallelFor (
222
- bx, PDE::ndim,
223
- [=] AMREX_GPU_DEVICE (
224
- int i, int j, int k, int n) noexcept {
225
- amrex::Real det_j =
226
- mesh_mapping ? (detJ (i, j, k)) : 1.0 ;
227
-
228
- fld (i, j, k, n) =
229
- rho_o (i, j, k) * det_j * fld_o (i, j, k, n) +
230
- static_cast <amrex::Real>(imask (i, j, k)) * dt *
231
- (0.5 *
232
- (ddt_o (i, j, k, n) + ddt (i, j, k, n)) +
233
- ofac * diff_o (i, j, k, n) +
234
- nfac * diff (i, j, k, n) +
235
- det_j * src (i, j, k, n));
236
-
237
- fld (i, j, k, n) /= rho (i, j, k);
238
-
239
- if (difftype == DiffusionType::Explicit) {
240
- fld (i, j, k, n) /= det_j;
241
- }
242
- });
243
- } else {
244
- amrex::ParallelFor (
245
- bx, PDE::ndim,
246
- [=] AMREX_GPU_DEVICE (
247
- int i, int j, int k, int n) noexcept {
248
- amrex::Real det_j =
249
- mesh_mapping ? (detJ (i, j, k)) : 1.0 ;
250
-
251
- fld (i, j, k, n) =
252
- det_j * fld_o (i, j, k, n) +
253
- static_cast <amrex::Real>(imask (i, j, k)) * dt *
254
- (0.5 *
255
- (ddt_o (i, j, k, n) + ddt (i, j, k, n)) +
256
- ofac * diff_o (i, j, k, n) +
257
- nfac * diff (i, j, k, n) +
258
- det_j * src (i, j, k, n));
259
-
260
- if (difftype == DiffusionType::Explicit) {
261
- fld (i, j, k, n) /= det_j;
262
- }
263
- });
264
- }
197
+ const auto & fld_arrs = field (lev).arrays ();
198
+ const auto & fld_o_arrs = field_old (lev).const_arrays ();
199
+ const auto & rho_o_arrs = den_old (lev).const_arrays ();
200
+ const auto & rho_arrs = den_new (lev).const_arrays ();
201
+ const auto & src_arrs = src_term (lev).const_arrays ();
202
+ const auto & diff_arrs = diff_term (lev).const_arrays ();
203
+ const auto & ddt_arrs = conv_term (lev).const_arrays ();
204
+ const auto & diff_o_arrs = diff_term_old (lev).const_arrays ();
205
+ const auto & ddt_o_arrs = conv_term_old (lev).const_arrays ();
206
+ const auto & imask_arrs = mask_cell (lev).const_arrays ();
207
+ const auto & detJ_arrs =
208
+ mesh_mapping ? ((*mesh_detJ)(lev).const_arrays ())
209
+ : amrex::MultiArray4<amrex::Real const >();
210
+
211
+ if (PDE::multiply_rho) {
212
+ // Remove multiplication by density as it will be added back
213
+ // in solver
214
+ amrex::ParallelFor (
215
+ field (lev), amrex::IntVect (0 ), PDE::ndim,
216
+ [=] AMREX_GPU_DEVICE (
217
+ int nbx, int i, int j, int k, int n) noexcept {
218
+ amrex::Real det_j =
219
+ mesh_mapping ? (detJ_arrs[nbx](i, j, k)) : 1.0 ;
220
+
221
+ fld_arrs[nbx](i, j, k, n) =
222
+ rho_o_arrs[nbx](i, j, k) * det_j *
223
+ fld_o_arrs[nbx](i, j, k, n) +
224
+ static_cast <amrex::Real>(imask_arrs[nbx](i, j, k)) *
225
+ dt *
226
+ (0.5 * (ddt_o_arrs[nbx](i, j, k, n) +
227
+ ddt_arrs[nbx](i, j, k, n)) +
228
+ ofac * diff_o_arrs[nbx](i, j, k, n) +
229
+ nfac * diff_arrs[nbx](i, j, k, n) +
230
+ det_j * src_arrs[nbx](i, j, k, n));
231
+
232
+ fld_arrs[nbx](i, j, k, n) /= rho_arrs[nbx](i, j, k);
233
+
234
+ if (difftype == DiffusionType::Explicit) {
235
+ fld_arrs[nbx](i, j, k, n) /= det_j;
236
+ }
237
+ });
238
+ } else {
239
+ amrex::ParallelFor (
240
+ field (lev), amrex::IntVect (0 ), PDE::ndim,
241
+ [=] AMREX_GPU_DEVICE (
242
+ int nbx, int i, int j, int k, int n) noexcept {
243
+ amrex::Real det_j =
244
+ mesh_mapping ? (detJ_arrs[nbx](i, j, k)) : 1.0 ;
245
+
246
+ fld_arrs[nbx](i, j, k, n) =
247
+ det_j * fld_o_arrs[nbx](i, j, k, n) +
248
+ static_cast <amrex::Real>(imask_arrs[nbx](i, j, k)) *
249
+ dt *
250
+ (0.5 * (ddt_o_arrs[nbx](i, j, k, n) +
251
+ ddt_arrs[nbx](i, j, k, n)) +
252
+ ofac * diff_o_arrs[nbx](i, j, k, n) +
253
+ nfac * diff_arrs[nbx](i, j, k, n) +
254
+ det_j * src_arrs[nbx](i, j, k, n));
255
+
256
+ if (difftype == DiffusionType::Explicit) {
257
+ fld_arrs[nbx](i, j, k, n) /= det_j;
258
+ }
259
+ });
265
260
}
266
261
}
262
+ amrex::Gpu::synchronize ();
267
263
}
268
264
269
265
// data members
0 commit comments