Skip to content

Commit bf4d1b8

Browse files
authored
Merge pull request #1792 from LLNL/feature/burmark1/param_rename
Rename param implementation
2 parents df10eff + 9200287 commit bf4d1b8

27 files changed

+238
-201
lines changed

include/RAJA/pattern/params/forall.hpp

+34-31
Original file line numberDiff line numberDiff line change
@@ -43,45 +43,45 @@ struct ForallParamPack
4343
private:
4444
// Init
4545
template<typename EXEC_POL, camp::idx_t... Seq, typename... Args>
46-
static constexpr void detail_init(EXEC_POL,
47-
camp::idx_seq<Seq...>,
48-
ForallParamPack& f_params,
49-
Args&&... args)
46+
static constexpr void parampack_init(EXEC_POL const& pol,
47+
camp::idx_seq<Seq...>,
48+
ForallParamPack& f_params,
49+
Args&&... args)
5050
{
51-
CAMP_EXPAND(expt::detail::init<EXEC_POL>(camp::get<Seq>(f_params.param_tup),
52-
std::forward<Args>(args)...));
51+
CAMP_EXPAND(param_init(pol, camp::get<Seq>(f_params.param_tup),
52+
std::forward<Args>(args)...));
5353
}
5454

5555
// Combine
5656
template<typename EXEC_POL, camp::idx_t... Seq>
57-
RAJA_HOST_DEVICE static constexpr void detail_combine(
58-
EXEC_POL,
57+
RAJA_HOST_DEVICE static constexpr void parampack_combine(
58+
EXEC_POL const& pol,
5959
camp::idx_seq<Seq...>,
6060
ForallParamPack& out,
6161
const ForallParamPack& in)
6262
{
63-
CAMP_EXPAND(detail::combine<EXEC_POL>(camp::get<Seq>(out.param_tup),
64-
camp::get<Seq>(in.param_tup)));
63+
CAMP_EXPAND(param_combine(pol, camp::get<Seq>(out.param_tup),
64+
camp::get<Seq>(in.param_tup)));
6565
}
6666

6767
template<typename EXEC_POL, camp::idx_t... Seq>
68-
RAJA_HOST_DEVICE static constexpr void detail_combine(
69-
EXEC_POL,
68+
RAJA_HOST_DEVICE static constexpr void parampack_combine(
69+
EXEC_POL const& pol,
7070
camp::idx_seq<Seq...>,
7171
ForallParamPack& f_params)
7272
{
73-
CAMP_EXPAND(detail::combine<EXEC_POL>(camp::get<Seq>(f_params.param_tup)));
73+
CAMP_EXPAND(param_combine(pol, camp::get<Seq>(f_params.param_tup)));
7474
}
7575

7676
// Resolve
7777
template<typename EXEC_POL, camp::idx_t... Seq, typename... Args>
78-
static constexpr void detail_resolve(EXEC_POL,
79-
camp::idx_seq<Seq...>,
80-
ForallParamPack& f_params,
81-
Args&&... args)
78+
static constexpr void parampack_resolve(EXEC_POL const& pol,
79+
camp::idx_seq<Seq...>,
80+
ForallParamPack& f_params,
81+
Args&&... args)
8282
{
83-
CAMP_EXPAND(detail::resolve<EXEC_POL>(camp::get<Seq>(f_params.param_tup),
84-
std::forward<Args>(args)...));
83+
CAMP_EXPAND(param_resolve(pol, camp::get<Seq>(f_params.param_tup),
84+
std::forward<Args>(args)...));
8585
}
8686

8787
// Used to construct the argument TYPES that will be invoked with the lambda.
@@ -155,33 +155,36 @@ struct ParamMultiplexer
155155
typename... Params,
156156
typename... Args,
157157
typename FP = ForallParamPack<Params...>>
158-
static void constexpr init(ForallParamPack<Params...>& f_params,
159-
Args&&... args)
158+
static void constexpr parampack_init(EXEC_POL const& pol,
159+
ForallParamPack<Params...>& f_params,
160+
Args&&... args)
160161
{
161-
FP::detail_init(EXEC_POL(), typename FP::params_seq(), f_params,
162-
std::forward<Args>(args)...);
162+
FP::parampack_init(pol, typename FP::params_seq(), f_params,
163+
std::forward<Args>(args)...);
163164
}
164165

165166
template<typename EXEC_POL,
166167
typename... Params,
167168
typename... Args,
168169
typename FP = ForallParamPack<Params...>>
169-
static void constexpr combine(ForallParamPack<Params...>& f_params,
170-
Args&&... args)
170+
static void constexpr parampack_combine(EXEC_POL const& pol,
171+
ForallParamPack<Params...>& f_params,
172+
Args&&... args)
171173
{
172-
FP::detail_combine(EXEC_POL(), typename FP::params_seq(), f_params,
173-
std::forward<Args>(args)...);
174+
FP::parampack_combine(pol, typename FP::params_seq(), f_params,
175+
std::forward<Args>(args)...);
174176
}
175177

176178
template<typename EXEC_POL,
177179
typename... Params,
178180
typename... Args,
179181
typename FP = ForallParamPack<Params...>>
180-
static void constexpr resolve(ForallParamPack<Params...>& f_params,
181-
Args&&... args)
182+
static void constexpr parampack_resolve(EXEC_POL const& pol,
183+
ForallParamPack<Params...>& f_params,
184+
Args&&... args)
182185
{
183-
FP::detail_resolve(EXEC_POL(), typename FP::params_seq(), f_params,
184-
std::forward<Args>(args)...);
186+
FP::parampack_resolve(pol, typename FP::params_seq(), f_params,
187+
std::forward<Args>(args)...);
185188
}
186189
};
187190

include/RAJA/pattern/params/reducer.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ struct Reducer : public ForallParamBase
8989
value_type* target = nullptr;
9090

9191
// combineTarget() performs the final op on the target data and location in
92-
// resolve()
92+
// param_resolve()
9393
RAJA_HOST_DEVICE void combineTarget(value_type in)
9494
{
9595
value_type temp = op {}(*target, in);
@@ -173,7 +173,7 @@ struct Reducer<Op<ValLoc<T, I>, ValLoc<T, I>, ValLoc<T, I>>,
173173
target_index_type* target_index = nullptr;
174174

175175
// combineTarget() performs the final op on the target data and location in
176-
// resolve()
176+
// param_resolve()
177177
RAJA_HOST_DEVICE void combineTarget(value_type in)
178178
{
179179
// Create a different temp ValLoc solely for combining

include/RAJA/policy/cuda/forall.hpp

+8-7
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ __launch_bounds__(BlockSize, BlocksPerSM) __global__
445445
{
446446
RAJA::expt::invoke_body(f_params, body, idx[ii]);
447447
}
448-
RAJA::expt::ParamMultiplexer::combine<EXEC_POL>(f_params);
448+
RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params);
449449
}
450450

451451
///
@@ -474,7 +474,7 @@ __global__ void forallp_cuda_kernel(LOOP_BODY loop_body,
474474
{
475475
RAJA::expt::invoke_body(f_params, body, idx[ii]);
476476
}
477-
RAJA::expt::ParamMultiplexer::combine<EXEC_POL>(f_params);
477+
RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params);
478478
}
479479

480480
template<
@@ -565,7 +565,7 @@ __launch_bounds__(BlockSize, BlocksPerSM) __global__
565565
{
566566
RAJA::expt::invoke_body(f_params, body, idx[ii]);
567567
}
568-
RAJA::expt::ParamMultiplexer::combine<EXEC_POL>(f_params);
568+
RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params);
569569
}
570570

571571
///
@@ -597,7 +597,7 @@ __global__ void forallp_cuda_kernel(LOOP_BODY loop_body,
597597
{
598598
RAJA::expt::invoke_body(f_params, body, idx[ii]);
599599
}
600-
RAJA::expt::ParamMultiplexer::combine<EXEC_POL>(f_params);
600+
RAJA::expt::ParamMultiplexer::parampack_combine(EXEC_POL {}, f_params);
601601
}
602602

603603
} // namespace impl
@@ -712,7 +712,7 @@ forall_impl(resources::Cuda cuda_res,
712712
IterationGetter,
713713
Concretizer,
714714
BlocksPerSM,
715-
Async> const&,
715+
Async> const& pol,
716716
Iterable&& iter,
717717
LoopBody&& loop_body,
718718
ForallParam f_params)
@@ -764,7 +764,7 @@ forall_impl(resources::Cuda cuda_res,
764764
launch_info.res = cuda_res;
765765

766766
{
767-
RAJA::expt::ParamMultiplexer::init<EXEC_POL>(f_params, launch_info);
767+
RAJA::expt::ParamMultiplexer::parampack_init(pol, f_params, launch_info);
768768

769769
//
770770
// Privatize the loop_body, using make_launch_body to setup reductions
@@ -781,7 +781,8 @@ forall_impl(resources::Cuda cuda_res,
781781
RAJA::cuda::launch(func, dims.blocks, dims.threads, args, shmem, cuda_res,
782782
Async);
783783

784-
RAJA::expt::ParamMultiplexer::resolve<EXEC_POL>(f_params, launch_info);
784+
RAJA::expt::ParamMultiplexer::parampack_resolve(pol, f_params,
785+
launch_info);
785786
}
786787

787788
RAJA_FT_END;

include/RAJA/policy/cuda/launch.hpp

+16-15
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ __global__ void launch_new_reduce_global_fcn(BODY body_in,
6161
RAJA::expt::invoke_body(reduce_params, body, ctx);
6262

6363
// Using a flatten global policy as we may use all dimensions
64-
RAJA::expt::ParamMultiplexer::combine<RAJA::cuda_flatten_global_xyz_direct>(
65-
reduce_params);
64+
RAJA::expt::ParamMultiplexer::parampack_combine(
65+
RAJA::cuda_flatten_global_xyz_direct {}, reduce_params);
6666
}
6767

6868
template<bool async>
@@ -186,8 +186,8 @@ struct LaunchExecute<
186186
{
187187
using EXEC_POL = RAJA::policy::cuda::cuda_launch_explicit_t<
188188
async, named_usage::unspecified, named_usage::unspecified>;
189-
RAJA::expt::ParamMultiplexer::init<EXEC_POL>(launch_reducers,
190-
launch_info);
189+
RAJA::expt::ParamMultiplexer::parampack_init(
190+
EXEC_POL {}, launch_reducers, launch_info);
191191

192192
//
193193
// Privatize the loop_body, using make_launch_body to setup reductions
@@ -203,8 +203,8 @@ struct LaunchExecute<
203203
RAJA::cuda::launch(func, gridSize, blockSize, args, shared_mem_size,
204204
cuda_res, async, kernel_name);
205205

206-
RAJA::expt::ParamMultiplexer::resolve<EXEC_POL>(launch_reducers,
207-
launch_info);
206+
RAJA::expt::ParamMultiplexer::parampack_resolve(
207+
EXEC_POL {}, launch_reducers, launch_info);
208208
}
209209

210210
RAJA_FT_END;
@@ -252,8 +252,8 @@ __launch_bounds__(num_threads, BLOCKS_PER_SM) __global__
252252
RAJA::expt::invoke_body(reduce_params, body, ctx);
253253

254254
// Using a flatten global policy as we may use all dimensions
255-
RAJA::expt::ParamMultiplexer::combine<RAJA::cuda_flatten_global_xyz_direct>(
256-
reduce_params);
255+
RAJA::expt::ParamMultiplexer::parampack_combine(
256+
RAJA::cuda_flatten_global_xyz_direct {}, reduce_params);
257257
}
258258

259259
template<bool async, int nthreads, size_t BLOCKS_PER_SM>
@@ -375,11 +375,12 @@ struct LaunchExecute<
375375
launch_info.res = cuda_res;
376376

377377
{
378-
using EXEC_POL =
379-
RAJA::policy::cuda::cuda_launch_explicit_t<async, nthreads,
380-
BLOCKS_PER_SM>;
381-
RAJA::expt::ParamMultiplexer::init<EXEC_POL>(launch_reducers,
382-
launch_info);
378+
// Use a generic block size policy here to match that used in
379+
// parampack_combine
380+
using EXEC_POL = RAJA::policy::cuda::cuda_launch_explicit_t<
381+
async, named_usage::unspecified, named_usage::unspecified>;
382+
RAJA::expt::ParamMultiplexer::parampack_init(
383+
EXEC_POL {}, launch_reducers, launch_info);
383384

384385
//
385386
// Privatize the loop_body, using make_launch_body to setup reductions
@@ -395,8 +396,8 @@ struct LaunchExecute<
395396
RAJA::cuda::launch(func, gridSize, blockSize, args, shared_mem_size,
396397
cuda_res, async, kernel_name);
397398

398-
RAJA::expt::ParamMultiplexer::resolve<EXEC_POL>(launch_reducers,
399-
launch_info);
399+
RAJA::expt::ParamMultiplexer::parampack_resolve(
400+
EXEC_POL {}, launch_reducers, launch_info);
400401
}
401402

402403
RAJA_FT_END;

include/RAJA/policy/cuda/params/kernel_name.hpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ namespace detail
1616

1717
// Init
1818
template<typename EXEC_POL>
19-
camp::concepts::enable_if<type_traits::is_cuda_policy<EXEC_POL>> init(
19+
camp::concepts::enable_if<type_traits::is_cuda_policy<EXEC_POL>> param_init(
20+
EXEC_POL const&,
2021
KernelName& kn,
2122
const RAJA::cuda::detail::cudaInfo&)
2223
{
@@ -31,12 +32,13 @@ camp::concepts::enable_if<type_traits::is_cuda_policy<EXEC_POL>> init(
3132
template<typename EXEC_POL>
3233
RAJA_HOST_DEVICE camp::concepts::enable_if<
3334
type_traits::is_cuda_policy<EXEC_POL>>
34-
combine(KernelName&)
35+
param_combine(EXEC_POL const&, KernelName&)
3536
{}
3637

3738
// Resolve
3839
template<typename EXEC_POL>
39-
camp::concepts::enable_if<type_traits::is_cuda_policy<EXEC_POL>> resolve(
40+
camp::concepts::enable_if<type_traits::is_cuda_policy<EXEC_POL>> param_resolve(
41+
EXEC_POL const&,
4042
KernelName&,
4143
const RAJA::cuda::detail::cudaInfo&)
4244
{

include/RAJA/policy/cuda/params/reduce.hpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ namespace detail
1919

2020
// Init
2121
template<typename EXEC_POL, typename OP, typename T, typename VOp>
22-
camp::concepts::enable_if<type_traits::is_cuda_policy<EXEC_POL>> init(
22+
camp::concepts::enable_if<type_traits::is_cuda_policy<EXEC_POL>> param_init(
23+
EXEC_POL const&,
2324
Reducer<OP, T, VOp>& red,
2425
RAJA::cuda::detail::cudaInfo& ci)
2526
{
@@ -34,15 +35,16 @@ camp::concepts::enable_if<type_traits::is_cuda_policy<EXEC_POL>> init(
3435
template<typename EXEC_POL, typename OP, typename T, typename VOp>
3536
RAJA_HOST_DEVICE camp::concepts::enable_if<
3637
type_traits::is_cuda_policy<EXEC_POL>>
37-
combine(Reducer<OP, T, VOp>& red)
38+
param_combine(EXEC_POL const&, Reducer<OP, T, VOp>& red)
3839
{
3940
RAJA::cuda::impl::expt::grid_reduce<typename EXEC_POL::IterationGetter, OP>(
4041
red.devicetarget, red.getVal(), red.device_mem, red.device_count);
4142
}
4243

4344
// Resolve
4445
template<typename EXEC_POL, typename OP, typename T, typename VOp>
45-
camp::concepts::enable_if<type_traits::is_cuda_policy<EXEC_POL>> resolve(
46+
camp::concepts::enable_if<type_traits::is_cuda_policy<EXEC_POL>> param_resolve(
47+
EXEC_POL const&,
4648
Reducer<OP, T, VOp>& red,
4749
RAJA::cuda::detail::cudaInfo& ci)
4850
{

0 commit comments

Comments
 (0)