Skip to content

Commit abfae20

Browse files
authored
Merge pull request #967 from parthenon-hpc-lab/jdolence/force_inline_inner
Force inline par_for_inner
2 parents ae0632a + 790cebb commit abfae20

File tree

2 files changed

+30
-29
lines changed

2 files changed

+30
-29
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
- [[PR 890]](https://github.com/parthenon-hpc-lab/parthenon/pull/890) Fix bugs in sparse communication and prolongation
3737

3838
### Infrastructure (changes irrelevant to downstream codes)
39+
- [[PR 967]](https://github.com/parthenon-hpc-lab/parthenon/pull/967) Change INLINE to FORCEINLINE on par_for_inner overloads
3940
- [[PR 938]](https://github.com/parthenon-hpc-lab/parthenon/pull/938) Restructure buffer packing/unpacking kernel hierarchical parallelism
4041
- [[PR 944]](https://github.com/parthenon-hpc-lab/parthenon/pull/944) Move sparse pack identifier creation to descriptor
4142
- [[PR 904]](https://github.com/parthenon-hpc-lab/parthenon/pull/904) Move to prolongation/restriction in one for AMR and communicate non-cell centered fields

src/kokkos_abstraction.hpp

+29-29
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
//========================================================================================
22
// Parthenon performance portable AMR framework
3-
// Copyright(C) 2020-2022 The Parthenon collaboration
3+
// Copyright(C) 2020-2023 The Parthenon collaboration
44
// Licensed under the 3-clause BSD License, see LICENSE file for details
55
//========================================================================================
6-
// (C) (or copyright) 2020-2022. Triad National Security, LLC. All rights reserved.
6+
// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved.
77
//
88
// This program was produced under U.S. Government contract 89233218CNA000001
99
// for Los Alamos National Laboratory (LANL), which is operated by Triad
@@ -707,7 +707,7 @@ inline void par_for_outer(OuterLoopPatternTeams, const std::string &name,
707707

708708
// Inner parallel loop using TeamThreadRange
709709
template <typename Function>
710-
KOKKOS_INLINE_FUNCTION void
710+
KOKKOS_FORCEINLINE_FUNCTION void
711711
par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int ll, const int lu,
712712
const int ml, const int mu, const int nl, const int nu, const int kl,
713713
const int ku, const int jl, const int ju, const int il, const int iu,
@@ -741,7 +741,7 @@ par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int ll, const i
741741
});
742742
}
743743
template <typename Function>
744-
KOKKOS_INLINE_FUNCTION void
744+
KOKKOS_FORCEINLINE_FUNCTION void
745745
par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int ml, const int mu,
746746
const int nl, const int nu, const int kl, const int ku, const int jl,
747747
const int ju, const int il, const int iu, const Function &function) {
@@ -770,7 +770,7 @@ par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int ml, const i
770770
});
771771
}
772772
template <typename Function>
773-
KOKKOS_INLINE_FUNCTION void
773+
KOKKOS_FORCEINLINE_FUNCTION void
774774
par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int nl, const int nu,
775775
const int kl, const int ku, const int jl, const int ju, const int il,
776776
const int iu, const Function &function) {
@@ -795,10 +795,10 @@ par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int nl, const i
795795
});
796796
}
797797
template <typename Function>
798-
KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member,
799-
const int kl, const int ku, const int jl,
800-
const int ju, const int il, const int iu,
801-
const Function &function) {
798+
KOKKOS_FORCEINLINE_FUNCTION void
799+
par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int kl, const int ku,
800+
const int jl, const int ju, const int il, const int iu,
801+
const Function &function) {
802802
const int Nk = ku - kl + 1;
803803
const int Nj = ju - jl + 1;
804804
const int Ni = iu - il + 1;
@@ -815,9 +815,9 @@ KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_m
815815
});
816816
}
817817
template <typename Function>
818-
KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member,
819-
const int jl, const int ju, const int il,
820-
const int iu, const Function &function) {
818+
KOKKOS_FORCEINLINE_FUNCTION void
819+
par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int jl, const int ju,
820+
const int il, const int iu, const Function &function) {
821821
const int Nj = ju - jl + 1;
822822
const int Ni = iu - il + 1;
823823
const int NjNi = Nj * Ni;
@@ -828,22 +828,22 @@ KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_m
828828
});
829829
}
830830
template <typename Function>
831-
KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member,
832-
const int il, const int iu,
833-
const Function &function) {
831+
KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR,
832+
team_mbr_t team_member, const int il,
833+
const int iu, const Function &function) {
834834
Kokkos::parallel_for(Kokkos::TeamThreadRange(team_member, il, iu + 1), function);
835835
}
836836
// Inner parallel loop using TeamVectorRange
837837
template <typename Function>
838-
KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTVR, team_mbr_t team_member,
839-
const int il, const int iu,
840-
const Function &function) {
838+
KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternTVR,
839+
team_mbr_t team_member, const int il,
840+
const int iu, const Function &function) {
841841
Kokkos::parallel_for(Kokkos::TeamVectorRange(team_member, il, iu + 1), function);
842842
}
843843

844844
// Inner parallel loop using FOR SIMD
845845
template <typename Function>
846-
KOKKOS_INLINE_FUNCTION void
846+
KOKKOS_FORCEINLINE_FUNCTION void
847847
par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member, const int nl, const int nu,
848848
const int kl, const int ku, const int jl, const int ju, const int il,
849849
const int iu, const Function &function) {
@@ -859,10 +859,10 @@ par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member, const int nl, con
859859
}
860860
}
861861
template <typename Function>
862-
KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member,
863-
const int kl, const int ku, const int jl,
864-
const int ju, const int il, const int iu,
865-
const Function &function) {
862+
KOKKOS_FORCEINLINE_FUNCTION void
863+
par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member, const int kl, const int ku,
864+
const int jl, const int ju, const int il, const int iu,
865+
const Function &function) {
866866
for (int k = kl; k <= ku; ++k) {
867867
for (int j = jl; j <= ju; ++j) {
868868
#pragma omp simd
@@ -873,9 +873,9 @@ KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, team_mbr_t te
873873
}
874874
}
875875
template <typename Function>
876-
KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member,
877-
const int jl, const int ju, const int il,
878-
const int iu, const Function &function) {
876+
KOKKOS_FORCEINLINE_FUNCTION void
877+
par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member, const int jl, const int ju,
878+
const int il, const int iu, const Function &function) {
879879
for (int j = jl; j <= ju; ++j) {
880880
#pragma omp simd
881881
for (int i = il; i <= iu; i++) {
@@ -884,9 +884,9 @@ KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, team_mbr_t te
884884
}
885885
}
886886
template <typename Function>
887-
KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member,
888-
const int il, const int iu,
889-
const Function &function) {
887+
KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor,
888+
team_mbr_t team_member, const int il,
889+
const int iu, const Function &function) {
890890
#pragma omp simd
891891
for (int i = il; i <= iu; i++) {
892892
function(i);

0 commit comments

Comments
 (0)