Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A few Kokkos quality of life improvements #1078

Merged
merged 3 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- [[PR 1019]](https://github.com/parthenon-hpc-lab/parthenon/pull/1019) Enable output for non-cell-centered variables

### Changed (changing behavior/API/variables/...)
- [[PR 1078]](https://github.com/parthenon-hpc-lab/parthenon/pull/1078) Add reduction fallback in 1D. Add IndexRange overload for 1D par loops
- [[PR 1024]](https://github.com/parthenon-hpc-lab/parthenon/pull/1024) Add .outN. to history output filenames
- [[PR 1004]](https://github.com/parthenon-hpc-lab/parthenon/pull/1004) Allow parameter modification from an input file for restarts

Expand Down
5 changes: 5 additions & 0 deletions src/basic_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ using Real = double;
#endif
#endif

struct IndexRange {
int s = 0; /// Starting Index (inclusive)
int e = 0; /// Ending Index (inclusive)
};

// Enum speficying whether or not you requested a flux variable in
// GetVariablesByFlag type methods
// TODO(JMM): Is this the right place for this?
Expand Down
47 changes: 29 additions & 18 deletions src/kokkos_abstraction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,31 +212,35 @@ template <class... Args>
inline void kokkos_dispatch(ParallelScanDispatch, Args &&...args) {
Kokkos::parallel_scan(std::forward<Args>(args)...);
}

} // namespace dispatch_impl

// this pattern does not support reductions yet
template <typename Tag, typename Function>
inline void par_dispatch(LoopPatternSimdFor, const std::string &name,
DevExecSpace exec_space, const int &il, const int &iu,
const Function &function) {
// 1D loop using RangePolicy loops
template <typename Tag, typename Pattern, typename Function, class... Args>
inline typename std::enable_if<sizeof...(Args) <= 1, void>::type
par_dispatch(Pattern, const std::string &name, DevExecSpace exec_space, const int &il,
const int &iu, const Function &function, Args &&...args) {
PARTHENON_INSTRUMENT_REGION(name)
if constexpr (std::is_same<Pattern, LoopPatternSimdFor>::value &&
std::is_same<Tag, dispatch_impl::ParallelForDispatch>::value) {
#pragma omp simd
for (auto i = il; i <= iu; i++)
function(i);
for (auto i = il; i <= iu; i++) {
function(i);
}
} else {
Tag tag;
kokkos_dispatch(tag, name,
Kokkos::Experimental::require(
Kokkos::RangePolicy<>(exec_space, il, iu + 1),
Kokkos::Experimental::WorkItemProperty::HintLightWeight),
function, std::forward<Args>(args)...);
}
}

// 1D loop using RangePolicy loops
template <typename Tag, typename Function, class... Args>
template <typename Tag, typename Pattern, typename Function, class... Args>
inline typename std::enable_if<sizeof...(Args) <= 1, void>::type
par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_space,
const int &il, const int &iu, const Function &function, Args &&...args) {
Tag tag;
kokkos_dispatch(tag, name,
Kokkos::Experimental::require(
Kokkos::RangePolicy<>(exec_space, il, iu + 1),
Kokkos::Experimental::WorkItemProperty::HintLightWeight),
function, std::forward<Args>(args)...);
par_dispatch(Pattern p, const std::string &name, DevExecSpace exec_space,
const IndexRange &r, const Function &function, Args &&...args) {
par_dispatch<Tag>(p, name, exec_space, r.s, r.e, function, std::forward<Args>(args)...);
}

// 2D loop using MDRange loops
Expand Down Expand Up @@ -927,6 +931,13 @@ KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor,
}
}

template <typename Tag, typename Function>
KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(const Tag &t, team_mbr_t member,
const IndexRange r,
const Function &function) {
par_for_inner(t, member, r.s, r.e, function);
}

template <typename... Args>
KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(team_mbr_t team_member, Args &&...args) {
par_for_inner(DEFAULT_INNER_LOOP_PATTERN, team_member, std::forward<Args>(args)...);
Expand Down
7 changes: 2 additions & 5 deletions src/mesh/domain.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,13 @@
#include <type_traits>
#include <vector>

#include <Kokkos_Core.hpp>

#include "basic_types.hpp"
#include "defs.hpp"

namespace parthenon {

struct IndexRange {
int s = 0; /// Starting Index (inclusive)
int e = 0; /// Ending Index (inclusive)
};

// Assuming we have a block
//
// - - - - - - - - - - ^
Expand Down
3 changes: 1 addition & 2 deletions src/prolong_restrict/pr_loops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,7 @@ InnerHostProlongationRestrictionLoop(std::size_t buf, const ProResInfoArrHost_t
auto coarse = info(buf).coarse;
auto fine = info(buf).fine;
par_for(
PARTHENON_AUTO_LABEL, 0, 0, 0, 0, 0, idxer.size() - 1,
KOKKOS_LAMBDA(const int, const int, const int ii) {
PARTHENON_AUTO_LABEL, 0, idxer.size() - 1, KOKKOS_LAMBDA(const int ii) {
Yurlungur marked this conversation as resolved.
Show resolved Hide resolved
const auto [t, u, v, k, j, i] = idxer(ii);
if (idxer.IsActive(k, j, i)) {
Stencil::template Do<DIM, FEL, CEL>(t, u, v, k, j, i, ckb, cjb, cib, kb, jb, ib,
Expand Down
31 changes: 31 additions & 0 deletions tst/unit/kokkos_abstraction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include <catch2/catch.hpp>

#include "basic_types.hpp"
#include "kokkos_abstraction.hpp"

using parthenon::DevExecSpace;
Expand Down Expand Up @@ -478,3 +479,33 @@ TEST_CASE("Parallel scan", "[par_scan]") {
default_exec_space) == true);
}
}

template <class T>
bool test_wrapper_reduce_1d(T loop_pattern, DevExecSpace exec_space) {
constexpr int N = 10;
parthenon::IndexRange r{0, N - 1};
parthenon::ParArray1D<int> buffer("Testing buffer", N);
// Initialize data
parthenon::par_for(
loop_pattern, "Initialize parallel reduce array", exec_space, r,
KOKKOS_LAMBDA(const int i) { buffer(i) = i; });
int total = 0;
for (int i = 0; i < N; ++i) {
total += i;
}
int test_tot = 0;
parthenon::par_reduce(
loop_pattern, "Sum via par reduce", exec_space, r,
KOKKOS_LAMBDA(const int i, int &t) { t += i; }, Kokkos::Sum<int>(test_tot));
return total == test_tot;
}

TEST_CASE("Parallel reduce", "[par_reduce]") {
auto default_exec_space = DevExecSpace();
REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_flatrange_tag,
default_exec_space) == true);
if constexpr (std::is_same<DevExecSpace, Kokkos::Serial>::value) {
REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_simdfor_tag,
default_exec_space) == true);
}
}
Loading