Skip to content

WIP MeshBlock buffer packing in one kernel #302

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
Nov 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
57fd293
Rename MakeTasks to MakeTaskCollection
pgrete Sep 4, 2020
9f472ea
Use TaskColletion in Advection example
pgrete Sep 4, 2020
f032651
Use MeshTask for FluxDivergence
pgrete Sep 4, 2020
cd61c7d
Fix format and sparse fix
pgrete Sep 4, 2020
9ca7943
Pass blocks as reference
pgrete Sep 4, 2020
9f674c8
Update and Average to MeshBlockPacks
pgrete Sep 6, 2020
7c97d53
WIP pack-in-one buffers
pgrete Sep 10, 2020
4b049fa
SetBoundaries pack in one
pgrete Sep 11, 2020
6f3379a
Fix ptr access
pgrete Sep 11, 2020
6a0d8b5
Merge branch 'develop' into pgrete/meshblock-pack-in-one
pgrete Sep 17, 2020
f5f0b2f
Update interfaces
pgrete Sep 17, 2020
6f3c07f
Merge branch 'jmm/meshblockpack-cache' into pgrete/meshblock-pack-in-one
pgrete Sep 28, 2020
985c958
Fix interfaces from merge
pgrete Sep 28, 2020
328652f
Fix kernel naming in advection package
pgrete Sep 29, 2020
23a1856
Move buffer pack in one to separate file
pgrete Oct 2, 2020
3afdc10
Add profile markers to boundary funcs
pgrete Oct 3, 2020
9f8fc14
Merge remote-tracking branch 'origin/jmm/meshblockpack-cache' into pg…
pgrete Oct 3, 2020
80e21e5
Reintroduce container based update func
pgrete Oct 3, 2020
f347072
FluxDiv uses BlockPacks
pgrete Oct 4, 2020
20253e2
Add ndim_ to MeshBlockPack to make avail on host
pgrete Oct 5, 2020
7126d6f
Update and Average use MeshBlockPacks
pgrete Oct 5, 2020
c34d7ff
Name refactor
pgrete Oct 5, 2020
ae6aa03
Reuse packs in Send/SetBoundaries
pgrete Oct 5, 2020
d586a9b
Merge branch 'develop' into pgrete/meshblock-pack-in-one
pgrete Oct 19, 2020
0fb9b23
Use pack caching in Advection example
pgrete Oct 19, 2020
90ef006
Fix task dependency
pgrete Oct 19, 2020
c48455e
fix package name
pgrete Oct 19, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 139 additions & 81 deletions example/advection/advection_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
// Local Includes
#include "advection_driver.hpp"
#include "advection_package.hpp"
#include "bvals/cc/bvals_cc_in_one.hpp"
#include "interface/metadata.hpp"
#include "mesh/meshblock_pack.hpp"
#include "parthenon/driver.hpp"

using namespace parthenon::driver::prelude;

Expand All @@ -44,106 +48,160 @@ AdvectionDriver::AdvectionDriver(ParameterInput *pin, ApplicationInput *app_in,
pin->CheckDesired("Advection", "refine_tol");
pin->CheckDesired("Advection", "derefine_tol");
}

// first some helper tasks
TaskStatus UpdateContainer(MeshBlock *pmb, int stage,
std::vector<std::string> &stage_name, Integrator *integrator) {
// const Real beta = stage_wghts[stage-1].beta;
auto UpdateContainer(const int stage, Integrator *integrator,
MeshBlockVarPack<Real> &in_pack,
const MeshBlockVarPack<Real> &base_pack,
const MeshBlockVarPack<Real> &dudt_pack,
MeshBlockVarPack<Real> &out_pack) -> TaskStatus {
const Real beta = integrator->beta[stage - 1];
const Real dt = integrator->dt;
auto &base = pmb->real_containers.Get();
auto &cin = pmb->real_containers.Get(stage_name[stage - 1]);
auto &cout = pmb->real_containers.Get(stage_name[stage]);
auto &dudt = pmb->real_containers.Get("dUdt");
parthenon::Update::AverageContainers(cin, base, beta);
parthenon::Update::UpdateContainer(cin, dudt, beta * dt, cout);
parthenon::Update::AverageContainers(in_pack, base_pack, beta);
parthenon::Update::UpdateContainer(in_pack, dudt_pack, beta * dt, out_pack);
return TaskStatus::complete;
}

// See the advection.hpp declaration for a description of how this function gets called.
TaskList AdvectionDriver::MakeTaskList(MeshBlock *pmb, int stage) {
TaskList tl;
TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const int stage) {
TaskCollection tc;

TaskID none(0);
// first make other useful containers
if (stage == 1) {
auto &base = pmb->real_containers.Get();
pmb->real_containers.Add("dUdt", base);
for (int i = 1; i < integrator->nstages; i++)
pmb->real_containers.Add(stage_name[i], base);

// Number of task lists that can be executed indepenently and thus *may*
// be executed in parallel and asynchronous.
// Being extra verbose here in this example to highlight that this is not
// required to be 1 or blocks.size() but could also only apply to a subset of blocks.
auto num_task_lists_executed_independently = blocks.size();
TaskRegion &async_region1 = tc.AddRegion(num_task_lists_executed_independently);

for (int i = 0; i < blocks.size(); i++) {
auto &pmb = blocks[i];
auto &tl = async_region1[i];
// first make other useful containers
if (stage == 1) {
auto &base = pmb->real_containers.Get();
pmb->real_containers.Add("dUdt", base);
for (int i = 1; i < integrator->nstages; i++)
pmb->real_containers.Add(stage_name[i], base);
}

// pull out the container we'll use to get fluxes and/or compute RHSs
auto &sc0 = pmb->real_containers.Get(stage_name[stage - 1]);
// pull out a container we'll use to store dU/dt.
// This is just -flux_divergence in this example
auto &dudt = pmb->real_containers.Get("dUdt");
// pull out the container that will hold the updated state
// effectively, sc1 = sc0 + dudt*dt
auto &sc1 = pmb->real_containers.Get(stage_name[stage]);

auto start_recv = tl.AddTask(none, &Container<Real>::StartReceiving, sc1.get(),
BoundaryCommSubset::all);

auto advect_flux = tl.AddTask(none, advection_package::CalculateFluxes, sc0);

auto send_flux =
tl.AddTask(advect_flux, &Container<Real>::SendFluxCorrection, sc0.get());
auto recv_flux =
tl.AddTask(advect_flux, &Container<Real>::ReceiveFluxCorrection, sc0.get());
}

// pull out the container we'll use to get fluxes and/or compute RHSs
auto &sc0 = pmb->real_containers.Get(stage_name[stage - 1]);
// pull out a container we'll use to store dU/dt.
// This is just -flux_divergence in this example
auto &dudt = pmb->real_containers.Get("dUdt");
// pull out the container that will hold the updated state
// effectively, sc1 = sc0 + dudt*dt
auto &sc1 = pmb->real_containers.Get(stage_name[stage]);

auto start_recv = tl.AddTask(none, &Container<Real>::StartReceiving, sc1.get(),
BoundaryCommSubset::all);

auto advect_flux = tl.AddTask(none, advection_package::CalculateFluxes, sc0);

auto send_flux =
tl.AddTask(advect_flux, &Container<Real>::SendFluxCorrection, sc0.get());
auto recv_flux =
tl.AddTask(advect_flux, &Container<Real>::ReceiveFluxCorrection, sc0.get());

// compute the divergence of fluxes of conserved variables
auto flux_div = tl.AddTask(recv_flux, parthenon::Update::FluxDivergence, sc0, dudt);

// apply du/dt to all independent fields in the container
auto update_container =
tl.AddTask(flux_div, UpdateContainer, pmb, stage, stage_name, integrator);

// update ghost cells
auto send =
tl.AddTask(update_container, &Container<Real>::SendBoundaryBuffers, sc1.get());
auto recv = tl.AddTask(send, &Container<Real>::ReceiveBoundaryBuffers, sc1.get());
auto fill_from_bufs = tl.AddTask(recv, &Container<Real>::SetBoundaries, sc1.get());
auto clear_comm_flags = tl.AddTask(fill_from_bufs, &Container<Real>::ClearBoundary,
sc1.get(), BoundaryCommSubset::all);

auto prolong_bound = tl.AddTask(
fill_from_bufs,
[](MeshBlock *pmb) {
pmb->pbval->ProlongateBoundaries(0.0, 0.0);
return TaskStatus::complete;
},
pmb);

// set physical boundaries
auto set_bc = tl.AddTask(prolong_bound, parthenon::ApplyBoundaryConditions, sc1);

// fill in derived fields
auto fill_derived =
tl.AddTask(set_bc, parthenon::FillDerivedVariables::FillDerived, sc1);

// estimate next time step
if (stage == integrator->nstages) {
auto new_dt = tl.AddTask(
fill_derived,
[](std::shared_ptr<Container<Real>> &rc) {
auto pmb = rc->GetBlockPointer();
pmb->SetBlockTimestep(parthenon::Update::EstimateTimestep(rc));
auto &sc0flux_packs =
pmesh->real_fluxpacks["advection_package"][stage_name[stage - 1] + "_varflux"];
auto &sc0_packs =
pmesh->real_varpacks["advection_package"][stage_name[stage - 1] + "_var"];
auto &sc1_packs = pmesh->real_varpacks["advection_package"][stage_name[stage] + "_var"];
auto &dudt_packs = pmesh->real_varpacks["advection_package"]["dudt_var"];
auto &base_packs = pmesh->real_varpacks["advection_package"]["base_var"];

const auto use_pack_in_one =
blocks[0]->packages["advection_package"]->Param<bool>("use_pack_in_one");

// note that task within this region that contains one tasklist per pack
// could still be executed in parallel
TaskRegion &single_tasklist_per_pack_region = tc.AddRegion(base_packs.size());
for (int i = 0; i < base_packs.size(); i++) {
auto &tl = single_tasklist_per_pack_region[i];

// compute the divergence of fluxes of conserved variables
auto flux_div = tl.AddTask(none, parthenon::Update::FluxDivergenceMesh,
sc0flux_packs[i], dudt_packs[i]);
// apply du/dt to all independent fields in the container
auto update_container =
tl.AddTask(flux_div, UpdateContainer, stage, integrator, sc0_packs[i],
base_packs[i], dudt_packs[i], sc1_packs[i]);

if (use_pack_in_one) {
// update ghost cells
auto send = tl.AddTask(update_container,
parthenon::cell_centered_bvars::SendBoundaryBuffers, blocks,
stage_name[stage], sc1_packs[i]);

auto recv = tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers,
blocks, stage_name[stage]);
auto fill_from_bufs =
tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, blocks,
stage_name[stage], sc1_packs[i]);
}
}
TaskRegion &async_region2 = tc.AddRegion(num_task_lists_executed_independently);

for (int i = 0; i < blocks.size(); i++) {
auto &pmb = blocks[i];
auto &tl = async_region2[i];
auto &sc1 = pmb->real_containers.Get(stage_name[stage]);

auto prev_task = none;
if (!use_pack_in_one) {
// update ghost cells
auto send = tl.AddTask(none, &Container<Real>::SendBoundaryBuffers, sc1.get());
auto recv = tl.AddTask(send, &Container<Real>::ReceiveBoundaryBuffers, sc1.get());
auto fill_from_bufs = tl.AddTask(recv, &Container<Real>::SetBoundaries, sc1.get());
prev_task = fill_from_bufs;
}

auto clear_comm_flags = tl.AddTask(prev_task, &Container<Real>::ClearBoundary,
sc1.get(), BoundaryCommSubset::all);

auto prolongBound = tl.AddTask(
prev_task,
[](std::shared_ptr<MeshBlock> pmb) {
pmb->pbval->ProlongateBoundaries(0.0, 0.0);
return TaskStatus::complete;
},
sc1);
pmb);

// set physical boundaries
auto set_bc = tl.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, sc1);

// fill in derived fields
auto fill_derived =
tl.AddTask(set_bc, parthenon::FillDerivedVariables::FillDerived, sc1);

// Update refinement
if (pmesh->adaptive) {
auto tag_refine = tl.AddTask(
// estimate next time step
if (stage == integrator->nstages) {
auto new_dt = tl.AddTask(
fill_derived,
[](MeshBlock *pmb) {
pmb->pmr->CheckRefinementCondition();
[](std::shared_ptr<Container<Real>> &rc) {
auto pmb = rc->GetBlockPointer();
pmb->SetBlockTimestep(parthenon::Update::EstimateTimestep(rc));
return TaskStatus::complete;
},
pmb);
sc1);

// Update refinement
if (pmesh->adaptive) {
auto tag_refine = tl.AddTask(
fill_derived,
[](std::shared_ptr<MeshBlock> pmb) {
pmb->pmr->CheckRefinementCondition();
return TaskStatus::complete;
},
pmb);
}
}
}
return tl;
return tc;
}

} // namespace advection_example
5 changes: 3 additions & 2 deletions example/advection/advection_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#define EXAMPLE_ADVECTION_ADVECTION_DRIVER_HPP_

#include <memory>
#include <vector>

#include <parthenon/driver.hpp>
#include <parthenon/package.hpp>
Expand All @@ -31,8 +32,8 @@ class AdvectionDriver : public MultiStageBlockTaskDriver {
// EvolutionDriver::Execute (driver.cpp)
// MultiStageBlockTaskDriver::Step (multistage.cpp)
// DriverUtils::ConstructAndExecuteBlockTasks (driver.hpp)
// AdvectionDriver::MakeTaskList (advection.cpp)
TaskList MakeTaskList(MeshBlock *pmb, int stage);
// AdvectionDriver::MakeTaskCollection (advection.cpp)
TaskCollection MakeTaskCollection(BlockList_t &blocks, int stage);
};

void ProblemGenerator(MeshBlock *pmb, parthenon::ParameterInput *pin);
Expand Down
88 changes: 86 additions & 2 deletions example/advection/advection_package.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
pkg->AddParam<>("refine_tol", refine_tol);
Real derefine_tol = pin->GetOrAddReal("Advection", "derefine_tol", 0.03);
pkg->AddParam<>("derefine_tol", derefine_tol);
// temporary use in package until interface is sorted out
bool use_pack_in_one = pin->GetOrAddBoolean("Advection", "use_pack_in_one", false);
pkg->AddParam<>("use_pack_in_one", use_pack_in_one);

auto profile_str = pin->GetOrAddString("Advection", "profile", "wave");
if (!((profile_str.compare("wave") == 0) ||
Expand Down Expand Up @@ -156,6 +159,87 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
);
pkg->AddField(field_name, m);

// all the follwing will go away/be streamline with future caching
pkg->AddMeshBlockPack("base_var", [](Mesh *pmesh) {
int pack_size = pmesh->DefaultPackSize();
std::vector<MeshBlockVarPack<Real>> packs;
auto partitions = partition::ToSizeN(pmesh->block_list, pack_size);
packs.resize(partitions.size());
for (int i = 0; i < partitions.size(); i++) {
packs[i] = PackVariablesOnMesh(
partitions[i], "base",
std::vector<parthenon::MetadataFlag>{parthenon::Metadata::Independent});
}
return packs;
});

pkg->AddMeshBlockPack("base_varflux", [](Mesh *pmesh) {
int pack_size = pmesh->DefaultPackSize();
std::vector<MeshBlockVarFluxPack<Real>> packs;
auto partitions = partition::ToSizeN(pmesh->block_list, pack_size);
packs.resize(partitions.size());
for (int i = 0; i < partitions.size(); i++) {
packs[i] = PackVariablesAndFluxesOnMesh(
partitions[i], "base",
std::vector<parthenon::MetadataFlag>{parthenon::Metadata::Independent});
}
return packs;
});

pkg->AddMeshBlockPack("1_var", [](Mesh *pmesh) {
// Add containers if not already present
for (auto &pmb : pmesh->block_list) {
auto &base = pmb->real_containers.Get();
pmb->real_containers.Add("1", base);
}
int pack_size = pmesh->DefaultPackSize();
std::vector<MeshBlockVarPack<Real>> packs;
auto partitions = partition::ToSizeN(pmesh->block_list, pack_size);
packs.resize(partitions.size());
for (int i = 0; i < partitions.size(); i++) {
packs[i] = PackVariablesOnMesh(
partitions[i], "1",
std::vector<parthenon::MetadataFlag>{parthenon::Metadata::Independent});
}
return packs;
});

pkg->AddMeshBlockPack("1_varflux", [](Mesh *pmesh) {
// Add containers if not already present
for (auto &pmb : pmesh->block_list) {
auto &base = pmb->real_containers.Get();
pmb->real_containers.Add("1", base);
}
int pack_size = pmesh->DefaultPackSize();
std::vector<MeshBlockVarFluxPack<Real>> packs;
auto partitions = partition::ToSizeN(pmesh->block_list, pack_size);
packs.resize(partitions.size());
for (int i = 0; i < partitions.size(); i++) {
packs[i] = PackVariablesAndFluxesOnMesh(
partitions[i], "1",
std::vector<parthenon::MetadataFlag>{parthenon::Metadata::Independent});
}
return packs;
});

pkg->AddMeshBlockPack("dudt_var", [](Mesh *pmesh) {
// Add containers if not already present
for (auto &pmb : pmesh->block_list) {
auto &base = pmb->real_containers.Get();
pmb->real_containers.Add("dUdt", base);
}
int pack_size = pmesh->DefaultPackSize();
std::vector<MeshBlockVarPack<Real>> packs;
auto partitions = partition::ToSizeN(pmesh->block_list, pack_size);
packs.resize(partitions.size());
for (int i = 0; i < partitions.size(); i++) {
packs[i] = PackVariablesOnMesh(
partitions[i], "dUdt",
std::vector<parthenon::MetadataFlag>{Metadata::Independent});
}
return packs;
});

pkg->FillDerived = SquareIt;
pkg->CheckRefinement = CheckRefinement;
pkg->EstimateTimestep = EstimateTimestep;
Expand Down Expand Up @@ -225,7 +309,7 @@ void SquareIt(std::shared_ptr<Container<Real>> &rc) {
const int in = imap["one_minus_advected"].first;
const int out = imap["one_minus_advected_sq"].first;
pmb->par_for(
"advection_package::PreFill", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
"advection_package::SquareIt", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int k, const int j, const int i) {
v(out, k, j, i) = v(in, k, j, i) * v(in, k, j, i);
});
Expand All @@ -247,7 +331,7 @@ void PostFill(std::shared_ptr<Container<Real>> &rc) {
const int out12 = imap["one_minus_sqrt_one_minus_advected_sq_12"].first;
const int out37 = imap["one_minus_sqrt_one_minus_advected_sq_37"].first;
pmb->par_for(
"advection_package::PreFill", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
"advection_package::PostFill", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int k, const int j, const int i) {
v(out12, k, j, i) = 1.0 - sqrt(v(in, k, j, i));
v(out37, k, j, i) = 1.0 - v(out12, k, j, i);
Expand Down
1 change: 1 addition & 0 deletions example/advection/parthinput.advection
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ profile = hard_sphere
refine_tol = 0.3 # control the package specific refinement tagging function
derefine_tol = 0.03
compute_error = false
use_pack_in_one = false

<parthenon/output1>
file_type = rst
Expand Down
Loading