diff --git a/external/parthenon b/external/parthenon index a92df6e4..437e02bf 160000 --- a/external/parthenon +++ b/external/parthenon @@ -1 +1 @@ -Subproject commit a92df6e4163291963b5d84136362782d56427484 +Subproject commit 437e02bf62734f7a7962b9e5d0fae0ab36a34dfc diff --git a/external/patches/parthenon-use-gr-coordinates.patch b/external/patches/parthenon-use-gr-coordinates.patch index 3b4b816c..fbb4bb3b 100644 --- a/external/patches/parthenon-use-gr-coordinates.patch +++ b/external/patches/parthenon-use-gr-coordinates.patch @@ -32,29 +32,4 @@ index d1290dee..50bfc840 100644 namespace parthenon { -diff --git a/src/interface/data_collection.cpp b/src/interface/data_collection.cpp -index 6a1d72c9..b5ba609b 100644 ---- a/src/interface/data_collection.cpp -+++ b/src/interface/data_collection.cpp -@@ -48,7 +48,7 @@ std::shared_ptr DataCollection::Add(const std::string &name, - if (it != containers_.end()) { - // check to make sure they are the same - if (!(*src == *(it->second))) { -- PARTHENON_THROW("Error attempting to add a Container to a Collection"); -+ //PARTHENON_THROW("Error attempting to add a Container to a Collection"); - } - return it->second; - } -diff --git a/src/interface/meshblock_data.cpp b/src/interface/meshblock_data.cpp -index 8d5dca57..0ab7dad8 100644 ---- a/src/interface/meshblock_data.cpp -+++ b/src/interface/meshblock_data.cpp -@@ -440,7 +440,7 @@ MeshBlockData::GetVariablesByUid(const std::vector &uids) { - - template - void MeshBlockData::Remove(const std::string &label) { -- throw std::runtime_error("MeshBlockData::Remove not yet implemented"); -+ varMap_.erase(label); - } - - template + diff --git a/kharma/b_cd/b_cd.cpp b/kharma/b_cd/b_cd.cpp index a89a0a6d..bb7d8f7e 100644 --- a/kharma/b_cd/b_cd.cpp +++ b/kharma/b_cd/b_cd.cpp @@ -223,20 +223,7 @@ Real MaxDivB(MeshData *md) TaskStatus PostStepDiagnostics(const SimTime& tm, MeshData *md) { - auto pmesh = md->GetMeshPointer(); - - // Print this unless we quash everything - int verbose = pmesh->packages.Get("Globals")->Param("verbose"); - if (verbose >= 0) { - static Reduce max_divb; - max_divb.val = B_CD::MaxDivB(md); - max_divb.StartReduce(0, MPI_MAX); - while (max_divb.CheckReduce() == TaskStatus::incomplete); - - if(MPIRank0()) { - std::cout << "Max DivB: " << max_divb.val << std::endl; - } - } + // TODO. Unify w/other B? return TaskStatus::complete; } @@ -280,16 +267,17 @@ void FillOutput(MeshBlock *pmb, ParameterInput *pin) void UpdateCtopMax(Mesh *pmesh, ParameterInput *pin, const SimTime &tm) { + // TODO use new Reductions stuff for this // Reduce and record the maximum sound speed on the grid, to propagate // phi at that speed next step. // Just needs to run after every step, so we use the KHARMA callback at that point. - auto& params = pmesh->packages.Get("B_CD")->AllParams(); - static AllReduce ctop_max_last_r; - ctop_max_last_r.val = params.Get("ctop_max"); - ctop_max_last_r.StartReduce(MPI_MAX); - while (ctop_max_last_r.CheckReduce() == TaskStatus::incomplete); - params.Update("ctop_max_last", ctop_max_last_r.val); - params.Update("ctop_max", 0.0); // Reset for next max calculation + // auto& params = pmesh->packages.Get("B_CD")->AllParams(); + // static AllReduce ctop_max_last_r; + // ctop_max_last_r.val = params.Get("ctop_max"); + // ctop_max_last_r.StartReduce(MPI_MAX); + // while (ctop_max_last_r.CheckReduce() == TaskStatus::incomplete); + // params.Update("ctop_max_last", ctop_max_last_r.val); + // params.Update("ctop_max", 0.0); // Reset for next max calculation } } // namespace B_CD diff --git a/kharma/b_cleanup/b_cleanup.cpp b/kharma/b_cleanup/b_cleanup.cpp index e9d445c3..23de26ef 100644 --- a/kharma/b_cleanup/b_cleanup.cpp +++ b/kharma/b_cleanup/b_cleanup.cpp @@ -38,6 +38,7 @@ #include "boundaries.hpp" #include "decs.hpp" +#include "kharma.hpp" #include "kharma_driver.hpp" #include "grmhd.hpp" #include "kharma.hpp" @@ -217,7 +218,7 @@ TaskStatus B_Cleanup::CleanupDivergence(std::shared_ptr>& md) } // Calculate/print inital max divB exactly as we would during run - const double divb_start = B_FluxCT::GlobalMaxDivB(md.get()); + const double divb_start = B_FluxCT::GlobalMaxDivB(md.get(), true); if (divb_start < rel_tolerance && !always_solve) { // If divB is "pretty good" and we allow not solving... if (MPIRank0()) @@ -236,22 +237,8 @@ TaskStatus B_Cleanup::CleanupDivergence(std::shared_ptr>& md) KHARMADriver::SyncAllBounds(md); // Add a solver container and associated MeshData - for (auto& pmb : pmesh->block_list) { - auto &base = pmb->meshblock_data.Get(); - pmb->meshblock_data.Add("solve", base); - } - // The "solve" container really only needs the RHS, the solution, and the scratch array dB - // This does not affect the main container, but saves a *lot* of time not syncing - // static variables. - // There's no MeshData-wide 'Remove' so we go block-by-block - for (auto& pmb : pmesh->block_list) { - auto rc_s = pmb->meshblock_data.Get("solve"); - auto vars = rc_s->GetVariablesByFlag({Metadata::GetUserFlag("MHD")}).vars(); - for (auto var : vars) { - rc_s->Remove(var->label()); - } - } - auto &msolve = pmesh->mesh_data.GetOrAdd("solve", 0); + std::vector names = KHARMA::GetVariableNames(&pmesh->packages, Metadata::GetUserFlag("B_Cleanup")); + auto &msolve = pmesh->mesh_data.Add("solve", names); // Create a TaskCollection of just the solve, // execute it to perform BiCGStab iteration @@ -285,24 +272,6 @@ TaskStatus B_Cleanup::CleanupDivergence(std::shared_ptr>& md) return TaskStatus::complete; } -TaskStatus B_Cleanup::RemoveExtraFields(BlockList_t &blocks) -{ - // If we aren't needed to clean anything... - if (! (blocks[0]->packages.Get("B_Cleanup")->Param("cleanup_interval") > 0)) { - // remove the internal BiCGStab variables by name, - // to prevent them weighing down MPI exchanges - // TODO anything FillGhost & not Conserved or Primitive - for (auto& pmb : blocks) { - auto rc_s = pmb->meshblock_data.Get(); - for (auto varlabel : {"pk0", "res0", "temp0", "divB_RHS", "p"}) { - if (rc_s->HasVariable(varlabel)) - rc_s->Remove(varlabel); - } - } - } - return TaskStatus::complete; -} - TaskStatus B_Cleanup::ApplyP(MeshData *msolve, MeshData *md) { // Apply on physical zones only, we'll be syncing/updating ghosts diff --git a/kharma/b_cleanup/b_cleanup.hpp b/kharma/b_cleanup/b_cleanup.hpp index ebb4037f..b75652d8 100644 --- a/kharma/b_cleanup/b_cleanup.hpp +++ b/kharma/b_cleanup/b_cleanup.hpp @@ -67,14 +67,8 @@ TaskStatus CleanupDivergence(std::shared_ptr>& md); bool CleanupThisStep(Mesh* pmesh, int nstep); /** - * Remove the extra solver fields which B_Cleanup added during initialization. - * Must be run before every step as the meshblocks are reconstructed per-step from - * package variable lists. - */ -TaskStatus RemoveExtraFields(BlockList_t &blocks); - -/** - * Calculate the laplacian using divergence at corners + * Calculate the laplacian using divergence at corners. + * Extra MeshData arg is just to satisfy Parthenon solver calling convention */ TaskStatus CornerLaplacian(MeshData* md, const std::string& p_var, MeshData* md_again, const std::string& lap_var); diff --git a/kharma/b_ct/b_ct.cpp b/kharma/b_ct/b_ct.cpp index 2b91ed72..2440bfd6 100644 --- a/kharma/b_ct/b_ct.cpp +++ b/kharma/b_ct/b_ct.cpp @@ -383,13 +383,15 @@ double B_CT::BlockMaxDivB(MeshBlockData *rc) return max_divb; } -double B_CT::GlobalMaxDivB(MeshData *md) +double B_CT::GlobalMaxDivB(MeshData *md, bool all_reduce) { - static AllReduce max_divb; - max_divb.val = MaxDivB(md); - max_divb.StartReduce(MPI_MAX); - while (max_divb.CheckReduce() == TaskStatus::incomplete); - return max_divb.val; + if (all_reduce) { + Reductions::StartToAll(md, 2, MaxDivB(md), MPI_MAX); + return Reductions::CheckOnAll(md, 2); + } else { + Reductions::Start(md, 2, MaxDivB(md), MPI_MAX); + return Reductions::Check(md, 2); + } } TaskStatus B_CT::PrintGlobalMaxDivB(MeshData *md, bool kill_on_large_divb) diff --git a/kharma/b_ct/b_ct.hpp b/kharma/b_ct/b_ct.hpp index b5b51dbb..0d45a5d3 100644 --- a/kharma/b_ct/b_ct.hpp +++ b/kharma/b_ct/b_ct.hpp @@ -98,7 +98,7 @@ double BlockMaxDivB(MeshBlockData *rc); /** * Returns the global maximum value, rather than the maximum over this rank's MeshData */ -double GlobalMaxDivB(MeshData *md); +double GlobalMaxDivB(MeshData *md, bool all_reduce=false); /** * Diagnostics printed/computed after each step @@ -124,22 +124,6 @@ void FillOutput(MeshBlock *pmb, ParameterInput *pin); */ void CalcDivB(MeshData *md, std::string divb_field_name="divB"); -// Reductions: FOR LATER -// KOKKOS_INLINE_FUNCTION Real phi(REDUCE_FUNCTION_ARGS_EH) -// { -// // \Phi == \int |*F^1^0| * gdet * dx2 * dx3 == \int |B1| * gdet * dx2 * dx3 -// return 0.5 * m::abs(U(m_u.B1, k, j, i)); // factor of gdet already in cons.B -// } - -// inline Real ReducePhi0(MeshData *md) -// { -// return Reductions::EHReduction(md, UserHistoryOperation::sum, phi, 0); -// } -// inline Real ReducePhi5(MeshData *md) -// { -// return Reductions::EHReduction(md, UserHistoryOperation::sum, phi, 5); -// } - // Device functions template KOKKOS_INLINE_FUNCTION Real face_div(const GRCoordinates &G, Global &v, const int &ndim, const int &k, const int &j, const int &i) diff --git a/kharma/b_flux_ct/b_flux_ct.cpp b/kharma/b_flux_ct/b_flux_ct.cpp index af2f93e6..6f846c02 100644 --- a/kharma/b_flux_ct/b_flux_ct.cpp +++ b/kharma/b_flux_ct/b_flux_ct.cpp @@ -478,7 +478,8 @@ double MaxDivB(MeshData *md) const IndexRange kb = IndexRange{kbl.s, kbl.e + (ndim > 2)}; const IndexRange block = IndexRange{0, B_U.GetDim(5)-1}; - // TODO Keep zone of max! Also applies to ctop. + // TODO Keep zone of max! See timestep calc + // Will need to translate them back to KS to make them useful though // This is one kernel call per block, because each block will have different bounds. // Could consolidate at the cost of lots of bounds checking. @@ -504,13 +505,15 @@ double MaxDivB(MeshData *md) return max_divb; } -double GlobalMaxDivB(MeshData *md) +double GlobalMaxDivB(MeshData *md, bool all_reduce) { - static AllReduce max_divb; - max_divb.val = MaxDivB(md); - max_divb.StartReduce(MPI_MAX); - while (max_divb.CheckReduce() == TaskStatus::incomplete); - return max_divb.val; + if (all_reduce) { + Reductions::StartToAll(md, 2, MaxDivB(md), MPI_MAX); + return Reductions::CheckOnAll(md, 2); + } else { + Reductions::Start(md, 2, MaxDivB(md), MPI_MAX); + return Reductions::Check(md, 2); + } } TaskStatus PrintGlobalMaxDivB(MeshData *md, bool kill_on_large_divb) diff --git a/kharma/b_flux_ct/b_flux_ct.hpp b/kharma/b_flux_ct/b_flux_ct.hpp index 0fa89728..ffdd4c3f 100644 --- a/kharma/b_flux_ct/b_flux_ct.hpp +++ b/kharma/b_flux_ct/b_flux_ct.hpp @@ -110,8 +110,10 @@ double MaxDivB(MeshData *md); /** * Returns the global maximum value, rather than the maximum over this rank's MeshData + * + * By default, only returns the correct value on rank 0 for printing */ -double GlobalMaxDivB(MeshData *md); +double GlobalMaxDivB(MeshData *md, bool all_reduce=false); /** * Diagnostics printed/computed after each step @@ -124,7 +126,7 @@ TaskStatus PrintGlobalMaxDivB(MeshData *md, bool kill_on_large_divb=false) */ inline TaskStatus PostStepDiagnostics(const SimTime& tm, MeshData *md) { - auto& params = md->GetMeshPointer()->block_list[0]->packages.Get("B_FluxCT")->AllParams(); + auto& params = md->GetMeshPointer()->packages.Get("B_FluxCT")->AllParams(); return PrintGlobalMaxDivB(md, params.Get("kill_on_large_divb")); } @@ -133,25 +135,17 @@ inline TaskStatus PostStepDiagnostics(const SimTime& tm, MeshData *md) */ void FillOutput(MeshBlock *pmb, ParameterInput *pin); /** - * Fill field "name" with divB + * Fill the field 'divb_field_name' with divB */ void CalcDivB(MeshData *md, std::string divb_field_name="divB"); -// Reductions: phi uses global machinery, but divB is too -// Can also sum the hemispheres independently to be fancy (TODO?) -KOKKOS_INLINE_FUNCTION Real phi(REDUCE_FUNCTION_ARGS_EH) -{ - // \Phi == \int |*F^1^0| * gdet * dx2 * dx3 == \int |B1| * gdet * dx2 * dx3 - return 0.5 * m::abs(U(m_u.B1, k, j, i)); // factor of gdet already in cons.B -} - inline Real ReducePhi0(MeshData *md) { - return Reductions::EHReduction(md, UserHistoryOperation::sum, phi, 0); + return Reductions::EHReduction(md, UserHistoryOperation::sum, 0); } inline Real ReducePhi5(MeshData *md) { - return Reductions::EHReduction(md, UserHistoryOperation::sum, phi, 5); + return Reductions::EHReduction(md, UserHistoryOperation::sum, 5); } /** diff --git a/kharma/boundaries/boundaries.cpp b/kharma/boundaries/boundaries.cpp index 20561573..e3ac6c15 100644 --- a/kharma/boundaries/boundaries.cpp +++ b/kharma/boundaries/boundaries.cpp @@ -76,8 +76,8 @@ std::shared_ptr KBoundaries::Initialize(ParameterInput *pin, std: Metadata m_x1, m_x2, m_x3; { - // We can't use GetVariablesByFlag yet, so walk through and count manually - int nvar = KHARMA::CountVars(packages.get(), Metadata::FillGhost); + // We can't use GetVariablesByFlag yet, so ask the packages + int nvar = KHARMA::PackDimension(packages.get(), Metadata::FillGhost); // We also don't know the mesh size, since it's not constructed. We infer. const int ng = pin->GetInteger("parthenon/mesh", "nghost"); diff --git a/kharma/coordinates/coordinate_utils.hpp b/kharma/coordinates/coordinate_utils.hpp index 98e6a54e..79e95fec 100644 --- a/kharma/coordinates/coordinate_utils.hpp +++ b/kharma/coordinates/coordinate_utils.hpp @@ -34,6 +34,7 @@ #pragma once #include "decs.hpp" +#include "matrix.hpp" /** * Rotate a set of coordinates 'Xin' by 'angle' about the *y-axis* diff --git a/kharma/debug.cpp b/kharma/debug.cpp deleted file mode 100644 index be3aa6e7..00000000 --- a/kharma/debug.cpp +++ /dev/null @@ -1,167 +0,0 @@ -/* - * File: debug.cpp - * - * BSD 3-Clause License - * - * Copyright (c) 2020, AFD Group at UIUC - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "debug.hpp" - -#include "decs.hpp" - -#include "floors.hpp" -#include "grmhd_functions.hpp" -#include "types.hpp" - -// TODO make this a DomainReduce, and add better verbosity options -// TODO - -TaskStatus CheckNaN(MeshData *md, int dir, IndexDomain domain) -{ - Flag("CheckNaN"); - auto pmb0 = md->GetBlockData(0)->GetBlockPointer(); - - // Pack variables - auto& cmax = md->PackVariables(std::vector{"Flux.cmax"}); - auto& cmin = md->PackVariables(std::vector{"Flux.cmin"}); - - // Get sizes - IndexRange ib = md->GetBoundsI(IndexDomain::interior); - IndexRange jb = md->GetBoundsJ(IndexDomain::interior); - IndexRange kb = md->GetBoundsK(IndexDomain::interior); - IndexRange block = IndexRange{0, cmax.GetDim(5) - 1}; - - // TODO these two kernels can be one with some Kokkos magic - int nzero = 0, nnan = 0; - Kokkos::Sum zero_reducer(nzero); - Kokkos::Sum nan_reducer(nnan); - pmb0->par_reduce("ctop_zeros", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, int &local_result) { - if (m::max(cmax(b, dir-1, k, j, i), cmin(b, dir-1, k, j, i)) <= 0.) { - ++local_result; - } - } - , zero_reducer); - pmb0->par_reduce("ctop_nans", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, int &local_result) { - if (m::isnan(m::max(cmax(b, dir-1, k, j, i), cmin(b, dir-1, k, j, i)))) { - ++local_result; - printf("ctop NaN at %d %d %d along dir %d\n", i, j, k, dir); // EDIT - } - } - , nan_reducer); - - // Reductions in parallel - // Only need to reduce to head node, saves time - static Reduce nzero_tot, nnan_tot; - nzero_tot.val = nzero; - nnan_tot.val = nnan; - nzero_tot.StartReduce(0, MPI_SUM); - nnan_tot.StartReduce(0, MPI_SUM); - while (nzero_tot.CheckReduce() == TaskStatus::incomplete); - while (nnan_tot.CheckReduce() == TaskStatus::incomplete); - nzero = nzero_tot.val; - nnan = nnan_tot.val; - - if (MPIRank0() && (nzero > 0 || nnan > 0)) { - // TODO string formatting in C++ that doesn't suck - fprintf(stderr, "Max signal speed ctop was 0 or NaN, direction %d (%d zero, %d NaN)", dir, nzero, nnan); - throw std::runtime_error("Bad ctop!"); - } - - // TODO reimplement printing *where* these values were hit? - // May not even be that useful, as the cause is usually much earlier - - EndFlag(); - return TaskStatus::complete; -} - -TaskStatus CheckNegative(MeshData *md, IndexDomain domain) -{ - Flag("CheckNegative"); - auto pmb0 = md->GetBlockData(0)->GetBlockPointer(); - // Pack variables - auto rho_p = md->PackVariables(std::vector{"prims.rho"}); - auto u_p = md->PackVariables(std::vector{"prims.u"}); - auto rho_c = md->PackVariables(std::vector{"cons.rho"}); - // Get sizes - IndexRange ib = md->GetBoundsI(domain); - IndexRange jb = md->GetBoundsJ(domain); - IndexRange kb = md->GetBoundsK(domain); - IndexRange block = IndexRange{0, rho_p.GetDim(5)-1}; - - // Check for negative values in the conserved vars - int nless = 0; - Kokkos::Sum sum_reducer(nless); - pmb0->par_reduce("count_negative_U", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, int &local_result) { - if (rho_c(b, 0, k, j, i) < 0.) ++local_result; - } - , sum_reducer); - - int nless_rho = 0, nless_u = 0; - Kokkos::Sum sum_reducer_rho(nless_rho); - Kokkos::Sum sum_reducer_u(nless_u); - pmb0->par_reduce("count_negative_RHO", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, int &local_result) { - if (rho_p(b, 0, k, j, i) < 0.) ++local_result; - } - , sum_reducer_rho); - pmb0->par_reduce("count_negative_UU", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, int &local_result) { - if (u_p(b, 0, k, j, i) < 0.) ++local_result; - } - , sum_reducer_u); - - // Reductions in parallel - static Reduce nless_tot, nless_rho_tot, nless_u_tot; - nless_tot.val = nless; - nless_rho_tot.val = nless_rho; - nless_u_tot.val = nless_u; - nless_tot.StartReduce(0, MPI_SUM); - nless_rho_tot.StartReduce(0, MPI_SUM); - nless_u_tot.StartReduce(0, MPI_SUM); - while (nless_tot.CheckReduce() == TaskStatus::incomplete); - while (nless_rho_tot.CheckReduce() == TaskStatus::incomplete); - while (nless_u_tot.CheckReduce() == TaskStatus::incomplete); - nless = nless_tot.val; - nless_rho = nless_rho_tot.val; - nless_u = nless_u_tot.val; - - if (MPIRank0() && nless > 0) { - std::cout << "Number of negative conserved rho: " << nless << std::endl; - } - if (MPIRank0() && (nless_rho > 0 || nless_u > 0)) { - std::cout << "Number of negative primitive rho, u: " << nless_rho << "," << nless_u << std::endl; - } - - EndFlag(); - return TaskStatus::complete; -} diff --git a/kharma/debug.hpp b/kharma/debug.hpp deleted file mode 100644 index ec792836..00000000 --- a/kharma/debug.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* - * File: debug.hpp - * - * BSD 3-Clause License - * - * Copyright (c) 2020, AFD Group at UIUC - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#pragma once - -#include "decs.hpp" -#include "types.hpp" - -// TODO TODO Namespace - -/** - * Check the max signal speed (ctop) for 0-values or NaNs. - * This is a final warning that something is very wrong and we should crash. - */ -TaskStatus CheckNaN(MeshData *md, int dir, IndexDomain domain=IndexDomain::interior); - -/** - * Check the primitive and conserved variables for negative values that definitely shouldn't be negative - * That is: primitive rho, u, conserved rho*u^t - */ -TaskStatus CheckNegative(MeshData *md, IndexDomain domain=IndexDomain::interior); diff --git a/kharma/driver/imex_step.cpp b/kharma/driver/imex_step.cpp index 8d86489a..3928a2c5 100644 --- a/kharma/driver/imex_step.cpp +++ b/kharma/driver/imex_step.cpp @@ -44,7 +44,6 @@ #include "wind.hpp" // Other headers #include "boundaries.hpp" -#include "debug.hpp" #include "flux.hpp" #include "resize_restart.hpp" #include "implicit.hpp" @@ -70,37 +69,28 @@ TaskCollection KHARMADriver::MakeImExTaskCollection(BlockList_t &blocks, int sta const bool use_jcon = pkgs.count("Current"); const bool use_linesearch = (use_implicit) ? pkgs.at("Implicit")->Param("linesearch") : false; - // If we cleaned up, this added other fields marked FillDerived - // Remove them before we allocate the space - if (use_b_cleanup) { - B_Cleanup::RemoveExtraFields(blocks); - } - - // Allocate the fluid states ("containers") we need for each block - for (auto& pmb : blocks) { - // first make other useful containers - auto &base = pmb->meshblock_data.Get(); - if (stage == 1) { - pmb->meshblock_data.Add("dUdt", base); - for (int i = 1; i < integrator->nstages; i++) - pmb->meshblock_data.Add(integrator->stage_name[i], base); - - if (use_jcon) { - // At the end of the step, updating "mbd_sub_step_final" updates the base - // So we have to keep a copy at the beginning to calculate jcon - pmb->meshblock_data.Add("preserve", base); - // Above only copies on allocate -- ensure we copy every step - Copy>({}, base.get(), pmb->meshblock_data.Get("preserve").get()); - } - - if (use_implicit) { - // When solving, we need a temporary copy with any explicit updates, - // but not overwriting the beginning- or mid-step values - pmb->meshblock_data.Add("solver", base); - if (use_linesearch) { - // Need an additional state for linesearch - pmb->meshblock_data.Add("linesearch", base); - } + // Allocate/copy the things we need + // TODO these can now be reduced by including the var lists/flags which actually need to be allocated + // TODO except the Copy they can be run on step 1 only + if (stage == 1) { + auto &base = pmesh->mesh_data.Get(); + // Fluxes + pmesh->mesh_data.Add("dUdt"); + for (int i = 1; i < integrator->nstages; i++) + pmesh->mesh_data.Add(integrator->stage_name[i]); + // Preserve state for time derivatives if we need to output current + if (use_jcon) { + pmesh->mesh_data.Add("preserve"); + // Above only copies on allocate -- ensure we copy every step + Copy>({}, base.get(), pmesh->mesh_data.Get("preserve").get()); + } + if (use_implicit) { + // When solving, we need a temporary copy with any explicit updates, + // but not overwriting the beginning- or mid-step values + pmesh->mesh_data.Add("solver"); + if (use_linesearch) { + // Need an additional state for linesearch + pmesh->mesh_data.Add("linesearch"); } } } diff --git a/kharma/driver/kharma_driver.cpp b/kharma/driver/kharma_driver.cpp index d60ed54d..37a28dec 100644 --- a/kharma/driver/kharma_driver.cpp +++ b/kharma/driver/kharma_driver.cpp @@ -266,5 +266,35 @@ TaskID KHARMADriver::AddFluxCalculations(TaskID& t_start, TaskList& tl, KReconst << "donor_cell, linear_mc, weno5" << std::endl; throw std::invalid_argument("Unsupported reconstruction algorithm!"); } - return t_calculate_flux1 | t_calculate_flux2 | t_calculate_flux3; + auto t_calc_fluxes = t_calculate_flux1 | t_calculate_flux2 | t_calculate_flux3; + + auto t_ctop = t_calc_fluxes; + if (md->GetMeshPointer()->packages.Get("Globals")->Param("extra_checks") > 0) { + auto t_ctop = tl.AddTask(t_calc_fluxes, Flux::CheckCtop, md); + } + + return t_ctop; +} + +void KHARMADriver::SetGlobalTimeStep() +{ + // TODO TODO apply the limits from GRMHD package here + if (tm.dt < 0.1 * std::numeric_limits::max()) { + tm.dt *= 2.0; + } + Real big = std::numeric_limits::max(); + for (auto const &pmb : pmesh->block_list) { + tm.dt = std::min(tm.dt, pmb->NewDt()); + pmb->SetAllowedDt(big); + } + + // TODO start reduce at the end of the per-meshblock stuff, check here +#ifdef MPI_PARALLEL + PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, &tm.dt, 1, MPI_PARTHENON_REAL, MPI_MIN, + MPI_COMM_WORLD)); +#endif + + if (tm.time < tm.tlim && + (tm.tlim - tm.time) < tm.dt) // timestep would take us past desired endpoint + tm.dt = tm.tlim - tm.time; } diff --git a/kharma/driver/kharma_driver.hpp b/kharma/driver/kharma_driver.hpp index e5a2c64e..6c695ea4 100644 --- a/kharma/driver/kharma_driver.hpp +++ b/kharma/driver/kharma_driver.hpp @@ -59,6 +59,9 @@ class KHARMADriver : public MultiStageDriver { // Eliminate Parthenon's print statements when starting up the driver, we have a bunch of our own void PreExecute() { timer_main.reset(); } + // Also override the timestep calculation, so we can start moving options etc out of GRMHD package + void SetGlobalTimeStep(); + /** * A Driver object orchestrates everything that has to be done to a mesh to take a step. * The function MakeTaskCollection outlines everything to be done in one sub-step, @@ -74,19 +77,22 @@ class KHARMADriver : public MultiStageDriver { * 4. Recover primtive variables * 4a. Apply any stability limits (floors) * 4b. Fix any errors in recovering the primitives, re-apply floors - * 5. Apply any source terms (KEL), or calculate outputs (jcon) which require the change in primitive values + * 5. Apply any source terms (KEL), or calculate outputs (jcon) which use the primitive variables * * This is before any synchronization between different blocks, etc, etc. - * Both task lists proceed roughly in this order, and you'll see the same broad outlines in both. + * All task lists proceed roughly in this order, but differ in which variables they synchronize via MPI, + * or whether they synchronize at all. */ TaskCollection MakeTaskCollection(BlockList_t &blocks, int stage); + + /** + * The default step, synchronizing conserved variables and then recovering primitive variables in the ghost zones. + */ TaskCollection MakeDefaultTaskCollection(BlockList_t &blocks, int stage); /** - * This "TaskCollection" (step) - * ImexDriver syncs primitive variables and treats them as fundamental, whereas HARMDriver syncs conserved variables. - * This allows ImexDriver to optionally use a semi-implicit step, adding a per-zone implicit solve via the 'Implicit' - * package, instead of just explicit RK2 time-stepping. This driver also allows explicit-only RK2 operation + * This step syncs primitive variables and treats them as fundamental + * This accommodates semi-implicit stepping, allowing evolving theories with implicit source terms such as extended MHD */ TaskCollection MakeImExTaskCollection(BlockList_t &blocks, int stage); @@ -95,6 +101,8 @@ class KHARMADriver : public MultiStageDriver { */ TaskCollection MakeSimpleTaskCollection(BlockList_t &blocks, int stage); + // The different drivers share substantially similar portions of the full task list, which we gather into + /** * Add the flux calculations in each direction. Since the flux functions are templated on which * reconstruction is being used, this amounts to a lot of shared lines. @@ -102,10 +110,10 @@ class KHARMADriver : public MultiStageDriver { static TaskID AddFluxCalculations(TaskID& t_start, TaskList& tl, KReconstruction::Type recon, MeshData *md); /** - * Add just the synchronization step to a task list tl, dependent upon taskID t_start, syncing mesh mc1 + * Add a synchronization retion to an existing TaskCollection tc. + * Since the region is self-contained, does not return a TaskID * - * This sequence is used identically in several places, so it makes sense - * to define once and use elsewhere. + * This function polls the 'integrator' member or it would be static too */ void AddFullSyncRegion(Mesh* pmesh, TaskCollection& tc, int stage); diff --git a/kharma/driver/kharma_step.cpp b/kharma/driver/kharma_step.cpp index 8bb2591b..19a6992c 100644 --- a/kharma/driver/kharma_step.cpp +++ b/kharma/driver/kharma_step.cpp @@ -44,7 +44,6 @@ #include "wind.hpp" // Other headers #include "boundaries.hpp" -#include "debug.hpp" #include "flux.hpp" #include "resize_restart.hpp" #include "implicit.hpp" @@ -88,12 +87,6 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int const bool use_electrons = pkgs.count("Electrons"); const bool use_jcon = pkgs.count("Current"); - // If we cleaned up, this added other fields marked FillDerived - // Remove them before we allocate the space - if (use_b_cleanup) { - B_Cleanup::RemoveExtraFields(blocks); - } - // Allocate the fluid states ("containers") we need for each block for (auto& pmb : blocks) { // first make other useful containers diff --git a/kharma/emhd/emhd.cpp b/kharma/emhd/emhd.cpp index 34153ce0..3150a198 100644 --- a/kharma/emhd/emhd.cpp +++ b/kharma/emhd/emhd.cpp @@ -122,15 +122,16 @@ std::shared_ptr Initialize(ParameterInput *pin, std::shared_ptr

Initialize(ParameterInput *pin, std::shared_ptr

GetBlockPointer(); // PackIndexMap prims_map, cons_map; -// auto U_E = rc->PackVariables(std::vector{Metadata::GetUserFlag("EMHD"), Metadata::Conserved}, cons_map); +// auto U_E = rc->PackVariables(std::vector{Metadata::GetUserFlag("EMHDVar"), Metadata::Conserved}, cons_map); // auto P = rc->PackVariables(std::vector{Metadata::GetUserFlag("Primitive")}, prims_map); // const VarMap m_p(prims_map, false), m_u(cons_map, true); @@ -216,7 +217,7 @@ void BlockPtoU(MeshBlockData *rc, IndexDomain domain, bool coarse) auto pmb = rc->GetBlockPointer(); PackIndexMap prims_map, cons_map; - auto U_E = rc->PackVariables(std::vector{Metadata::GetUserFlag("EMHD"), Metadata::Conserved}, cons_map); + auto U_E = rc->PackVariables(std::vector{Metadata::GetUserFlag("EMHDVar"), Metadata::Conserved}, cons_map); auto P = rc->PackVariables(std::vector{Metadata::GetUserFlag("Primitive")}, prims_map); const VarMap m_p(prims_map, false), m_u(cons_map, true); diff --git a/kharma/floors/floors.cpp b/kharma/floors/floors.cpp index 97f2412c..1dd4e557 100644 --- a/kharma/floors/floors.cpp +++ b/kharma/floors/floors.cpp @@ -34,7 +34,6 @@ #include "floors.hpp" #include "floors_functions.hpp" -#include "debug.hpp" #include "grmhd.hpp" #include "grmhd_functions.hpp" #include "pack.hpp" @@ -43,7 +42,7 @@ int CountFFlags(MeshData *md) { - return Reductions::CountFlags(md, "fflag", FFlag::flag_names, IndexDomain::interior, 0, true); + return Reductions::CountFlags(md, "fflag", FFlag::flag_names, IndexDomain::interior, true)[0]; } std::shared_ptr Floors::Initialize(ParameterInput *pin, std::shared_ptr& packages) @@ -225,7 +224,7 @@ TaskStatus Floors::ApplyInitialFloors(ParameterInput *pin, MeshBlockData * TaskStatus Floors::ApplyGRMHDFloors(MeshBlockData *mbd, IndexDomain domain) { - auto pmb = mbd->GetBlockPointer(); + auto pmb = mbd->GetBlockPointer(); PackIndexMap prims_map, cons_map; auto P = mbd->PackVariables({Metadata::GetUserFlag("Primitive")}, prims_map); @@ -286,6 +285,9 @@ TaskStatus Floors::ApplyGRMHDFloors(MeshBlockData *mbd, IndexDomain domain } ); + //if (flag_verbose) + //Reductions::StartFlagReduce(md, "fflag", FFlag::flag_names, IndexDomain::interior, true, 0); + return TaskStatus::complete; } @@ -297,9 +299,15 @@ TaskStatus Floors::PostStepDiagnostics(const SimTime& tm, MeshData *md) const auto& pars = pmesh->packages.Get("Globals")->AllParams(); const int flag_verbose = pars.Get("flag_verbose"); - // Debugging/diagnostic info about floor and inversion flags - if (flag_verbose >= 1) { - Reductions::CountFlags(md, "fflag", FFlag::flag_names, IndexDomain::interior, flag_verbose, true); + // Debugging/diagnostic info about floor flags + if (flag_verbose > 0) { + // TODO this should move to ApplyGRMHDFloors when everything goes MeshData + Reductions::StartFlagReduce(md, "fflag", FFlag::flag_names, IndexDomain::interior, true, 0); + // Debugging/diagnostic info about floor and inversion flags + Reductions::CheckFlagReduceAndPrintHits(md, "fflag", FFlag::flag_names, IndexDomain::interior, true, 0); } + + // Anything else (energy conservation? Added material stats?) + return TaskStatus::complete; } diff --git a/kharma/floors/floors.hpp b/kharma/floors/floors.hpp index 3e3d9edc..4325680c 100644 --- a/kharma/floors/floors.hpp +++ b/kharma/floors/floors.hpp @@ -80,7 +80,8 @@ static const std::map flag_names = { {TEMP, "TEMPERATURE"}, {KTOT, "ENTROPY"}, {GEOM_RHO_FLUX, "GEOM_RHO_ON_RECON"}, - {GEOM_U_FLUX, "GEOM_U_ON_RECON"}}; + {GEOM_U_FLUX, "GEOM_U_ON_RECON"} +}; } namespace Floors { diff --git a/kharma/flux/flux.cpp b/kharma/flux/flux.cpp index 68bc6196..a49d704a 100644 --- a/kharma/flux/flux.cpp +++ b/kharma/flux/flux.cpp @@ -50,7 +50,7 @@ std::shared_ptr Flux::Initialize(ParameterInput *pin, std::shared // We can't just use GetVariables or something since there's no mesh yet. // That's what this function is for. - int nvar = KHARMA::CountVars(packages.get(), Metadata::WithFluxes); + int nvar = KHARMA::PackDimension(packages.get(), Metadata::WithFluxes); std::cout << "Allocating fluxes with nvar: " << nvar << std::endl; std::vector s_flux({nvar}); // TODO optionally move all these to faces? Not important yet, no output, more memory @@ -266,3 +266,36 @@ void Flux::AddGeoSource(MeshData *md, MeshData *mdudt) } ); } + +TaskStatus Flux::CheckCtop(MeshData *md) +{ + Reductions::DomainReduction(md, UserHistoryOperation::sum, 0); + Reductions::DomainReduction(md, UserHistoryOperation::sum, 1); + return TaskStatus::complete; +} + +TaskStatus Flux::PostStepDiagnostics(const SimTime& tm, MeshData *md) +{ + auto pmesh = md->GetMeshPointer(); + auto pmb0 = md->GetBlockData(0)->GetBlockPointer(); + // Options + const auto& pars = pmesh->packages.Get("Globals")->AllParams(); + const int extra_checks = pars.Get("extra_checks"); + + // Check for a soundspeed (ctop) of 0 or NaN + // This functions as a "last resort" check to stop a + // simulation on obviously bad data + if (extra_checks >= 1) { + int nnan = Reductions::Check(md, 0); + int nzero = Reductions::Check(md, 1); + + if (MPIRank0() && (nzero > 0 || nnan > 0)) { + // TODO string formatting in C++ that doesn't suck + fprintf(stderr, "Max signal speed ctop of 0 or NaN (%d zero, %d NaN)", nzero, nnan); + throw std::runtime_error("Bad ctop!"); + } + + } + + return TaskStatus::complete; +} diff --git a/kharma/flux/flux.hpp b/kharma/flux/flux.hpp index 289aa43d..d8907c8c 100644 --- a/kharma/flux/flux.hpp +++ b/kharma/flux/flux.hpp @@ -37,7 +37,6 @@ #include -#include "debug.hpp" #include "floors.hpp" #include "flux_functions.hpp" #include "pack.hpp" @@ -48,6 +47,10 @@ namespace Flux { std::shared_ptr Initialize(ParameterInput *pin, std::shared_ptr& packages); +TaskStatus CheckCtop(MeshData *md); + +TaskStatus PostStepDiagnostics(const SimTime& tm, MeshData *md); + /** * Add the geometric source term present in the covariant derivative of the stress-energy tensor, * S_nu = sqrt(-g) T^kap_lam Gamma^lam_nu_kap diff --git a/kharma/grmhd/grmhd.cpp b/kharma/grmhd/grmhd.cpp index 82b464d9..9c9f1c41 100644 --- a/kharma/grmhd/grmhd.cpp +++ b/kharma/grmhd/grmhd.cpp @@ -41,7 +41,6 @@ #include "boundaries.hpp" #include "current.hpp" -#include "debug.hpp" #include "floors.hpp" #include "flux.hpp" #include "gr_coordinates.hpp" @@ -50,7 +49,6 @@ #include - /** * GRMHD package. Global operations on General Relativistic Magnetohydrodynamic systems. */ @@ -258,26 +256,32 @@ Real EstimateTimestep(MeshBlockData *rc) return globals.Get("dt_light"); } - typename Kokkos::MinMax::value_type minmax; + Reductions::Reduce3v minmax; pmb->par_reduce("ndt_min", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA (const int k, const int j, const int i, - typename Kokkos::MinMax::value_type &lminmax) { + Reductions::Reduce3v &lminmax) { double ndt_zone = 1 / (1 / (G.Dxc<1>(i) / m::max(cmax(0, k, j, i), cmin(0, k, j, i))) + 1 / (G.Dxc<2>(j) / m::max(cmax(1, k, j, i), cmin(1, k, j, i))) + 1 / (G.Dxc<3>(k) / m::max(cmax(2, k, j, i), cmin(2, k, j, i)))); // Effective "max speed" used for the timestep double ctop_max_zone = m::min(G.Dxc<1>(i), m::min(G.Dxc<2>(j), G.Dxc<3>(k))) / ndt_zone; - if (!m::isnan(ndt_zone) && (ndt_zone < lminmax.min_val)) + if (!m::isnan(ndt_zone) && (ndt_zone < lminmax.min_val)) { lminmax.min_val = ndt_zone; - if (!m::isnan(ctop_max_zone) && (ctop_max_zone > lminmax.max_val)) + lminmax.min_loc = std::tuple{i, j, k}; + } + if (!m::isnan(ctop_max_zone) && (ctop_max_zone > lminmax.max_val)) { lminmax.max_val = ctop_max_zone; + lminmax.max_loc = std::tuple{i, j, k}; + } } - , Kokkos::MinMax(minmax)); + , Reductions::Reduce3(minmax)); // Keep dt to do some checks below const double min_ndt = minmax.min_val; const double nctop = minmax.max_val; + // TODO print tuples + // Apply limits const double cfl = grmhd_pars.Get("cfl"); const double dt_min = grmhd_pars.Get("dt_min"); @@ -401,19 +405,27 @@ TaskStatus PostStepDiagnostics(const SimTime& tm, MeshData *md) const auto& pars = pmesh->packages.Get("Globals")->AllParams(); const int extra_checks = pars.Get("extra_checks"); - // Check for a soundspeed (ctop) of 0 or NaN - // This functions as a "last resort" check to stop a - // simulation on obviously bad data - if (extra_checks >= 1) { - CheckNaN(md, X1DIR); - if (pmesh->ndim > 1) CheckNaN(md, X2DIR); - if (pmesh->ndim > 2) CheckNaN(md, X3DIR); - } - - // Further checking for any negative values. Floors should + // Checking for any negative values. Floors should // prevent this, so we save it for dire debugging if (extra_checks >= 2) { - CheckNegative(md, IndexDomain::interior); + // Not sure when I'd do the check to hide latency, it's a step-end sort of deal + // Just as well it's behind extra_checks 2 + // This may happen while ch0-1 are in flight from floors, but ch2-4 are now reusable + Reductions::DomainReduction(md, UserHistoryOperation::sum, 2); + Reductions::DomainReduction(md, UserHistoryOperation::sum, 3); + Reductions::DomainReduction(md, UserHistoryOperation::sum, 4); + int nless_rho = Reductions::Check(md, 2); + int nless_u = Reductions::Check(md, 3); + int nless_rhout = Reductions::Check(md, 4); + + if (MPIRank0()) { + if (nless_rhout > 0) { + std::cout << "Number of negative conserved rho: " << nless_rhout << std::endl; + } + if (nless_rho > 0 || nless_u > 0) { + std::cout << "Number of negative primitive rho, u: " << nless_rho << "," << nless_u << std::endl; + } + } } return TaskStatus::complete; diff --git a/kharma/grmhd/grmhd.hpp b/kharma/grmhd/grmhd.hpp index 51736a2f..53878118 100644 --- a/kharma/grmhd/grmhd.hpp +++ b/kharma/grmhd/grmhd.hpp @@ -74,7 +74,7 @@ void FillOutput(MeshBlock *pmb, ParameterInput *pin); /** * Diagnostics performed after each step. - * Currently finds any negative flags or 0/NaN values in ctop + * Currently just looks for negative density/internal energy */ TaskStatus PostStepDiagnostics(const SimTime& tm, MeshData *rc); } diff --git a/kharma/grmhd/grmhd_reductions.hpp b/kharma/grmhd/grmhd_reductions.hpp index 2db2cad8..a98f40a3 100644 --- a/kharma/grmhd/grmhd_reductions.hpp +++ b/kharma/grmhd/grmhd_reductions.hpp @@ -59,7 +59,7 @@ KOKKOS_INLINE_FUNCTION Real edot(REDUCE_FUNCTION_ARGS_EH) FourVectors Dtmp; Real T1[GR_DIM]; GRMHD::calc_4vecs(G, P, m_p, k, j, i, Loci::center, Dtmp); - Flux::calc_tensor(P, m_p, Dtmp, gam, k, j, i, X1DIR, T1); + Flux::calc_tensor(G, P, m_p, Dtmp, gam, k, j, i, X1DIR, T1); // \dot{E} == \int - T^1_0 * gdet * dx2 * dx3 return -T1[X0DIR] * G.gdet(Loci::center, j, i); } @@ -68,7 +68,7 @@ KOKKOS_INLINE_FUNCTION Real ldot(REDUCE_FUNCTION_ARGS_EH) FourVectors Dtmp; Real T1[GR_DIM]; GRMHD::calc_4vecs(G, P, m_p, k, j, i, Loci::center, Dtmp); - Flux::calc_tensor(P, m_p, Dtmp, gam, k, j, i, X1DIR, T1); + Flux::calc_tensor(G, P, m_p, Dtmp, gam, k, j, i, X1DIR, T1); // \dot{L} == \int T^1_3 * gdet * dx2 * dx3 return T1[X3DIR] * G.gdet(Loci::center, j, i); } @@ -121,7 +121,7 @@ KOKKOS_INLINE_FUNCTION Real jet_lum(REDUCE_FUNCTION_ARGS_MESH) FourVectors Dtmp; Real T1[GR_DIM]; GRMHD::calc_4vecs(G, P(b), m_p, k, j, i, Loci::center, Dtmp); - Flux::calc_tensor(P(b), m_p, Dtmp, gam, k, j, i, X1DIR, T1); + Flux::calc_tensor(G, P(b), m_p, Dtmp, gam, k, j, i, X1DIR, T1); // If sigma > 1... if ((dot(Dtmp.bcon, Dtmp.bcov) / P(b, m_p.RHO, k, j, i)) > 1.) { // Energy flux, like at EH. 2D integral jacobian, so we have to take X1 off of auto-applied dV diff --git a/kharma/implicit/fixup.cpp b/kharma/implicit/fix_solve.cpp similarity index 96% rename from kharma/implicit/fixup.cpp rename to kharma/implicit/fix_solve.cpp index 81871696..9c9d7104 100644 --- a/kharma/implicit/fixup.cpp +++ b/kharma/implicit/fix_solve.cpp @@ -1,5 +1,5 @@ /* - * File: fixup.cpp + * File: fix_solve.cpp * * BSD 3-Clause License * @@ -86,7 +86,7 @@ TaskStatus Implicit::FixSolve(MeshBlockData *mbd) { sum_x(ip, k, j, i) = 0.; } // Fix only bad zones - if ((solve_fail(k, j, i)) == SolverStatus::fail) { + if (failed(solve_fail(k, j, i))) { //printf("Fixing zone %d %d %d!\n", i, j, k); double wsum = 0., wsum_x = 0.; // double sum[nfvar] = {0.}, sum_x[nfvar] = {0.}; @@ -102,7 +102,7 @@ TaskStatus Implicit::FixSolve(MeshBlockData *mbd) { double w = 1./(m::abs(l) + m::abs(m) + m::abs(n) + 1); // Count only the good cells, if we can - if ((solve_fail(kk, jj, ii)) != SolverStatus::fail) { + if (!failed(solve_fail(kk, jj, ii))) { // Weight by distance. Note interpolated "fixed" cells stay flagged wsum += w; FLOOP sum(ip, k, j, i) += w * P(ip, kk, jj, ii); @@ -140,7 +140,7 @@ TaskStatus Implicit::FixSolve(MeshBlockData *mbd) { pmb->par_for("fix_solver_failures_PtoU", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA (const int& k, const int& j, const int& i) { - if (solve_fail(k, j, i) == SolverStatus::fail) + if (failed(solve_fail(k, j, i))) Flux::p_to_u(G, P_all, m_p, emhd_params, gam, k, j, i, U_all, m_u); } ); diff --git a/kharma/implicit/implicit.cpp b/kharma/implicit/implicit.cpp index 6254055a..826a4665 100644 --- a/kharma/implicit/implicit.cpp +++ b/kharma/implicit/implicit.cpp @@ -34,7 +34,6 @@ #include "implicit.hpp" -#include "debug.hpp" #include "grmhd.hpp" #include "grmhd_functions.hpp" #include "kharma.hpp" @@ -125,7 +124,7 @@ std::shared_ptr Implicit::Initialize(ParameterInput *pin, std::sh bool save_residual = pin->GetOrAddBoolean("implicit", "save_residual", false); params.Add("save_residual", save_residual); if (save_residual) { - int nvars_implicit = KHARMA::CountVars(packages.get(), Metadata::GetUserFlag("Implicit")); + int nvars_implicit = KHARMA::PackDimension(packages.get(), Metadata::GetUserFlag("Implicit")); std::vector s_vars_implicit({nvars_implicit}); Metadata m = Metadata({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, s_vars_implicit); @@ -153,7 +152,9 @@ TaskStatus Implicit::Step(MeshData *md_full_step_init, MeshData *md_ const Real delta = implicit_par.Get("jacobian_delta"); const Real rootfind_tol = implicit_par.Get("rootfind_tol"); const bool use_qr = implicit_par.Get("use_qr"); - const int verbose = pmb_full_step_init->packages.Get("Globals")->Param("verbose"); + const auto& globals = pmb_full_step_init->packages.Get("Globals")->AllParams(); + const int verbose = globals.Get("verbose"); + const int flag_verbose = globals.Get("flag_verbose"); const Real gam = pmb_full_step_init->packages.Get("GRMHD")->Param("gamma"); const bool linesearch = implicit_par.Get("linesearch"); @@ -296,7 +297,7 @@ TaskStatus Implicit::Step(MeshData *md_full_step_init, MeshData *md_ ScratchPad2D P_linesearch_s(member.team_scratch(scratch_level), n1, nvar); // Scratchpads for solver performance diagnostics ScratchPad1D solve_norm_s(member.team_scratch(scratch_level), n1); - ScratchPad1D solve_fail_s(member.team_scratch(scratch_level), n1); + ScratchPad1D solve_fail_s(member.team_scratch(scratch_level), n1); // Copy some file contents to scratchpads, so we can slice them for(int ip=0; ip < nvar; ++ip) { @@ -321,7 +322,7 @@ TaskStatus Implicit::Step(MeshData *md_full_step_init, MeshData *md_ } else { // Need this to check if the zone had failed in any of the previous iterations. // If so, we don't attempt to update it again in the implicit solver. - solve_fail_s(i) = solve_fail_all(b, 0, k, j, i); + solve_fail_s(i) = (SolverStatus) solve_fail_all(b, 0, k, j, i); } } ); @@ -534,7 +535,7 @@ TaskStatus Implicit::Step(MeshData *md_full_step_init, MeshData *md_ // Did we converge to required tolerance? If not, update solve_fail accordingly if (solve_norm() > rootfind_tol) { - solve_fail() += SolverStatus::beyond_tol; + solve_fail() = SolverStatus::beyond_tol; // TODO was changed from +=. Valid? } } } @@ -555,7 +556,7 @@ TaskStatus Implicit::Step(MeshData *md_full_step_init, MeshData *md_ parthenon::par_for_inner(member, ib.s, ib.e, [&](const int& i) { solve_norm_all(b, 0, k, j, i) = solve_norm_s(i); - solve_fail_all(b, 0, k, j, i) = solve_fail_s(i); + solve_fail_all(b, 0, k, j, i) = (Real) solve_fail_s(i); } ); } @@ -564,42 +565,44 @@ TaskStatus Implicit::Step(MeshData *md_full_step_init, MeshData *md_ // If we need to print or exit on the max norm... if (iter >= iter_min || verbose >= 1) { // Take the maximum L2 norm on this rank - static AllReduce max_norm; - Kokkos::Max norm_max(max_norm.val); + Real lmax_norm = 0.0; pmb_sub_step_init->par_reduce("max_norm", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, KOKKOS_LAMBDA (const int& b, const int& k, const int& j, const int& i, Real& local_result) { if (solve_norm_all(b, 0, k, j, i) > local_result) local_result = solve_norm_all(b, 0, k, j, i); } - , norm_max); - // Then MPI reduce AllReduce to copy the global max to every rank - max_norm.StartReduce(MPI_MAX); - while (max_norm.CheckReduce() == TaskStatus::incomplete); - if (verbose >= 1 && MPIRank0()) printf("Iteration %d max L2 norm: %g\n", iter, max_norm.val); - - // Count total number of solver fails - // TODO move reductions like this to PostStep - int nfails = 0; - Kokkos::Sum sum_reducer(nfails); - pmb_sub_step_init->par_reduce("count_solver_fails", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA (const int& b, const int& k, const int& j, const int& i, int& local_result) { - if (solve_fail_all(b, 0, k, j, i) == SolverStatus::fail) ++local_result; + , Kokkos::Max(lmax_norm)); + // Then MPI AllReduce to copy the global max to every rank + Reductions::StartToAll(md_solver, 4, lmax_norm, MPI_MAX); + Real max_norm = Reductions::CheckOnAll(md_solver, 4); + + if (verbose >= 1) { + // Count total number of solver fails + int lnfails = 0; + pmb_sub_step_init->par_reduce("count_solver_fails", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA (const int& b, const int& k, const int& j, const int& i, int& local_result) { + if ((SolverStatus) solve_fail_all(b, 0, k, j, i) == SolverStatus::fail) ++local_result; + } + , Kokkos::Sum(lnfails)); + // Then reduce to rank 0 to print the iteration by iteration + Reductions::Start(md_solver, 5, lnfails, MPI_SUM); + int nfails = Reductions::Check(md_solver, 5); + if (MPIRank0()) { + printf("Iteration %d max L2 norm: %g, failed zones: %d\n", iter, max_norm, nfails); } - , sum_reducer); - // Then MPI reduce AllReduce to copy the global max to every rank - static AllReduce nfails_tot; - nfails_tot.val = nfails; - nfails_tot.StartReduce(MPI_SUM); - while (nfails_tot.CheckReduce() == TaskStatus::incomplete); - if (verbose >= 1 && MPIRank0()) printf("Number of failed zones: %d\n", nfails_tot.val); - - // Break if max_norm is less than the total tolerance we set. TODO per-zone version of this? - if (iter >= iter_min && max_norm.val < rootfind_tol) break; + } + + // Finally, break if max_norm is less than the total tolerance we set + // TODO per-zone tolerance with masks? + if (iter >= iter_min && max_norm < rootfind_tol) break; } EndFlag(); } - EndFlag(); + if (flag_verbose > 0) { + Reductions::CheckFlagReduceAndPrintHits(md_solver, "solve_fail", Implicit::status_names, IndexDomain::interior, false, 2); + } + EndFlag(); return TaskStatus::complete; } @@ -613,11 +616,9 @@ TaskStatus Implicit::PostStepDiagnostics(const SimTime& tm, MeshData *md) const int flag_verbose = pars.Get("flag_verbose"); // Debugging/diagnostic info about implicit solver - // TODO status names - // if (flag_verbose >= 1) { - // int nflags = Reductions::CountFlags(md, "solve_fail", Implicit::status_names, IndexDomain::interior, flag_verbose, false); - // // TODO TODO yell here if there are too many flags - // } + if (flag_verbose > 0) { + Reductions::CheckFlagReduceAndPrintHits(md, "solve_fail", Implicit::status_names, IndexDomain::interior, false, 2); + } return TaskStatus::complete; } diff --git a/kharma/implicit/implicit.hpp b/kharma/implicit/implicit.hpp index 7004cd9f..91b4413b 100644 --- a/kharma/implicit/implicit.hpp +++ b/kharma/implicit/implicit.hpp @@ -65,7 +65,20 @@ namespace Implicit // `fail`: manual backtracking wasn't good enough. FixSolve will be called // `beyond_tol`: solver didn't converge to prescribed tolerance but didn't fail // `backtrack`: step length of 1 gave negative rho/uu, but manual backtracking (0.1) sufficed -enum SolverStatus{converged=0, fail, beyond_tol, backtrack}; +enum class SolverStatus{converged=0, fail, beyond_tol, backtrack}; + +static const std::map status_names = { + {(int) SolverStatus::fail, "failed"}, + {(int) SolverStatus::beyond_tol, "beyond tolerance"}, + {(int) SolverStatus::backtrack, "backtrack"} +}; + +template +KOKKOS_INLINE_FUNCTION bool failed(T status_flag) +{ + // Return only zones which outright failed + return static_cast(status_flag) == static_cast(SolverStatus::fail); +} /** * Initialization. Set parameters. diff --git a/kharma/inverter/invert_template.hpp b/kharma/inverter/invert_template.hpp index 38a99239..163ee8d7 100644 --- a/kharma/inverter/invert_template.hpp +++ b/kharma/inverter/invert_template.hpp @@ -58,7 +58,9 @@ static const std::map status_names = { {(int) Status::bad_gamma, "Gamma invalid"}, {(int) Status::neg_rho, "Negative rho"}, {(int) Status::neg_u, "Negative U"}, - {(int) Status::neg_rhou, "Negative rho & U"}}; + {(int) Status::neg_rhou, "Negative rho & U"} +}; + template KOKKOS_INLINE_FUNCTION bool failed(T status_flag) { diff --git a/kharma/inverter/inverter.cpp b/kharma/inverter/inverter.cpp index c120d448..62f70e84 100644 --- a/kharma/inverter/inverter.cpp +++ b/kharma/inverter/inverter.cpp @@ -39,45 +39,6 @@ #include "domain.hpp" #include "reductions.hpp" -/** - * Internal inversion fn, templated on inverter type. Calls through to templated u_to_p - * This is called with the correct template argument from BlockUtoP - */ -template -inline void BlockPerformInversion(MeshBlockData *rc, IndexDomain domain, bool coarse) -{ - auto pmb = rc->GetBlockPointer(); - const auto& G = pmb->coords; - - PackIndexMap prims_map, cons_map; - auto U = GRMHD::PackMHDCons(rc, cons_map); - auto P = GRMHD::PackHDPrims(rc, prims_map); - const VarMap m_u(cons_map, true), m_p(prims_map, false); - - GridScalar pflag = rc->Get("pflag").data; - - const Real gam = pmb->packages.Get("GRMHD")->Param("gamma"); - - const Real err_tol = pmb->packages.Get("Inverter")->Param("err_tol"); - const int iter_max = pmb->packages.Get("Inverter")->Param("iter_max"); - const Real stepsize = pmb->packages.Get("Inverter")->Param("stepsize"); - - // Get the primitives from our conserved versions - // Notice we recover variables for only the physical (interior or MPI-boundary) - // zones! These are the only ones which are filled at our point in the step - auto bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds; - const IndexRange3 b = KDomain::GetPhysicalRange(rc); - - pmb->par_for("U_to_P", b.ks, b.ke, b.js, b.je, b.is, b.ie, - KOKKOS_LAMBDA (const int &k, const int &j, const int &i) { - if (KDomain::inside(k, j, i, b)) { - // Run over all interior zones and any initialized ghosts - pflag(k, j, i) = static_cast(Inverter::u_to_p(G, U, m_u, gam, k, j, i, P, m_p, Loci::center)); - } - } - ); -} - std::shared_ptr Inverter::Initialize(ParameterInput *pin, std::shared_ptr& packages) { auto pkg = std::make_shared("Inverter"); @@ -126,6 +87,45 @@ std::shared_ptr Inverter::Initialize(ParameterInput *pin, std::sh return pkg; } +/** + * Internal inversion fn, templated on inverter type. Calls through to templated u_to_p + * This is called with the correct template argument from BlockUtoP + */ +template +inline void BlockPerformInversion(MeshBlockData *rc, IndexDomain domain, bool coarse) +{ + auto pmb = rc->GetBlockPointer(); + const auto& G = pmb->coords; + + PackIndexMap prims_map, cons_map; + auto U = GRMHD::PackMHDCons(rc, cons_map); + auto P = GRMHD::PackHDPrims(rc, prims_map); + const VarMap m_u(cons_map, true), m_p(prims_map, false); + + GridScalar pflag = rc->Get("pflag").data; + + const Real gam = pmb->packages.Get("GRMHD")->Param("gamma"); + + const Real err_tol = pmb->packages.Get("Inverter")->Param("err_tol"); + const int iter_max = pmb->packages.Get("Inverter")->Param("iter_max"); + const Real stepsize = pmb->packages.Get("Inverter")->Param("stepsize"); + + // Get the primitives from our conserved versions + // Notice we recover variables for only the physical (interior or MPI-boundary) + // zones! These are the only ones which are filled at our point in the step + auto bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds; + const IndexRange3 b = KDomain::GetPhysicalRange(rc); + + pmb->par_for("U_to_P", b.ks, b.ke, b.js, b.je, b.is, b.ie, + KOKKOS_LAMBDA (const int &k, const int &j, const int &i) { + if (KDomain::inside(k, j, i, b)) { + // Run over all interior zones and any initialized ghosts + pflag(k, j, i) = static_cast(Inverter::u_to_p(G, U, m_u, gam, k, j, i, P, m_p, Loci::center)); + } + } + ); +} + void Inverter::BlockUtoP(MeshBlockData *rc, IndexDomain domain, bool coarse) { // This only chooses an implementation. See BlockPerformInversion and implementations e.g. onedw.hpp @@ -137,6 +137,7 @@ void Inverter::BlockUtoP(MeshBlockData *rc, IndexDomain domain, bool coars case Type::none: break; } + //Reductions::StartFlagReduce(md, "pflag", Inverter::status_names, IndexDomain::interior, false, 1); } TaskStatus Inverter::PostStepDiagnostics(const SimTime& tm, MeshData *md) @@ -148,9 +149,11 @@ TaskStatus Inverter::PostStepDiagnostics(const SimTime& tm, MeshData *md) const int flag_verbose = pars.Get("flag_verbose"); // Debugging/diagnostic info about floor and inversion flags + // TODO grab the total and die on too many if (flag_verbose >= 1) { - int nflags = Reductions::CountFlags(md, "pflag", Inverter::status_names, IndexDomain::interior, flag_verbose, false); - // TODO TODO yell here if there are too many flags + // TODO this should move into BlockUtoP when everything goes MeshData + Reductions::StartFlagReduce(md, "pflag", Inverter::status_names, IndexDomain::interior, false, 1); + Reductions::CheckFlagReduceAndPrintHits(md, "pflag", Inverter::status_names, IndexDomain::interior, false, 1); } return TaskStatus::complete; diff --git a/kharma/kharma.cpp b/kharma/kharma.cpp index d7fc2ffe..6bc4d950 100644 --- a/kharma/kharma.cpp +++ b/kharma/kharma.cpp @@ -301,6 +301,8 @@ Packages_t KHARMA::ProcessPackages(std::unique_ptr &pin) auto t_grmhd = tl.AddTask(t_globals | t_driver, KHARMA::AddPackage, packages, GRMHD::Initialize, pin.get()); // Inverter (TODO: split out fixups, then don't load this when GRMHD isn't loaded) auto t_inverter = tl.AddTask(t_grmhd, KHARMA::AddPackage, packages, Inverter::Initialize, pin.get()); + // Reductions, needed for most other packages + auto t_reductions = tl.AddTask(t_none, KHARMA::AddPackage, packages, Reductions::Initialize, pin.get()); // B field solvers, to ensure divB ~= 0. // Bunch of logic here: basically we want to load <=1 solver with an encoded order of preference @@ -335,12 +337,8 @@ Packages_t KHARMA::ProcessPackages(std::unique_ptr &pin) if (t_b_field == t_none) t_b_field = t_b_cleanup; } - // Enable calculating jcon iff it is in any list of outputs (and there's even B to calculate it). - // Since it is never required to restart, this is the only time we'd write (hence, need) it - if (FieldIsOutput(pin.get(), "jcon") && t_b_field != t_none) { - auto t_current = tl.AddTask(t_b_field, KHARMA::AddPackage, packages, Current::Initialize, pin.get()); - } - // Electrons are usually boring but not impossible without a B field (TODO add a test?) + // Optional standalone packages + // Electrons are boring but not impossible without a B field (TODO add a test?) if (pin->GetOrAddBoolean("electrons", "on", false)) { auto t_electrons = tl.AddTask(t_grmhd, KHARMA::AddPackage, packages, Electrons::Initialize, pin.get()); } @@ -350,6 +348,11 @@ Packages_t KHARMA::ProcessPackages(std::unique_ptr &pin) if (pin->GetOrAddBoolean("wind", "on", false)) { auto t_electrons = tl.AddTask(t_grmhd, KHARMA::AddPackage, packages, Wind::Initialize, pin.get()); } + // Enable calculating jcon iff it is in any list of outputs (and there's even B to calculate it). + // Since it is never required to restart, this is the only time we'd write (hence, need) it + if (FieldIsOutput(pin.get(), "jcon") && t_b_field != t_none) { + auto t_current = tl.AddTask(t_b_field, KHARMA::AddPackage, packages, Current::Initialize, pin.get()); + } // Execute the whole collection (just in case we do something fancy?) while (!tr.Execute()); // TODO this will inf-loop on error @@ -365,7 +368,8 @@ Packages_t KHARMA::ProcessPackages(std::unique_ptr &pin) KHARMA::AddPackage(packages, KBoundaries::Initialize, pin.get()); // Load the implicit package last, and only if there are any variables which need implicit evolution - int n_implicit = CountVars(packages.get(), Metadata::GetUserFlag("Implicit")); + auto all_implicit = Metadata::FlagCollection(Metadata::GetUserFlag("Implicit")); + int n_implicit = PackDimension(packages.get(), Metadata::GetUserFlag("Implicit")); if (n_implicit > 0) { KHARMA::AddPackage(packages, Implicit::Initialize, pin.get()); } diff --git a/kharma/kharma.hpp b/kharma/kharma.hpp index 5928ec3a..3e30e7a2 100644 --- a/kharma/kharma.hpp +++ b/kharma/kharma.hpp @@ -31,6 +31,7 @@ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#pragma once #include "decs.hpp" #include "types.hpp" @@ -109,32 +110,36 @@ inline bool FieldIsOutput(ParameterInput *pin, std::string name) * This fn calculates the size a VariablePack *would* be, without making one -- * it uses only the package list, and counts through each variable in each package. * Mostly useful for initialization. - * TODO can this take flagcollections? Move to Parthenon... */ -inline int CountVars(Packages_t* packages, MetadataFlag flag) +inline int PackDimension(Packages_t* packages, Metadata::FlagCollection fc) { + // We want to exclude anything specific to B field cleanup & not used elsewhere + // (confusingly, this isn't *necessarily* everything in the B_Cleanup package) + if (packages->AllPackages().count("B_Cleanup")) + fc = fc - Metadata::GetUserFlag("B_Cleanup"); + + // Count dimensions (1 for scalars + vector lengths) of each package's variables int nvar = 0; for (auto pkg : packages->AllPackages()) { - for (auto field : pkg.second->AllFields()) { - // Specifically ignore the B_Cleanup variables, we'll never want them separately like this - bool is_not_cleanup = packages->AllPackages().count("B_Cleanup") - ? !field.second.IsSet(Metadata::GetUserFlag("B_Cleanup")) - : true; - if (field.second.IsSet(flag) && is_not_cleanup) { - int var_len = 0; - if (field.second.IsSet(Metadata::Face)) { - var_len = 3; // TODO non-scalar face fields? - } else if (field.second.Shape().size() < 1) { - var_len = 1; - } else { - var_len = field.second.Shape()[0]; - } - //std::cout << "flag: " << flag << " var: " << field.first.label() << " size: " << var_len << std::endl; - nvar += var_len; - } - } + nvar += pkg.second->GetPackDimension(fc); } return nvar; } +/** + * This fn calculates the size a VariablePack *would* be, without making one -- + * it uses only the package list, and counts through each variable in each package. + * Mostly useful for initialization. + */ +inline std::vector GetVariableNames(Packages_t* packages, Metadata::FlagCollection fc) +{ + // Count dimensions (1 for scalars + vector lengths) of each package's variables + std::vector names; + for (auto pkg : packages->AllPackages()) { + std::vector pnames = pkg.second->GetVariableNames(fc); + names.insert(names.end(), pnames.begin(), pnames.end()); + } + return names; +} + } diff --git a/kharma/prob/post_initialize.cpp b/kharma/prob/post_initialize.cpp index 7a09c3dc..5944bbe1 100644 --- a/kharma/prob/post_initialize.cpp +++ b/kharma/prob/post_initialize.cpp @@ -41,7 +41,6 @@ #include "b_field_tools.hpp" #include "blob.hpp" #include "boundaries.hpp" -#include "debug.hpp" #include "floors.hpp" #include "flux.hpp" #include "gr_coordinates.hpp" @@ -56,6 +55,7 @@ * Should only be used in initialization code, as the * reducer object & MPI comm are created on entry & * cleaned on exit + * TODO use Reductions stuff? */ template inline T MPIReduce_once(T f, MPI_Op O) @@ -69,35 +69,18 @@ inline T MPIReduce_once(T f, MPI_Op O) return reduction.val; } -// Define reductions we need just for PostInitialize code. -// TODO namespace... -KOKKOS_INLINE_FUNCTION Real bsq(REDUCE_FUNCTION_ARGS_MESH) -{ - FourVectors Dtmp; - GRMHD::calc_4vecs(G, P, m_p, k, j, i, Loci::center, Dtmp); - return dot(Dtmp.bcon, Dtmp.bcov); -} -KOKKOS_INLINE_FUNCTION Real gas_pres(REDUCE_FUNCTION_ARGS_MESH) -{ - return (gam - 1) * P(m_p.UU, k, j, i); -} -KOKKOS_INLINE_FUNCTION Real gas_beta(REDUCE_FUNCTION_ARGS_MESH) -{ - FourVectors Dtmp; - GRMHD::calc_4vecs(G, P, m_p, k, j, i, Loci::center, Dtmp); - return ((gam - 1) * P(m_p.UU, k, j, i))/(0.5*(dot(Dtmp.bcon, Dtmp.bcov) + SMALL)); -} +// Shorter names for the reductions we use here Real MaxBsq(MeshData *md) { - return Reductions::DomainReduction(md, UserHistoryOperation::max, bsq, 0.0); + return Reductions::DomainReduction(md, UserHistoryOperation::max); } Real MaxPressure(MeshData *md) { - return Reductions::DomainReduction(md, UserHistoryOperation::max, gas_pres, 0.0); + return Reductions::DomainReduction(md, UserHistoryOperation::max); } Real MinBeta(MeshData *md) { - return Reductions::DomainReduction(md, UserHistoryOperation::min, gas_beta, 0.0); + return Reductions::DomainReduction(md, UserHistoryOperation::min); } void KHARMA::SeedAndNormalizeB(ParameterInput *pin, std::shared_ptr> md) @@ -109,8 +92,6 @@ void KHARMA::SeedAndNormalizeB(ParameterInput *pin, std::shared_ptrpackages.AllPackages().count("B_CD"); const int verbose = pmesh->packages.Get("Globals")->Param("verbose"); - // TODO this should be restructured... - Flag("SeedBField"); // Seed the magnetic field on each block for (auto &pmb : pmesh->block_list) { @@ -142,7 +123,6 @@ void KHARMA::SeedAndNormalizeB(ParameterInput *pin, std::shared_ptrmesh_data.GetOrAdd("base", 0); + auto &md = pmesh->mesh_data.Get(); auto& pkgs = pmesh->packages.AllPackages(); @@ -269,8 +248,6 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart) // This does its own MPI syncs B_Cleanup::CleanupDivergence(md); - - B_Cleanup::RemoveExtraFields(pmesh->block_list); } // Finally, synchronize boundary values. diff --git a/kharma/prob/problem.cpp b/kharma/prob/problem.cpp index db19c618..b6e57c6e 100644 --- a/kharma/prob/problem.cpp +++ b/kharma/prob/problem.cpp @@ -36,7 +36,6 @@ #include "b_field_tools.hpp" #include "boundaries.hpp" -#include "debug.hpp" #include "electrons.hpp" #include "floors.hpp" #include "flux.hpp" diff --git a/kharma/prob/resize_restart.cpp b/kharma/prob/resize_restart.cpp index 0a60caaf..07add691 100644 --- a/kharma/prob/resize_restart.cpp +++ b/kharma/prob/resize_restart.cpp @@ -35,7 +35,6 @@ #include "resize_restart.hpp" #include "b_flux_ct.hpp" -#include "debug.hpp" #include "hdf5_utils.h" #include "kharma_utils.hpp" #include "interpolation.hpp" diff --git a/kharma/reductions/reductions.cpp b/kharma/reductions/reductions.cpp index 07f7f1f7..ebf754b8 100644 --- a/kharma/reductions/reductions.cpp +++ b/kharma/reductions/reductions.cpp @@ -36,138 +36,39 @@ #include +// TODO none of this machinery preserves zone locations, +// which we pretty often would like... -#pragma hd_warning_disable -Real Reductions::EHReduction(MeshData *md, UserHistoryOperation op, std::function fn, int zone) +std::shared_ptr Reductions::Initialize(ParameterInput *pin, std::shared_ptr& packages) { - Flag("EHReduction"); - auto pmesh = md->GetMeshPointer(); - - Real result = 0.; - for (auto &pmb : pmesh->block_list) { - // If we're on the inner edge - if (pmb->boundary_flag[parthenon::BoundaryFace::inner_x1] == BoundaryFlag::user) { - const auto& pars = pmb->packages.Get("GRMHD")->AllParams(); - const Real gam = pars.Get("gamma"); - - auto& rc = pmb->meshblock_data.Get(); - PackIndexMap prims_map, cons_map; - const auto& P = rc->PackVariables(std::vector{Metadata::GetUserFlag("Primitive")}, prims_map); - const auto& U = rc->PackVariablesAndFluxes(std::vector{Metadata::Conserved}, cons_map); - const VarMap m_u(cons_map, true), m_p(prims_map, false); - - IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior); - IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior); - IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior); - const auto& G = pmb->coords; - - Real block_result; - switch(op) { - case UserHistoryOperation::sum: { - Kokkos::Sum sum_reducer(block_result); - pmb->par_reduce("accretion_sum", kb.s, kb.e, jb.s, jb.e, ib.s+zone, ib.s+zone, - KOKKOS_LAMBDA (const int &k, const int &j, const int &i, double &local_result) { - local_result += fn(G, P, m_p, U, m_u, gam, k, j, i) * G.Dxc<3>(k) * G.Dxc<2>(j); - } - , sum_reducer); - result += block_result; - break; - } - case UserHistoryOperation::max: { - Kokkos::Max max_reducer(block_result); - pmb->par_reduce("accretion_sum", kb.s, kb.e, jb.s, jb.e, ib.s+zone, ib.s+zone, - KOKKOS_LAMBDA (const int &k, const int &j, const int &i, double &local_result) { - const Real val = fn(G, P, m_p, U, m_u, gam, k, j, i) * G.Dxc<3>(k) * G.Dxc<2>(j); - if (val > local_result) local_result = val; - } - , max_reducer); - if (block_result > result) result = block_result; - break; - } - case UserHistoryOperation::min: { - Kokkos::Min min_reducer(block_result); - pmb->par_reduce("accretion_sum", kb.s, kb.e, jb.s, jb.e, ib.s+zone, ib.s+zone, - KOKKOS_LAMBDA (const int &k, const int &j, const int &i, double &local_result) { - const Real val = fn(G, P, m_p, U, m_u, gam, k, j, i) * G.Dxc<3>(k) * G.Dxc<2>(j); - if (val < local_result) local_result = val; - } - , min_reducer); - if (block_result < result) result = block_result; - break; - } - } - } - } - - EndFlag(); - return result; -} - -#pragma hd_warning_disable -Real Reductions::DomainReduction(MeshData *md, UserHistoryOperation op, std::function fn, Real arg) -{ - Flag("DomainReduction"); - auto pmesh = md->GetMeshPointer(); - - // TODO TODO MESHDATA THIS - Real result = 0.; - const auto& pars = pmesh->packages.Get("GRMHD")->AllParams(); - const Real gam = pars.Get("gamma"); - - PackIndexMap prims_map, cons_map; - const auto& P = md->PackVariables(std::vector{Metadata::GetUserFlag("Primitive")}, prims_map); - const auto& U = md->PackVariablesAndFluxes(std::vector{Metadata::Conserved}, cons_map); - const VarMap m_u(cons_map, true), m_p(prims_map, false); - - auto pmb0 = md->GetBlockData(0)->GetBlockPointer(); - IndexRange ib = pmb0->cellbounds.GetBoundsI(IndexDomain::interior); - IndexRange jb = pmb0->cellbounds.GetBoundsJ(IndexDomain::interior); - IndexRange kb = pmb0->cellbounds.GetBoundsK(IndexDomain::interior); - IndexRange block = IndexRange{0, U.GetDim(5) - 1}; - - switch(op) { - case UserHistoryOperation::sum: { - Kokkos::Sum sum_reducer(result); - pmb0->par_reduce("domain_sum", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, double &local_result) { - const auto& G = U.GetCoords(b); - local_result += fn(G, P(b), m_p, U(b), m_u, gam, k, j, i, arg) * G.Dxc<3>(k) * G.Dxc<2>(j) * G.Dxc<1>(i); - } - , sum_reducer); - break; - } - case UserHistoryOperation::max: { - Kokkos::Max max_reducer(result); - pmb0->par_reduce("domain_max", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, double &local_result) { - const auto& G = U.GetCoords(b); - const Real val = fn(G, P(b), m_p, U(b), m_u, gam, k, j, i, arg) * G.Dxc<3>(k) * G.Dxc<2>(j) * G.Dxc<1>(i); - if (val > local_result) local_result = val; - } - , max_reducer); - break; - } - case UserHistoryOperation::min: { - Kokkos::Min min_reducer(result); - pmb0->par_reduce("domain_min", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, double &local_result) { - const auto& G = U.GetCoords(b); - const Real val = fn(G, P(b), m_p, U(b), m_u, gam, k, j, i, arg) * G.Dxc<3>(k) * G.Dxc<2>(j) * G.Dxc<1>(i); - if (val < local_result) local_result = val; - } - , min_reducer); - break; - } - } - - EndFlag(); - return result; + auto pkg = std::make_shared("Reductions"); + Params ¶ms = pkg->AllParams(); + + // These pools are vectors of Reducers which operate on vectors (or scalars) + // They exist to allow several reductions to be in-flight at once to hide latency + // (even reductions over vectors, as with the different flags) + std::vector>> vector_int_reduce_pool; + params.Add("vector_int_reduce_pool", vector_int_reduce_pool, true); + std::vector>> vector_reduce_pool; + params.Add("vector_reduce_pool", vector_reduce_pool, true); + std::vector> int_reduce_pool; + params.Add("int_reduce_pool", int_reduce_pool, true); + std::vector> reduce_pool; + params.Add("reduce_pool", reduce_pool, true); + + std::vector>> vector_int_allreduce_pool; + params.Add("vector_int_allreduce_pool", vector_int_allreduce_pool, true); + std::vector>> vector_allreduce_pool; + params.Add("vector_allreduce_pool", vector_allreduce_pool, true); + std::vector> int_allreduce_pool; + params.Add("int_allreduce_pool", int_allreduce_pool, true); + std::vector> allreduce_pool; + params.Add("allreduce_pool", allreduce_pool, true); + + return pkg; } -/** - * Counts occurrences of a particular flag value - * - */ +// Flag reductions: local int Reductions::CountFlag(MeshData *md, std::string field_name, const int& flag_val, IndexDomain domain, bool is_bitflag) { auto pmb0 = md->GetBlockData(0)->GetBlockPointer(); @@ -193,10 +94,11 @@ int Reductions::CountFlag(MeshData *md, std::string field_name, const int& return n_flag; } -int Reductions::CountFlags(MeshData *md, std::string field_name, std::map flag_values, IndexDomain domain, int verbose, bool is_bitflag) +#define MAX_NFLAGS 20 + +std::vector Reductions::CountFlags(MeshData *md, std::string field_name, const std::map &flag_values, IndexDomain domain, bool is_bitflag) { Flag("CountFlags_"+field_name); - int nflags = 0; auto pmb0 = md->GetBlockData(0)->GetBlockPointer(); // Pack variables @@ -209,85 +111,82 @@ int Reductions::CountFlags(MeshData *md, std::string field_name, std::map< IndexRange kb = md->GetBoundsK(domain); IndexRange block = IndexRange{0, flag.GetDim(5) - 1}; - // Count all nonzero (technically, >0) values + const int n_of_flags = flag_values.size(); + int flag_val_list[MAX_NFLAGS]; + int f=0; + for (auto &flag : flag_values) { + flag_val_list[f] = flag.first; + f++; + } + + // Count all nonzero (technically, >0) values, + // and all values of each // This works for pflags or fflags, so long as they're separate // We don't count negative pflags as they denote zones that shouldn't be fixed - Kokkos::Sum sum_reducer(nflags); + Reductions::array_type flag_reducer; pmb0->par_reduce("count_flags", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, int &local_result) { - if ((int) flag(b, 0, k, j, i) > 0) ++local_result; + KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, + Reductions::array_type &local_result) { + if ((int) flag(b, 0, k, j, i) > 0) ++local_result.my_array[0]; + for (int f=0; f(flag(b, 0, k, j, i)) & flag_val_list[f]) || + (!is_bitflag && static_cast(flag(b, 0, k, j, i)) == flag_val_list[f])) + ++local_result.my_array[f+1]; } - , sum_reducer); - - // TODO TODO REPLACE ABOVE WITH SOMETHING LIKE: - // array_sum::array_type res; - // parthenon::par_reduce(parthenon::loop_pattern_mdrange_tag, "RadiationResidual1", - // DevExecSpace(), 0, mout->NumBlocks()-1, - // 0, nang1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - // KOKKOS_LAMBDA(const int b, const int n, const int k, const int j, const int i, - // array_sum::array_type& dsum) { - // dsum.my_array[0] += m::abs(iiter(b,n,k,j,i) - iout(b,n,k,j,i)); - // dsum.my_array[1] += iout(b,n,k,j,i); - // }, array_sum::GlobalSum(res)); + , Reductions::ArraySum(flag_reducer)); + + std::vector n_each_flag; + for (int f=0; f n_tot; - n_tot.val = nflags; - n_tot.StartReduce(MPI_SUM); - while (n_tot.CheckReduce() == TaskStatus::incomplete); - nflags = n_tot.val; +// Flag reductions: global +void Reductions::StartFlagReduce(MeshData *md, std::string field_name, const std::map &flag_values, IndexDomain domain, bool is_bitflag, int channel) +{ + Start>(md, channel, CountFlags(md, field_name, flag_values, domain, is_bitflag), MPI_SUM); +} - // If necessary, count each flag - // This is slow, but it can be slow: it's not called for normal operation - if (verbose > 0 && nflags > 0) { - // Overlap reductions to save time - // ...at the cost of considerable complexity... +std::vector Reductions::CheckFlagReduceAndPrintHits(MeshData *md, std::string field_name, const std::map &flag_values, + IndexDomain domain, bool is_bitflag, int channel) +{ + Flag("CheckFlagReduce"); + const auto& pmesh = md->GetMeshPointer(); + const auto& verbose = pmesh->packages.Get("Globals")->Param("flag_verbose"); - // TODO TODO eliminate static reducers, they crash the program after it finishes - static Reduce n_cells_r; - n_cells_r.val = (block.e - block.s + 1) * (kb.e - kb.s + 1) * (jb.e - jb.s + 1) * (ib.e - ib.s + 1); - n_cells_r.StartReduce(0, MPI_SUM); + // Get the relevant reducer and result + auto& pars = md->GetMeshPointer()->packages.Get("Reductions")->AllParams(); + auto *vector_int_reduce_pool = pars.GetMutable>>>("vector_int_reduce_pool"); + auto& vector_int_reduce = (*vector_int_reduce_pool)[channel]; - static std::vector>> reducers; - // Initialize reducers if they haven't been - if (reducers.size() == 0) { - for (auto& status : flag_values) { - std::shared_ptr> reducer = std::make_shared>(); - reducers.push_back(reducer); - } - } - // Count occurrences of each flag value, assign to a reducer in order - int i = 0; - for (auto& status : flag_values) { - reducers[i]->val = CountFlag(md, field_name, (int) status.first, domain, is_bitflag); - reducers[i]->StartReduce(0, MPI_SUM); - ++i; - } - while (n_cells_r.CheckReduce() == TaskStatus::incomplete); - const int n_cells = n_cells_r.val; - // Check each reducer in order, add to a vector - std::vector n_status_present; - for (std::shared_ptr> reducer : reducers) { - while (reducer->CheckReduce() == TaskStatus::incomplete); - n_status_present.push_back(reducer->val); - } + while (vector_int_reduce.CheckReduce() == TaskStatus::incomplete); + const std::vector &total_flag_counts = vector_int_reduce.val; + // Print flags + if (total_flag_counts[0] > 0 && verbose > 0) { if (MPIRank0()) { + // Always our domain size times total number of blocks + IndexRange ib = md->GetBoundsI(domain); + IndexRange jb = md->GetBoundsJ(domain); + IndexRange kb = md->GetBoundsK(domain); + int n_cells = pmesh->nbtotal * (kb.e - kb.s + 1) * (jb.e - jb.s + 1) * (ib.e - ib.s + 1); + + int nflags = total_flag_counts[0]; std::cout << field_name << ": " << nflags << " (" << (int)(((double) nflags )/n_cells * 100) << "% of all cells)" << std::endl; if (verbose > 1) { // Print nonzero vector contents against flag names in order - int i = 0; + int i = 1; for (auto& status : flag_values) { - if (n_status_present[i] > 0) std::cout << status.second << ": " << n_status_present[i] << std::endl; + if (total_flag_counts[i] > 0) std::cout << status.second << ": " << total_flag_counts[i] << std::endl; ++i; } std::cout << std::endl; } } - - // TODO Print zone locations of bad inversions } EndFlag(); - return nflags; + return total_flag_counts; } diff --git a/kharma/reductions/reductions.hpp b/kharma/reductions/reductions.hpp index 11dd709f..b84a8518 100644 --- a/kharma/reductions/reductions.hpp +++ b/kharma/reductions/reductions.hpp @@ -33,26 +33,22 @@ */ #pragma once -#include "debug.hpp" +#include "reductions_variables.hpp" #include "flux_functions.hpp" #include "grmhd_functions.hpp" #include "types.hpp" -// This is for flux/accretion rate -#define REDUCE_FUNCTION_ARGS_EH const GRCoordinates& G, const VariablePack& P, const VarMap& m_p, \ - const VariableFluxPack& U, const VarMap& m_u, const Real& gam, \ - const int& k, const int& j, const int& i +namespace Reductions { -// Notice this list also includes a generic Real-type argument: this is for denoting a radius or placement. -// Provided as argument in case reductions at/within/etc multiple places are desired -// (e.g., disk and jet, inner & outer, multiple radii) -// TODO take off 'b' from arg list and pass block contents? -#define REDUCE_FUNCTION_ARGS_MESH const GRCoordinates& G, const VariablePack& P, const VarMap& m_p, \ - const VariableFluxPack& U, const VarMap& m_u, const Real& gam, \ - const int& k, const int& j, const int& i, const Real& arg +// Think about how to do channels as not ints +//constexpr enum class Channel{fflag, pflag, iflag, }; -namespace Reductions { +/** + * These, too, are a package. + * Mostly it exists to keep track of Reducers, so we can clean them up to keep MPI happy. + */ +std::shared_ptr Initialize(ParameterInput *pin, std::shared_ptr& packages); /** * Perform a reduction using operation 'op' over a spherical shell at the given zone, measured from left side of @@ -60,15 +56,48 @@ namespace Reductions { * As this only runs on innermost blocks, this is intended for accretion/event horizon * measurements in black hole simulations. */ -Real EHReduction(MeshData *md, UserHistoryOperation op, std::function fn, int zone); +template +T EHReduction(MeshData *md, UserHistoryOperation op, int zone); /** - * Perform a reduction using operation 'op' over all zones. - * The extra 'arg' is passed as the last argument to the device-side function. - * It is generally used to denote a radius inside, outside, or at which the reduction should be taken. - * This should be used for 2D shell sums not at the EH: just divide the function result by the zone spacing dx1. + * Perform a reduction using operation 'op' over a given domain + * This should be used for all 2D shell sums not around the EH: + * Just set equal min/max, 2D slices are detected */ -Real DomainReduction(MeshData *md, UserHistoryOperation op, std::function fn, Real arg); +template +T DomainReduction(MeshData *md, UserHistoryOperation op, const GReal startx[3], const GReal stopx[3], int channel=-1); +template +T DomainReduction(MeshData *md, UserHistoryOperation op, int channel=-1) { + const GReal startx[3] = {std::numeric_limits::min(), std::numeric_limits::min(), std::numeric_limits::min()}; + const GReal stopx[3] = {std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::max()}; + return DomainReduction(md, op, startx, stopx, channel); +} + +/** + * Start reductions with a value you have on hand + */ +template +void Start(MeshData *md, int channel, T val, MPI_Op op); +template +void StartToAll(MeshData *md, int channel, T val, MPI_Op op); + +/** + * Check the results of reductions that have been started. + * Remember channels are COUNTED SEPARATELY between the 4 lists: + * Real/default, int, vector and vector (i.e. Flags) + */ +template +T Check(MeshData *md, int channel); +template +T CheckOnAll(MeshData *md, int channel); + +/** + * Check the results of reductions that have been started. + * Remember channels are COUNTED SEPARATELY between the 4 lists: + * Real/default, int, vector and vector (i.e. Flags) + */ +template +T Check(MeshData *md, int channel); /** * Count instances of a particular flag value in the named field. @@ -78,11 +107,24 @@ Real DomainReduction(MeshData *md, UserHistoryOperation op, std::function< int CountFlag(MeshData *md, std::string field_name, const int& flag_val, IndexDomain domain, bool is_bitflag); /** - * Count instances of a particular flag value in the named field. + * Count instances of all flags in the named field. * is_bitflag specifies whether multiple flags may be present and will be orthogonal (e.g. FFlag), * or whether flags receive consecutive integer values. - * TODO could return numbers for all flags instead of just printing */ -int CountFlags(MeshData *md, std::string field_name, std::map flag_values, IndexDomain domain, int verbose, bool is_bitflag); +std::vector CountFlags(MeshData *md, std::string field_name, const std::map &flag_values, IndexDomain domain, bool is_bitflag); + +/** + * Determine number of local flags hit with CountFlags, and send the value over MPI reducer 'channel' + */ +void StartFlagReduce(MeshData *md, std::string field_name, const std::map &flag_values, IndexDomain domain, bool is_bitflag, int channel); + +/** + * Check a flag's MPI reduction and print any flags hit + */ +std::vector CheckFlagReduceAndPrintHits(MeshData *md, std::string field_name, const std::map &flag_values, + IndexDomain domain, bool is_bitflag, int channel); } // namespace Reductions + +// See the file for why we do this +#include "reductions_impl.hpp" diff --git a/kharma/reductions/reductions_impl.hpp b/kharma/reductions/reductions_impl.hpp new file mode 100644 index 00000000..df42b1b8 --- /dev/null +++ b/kharma/reductions/reductions_impl.hpp @@ -0,0 +1,296 @@ +/* + * File: reductions_variables.hpp + * + * BSD 3-Clause License + * + * Copyright (c) 2020, AFD Group at UIUC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#pragma once + +// This is a weird header. It's included at the end of reductions.hpp, in order to provide +// the source of the below templates to other files to instantiate. +// Otherwise, it operates like a normal cpp (NOT hpp) file. + +// Satisfy IDE parsers who aren't wise to our schemes +#include "reductions.hpp" + +template +inline std::string GetPoolName() +{ + if constexpr (all_reduce) { + if constexpr (std::is_same::value) + return "allreduce_pool"; + if constexpr (std::is_same::value) + return "int_allreduce_pool"; + if constexpr (std::is_same>::value) + return "vector_allreduce_pool"; + if constexpr (std::is_same>::value) + return "vector_int_allreduce_pool"; + } else { + if constexpr (std::is_same::value) + return "reduce_pool"; + if constexpr (std::is_same::value) + return "int_reduce_pool"; + if constexpr (std::is_same>::value) + return "vector_reduce_pool"; + if constexpr (std::is_same>::value) + return "vector_int_reduce_pool"; + } +} + +// MPI reduction starts +template +void Reductions::Start(MeshData *md, int channel, T val, MPI_Op op) +{ + // Get the relevant reducer + const std::string pool_name = GetPoolName(); + auto& pars = md->GetMeshPointer()->packages.Get("Reductions")->AllParams(); + auto *reduce_pool = pars.GetMutable>>(pool_name); + while (reduce_pool->size() <= channel) reduce_pool->push_back(Reduce()); + auto& reduce = (*reduce_pool)[channel]; + // Fill with flags + reduce.val = val; + reduce.StartReduce(0, op); +} +template +void Reductions::StartToAll(MeshData *md, int channel, T val, MPI_Op op) +{ + // Get the relevant reducer + const std::string pool_name = GetPoolName(); + auto& pars = md->GetMeshPointer()->packages.Get("Reductions")->AllParams(); + auto *allreduce_pool = pars.GetMutable>>(pool_name); + while (allreduce_pool->size() <= channel) allreduce_pool->push_back(AllReduce()); + auto& reduce = (*allreduce_pool)[channel]; + // Fill with flags + reduce.val = val; + reduce.StartReduce(op); +} + +// MPI reduction checks +template +T Reductions::Check(MeshData *md, int channel) +{ + // Get the relevant reducer and result + const std::string pool_name = GetPoolName(); + auto& pars = md->GetMeshPointer()->packages.Get("Reductions")->AllParams(); + auto *reduce_pool = pars.GetMutable>>(pool_name); + auto& reducer = (*reduce_pool)[channel]; + + while (reducer.CheckReduce() == TaskStatus::incomplete); + return reducer.val; +} +template +T Reductions::CheckOnAll(MeshData *md, int channel) +{ + // Get the relevant reducer and result + const std::string pool_name = GetPoolName(); + auto& pars = md->GetMeshPointer()->packages.Get("Reductions")->AllParams(); + auto *reduce_pool = pars.GetMutable>>(pool_name); + auto& reducer = (*reduce_pool)[channel]; + + while (reducer.CheckReduce() == TaskStatus::incomplete); + return reducer.val; +} + +#define REDUCE_FUNCTION_CALL G, P(b), m_p, U(b), m_u, cmax(b), cmin(b), emhd_params, gam, k, j, i + +template +T Reductions::EHReduction(MeshData *md, UserHistoryOperation op, int zone) +{ + Flag("EHReduction"); + auto pmesh = md->GetMeshPointer(); + + const auto& pars = pmesh->packages.Get("GRMHD")->AllParams(); + const Real gam = pars.Get("gamma"); + const auto& emhd_params = EMHD::GetEMHDParameters(pmesh->packages); + + PackIndexMap prims_map, cons_map; + const auto& P = md->PackVariables(std::vector{Metadata::GetUserFlag("Primitive")}, prims_map); + const auto& U = md->PackVariablesAndFluxes(std::vector{Metadata::Conserved}, cons_map); + const VarMap m_u(cons_map, true), m_p(prims_map, false); + const auto& cmax = md->PackVariables(std::vector{"Flux.cmax"}); + const auto& cmin = md->PackVariables(std::vector{"Flux.cmin"}); + + auto pmb0 = md->GetBlockData(0)->GetBlockPointer(); + IndexRange ib = pmb0->cellbounds.GetBoundsI(IndexDomain::interior); + IndexRange jb = pmb0->cellbounds.GetBoundsJ(IndexDomain::interior); + IndexRange kb = pmb0->cellbounds.GetBoundsK(IndexDomain::interior); + IndexRange block = IndexRange{0, U.GetDim(5) - 1}; + + T result(0); + int nb = pmesh->GetNumMeshBlocksThisRank(); + for (int iblock=0; iblock < nb; iblock++) { + const auto &pmb = pmesh->block_list[iblock]; + // Inner-edge blocks only for speed + if (pmb->boundary_flag[parthenon::BoundaryFace::inner_x1] == BoundaryFlag::user) { + const auto& G = pmb->coords; + T block_result; + switch(op) { + case UserHistoryOperation::sum: { + Kokkos::Sum sum_reducer(block_result); + pmb->par_reduce("accretion_sum", iblock, iblock, kb.s, kb.e, jb.s, jb.e, ib.s+zone, ib.s+zone, + KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, T &local_result) { + local_result += reduction_var(REDUCE_FUNCTION_CALL) * G.Dxc<3>(k) * G.Dxc<2>(j); + } + , sum_reducer); + result += block_result; + break; + } + case UserHistoryOperation::max: { + Kokkos::Max max_reducer(block_result); + pmb->par_reduce("accretion_sum", iblock, iblock, kb.s, kb.e, jb.s, jb.e, ib.s+zone, ib.s+zone, + KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, T &local_result) { + const T val = reduction_var(REDUCE_FUNCTION_CALL) * G.Dxc<3>(k) * G.Dxc<2>(j); + if (val > local_result) local_result = val; + } + , max_reducer); + if (block_result > result) result = block_result; + break; + } + case UserHistoryOperation::min: { + Kokkos::Min min_reducer(block_result); + pmb->par_reduce("accretion_sum", iblock, iblock, kb.s, kb.e, jb.s, jb.e, ib.s+zone, ib.s+zone, + KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, T &local_result) { + const T val = reduction_var(REDUCE_FUNCTION_CALL) * G.Dxc<3>(k) * G.Dxc<2>(j); + if (val < local_result) local_result = val; + } + , min_reducer); + if (block_result < result) result = block_result; + break; + } + } + } + } + + EndFlag(); + return result; +} + +#define INSIDE (x[1] > startx[0] && x[2] > startx[1] && x[3] > startx[2]) && \ + (trivial[0] ? x[1] < startx[0] + G.Dxc<1>(i) : x[1] < stopx[0]) && \ + (trivial[1] ? x[2] < startx[1] + G.Dxc<2>(j) : x[2] < stopx[1]) && \ + (trivial[2] ? x[3] < startx[2] + G.Dxc<3>(k) : x[3] < stopx[2]) + +// TODO additionally template on return type to avoid counting flags with Reals +template +T Reductions::DomainReduction(MeshData *md, UserHistoryOperation op, const GReal startx[3], const GReal stopx[3], int channel) +{ + Flag("DomainReduction"); + auto pmesh = md->GetMeshPointer(); + + const auto& pars = pmesh->packages.Get("GRMHD")->AllParams(); + const Real gam = pars.Get("gamma"); + const auto& emhd_params = EMHD::GetEMHDParameters(pmesh->packages); + + // Just pass in everything we might want. Probably slow? + PackIndexMap prims_map, cons_map; + const auto& P = md->PackVariables(std::vector{Metadata::GetUserFlag("Primitive")}, prims_map); + const auto& U = md->PackVariablesAndFluxes(std::vector{Metadata::Conserved}, cons_map); + const VarMap m_u(cons_map, true), m_p(prims_map, false); + const auto& cmax = md->PackVariables(std::vector{"Flux.cmax"}); + const auto& cmin = md->PackVariables(std::vector{"Flux.cmin"}); + + auto pmb0 = md->GetBlockData(0)->GetBlockPointer(); + IndexRange ib = pmb0->cellbounds.GetBoundsI(IndexDomain::interior); + IndexRange jb = pmb0->cellbounds.GetBoundsJ(IndexDomain::interior); + IndexRange kb = pmb0->cellbounds.GetBoundsK(IndexDomain::interior); + IndexRange block = IndexRange{0, U.GetDim(5) - 1}; + + bool trivial_tmp[3] = {false, false, false}; + VLOOP if(startx[v] == stopx[v]) { + trivial_tmp[v] = true; + } + const bool trivial[3] = {trivial_tmp[0], trivial_tmp[1], trivial_tmp[2]}; + + T result = 0.; + MPI_Op mop; + switch(op) { + case UserHistoryOperation::sum: { + Kokkos::Sum sum_reducer(result); + pmb0->par_reduce("domain_sum", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, T &local_result) { + const auto& G = U.GetCoords(b); + GReal x[4]; + G.coord_embed(k, j, i, Loci::center, x); + if(INSIDE) { + local_result += reduction_var(REDUCE_FUNCTION_CALL) * + (!trivial[2]) * G.Dxc<3>(k) * (!trivial[1]) * G.Dxc<2>(j) * (!trivial[0]) * G.Dxc<1>(i); + } + } + , sum_reducer); + mop = MPI_SUM; + break; + } + case UserHistoryOperation::max: { + Kokkos::Max max_reducer(result); + pmb0->par_reduce("domain_max", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, T &local_result) { + const auto& G = U.GetCoords(b); + GReal x[4]; + G.coord_embed(k, j, i, Loci::center, x); + if(INSIDE) { + const Real val = reduction_var(REDUCE_FUNCTION_CALL) * + (!trivial[2]) * G.Dxc<3>(k) * (!trivial[1]) * G.Dxc<2>(j) * (!trivial[0]) * G.Dxc<1>(i); + if (val > local_result) local_result = val; + } + } + , max_reducer); + mop = MPI_MAX; + break; + } + case UserHistoryOperation::min: { + Kokkos::Min min_reducer(result); + pmb0->par_reduce("domain_min", block.s, block.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA (const int &b, const int &k, const int &j, const int &i, T &local_result) { + const auto& G = U.GetCoords(b); + GReal x[4]; + G.coord_embed(k, j, i, Loci::center, x); + if(INSIDE) { + const Real val = reduction_var(REDUCE_FUNCTION_CALL) * + (!trivial[2]) * G.Dxc<3>(k) * (!trivial[1]) * G.Dxc<2>(j) * (!trivial[0]) * G.Dxc<1>(i); + if (val < local_result) local_result = val; + } + } + , min_reducer); + mop = MPI_MIN; + break; + } + } + + // Optionally start an MPI reducer w/given index, so the mesh-wide result is ready when we want it + if (channel >= 0) { + Start(md, channel, result, mop); + } + + EndFlag(); + return result; +} + +#undef INSIDE +#undef REDUCE_FUNCTION_CALL diff --git a/kharma/reductions/reductions_types.hpp b/kharma/reductions/reductions_types.hpp new file mode 100644 index 00000000..7ebca674 --- /dev/null +++ b/kharma/reductions/reductions_types.hpp @@ -0,0 +1,121 @@ +/* + * File: reductions_variables.hpp + * + * BSD 3-Clause License + * + * Copyright (c) 2020, AFD Group at UIUC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#pragma once + +// This file is included with types.hpp, +// so that all files have access to the extra Kokkos reduction machinery + +#include "decs.hpp" + +// Reduction types: teach Kokkos to keep a 3-int index, make it usable +// See grmhd.cpp timestep calc for example +namespace Kokkos { +template <> +struct reduction_identity> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static std::tuple min() { + int max = std::numeric_limits::max(); + return std::tuple{max, max, max}; + } +}; +} +namespace Reductions { +// Types for 3-index reduction +typedef Kokkos::MinMaxLoc> Reduce3; +typedef Reduce3::value_type Reduce3v; + +// Array type for reducing arbitrary numbers of reals +template +struct array_type { + ScalarType my_array[N]; + + KOKKOS_INLINE_FUNCTION + array_type() { init(); } + + KOKKOS_INLINE_FUNCTION + array_type(const array_type& rhs) { + for (int i = 0; i < N; i++) { + my_array[i] = rhs.my_array[i]; + } + } + + KOKKOS_INLINE_FUNCTION void init() { + for (int i = 0; i < N; i++) { + my_array[i] = 0; + } + } + + KOKKOS_INLINE_FUNCTION array_type& + operator+=(const array_type& src) { + for (int i = 0; i < N; i++) { + my_array[i] += src.my_array[i]; + } + return *this; + } +}; + +template +struct ArraySum { + public: + // Required + typedef ArraySum reducer; + typedef array_type value_type; + typedef Kokkos::View + result_view_type; + + private: + value_type& value; + + public: + KOKKOS_INLINE_FUNCTION + ArraySum(value_type& value_) : value(value_) {} + + // Required + KOKKOS_INLINE_FUNCTION + void join(value_type& dest, const value_type& src) const { + dest += src; + } + + KOKKOS_INLINE_FUNCTION + void init(value_type& val) const { val.init(); } + + KOKKOS_INLINE_FUNCTION + value_type& reference() const { return value; } + + KOKKOS_INLINE_FUNCTION + result_view_type view() const { return result_view_type(&value, 1); } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { return true; } +}; +} \ No newline at end of file diff --git a/kharma/reductions/reductions_variables.hpp b/kharma/reductions/reductions_variables.hpp new file mode 100644 index 00000000..118ae5a9 --- /dev/null +++ b/kharma/reductions/reductions_variables.hpp @@ -0,0 +1,249 @@ +/* + * File: reductions_variables.hpp + * + * BSD 3-Clause License + * + * Copyright (c) 2020, AFD Group at UIUC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#pragma once + +#include "decs.hpp" +#include "types.hpp" + +#include "emhd.hpp" +#include "flux_functions.hpp" + +using namespace parthenon; + +#define REDUCE_FUNCTION_ARGS const GRCoordinates& G, const VariablePack& P, const VarMap& m_p, \ + const VariableFluxPack& U, const VarMap& m_u, \ + const VariablePack& cmax, const VariablePack& cmin,\ + const EMHD::EMHD_parameters& emhd_params, const Real& gam, const int& k, const int& j, const int& i + +namespace Reductions { + +// Add any new reduction variables to this list, then implementations below +// Not elegant, but fast & portable. +// HIPCC doesn't like passing function pointers as we used to do, +// and it doesn't vectorize anyway. Look forward to more of this pattern in the code +enum class Var{phi, bsq, gas_pressure, mag_pressure, beta, + mdot, edot, ldot, mdot_flux, edot_flux, ldot_flux, eht_lum, jet_lum, + nan_ctop, zero_ctop, neg_rho, neg_u, neg_rhout}; + +// Function template for all reductions. +template +Real reduction_var(REDUCE_FUNCTION_ARGS); + +// Can also sum the hemispheres independently to be fancy (TODO?) +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + // \Phi == \int |*F^1^0| * gdet * dx2 * dx3 == \int |B1| * gdet * dx2 * dx3 + return 0.5 * m::abs(U(m_u.B1, k, j, i)); // factor of gdet already in cons.B +} + +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + FourVectors Dtmp; + GRMHD::calc_4vecs(G, P, m_p, k, j, i, Loci::center, Dtmp); + return dot(Dtmp.bcon, Dtmp.bcov); +} +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + return (gam - 1) * P(m_p.UU, k, j, i); +} +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + FourVectors Dtmp; + GRMHD::calc_4vecs(G, P, m_p, k, j, i, Loci::center, Dtmp); + return ((gam - 1) * P(m_p.UU, k, j, i))/(0.5*(dot(Dtmp.bcon, Dtmp.bcov) + SMALL)); +} + +// Accretion rates: return a zone's contribution to the surface integral +// forming each rate measurement. +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + Real ucon[GR_DIM]; + GRMHD::calc_ucon(G, P, m_p, k, j, i, Loci::center, ucon); + // \dot{M} == \int rho * u^1 * gdet * dx2 * dx3 + return -P(m_p.RHO, k, j, i) * ucon[X1DIR] * G.gdet(Loci::center, j, i); +} +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + FourVectors Dtmp; + Real T1[GR_DIM]; + GRMHD::calc_4vecs(G, P, m_p, k, j, i, Loci::center, Dtmp); + Flux::calc_tensor(P, m_p, Dtmp, emhd_params, gam, k, j, i, X1DIR, T1); + // \dot{E} == \int - T^1_0 * gdet * dx2 * dx3 + return -T1[X0DIR] * G.gdet(Loci::center, j, i); +} +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + FourVectors Dtmp; + Real T1[GR_DIM]; + GRMHD::calc_4vecs(G, P, m_p, k, j, i, Loci::center, Dtmp); + Flux::calc_tensor(P, m_p, Dtmp, emhd_params, gam, k, j, i, X1DIR, T1); + // \dot{L} == \int T^1_3 * gdet * dx2 * dx3 + return T1[X3DIR] * G.gdet(Loci::center, j, i); +} + +// Then we can define the same with fluxes. +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + return -U.flux(X1DIR, m_u.RHO, k, j, i); +} +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + return (U.flux(X1DIR, m_u.UU, k, j, i) - U.flux(X1DIR, m_u.RHO, k, j, i)); +} +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + return U.flux(X1DIR, m_u.U3, k, j, i); +} + +// Luminosity proxy from (for example) Porth et al 2019. +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + FourVectors Dtmp; + GRMHD::calc_4vecs(G, P, m_p, k, j, i, Loci::center, Dtmp); + Real rho = P(m_p.RHO, k, j, i); + Real Pg = (gam - 1.) * P(m_p.UU, k, j, i); + Real Bmag = m::sqrt(dot(Dtmp.bcon, Dtmp.bcov)); + Real j_eht = rho*rho*rho/Pg/Pg * m::exp(-0.2 * m::cbrt(rho * rho / (Bmag * Pg * Pg))); + return j_eht; +} + +// Example of checking extra conditions before adding local results: +// sums total jet power only at exactly r=radius, for areas with sig > 1 +// TODO version w/E&M power only. Needs "calc_tensor_EM" +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + FourVectors Dtmp; + Real T1[GR_DIM]; + GRMHD::calc_4vecs(G, P, m_p, k, j, i, Loci::center, Dtmp); + Flux::calc_tensor(P, m_p, Dtmp, emhd_params, gam, k, j, i, X1DIR, T1); + // If sigma > 1... + if ((dot(Dtmp.bcon, Dtmp.bcov) / P(m_p.RHO, k, j, i)) > 1.) { + // Energy flux, like at EH + return -T1[X0DIR]; + } else { + return 0.; + } +} + +// Diagnostics. Still have to return Real so we get creative. +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + Real is_zero = 0; + VLOOP { + if(m::max(cmax(v, k, j, i), cmin(v, k, j, i)) <= 0.) { + is_zero = 1.; // once per zone +#if DEBUG +#ifndef KOKKOS_ENABLE_SYCL + printf("ctop zero at %d %d %d along dir %d\n", i, j, k, v+1); +#endif +#endif + } + } + + return is_zero; +} +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + Real is_nan = 0.; + VLOOP { + if(m::isnan(m::max(cmax(v, k, j, i), cmin(v, k, j, i)))) { + is_nan = 1.; +#if DEBUG +#ifndef KOKKOS_ENABLE_SYCL + printf("ctop NaN at %d %d %d along dir %d\n", i, j, k, v+1); +#endif +#endif + } + } + + return is_nan; +} + +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + Real is_neg = 0.; + if (U(m_u.RHO, k, j, i) < 0.) { + is_neg = 1.; +#if DEBUG +#ifndef KOKKOS_ENABLE_SYCL + printf("Negative rho*u^0 (cons.rho) at %d %d %d\n", i, j, k); +#endif +#endif + } +} +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + Real is_neg = 0.; + if (P(m_p.UU, k, j, i) < 0.) { + is_neg = 1.; +#if DEBUG +#ifndef KOKKOS_ENABLE_SYCL + printf("Negative internal energy (prims.u) at %d %d %d\n", i, j, k); +#endif +#endif + } +} +template <> +KOKKOS_INLINE_FUNCTION Real reduction_var(REDUCE_FUNCTION_ARGS) +{ + Real is_neg = 0.; + if (P(m_p.RHO, k, j, i) < 0.) { + is_neg = 1.; +#if DEBUG +#ifndef KOKKOS_ENABLE_SYCL + printf("Negative density (prims.rho) at %d %d %d\n", i, j, k); +#endif +#endif + } +} + +} + +#undef REDUCE_FUNCTION_ARGS \ No newline at end of file diff --git a/kharma/types.hpp b/kharma/types.hpp index adeee780..c8eafeab 100644 --- a/kharma/types.hpp +++ b/kharma/types.hpp @@ -37,6 +37,7 @@ #include "boundary_types.hpp" #include "kharma_package.hpp" +#include "reductions_types.hpp" #include diff --git a/machines/bp.sh b/machines/bp.sh index 9eed4109..f7997917 100644 --- a/machines/bp.sh +++ b/machines/bp.sh @@ -27,6 +27,14 @@ if [[ $METAL_HOSTNAME == "fermium" ]]; then DEVICE_ARCH="TURING75" # Nvidia MPI hangs unless I do this MPI_EXE=mpirun + + if [[ "$ARGS" == *"cuda"* ]]; then + echo "Nothing special for cuda" + else + # AMD for CPUs + CXX_NATIVE=clang++ + C_NATIVE=clang + fi fi if [[ $METAL_HOSTNAME == "ferrum" ]]; then diff --git a/machines/illinois.sh b/machines/illinois.sh index f0355603..66506a85 100644 --- a/machines/illinois.sh +++ b/machines/illinois.sh @@ -13,9 +13,11 @@ elif [[ $HOST == *".astro.illinois.edu" ]]; then HOST_ARCH="ZEN2" # BH29 benefits from using just 1 thread/core export OMP_NUM_THREADS=64 + NPROC=64 else # Other machines are Skylake HOST_ARCH="SKX" + NPROC=36 fi # Compile our own HDF5 by default diff --git a/make.sh b/make.sh index a891a6bc..35e4df3d 100755 --- a/make.sh +++ b/make.sh @@ -110,7 +110,7 @@ if [[ -z "$CXX_NATIVE" ]]; then elif which icpx >/dev/null 2>&1; then CXX_NATIVE=icpx C_NATIVE=icx - OMP_FLAG="-fopenmp" + OMP_FLAG="-fiopenmp" elif which icpc >/dev/null 2>&1; then CXX_NATIVE=icpc C_NATIVE=icc @@ -201,9 +201,10 @@ fi # Allow for a custom linker program, but use CXX by # default as system linker may be older/incompatible if [[ -v LINKER ]]; then - LINKER="$LINKER" -else - LINKER="$CXX" + EXTRA_FLAGS="-DCMAKE_LINKER=$LINKER" +fi +if [[ "$ARGS" == *"special_link_line"* ]]; then + EXTRA_FLAGS="-DCMAKE_CXX_LINK_EXECUTABLE=' -o '" fi # Avoid warning on nvcc pragmas Intel doesn't like @@ -255,6 +256,7 @@ if [[ "$ARGS" == *"hdf5"* && "$ARGS" == *"clean"* ]]; then echo Configuring HDF5... + export CFLAGS="-fPIC $CFLAGS" CC=$HDF_CC sh configure -C $HDF_EXTRA --prefix=$SOURCE_DIR/external/hdf5 --enable-build-mode=production \ --disable-dependency-tracking --disable-hl --disable-tests --disable-tools --disable-shared --disable-deprecated-symbols > build-hdf5.log sleep 1 @@ -304,8 +306,6 @@ if [[ "$ARGS" == *"clean"* ]]; then cmake ..\ -DCMAKE_C_COMPILER="$CC" \ -DCMAKE_CXX_COMPILER="$CXX" \ - -DCMAKE_LINKER="$LINKER" \ - -DCMAKE_CXX_LINK_EXECUTABLE=' -o ' \ -DCMAKE_PREFIX_PATH="$PREFIX_PATH;$CMAKE_PREFIX_PATH" \ -DCMAKE_BUILD_TYPE=$TYPE \ -DPAR_LOOP_LAYOUT=$OUTER_LAYOUT \