Skip to content

Commit

Permalink
Fix restaring from files not containing the derefinement counter (from
Browse files Browse the repository at this point in the history
…#1073) (#1089)

* Fix deref couter reading from previous restarts

* Fix logic.

* make a separate dataset for derefinement count

* don't set derefinement count to large number in restart test

* Add warning

* ignore derefinement_count in phdf_diff

* only warn on rank 0

* remove hdf debug output

---------

Co-authored-by: Luke Roberts <[email protected]>
  • Loading branch information
pgrete and lroberts36 authored May 29, 2024
1 parent fb1663f commit bd615b7
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- [[PR 1004]](https://github.com/parthenon-hpc-lab/parthenon/pull/1004) Allow parameter modification from an input file for restarts

### Fixed (not changing behavior/API/variables/...)
- [[PR 1089]](https://github.com/parthenon-hpc-lab/parthenon/pull/1089) Fix loading restart files without derefinement counter
- [[PR 1079]](https://github.com/parthenon-hpc-lab/parthenon/pull/1079) Address XDMF/Visit Issues
- [[PR 1088]](https://github.com/parthenon-hpc-lab/parthenon/pull/1088) Correctly fill fluxes for non-cell variables in SparsePacks
- [[PR 1083]](https://github.com/parthenon-hpc-lab/parthenon/pull/1083) Correctly fill VariableFluxPack for edge fluxes in 2D
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ def compare_metadata(f0, f1, quiet=False, one=False, check_input=False, tol=1.0e
for key in f0.fid[var].keys():
if var == "Blocks" and key == "loc.level-gid-lid-cnghost-gflag":
continue # depends on number of MPI ranks and distribution of blocks among ranks

if var == "Blocks" and key == "derefinement_count":
continue # Gold files have not been updated to include derefinement_count
# Compare raw data of these variables
val0 = f0.fid[var][key]
val1 = f1.fid[var][key]
Expand Down
3 changes: 1 addition & 2 deletions src/mesh/mesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,8 +693,7 @@ Mesh::Mesh(ParameterInput *pin, ApplicationInput *app_in, RestartReader &rr,
MeshBlock::Make(i, i - nbs, loclist[i], block_size, block_bcs, this, pin, app_in,
packages, resolved_packages, gflag, costlist[i]);
if (block_list[i - nbs]->pmr)
block_list[i - nbs]->pmr->DerefinementCount() =
locLevelGidLidCnghostGflag[NumIDsAndFlags * i + 5];
block_list[i - nbs]->pmr->DerefinementCount() = mesh_info.derefinement_count[i];
}
BuildGMGBlockLists(pin, app_in);
SetMeshBlockNeighbors(GridIdentifier::leaf(), block_list, ranklist);
Expand Down
10 changes: 8 additions & 2 deletions src/outputs/output_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,17 +232,23 @@ std::vector<int64_t> ComputeLocs(Mesh *pm) {

std::vector<int> ComputeIDsAndFlags(Mesh *pm) {
return FlattenBlockInfo<int>(
pm, 6, [=](MeshBlock *pmb, std::vector<int> &data, int &i) {
pm, 5, [=](MeshBlock *pmb, std::vector<int> &data, int &i) {
auto loc = pmb->pmy_mesh->Forest().GetLegacyTreeLocation(pmb->loc);
data[i++] = loc.level();
data[i++] = pmb->gid;
data[i++] = pmb->lid;
data[i++] = pmb->cnghost;
data[i++] = pmb->gflag;
data[i++] = pmb->pmr ? pmb->pmr->DerefinementCount() : 0;
});
}

std::vector<int> ComputeDerefinementCount(Mesh *pm) {
return FlattenBlockInfo<int>(pm, 1,
[=](MeshBlock *pmb, std::vector<int> &data, int &i) {
data[i++] = pmb->pmr ? pmb->pmr->DerefinementCount() : 0;
});
}

// TODO(JMM): I could make this use the other loop
// functionality/high-order functions. but it was more code than this
// for, I think, little benefit.
Expand Down
1 change: 1 addition & 0 deletions src/outputs/output_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ void ComputeCoords(Mesh *pm, bool face, const IndexRange &ib, const IndexRange &
std::vector<Real> ComputeXminBlocks(Mesh *pm);
std::vector<int64_t> ComputeLocs(Mesh *pm);
std::vector<int> ComputeIDsAndFlags(Mesh *pm);
std::vector<int> ComputeDerefinementCount(Mesh *pm);

// TODO(JMM): Potentially unsafe if MPI_UNSIGNED_LONG_LONG isn't a size_t
// however I think it's probably safe to assume we'll be on systems
Expand Down
10 changes: 10 additions & 0 deletions src/outputs/parthenon_hdf5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,16 @@ void PHDF5Output::WriteBlocksMetadata_(Mesh *pm, hid_t file, const HDF5::H5P &pl
HDF5Write2D(gBlocks, "loc.level-gid-lid-cnghost-gflag", tmpID.data(), &loc_offset[0],
&loc_cnt[0], &glob_cnt[0], pl);
}

{
// derefinement count
hsize_t loc_cnt[2] = {num_blocks_local, 1};
hsize_t glob_cnt[2] = {max_blocks_global, 1};
std::vector<int> tmpID = OutputUtils::ComputeDerefinementCount(pm);
HDF5Write2D(gBlocks, "derefinement_count", tmpID.data(), &loc_offset[0], &loc_cnt[0],
&glob_cnt[0], pl);
}

Kokkos::Profiling::popRegion(); // write block metadata
}

Expand Down
5 changes: 4 additions & 1 deletion src/outputs/restart.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ namespace parthenon {
class Mesh;
class Param;

constexpr int NumIDsAndFlags{6};
// If this number changes, the logic for reading previously written restart files in
// mesh.cpp needs to be adjusted.
constexpr int NumIDsAndFlags{5};

class RestartReader {
public:
Expand Down Expand Up @@ -89,6 +91,7 @@ class RestartReader {
std::vector<Real> grid_dim;
std::vector<int64_t> lx123;
std::vector<int> level_gid_lid_cnghost_gflag; // what's this?!
std::vector<int> derefinement_count;
};
[[nodiscard]] virtual MeshInfo GetMeshInfo() const = 0;

Expand Down
14 changes: 14 additions & 0 deletions src/outputs/restart_hdf5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,20 @@ RestartReaderHDF5::MeshInfo RestartReaderHDF5::GetMeshInfo() const {
mesh_info.level_gid_lid_cnghost_gflag =
ReadDataset<int>("/Blocks/loc.level-gid-lid-cnghost-gflag");

auto status =
PARTHENON_HDF5_CHECK(H5Lexists(fh_, "Blocks/derefinement_count", H5P_DEFAULT));
if (status > 0) {
mesh_info.derefinement_count = ReadDataset<int>("/Blocks/derefinement_count");
} else {
// File does not contain this dataset, so must be older. Set to default value of zero
if (Globals::my_rank == 0 && (GetAttr<int>("Info", "Multilevel") != 0))
PARTHENON_WARN("Restarting from an HDF5 file that doesn't contain "
"/Blocks/derefinement_count. \n"
" If you are running with AMR, this may cause restarts to not be "
"bitwise exact \n"
" with simulations that are run without restarting.");
mesh_info.derefinement_count = std::vector<int>(mesh_info.nbtotal, 0);
}
return mesh_info;
}

Expand Down
3 changes: 0 additions & 3 deletions tst/regression/test_suites/restart/parthinput.restart
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ x3max = 1
ix3_bc = periodic
ox3_bc = periodic

derefine_count = 999999 # disable this since derefinement counter resets on
# restart and thus leads to different results

<parthenon/meshblock>
nx1 = 16
nx2 = 16
Expand Down

0 comments on commit bd615b7

Please sign in to comment.