From 6bd9592e420594b8c5d6391b893555c285e58d74 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 11:23:24 +0000 Subject: [PATCH 01/38] Read AMR domains lazily --- yt/frontends/ramses/data_structures.py | 129 +++++++++++++++---------- 1 file changed, 79 insertions(+), 50 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index a25a08644c7..a9cab39f119 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -1,6 +1,7 @@ import os import weakref from collections import defaultdict +from functools import cached_property from itertools import product from pathlib import Path from typing import Optional @@ -219,9 +220,6 @@ def __init__(self, ds, domain_id): ph.read_header() # self._add_ptype(ph.ptype) - # Load the AMR structure - self._read_amr() - _hydro_offset = None _level_count = None @@ -241,7 +239,7 @@ def level_count(self): @property def amr_file(self): - if hasattr(self, "_amr_file"): + if hasattr(self, "_amr_file") and not self._amr_file.close: self._amr_file.seek(0) return self._amr_file @@ -252,31 +250,42 @@ def amr_file(self): def _read_amr_header(self): hvals = {} - f = self.amr_file - f.seek(0) - - for header in ramses_header(hvals): - hvals.update(f.read_attrs(header)) - # For speedup, skip reading of 'headl' and 'taill' - f.skip(2) - hvals["numbl"] = f.read_vector("i") + with self.amr_file as f: + f.seek(0) - # That's the header, now we skip a few. - hvals["numbl"] = np.array(hvals["numbl"]).reshape( - (hvals["nlevelmax"], hvals["ncpu"]) - ) - f.skip() - if hvals["nboundary"] > 0: + for header in ramses_header(hvals): + hvals.update(f.read_attrs(header)) + # For speedup, skip reading of 'headl' and 'taill' f.skip(2) - self.ngridbound = f.read_vector("i").astype("int64") - else: - self.ngridbound = np.zeros(hvals["nlevelmax"], dtype="int64") - free_mem = f.read_attrs((("free_mem", 5, "i"),)) # NOQA - ordering = f.read_vector("c") # NOQA - f.skip(4) - # Now we're at the tree itself - # Now we iterate over each level and each CPU. + hvals["numbl"] = f.read_vector("i") + + # That's the header, now we skip a few. + hvals["numbl"] = np.array(hvals["numbl"]).reshape( + (hvals["nlevelmax"], hvals["ncpu"]) + ) + f.skip() + if hvals["nboundary"] > 0: + f.skip(2) + self.ngridbound = f.read_vector("i").astype("int64") + else: + self.ngridbound = np.zeros(hvals["nlevelmax"], dtype="int64") + _free_mem = f.read_attrs((("free_mem", 5, "i"),)) + _ordering = f.read_vector("c") + f.skip(4) + # Now we're at the tree itself + # Now we iterate over each level and each CPU. + position = f.tell() + self.amr_header = hvals + self.amr_offset = position + + # The maximum effective level is the deepest level + # that has a non-zero number of octs + nocts_to_this_level = hvals["numbl"].sum(axis=1).cumsum() + self.max_level = np.argwhere(nocts_to_this_level == nocts_to_this_level[-1])[0][ + 0 + ] + # update levelmax force_max_level, convention = self.ds._force_max_level if convention == "yt": @@ -284,26 +293,27 @@ def _read_amr_header(self): self.amr_header["nlevelmax"] = min( force_max_level, self.amr_header["nlevelmax"] ) - self.amr_offset = f.tell() self.local_oct_count = hvals["numbl"][ self.ds.min_level :, self.domain_id - 1 ].sum() - self.total_oct_count = hvals["numbl"][self.ds.min_level :, :].sum(axis=0) + imin, imax = self.ds.min_level, self.amr_header["nlevelmax"] + self.total_oct_count = hvals["numbl"][imin:imax, :].sum(axis=0) - def _read_amr(self): + @cached_property + def oct_handler(self): """Open the oct file, read in octs level-by-level. For each oct, only the position, index, level and domain are needed - its position in the octree is found automatically. The most important is finding all the information to feed oct_handler.add """ - self.oct_handler = RAMSESOctreeContainer( + oct_handler = RAMSESOctreeContainer( self.ds.domain_dimensions / 2, self.ds.domain_left_edge, self.ds.domain_right_edge, ) root_nodes = self.amr_header["numbl"][self.ds.min_level, :].sum() - self.oct_handler.allocate_domains(self.total_oct_count, root_nodes) + oct_handler.allocate_domains(self.total_oct_count, root_nodes) mylog.debug( "Reading domain AMR % 4i (%0.3e, %0.3e)", self.domain_id, @@ -311,19 +321,24 @@ def _read_amr(self): self.ngridbound.sum(), ) - f = self.amr_file - f.seek(self.amr_offset) + with self.amr_file as f: + f.seek(self.amr_offset) - min_level = self.ds.min_level - max_level = read_amr( - f, self.amr_header, self.ngridbound, min_level, self.oct_handler - ) + min_level = self.ds.min_level + max_level = read_amr( + f, self.amr_header, self.ngridbound, min_level, oct_handler + ) + + oct_handler.finalize() - self.max_level = max_level - self.oct_handler.finalize() + new_max_level = max_level + self.ds.min_level + if new_max_level != self.max_level: + raise RuntimeError( + f"The maximum level detected in the AMR file ({new_max_level}) " + f" does not match the expected number {self.max_level}." + ) - # Close AMR file - f.close() + return oct_handler def included(self, selector): if getattr(selector, "domain_id", None) is not None: @@ -548,7 +563,6 @@ def __init__(self, ds, dataset_type="ramses"): self.dataset = weakref.proxy(ds) self.index_filename = self.dataset.parameter_filename self.directory = os.path.dirname(self.index_filename) - self.max_level = None self.float_type = np.float64 super().__init__(ds, dataset_type) @@ -560,16 +574,19 @@ def _initialize_oct_handler(self): cpu_list = range(self.dataset["ncpu"]) self.domains = [RAMSESDomainFile(self.dataset, i + 1) for i in cpu_list] - total_octs = sum( - dom.local_oct_count for dom in self.domains # + dom.ngridbound.sum() - ) + + @cached_property + def max_level(self): force_max_level, convention = self.ds._force_max_level if convention == "yt": force_max_level += self.ds.min_level + 1 - self.max_level = min( - force_max_level, max(dom.max_level for dom in self.domains) + return min(force_max_level, max(dom.max_level for dom in self.domains)) + + @cached_property + def num_grids(self): + return sum( + dom.local_oct_count for dom in self.domains # + dom.ngridbound.sum() ) - self.num_grids = total_octs def _detect_output_fields(self): dsl = set() @@ -594,11 +611,23 @@ def _detect_output_fields(self): self.field_list = self.particle_field_list + self.fluid_field_list def _identify_base_chunk(self, dobj): + use_fast_hilbert = ( + hasattr(dobj, "get_bbox") + and self.ds.parameters["ordering type"] == "hilbert" + ) if getattr(dobj, "_chunk_info", None) is None: - domains = [dom for dom in self.domains if dom.included(dobj.selector)] + if use_fast_hilbert: + bbox = dobj.get_bbox() + idoms = {idom + 1 for idom in get_cpu_list(self.ds, bbox)} + candidate_domains = [ + dom for dom in self.domains if dom.domain_id in idoms + ] + else: + candidate_domains = self.domains + domains = [dom for dom in candidate_domains if dom.included(dobj.selector)] base_region = getattr(dobj, "base_region", dobj) if len(domains) > 1: - mylog.debug("Identified %s intersecting domains", len(domains)) + mylog.info("Identified %s intersecting domains", len(domains)) subsets = [ RAMSESDomainSubset( base_region, From 1117bb98ec45776f0100154975ef4e97cb92fead Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 11:54:15 +0000 Subject: [PATCH 02/38] Statically compute hydro offsets --- yt/frontends/ramses/particle_handlers.py | 57 +++++++++++++----------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/yt/frontends/ramses/particle_handlers.py b/yt/frontends/ramses/particle_handlers.py index ea341132072..b5956496adf 100644 --- a/yt/frontends/ramses/particle_handlers.py +++ b/yt/frontends/ramses/particle_handlers.py @@ -1,5 +1,7 @@ import abc import os +import struct +from itertools import chain, count from typing import Optional from yt._typing import FieldKey @@ -138,13 +140,15 @@ def read_header(self): self.local_particle_count = 0 return - fd = FortranFile(self.fname) - fd.seek(0, os.SEEK_END) - flen = fd.tell() - fd.seek(0) - hvals = {} - attrs = self.attrs - hvals.update(fd.read_attrs(attrs)) + with FortranFile(self.fname) as fd: + fd.seek(0, os.SEEK_END) + flen = fd.tell() + fd.seek(0) + hvals = {} + attrs = self.attrs + hvals.update(fd.read_attrs(attrs)) + ipos = fd.tell() + self.header = hvals self.local_particle_count = hvals["npart"] extra_particle_fields = self.ds._extra_particle_fields @@ -157,37 +161,36 @@ def read_header(self): if extra_particle_fields is not None: particle_fields += extra_particle_fields - if hvals["nstar_tot"] > 0 and extra_particle_fields is not None: + if ( + hvals["nstar_tot"] > 0 + and extra_particle_fields is not None + and ("particle_birth_time", "d") not in particle_fields + ): particle_fields += [ ("particle_birth_time", "d"), ("particle_metallicity", "d"), ] + particle_fields = chain( + particle_fields, + ((f"particle_extra_field_{i}", "d") for i in count()), + ) + field_offsets = {} _pfields = {} - ptype = self.ptype + blockLen = struct.calcsize("i") * 2 - # Read offsets - for field, vtype in particle_fields: - if fd.tell() >= flen: - break - field_offsets[ptype, field] = fd.tell() - _pfields[ptype, field] = vtype - fd.skip(1) - - iextra = 0 - while fd.tell() < flen: - iextra += 1 - field, vtype = ("particle_extra_field_%i" % iextra, "d") - particle_fields.append((field, vtype)) - - field_offsets[ptype, field] = fd.tell() + while ipos < flen: + field, vtype = next(particle_fields) + field_offsets[ptype, field] = ipos _pfields[ptype, field] = vtype - fd.skip(1) - - fd.close() + ipos += blockLen + struct.calcsize(vtype) * hvals["npart"] + if field.startswith("particle_extra_field_"): + iextra = int(field.split("_")[-1]) + else: + iextra = 0 if iextra > 0 and not self.ds._warned_extra_fields["io"]: mylog.warning( "Detected %s extra particle fields assuming kind " From 8231e1d54cbb50f061169823d56ea115e520996e Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 11:57:41 +0000 Subject: [PATCH 03/38] Cache read_fluid_file_descriptor --- yt/frontends/ramses/io.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yt/frontends/ramses/io.py b/yt/frontends/ramses/io.py index a74a24cf1fa..21a4a252072 100644 --- a/yt/frontends/ramses/io.py +++ b/yt/frontends/ramses/io.py @@ -1,5 +1,6 @@ import os from collections import defaultdict +from functools import lru_cache from typing import Union import numpy as np @@ -306,6 +307,7 @@ def _read_particle_subset(self, subset, fields): return tr +@lru_cache(maxsize=32) def _read_part_binary_file_descriptor(fname: Union[str, "os.PathLike[str]"]): """ Read a file descriptor and returns the array of the fields found. @@ -362,6 +364,7 @@ def _read_part_binary_file_descriptor(fname: Union[str, "os.PathLike[str]"]): return fields +@lru_cache(maxsize=32) def _read_part_csv_file_descriptor(fname: Union[str, "os.PathLike[str]"]): """ Read the file from the csv sink particles output. @@ -411,6 +414,7 @@ def _read_part_csv_file_descriptor(fname: Union[str, "os.PathLike[str]"]): return fields, local_particle_count +@lru_cache(maxsize=32) def _read_fluid_file_descriptor(fname: Union[str, "os.PathLike[str]"], *, prefix: str): """ Read a file descriptor and returns the array of the fields found. From 7fc18eeb8c704252dab857dbaf7cb421b37cd457 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 12:20:08 +0000 Subject: [PATCH 04/38] Delay as much as possible reading AMR --- yt/frontends/ramses/data_structures.py | 3 ++- yt/frontends/ramses/field_handlers.py | 8 ++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index a9cab39f119..4f93a42ffeb 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -198,7 +198,7 @@ def __init__(self, ds, domain_id): for t in ["grav", "amr"]: setattr(self, f"{t}_fn", basename % t) self._part_file_descriptor = part_file_descriptor - self._read_amr_header() + self.max_level = self.ds.parameters["levelmax"] # Autodetect field files field_handlers = [FH(self) for FH in get_field_handlers() if FH.any_exist(ds)] @@ -307,6 +307,7 @@ def oct_handler(self): The most important is finding all the information to feed oct_handler.add """ + self._read_amr_header() oct_handler = RAMSESOctreeContainer( self.ds.domain_dimensions / 2, self.ds.domain_left_edge, diff --git a/yt/frontends/ramses/field_handlers.py b/yt/frontends/ramses/field_handlers.py index edd0f612f7b..12b4a439522 100644 --- a/yt/frontends/ramses/field_handlers.py +++ b/yt/frontends/ramses/field_handlers.py @@ -1,6 +1,7 @@ import abc import glob import os +from functools import cached_property from typing import Optional from yt.config import ytcfg @@ -232,7 +233,7 @@ def level_count(self): return self._level_count - @property + @cached_property def offset(self): """ Compute the offsets of the fields. @@ -243,10 +244,6 @@ def offset(self): It should be generic enough for most of the cases, but if the *structure* of your fluid file is non-canonical, change this. """ - - if getattr(self, "_offset", None) is not None: - return self._offset - nvars = len(self.field_list) with FortranFile(self.fname) as fd: # Skip headers @@ -275,7 +272,6 @@ def offset(self): Nskip=nvars * 8, ) - self._offset = offset self._level_count = level_count return self._offset From 1266f13cea76a9e61366bf6e5e82f6dc609f1a61 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 12:20:29 +0000 Subject: [PATCH 05/38] Use os.path.getsize ot measure file len --- yt/frontends/ramses/particle_handlers.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/yt/frontends/ramses/particle_handlers.py b/yt/frontends/ramses/particle_handlers.py index b5956496adf..2fd01fcba39 100644 --- a/yt/frontends/ramses/particle_handlers.py +++ b/yt/frontends/ramses/particle_handlers.py @@ -140,10 +140,8 @@ def read_header(self): self.local_particle_count = 0 return + flen = os.path.getsize(self.fname) with FortranFile(self.fname) as fd: - fd.seek(0, os.SEEK_END) - flen = fd.tell() - fd.seek(0) hvals = {} attrs = self.attrs hvals.update(fd.read_attrs(attrs)) @@ -246,9 +244,7 @@ def read_header(self): self.local_particle_count = 0 return fd = FortranFile(self.fname) - fd.seek(0, os.SEEK_END) - flen = fd.tell() - fd.seek(0) + flen = os.path.getsize(self.fname) hvals = {} # Read the header of the file attrs = self.attrs From 31c3eb5a739ef5c7e7624e449c9453e257644c80 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 12:24:25 +0000 Subject: [PATCH 06/38] Fix typo --- yt/frontends/ramses/field_handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/frontends/ramses/field_handlers.py b/yt/frontends/ramses/field_handlers.py index 12b4a439522..65661c3119b 100644 --- a/yt/frontends/ramses/field_handlers.py +++ b/yt/frontends/ramses/field_handlers.py @@ -273,7 +273,7 @@ def offset(self): ) self._level_count = level_count - return self._offset + return offset @classmethod def load_fields_from_yt_config(cls) -> list[str]: From 0b4b4a89c72bd799f406e1447ced33158047f1ae Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 14:29:48 +0000 Subject: [PATCH 07/38] Trust more conservative Hilbert indexing for fast chunking --- yt/frontends/ramses/data_structures.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 4f93a42ffeb..7136e5c246c 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -620,12 +620,9 @@ def _identify_base_chunk(self, dobj): if use_fast_hilbert: bbox = dobj.get_bbox() idoms = {idom + 1 for idom in get_cpu_list(self.ds, bbox)} - candidate_domains = [ - dom for dom in self.domains if dom.domain_id in idoms - ] + domains = [dom for dom in self.domains if dom.domain_id in idoms] else: - candidate_domains = self.domains - domains = [dom for dom in candidate_domains if dom.included(dobj.selector)] + domains = [dom for dom in self.domains if dom.included(dobj.selector)] base_region = getattr(dobj, "base_region", dobj) if len(domains) > 1: mylog.info("Identified %s intersecting domains", len(domains)) From 26c01c3a69a45c93b3e748ecd55b8e9e9b70619f Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 14:33:32 +0000 Subject: [PATCH 08/38] Do not trigger oct_handler construction --- yt/data_objects/index_subobjects/octree_subset.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt/data_objects/index_subobjects/octree_subset.py b/yt/data_objects/index_subobjects/octree_subset.py index 28861425bbb..a74f5dc24f8 100644 --- a/yt/data_objects/index_subobjects/octree_subset.py +++ b/yt/data_objects/index_subobjects/octree_subset.py @@ -51,12 +51,17 @@ def __init__(self, base_region, domain, ds, num_zones=2, num_ghost_zones=0): self.domain_id = domain.domain_id self.ds = domain.ds self._index = self.ds.index - self.oct_handler = domain.oct_handler self._last_mask = None self._last_selector_id = None self.base_region = base_region self.base_selector = base_region.selector + @property + def oct_handler(self): + # Use an indirection so that oct_handler + # doesn't have to exist when we create the subset + return self.domain.oct_handler + def __getitem__(self, key): tr = super().__getitem__(key) try: From 23f3f286e88251aa2932c56b4a1a18c324f0fa07 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 14:44:01 +0000 Subject: [PATCH 09/38] Handle case where reading is a bit too optimistic --- yt/frontends/ramses/io.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt/frontends/ramses/io.py b/yt/frontends/ramses/io.py index 21a4a252072..6114d2140aa 100644 --- a/yt/frontends/ramses/io.py +++ b/yt/frontends/ramses/io.py @@ -202,6 +202,8 @@ def _read_fluid_selection(self, chunks, selector, fields, size): rv = subset.fill(fd, field_subs, selector, file_handler) for ft, f in field_subs: d = rv.pop(f) + if d.size == 0: + continue mylog.debug( "Filling %s with %s (%0.3e %0.3e) (%s zones)", f, @@ -213,7 +215,8 @@ def _read_fluid_selection(self, chunks, selector, fields, size): tr[(ft, f)].append(d) d = {} for field in fields: - d[field] = np.concatenate(tr.pop(field)) + tmp = tr.pop(field, None) + d[field] = np.concatenate(tmp) if tmp else np.empty(0, dtype="d") return d From ad4648fff6340d5db1e51f731b23d97f98a619a9 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 15:10:12 +0000 Subject: [PATCH 10/38] Use own version of the oct handler for stream dataset --- yt/frontends/ramses/data_structures.py | 10 +++++++--- yt/frontends/stream/data_structures.py | 8 ++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 7136e5c246c..35785b1b4b7 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -176,6 +176,11 @@ class RAMSESDomainFile: _last_mask = None _last_selector_id = None + _hydro_offset = None + _level_count = None + + oct_handler_initialized = False + def __init__(self, ds, domain_id): self.ds = ds self.domain_id = domain_id @@ -220,9 +225,6 @@ def __init__(self, ds, domain_id): ph.read_header() # self._add_ptype(ph.ptype) - _hydro_offset = None - _level_count = None - def __repr__(self): return "RAMSESDomainFile: %i" % self.domain_id @@ -339,6 +341,8 @@ def oct_handler(self): f" does not match the expected number {self.max_level}." ) + self.oct_handler_initialized = True + return oct_handler def included(self, selector): diff --git a/yt/frontends/stream/data_structures.py b/yt/frontends/stream/data_structures.py index 3757b1c0726..4cc8b58354c 100644 --- a/yt/frontends/stream/data_structures.py +++ b/yt/frontends/stream/data_structures.py @@ -784,6 +784,14 @@ def __init__(self, base_region, ds, oct_handler, num_zones=2, num_ghost_zones=0) base_grid = StreamOctreeSubset(base_region, ds, oct_handler, num_zones) self._base_grid = base_grid + @property + def oct_handler(self): + return self._oct_handler + + @oct_handler.setter + def oct_handler(self, oh): + self._oct_handler = oh + def retrieve_ghost_zones(self, ngz, fields, smoothed=False): try: new_subset = self._subset_with_gz From a02cb1687c7b5395a0bad75bff0409cfb6ecc6b3 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 15:10:32 +0000 Subject: [PATCH 11/38] Use oct_handler to refine selection is already created --- yt/frontends/ramses/data_structures.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 35785b1b4b7..359b0a0c89f 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -624,7 +624,19 @@ def _identify_base_chunk(self, dobj): if use_fast_hilbert: bbox = dobj.get_bbox() idoms = {idom + 1 for idom in get_cpu_list(self.ds, bbox)} - domains = [dom for dom in self.domains if dom.domain_id in idoms] + # If the oct handler has been initialized, use it + domains = [] + for dom in self.domains: + # Hilbert indexing is conservative, so reject all those that + # aren't in the bbox + if dom.domain_id not in idoms: + continue + # If the domain has its oct handler, refine the selection + if dom.oct_handler_initialized and not dom.included(dobj.selector): + continue + mylog.info("Identified domain %s", dom.domain_id) + + domains.append(dom) else: domains = [dom for dom in self.domains if dom.included(dobj.selector)] base_region = getattr(dobj, "base_region", dobj) From d2139ebe39e92d2b687f6ee3e34f65e302529573 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 15:18:23 +0000 Subject: [PATCH 12/38] Reduce verbosity --- yt/frontends/ramses/data_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 359b0a0c89f..6190202980b 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -634,7 +634,7 @@ def _identify_base_chunk(self, dobj): # If the domain has its oct handler, refine the selection if dom.oct_handler_initialized and not dom.included(dobj.selector): continue - mylog.info("Identified domain %s", dom.domain_id) + mylog.debug("Identified domain %s", dom.domain_id) domains.append(dom) else: From 4ac917d0705969cd57e82d3e30ab58d1d9ac1913 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 15:41:48 +0000 Subject: [PATCH 13/38] oct_handler should be a read-only property --- yt/data_objects/index_subobjects/octree_subset.py | 9 +++++---- yt/frontends/art/data_structures.py | 6 +++++- yt/frontends/artio/data_structures.py | 6 +++++- yt/frontends/ramses/data_structures.py | 5 ++++- yt/frontends/stream/data_structures.py | 6 +----- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/yt/data_objects/index_subobjects/octree_subset.py b/yt/data_objects/index_subobjects/octree_subset.py index a74f5dc24f8..eb08750b9d7 100644 --- a/yt/data_objects/index_subobjects/octree_subset.py +++ b/yt/data_objects/index_subobjects/octree_subset.py @@ -1,3 +1,4 @@ +import abc from contextlib import contextmanager from functools import cached_property from itertools import product, repeat @@ -33,7 +34,7 @@ def cc_cache_func(self, dobj): return cc_cache_func -class OctreeSubset(YTSelectionContainer): +class OctreeSubset(YTSelectionContainer, abc.ABC): _spatial = True _num_ghost_zones = 0 _type_name = "octree_subset" @@ -57,10 +58,10 @@ def __init__(self, base_region, domain, ds, num_zones=2, num_ghost_zones=0): self.base_selector = base_region.selector @property + @abc.abstractmethod def oct_handler(self): - # Use an indirection so that oct_handler - # doesn't have to exist when we create the subset - return self.domain.oct_handler + # In charge of returning the oct_handler + pass def __getitem__(self, key): tr = super().__getitem__(key) diff --git a/yt/frontends/art/data_structures.py b/yt/frontends/art/data_structures.py index 5c5281cc1b1..a901f780e3b 100644 --- a/yt/frontends/art/data_structures.py +++ b/yt/frontends/art/data_structures.py @@ -798,7 +798,11 @@ def __init__(self, ds, nvar, oct_handler, domain_id): self._level_count = None self._level_oct_offsets = None self._level_child_offsets = None - self.oct_handler = oct_handler + self._oct_handler = oct_handler + + @property + def oct_handler(self): + return self._oct_handler @property def level_count(self): diff --git a/yt/frontends/artio/data_structures.py b/yt/frontends/artio/data_structures.py index 45052f01552..c220c5f82be 100644 --- a/yt/frontends/artio/data_structures.py +++ b/yt/frontends/artio/data_structures.py @@ -34,7 +34,7 @@ def __init__(self, base_region, sfc_start, sfc_end, oct_handler, ds): self.field_parameters = {} self.sfc_start = sfc_start self.sfc_end = sfc_end - self.oct_handler = oct_handler + self._oct_handler = oct_handler self.ds = ds self._last_mask = None self._last_selector_id = None @@ -43,6 +43,10 @@ def __init__(self, base_region, sfc_start, sfc_end, oct_handler, ds): self.base_region = base_region self.base_selector = base_region.selector + @property + def oct_handler(self): + return self._oct_handler + @property def min_ind(self): return self.sfc_start diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 6190202980b..780698b5c1e 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -223,7 +223,6 @@ def __init__(self, ds, domain_id): "Detected particle type %s in domain_id=%s", ph.ptype, domain_id ) ph.read_header() - # self._add_ptype(ph.ptype) def __repr__(self): return "RAMSESDomainFile: %i" % self.domain_id @@ -385,6 +384,10 @@ def __init__( "of ghost zones, was called with num_ghost_zones=%s" % num_ghost_zones ) + @property + def oct_handler(self): + return self.domain.oct_handler + def _fill_no_ghostzones(self, fd, fields, selector, file_handler): ndim = self.ds.dimensionality # Here we get a copy of the file, which we skip through and read the diff --git a/yt/frontends/stream/data_structures.py b/yt/frontends/stream/data_structures.py index 4cc8b58354c..e6302649375 100644 --- a/yt/frontends/stream/data_structures.py +++ b/yt/frontends/stream/data_structures.py @@ -766,7 +766,7 @@ def __init__(self, base_region, ds, oct_handler, num_zones=2, num_ghost_zones=0) self.field_data = YTFieldData() self.field_parameters = {} self.ds = ds - self.oct_handler = oct_handler + self._oct_handler = oct_handler self._last_mask = None self._last_selector_id = None self._current_particle_type = "io" @@ -788,10 +788,6 @@ def __init__(self, base_region, ds, oct_handler, num_zones=2, num_ghost_zones=0) def oct_handler(self): return self._oct_handler - @oct_handler.setter - def oct_handler(self, oh): - self._oct_handler = oh - def retrieve_ghost_zones(self, ngz, fields, smoothed=False): try: new_subset = self._subset_with_gz From 378a1035a8dbc8bc56732f93aafe9663952b6d53 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 16:02:13 +0000 Subject: [PATCH 14/38] Implement oct_handler for ART --- yt/frontends/art/data_structures.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yt/frontends/art/data_structures.py b/yt/frontends/art/data_structures.py index a901f780e3b..0bfe58bda9f 100644 --- a/yt/frontends/art/data_structures.py +++ b/yt/frontends/art/data_structures.py @@ -727,6 +727,10 @@ def _is_valid(cls, filename: str, *args, **kwargs) -> bool: class ARTDomainSubset(OctreeSubset): + @property + def oct_handler(self): + return self.domain.oct_handler + def fill(self, content, ftfields, selector): """ This is called from IOHandler. It takes content From aaefe7c795400ef8956e6166f3f44144fa0ac172 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 6 Nov 2023 18:51:06 +0000 Subject: [PATCH 15/38] Fix broken test --- yt/frontends/ramses/particle_handlers.py | 40 +++++++++++++++++++---- yt/frontends/ramses/tests/test_outputs.py | 7 ++++ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/yt/frontends/ramses/particle_handlers.py b/yt/frontends/ramses/particle_handlers.py index 2fd01fcba39..d9fe3dc858b 100644 --- a/yt/frontends/ramses/particle_handlers.py +++ b/yt/frontends/ramses/particle_handlers.py @@ -1,6 +1,7 @@ import abc import os import struct +import warnings from itertools import chain, count from typing import Optional @@ -145,7 +146,7 @@ def read_header(self): hvals = {} attrs = self.attrs hvals.update(fd.read_attrs(attrs)) - ipos = fd.tell() + particle_field_pos = fd.tell() self.header = hvals self.local_particle_count = hvals["npart"] @@ -169,22 +170,49 @@ def read_header(self): ("particle_metallicity", "d"), ] - particle_fields = chain( - particle_fields, - ((f"particle_extra_field_{i}", "d") for i in count()), - ) + def build_iterator(): + return chain( + particle_fields, + ((f"particle_extra_field_{i}", "d") for i in count(1)), + ) field_offsets = {} _pfields = {} ptype = self.ptype blockLen = struct.calcsize("i") * 2 + particle_fields_iterator = build_iterator() + ipos = particle_field_pos while ipos < flen: - field, vtype = next(particle_fields) + field, vtype = next(particle_fields_iterator) field_offsets[ptype, field] = ipos _pfields[ptype, field] = vtype ipos += blockLen + struct.calcsize(vtype) * hvals["npart"] + if ipos != flen: + particle_fields_iterator = build_iterator() + with FortranFile(self.fname) as fd: + fd.seek(particle_field_pos) + ipos = particle_field_pos + while ipos < flen: + field, vtype = next(particle_fields_iterator) + old_pos = fd.tell() + field_offsets[ptype, field] = old_pos + _pfields[ptype, field] = vtype + fd.skip(1) + ipos = fd.tell() + + record_len = ipos - old_pos - blockLen + exp_len = struct.calcsize(vtype) * hvals["npart"] + if record_len != exp_len: + warnings.warn( + f"Field {(ptype, field)} has a length {record_len}, but " + f"expected a length of {exp_len}. " + "Use yt.load(..., extra_particle_fields=[...]) to specify " + "the right size.", + stacklevel=1, + ) + if field.startswith("particle_extra_field_"): iextra = int(field.split("_")[-1]) else: diff --git a/yt/frontends/ramses/tests/test_outputs.py b/yt/frontends/ramses/tests/test_outputs.py index 4fad40ef0c4..2e75f56a2b3 100644 --- a/yt/frontends/ramses/tests/test_outputs.py +++ b/yt/frontends/ramses/tests/test_outputs.py @@ -152,6 +152,13 @@ def test_extra_fields_2(): extra_fields = [f"particle_extra_field_{i + 1}" for i in range(2)] ds = yt.load(os.path.join(ramsesExtraFieldsSmall, "info_00001.txt")) + # When migrating to pytest, uncomment this + # with pytest.warns( + # UserWarning, + # match=r"Field (.*) has a length \d+, but expected a length of \d+.()", + # ): + # ds.index + # the dataset should contain the fields for field in extra_fields: assert ("io", field) in ds.field_list From 62f923b511246686b2aa961ff5148511257b1f81 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 12:05:31 +0100 Subject: [PATCH 16/38] Make hilbert lookup faster --- yt/frontends/ramses/hilbert.py | 329 ++++++---------------- yt/frontends/ramses/tests/test_hilbert.py | 4 +- 2 files changed, 95 insertions(+), 238 deletions(-) diff --git a/yt/frontends/ramses/hilbert.py b/yt/frontends/ramses/hilbert.py index 6e484726434..f86c878770d 100644 --- a/yt/frontends/ramses/hilbert.py +++ b/yt/frontends/ramses/hilbert.py @@ -12,205 +12,41 @@ def hilbert3d(X, bit_length): The bit_length for the indexing. """ X = np.atleast_2d(X) - state_diagram = ( - np.array( + state_diagram = np.array( + [ [ - 1, - 2, - 3, - 2, - 4, - 5, - 3, - 5, - 0, - 1, - 3, - 2, - 7, - 6, - 4, - 5, - 2, - 6, - 0, - 7, - 8, - 8, - 0, - 7, - 0, - 7, - 1, - 6, - 3, - 4, - 2, - 5, - 0, - 9, - 10, - 9, - 1, - 1, - 11, - 11, - 0, - 3, - 7, - 4, - 1, - 2, - 6, - 5, - 6, - 0, - 6, - 11, - 9, - 0, - 9, - 8, - 2, - 3, - 1, - 0, - 5, - 4, - 6, - 7, - 11, - 11, - 0, - 7, - 5, - 9, - 0, - 7, - 4, - 3, - 5, - 2, - 7, - 0, - 6, - 1, - 4, - 4, - 8, - 8, - 0, - 6, - 10, - 6, - 6, - 5, - 1, - 2, - 7, - 4, - 0, - 3, - 5, - 7, - 5, - 3, - 1, - 1, - 11, - 11, - 4, - 7, - 3, - 0, - 5, - 6, - 2, - 1, - 6, - 1, - 6, - 10, - 9, - 4, - 9, - 10, - 6, - 7, - 5, - 4, - 1, - 0, - 2, - 3, - 10, - 3, - 1, - 1, - 10, - 3, - 5, - 9, - 2, - 5, - 3, - 4, - 1, - 6, - 0, - 7, - 4, - 4, - 8, - 8, - 2, - 7, - 2, - 3, - 2, - 1, - 5, - 6, - 3, - 0, - 4, - 7, - 7, - 2, - 11, - 2, - 7, - 5, - 8, - 5, - 4, - 5, - 7, - 6, - 3, - 2, - 0, - 1, - 10, - 3, - 2, - 6, - 10, - 3, - 4, - 4, - 6, - 1, - 7, - 0, - 5, - 2, - 4, - 3, - ] - ) - .reshape(12, 2, 8) - .T + [1, 2, 0, 6, 11, 4, 5, 6, 10, 4, 7, 10], + [0, 0, 0, 2, 4, 6, 4, 6, 2, 2, 4, 6], + ], + [ + [2, 6, 9, 0, 11, 4, 7, 1, 3, 4, 2, 3], + [1, 7, 3, 3, 3, 5, 7, 7, 5, 1, 5, 1], + ], + [ + [3, 0, 10, 6, 0, 8, 5, 6, 1, 8, 11, 2], + [3, 1, 7, 1, 5, 1, 3, 5, 3, 5, 7, 7], + ], + [ + [2, 7, 9, 11, 7, 8, 3, 10, 1, 8, 2, 6], + [2, 6, 4, 0, 2, 2, 0, 4, 4, 6, 6, 0], + ], + [ + [4, 8, 1, 9, 5, 0, 1, 9, 10, 2, 7, 10], + [7, 3, 1, 5, 7, 7, 5, 1, 1, 3, 3, 5], + ], + [ + [5, 8, 1, 0, 9, 6, 1, 4, 3, 7, 5, 3], + [6, 4, 2, 4, 0, 4, 6, 0, 6, 0, 2, 2], + ], + [ + [3, 0, 11, 9, 0, 10, 11, 9, 5, 2, 8, 4], + [4, 2, 6, 6, 6, 0, 2, 2, 0, 4, 0, 4], + ], + [ + [5, 7, 11, 8, 7, 6, 11, 10, 9, 3, 5, 4], + [5, 5, 5, 7, 1, 3, 1, 3, 7, 7, 1, 3], + ], + ] ) x_bit_mask, y_bit_mask, z_bit_mask = ( @@ -259,10 +95,50 @@ def hilbert3d(X, bit_length): return order -def get_cpu_list(ds, X): +def get_cpu_list( + ds, bbox: np.ndarray, LE: np.ndarray = None, dx: float = 1.0, dx_cond: float = None +) -> set[int]: + """ + Find the subset of CPUs that intersect the bbox in a recursive fashion. + """ + if LE is None: + LE = np.array( + [ + 0, + 0, + 0, + ], + dtype="d", + ) + if isinstance(bbox, (list, tuple)): + bbox = ds.arr(bbox, "code_length") + if dx_cond is None: + dx_cond = float((bbox[1] - bbox[0]).min().to("code_length")) + + # If the current dx is smaller than the smallest size of the bbox + if dx < dx_cond: + # Finish recursion + return get_cpu_list_cuboid(ds, np.asarray([LE, LE + dx])) + + dx /= 2 + + ret = set() + # Compute intersection of the eight subcubes with the bbox and recurse. + for i in range(2): + for j in range(2): + for k in range(2): + LE_new = LE + np.array([i, j, k], dtype="d") * dx + # Compute AABB intersection between [LE_new, LE_new + dx] and bbox + if all(bbox[1] >= LE_new) and all(bbox[0] <= LE_new + dx): + # Recurse + ret.update(get_cpu_list(ds, bbox, LE_new, dx, dx_cond)) + return ret + + +def get_cpu_list_cuboid(ds, X: np.ndarray) -> set[int]: """ - Return the list of the CPU intersecting with the positions - given. Note that it will be 0-indexed. + Return the list of the CPU intersecting with the cuboid containing the positions. + Note that it will be 0-indexed. Parameters ---------- @@ -283,12 +159,7 @@ def get_cpu_list(ds, X): xmax, ymax, zmax = X.max(axis=0) dmax = max(xmax - xmin, ymax - ymin, zmax - zmin) - ilevel = 0 - deltax = dmax * 2 - - while deltax >= dmax: - ilevel += 1 - deltax = 0.5**ilevel + ilevel = int(np.ceil(-np.log2(dmax))) lmin = ilevel bit_length = lmin - 1 @@ -308,7 +179,7 @@ def get_cpu_list(ds, X): if bit_length > 0: ndom = 8 - idom, jdom, kdom = (np.zeros(8, dtype="int64") for _ in range(3)) + ijkdom = idom, jdom, kdom = np.empty((3, 8), dtype="int64") idom[0], idom[1] = imin, imax idom[2], idom[3] = imin, imax @@ -326,40 +197,26 @@ def get_cpu_list(ds, X): kdom[6], kdom[7] = kmax, kmax bounding_min, bounding_max = np.zeros(ndom), np.zeros(ndom) + if bit_length > 0: + order_min = hilbert3d(ijkdom.T, bit_length) for i in range(ndom): if bit_length > 0: - order_min = hilbert3d([idom[i], jdom[i], kdom[i]], bit_length) + omin = order_min[i] else: - order_min = 0 - bounding_min[i] = (order_min) * dkey - bounding_max[i] = (order_min + 1) * dkey + omin = 0 + bounding_min[i] = omin * dkey + bounding_max[i] = (omin + 1) * dkey - bound_key = {} + bound_key = np.empty(ncpu + 1, dtype="float64") for icpu in range(1, ncpu + 1): bound_key[icpu - 1], bound_key[icpu] = ds.hilbert_indices[icpu] - cpu_min, cpu_max = (np.zeros(ncpu + 1, dtype="int64") for _ in range(2)) - for icpu in range(1, ncpu + 1): - for i in range(ndom): - if ( - bound_key[icpu - 1] <= bounding_min[i] - and bound_key[icpu] > bounding_min[i] - ): - cpu_min[i] = icpu - 1 - if ( - bound_key[icpu - 1] < bounding_max[i] - and bound_key[icpu] >= bounding_max[i] - ): - cpu_max[i] = icpu - - ncpu_read = 0 - cpu_list = [] - cpu_read = np.zeros(ncpu, dtype="bool") + cpu_min = np.searchsorted(bound_key, bounding_min, side="right") - 1 + cpu_max = np.searchsorted(bound_key, bounding_max, side="right") + + cpu_read = set() + for i in range(ndom): - for j in range(cpu_min[i], cpu_max[i]): - if not cpu_read[j]: - ncpu_read += 1 - cpu_list.append(j) - cpu_read[j] = True + cpu_read.update(range(cpu_min[i], cpu_max[i])) - return sorted(cpu_list) + return cpu_read diff --git a/yt/frontends/ramses/tests/test_hilbert.py b/yt/frontends/ramses/tests/test_hilbert.py index f43876458f1..2ed34e5f3d5 100644 --- a/yt/frontends/ramses/tests/test_hilbert.py +++ b/yt/frontends/ramses/tests/test_hilbert.py @@ -2,7 +2,7 @@ from numpy.testing import assert_equal import yt -from yt.frontends.ramses.hilbert import get_cpu_list, hilbert3d +from yt.frontends.ramses.hilbert import get_cpu_list_cuboid, hilbert3d from yt.testing import requires_file @@ -44,6 +44,6 @@ def test_get_cpu_list(): for i, o in zip(inputs, outputs): bbox = i - ls = get_cpu_list(ds, bbox) + ls = list(get_cpu_list_cuboid(ds, bbox)) assert len(ls) > 0 assert all(np.array(o) == np.array(ls)) From 52162fed7b21bbf49e149aeb9e22c5335572158a Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 14:07:52 +0100 Subject: [PATCH 17/38] Raise an error if we get weird results with max_level --- yt/frontends/ramses/data_structures.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 780698b5c1e..b8e9623c14f 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -334,12 +334,12 @@ def oct_handler(self): oct_handler.finalize() new_max_level = max_level + self.ds.min_level - if new_max_level != self.max_level: + if new_max_level > self.max_level: raise RuntimeError( f"The maximum level detected in the AMR file ({new_max_level}) " f" does not match the expected number {self.max_level}." ) - + self.max_level = new_max_level self.oct_handler_initialized = True return oct_handler From 02749b4ff887a9d817da330cdec7c9473f95b53f Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 14:35:12 +0100 Subject: [PATCH 18/38] Fix max level --- yt/frontends/ramses/data_structures.py | 77 ++++++++++++++++++++------ 1 file changed, 59 insertions(+), 18 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index b8e9623c14f..86975d0dd6f 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -180,6 +180,7 @@ class RAMSESDomainFile: _level_count = None oct_handler_initialized = False + amr_header_initialized = False def __init__(self, ds, domain_id): self.ds = ds @@ -203,7 +204,7 @@ def __init__(self, ds, domain_id): for t in ["grav", "amr"]: setattr(self, f"{t}_fn", basename % t) self._part_file_descriptor = part_file_descriptor - self.max_level = self.ds.parameters["levelmax"] + self.max_level = self.ds.parameters["levelmax"] - self.ds.parameters["levelmin"] # Autodetect field files field_handlers = [FH(self) for FH in get_field_handlers() if FH.any_exist(ds)] @@ -250,6 +251,8 @@ def amr_file(self): return f def _read_amr_header(self): + if self.amr_header_initialized: + return hvals = {} with self.amr_file as f: f.seek(0) @@ -267,9 +270,9 @@ def _read_amr_header(self): f.skip() if hvals["nboundary"] > 0: f.skip(2) - self.ngridbound = f.read_vector("i").astype("int64") + self._ngridbound = f.read_vector("i").astype("int64") else: - self.ngridbound = np.zeros(hvals["nlevelmax"], dtype="int64") + self._ngridbound = np.zeros(hvals["nlevelmax"], dtype="int64") _free_mem = f.read_attrs((("free_mem", 5, "i"),)) _ordering = f.read_vector("c") f.skip(4) @@ -277,28 +280,67 @@ def _read_amr_header(self): # Now we iterate over each level and each CPU. position = f.tell() - self.amr_header = hvals - self.amr_offset = position + self._amr_header = hvals + self._amr_offset = position # The maximum effective level is the deepest level # that has a non-zero number of octs nocts_to_this_level = hvals["numbl"].sum(axis=1).cumsum() - self.max_level = np.argwhere(nocts_to_this_level == nocts_to_this_level[-1])[0][ - 0 - ] + self._max_level = ( + np.argwhere(nocts_to_this_level == nocts_to_this_level[-1])[0][0] + - self.ds.parameters["levelmin"] + + 1 + ) + print(self._max_level, nocts_to_this_level) # update levelmax force_max_level, convention = self.ds._force_max_level if convention == "yt": force_max_level += self.ds.min_level + 1 - self.amr_header["nlevelmax"] = min( - force_max_level, self.amr_header["nlevelmax"] + self._amr_header["nlevelmax"] = min( + force_max_level, self._amr_header["nlevelmax"] ) - self.local_oct_count = hvals["numbl"][ + self._local_oct_count = hvals["numbl"][ self.ds.min_level :, self.domain_id - 1 ].sum() - imin, imax = self.ds.min_level, self.amr_header["nlevelmax"] - self.total_oct_count = hvals["numbl"][imin:imax, :].sum(axis=0) + imin, imax = self.ds.min_level, self._amr_header["nlevelmax"] + self._total_oct_count = hvals["numbl"][imin:imax, :].sum(axis=0) + + self.amr_header_initialized = True + + @property + def ngridbound(self): + self._read_amr_header() + return self._ngridbound + + @property + def amr_offset(self): + self._read_amr_header() + return self._amr_offset + + @property + def max_level(self): + self._read_amr_header() + return self._max_level + + @max_level.setter + def max_level(self, value): + self._max_level = value + + @property + def total_oct_count(self): + self._read_amr_header() + return self._total_oct_count + + @property + def local_oct_count(self): + self._read_amr_header() + return self._local_oct_count + + @property + def amr_header(self): + self._read_amr_header() + return self._amr_header @cached_property def oct_handler(self): @@ -333,7 +375,7 @@ def oct_handler(self): oct_handler.finalize() - new_max_level = max_level + self.ds.min_level + new_max_level = max_level if new_max_level > self.max_level: raise RuntimeError( f"The maximum level detected in the AMR file ({new_max_level}) " @@ -686,10 +728,9 @@ def _initialize_level_stats(self): self.level_stats[level + 1]["numcells"] = 2 ** ( level * self.dataset.dimensionality ) - for level in range(self.max_level + 1): - self.level_stats[level + self.dataset.min_level + 1]["numcells"] = levels[ - :, level - ].sum() + for level in range(levels.shape[1]): + ncell = levels[:, level].sum() + self.level_stats[level + self.dataset.min_level + 1]["numcells"] = ncell def _get_particle_type_counts(self): npart = 0 From bc92ac8493f9928fee1978c4b3c89cfe8e3adf8b Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 14:40:28 +0100 Subject: [PATCH 19/38] Remove print statement --- yt/frontends/ramses/data_structures.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 86975d0dd6f..09dea0b43f3 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -291,7 +291,6 @@ def _read_amr_header(self): - self.ds.parameters["levelmin"] + 1 ) - print(self._max_level, nocts_to_this_level) # update levelmax force_max_level, convention = self.ds._force_max_level From a833f06890fdae4cd8e9ccd4a41556a187b2a1cc Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 17:19:55 +0100 Subject: [PATCH 20/38] For convex regions, do an early break when the selector fully contains a cell --- yt/frontends/ramses/data_structures.py | 3 +-- yt/frontends/ramses/hilbert.py | 34 +++++++++++++++----------- yt/geometry/selection_routines.pyx | 26 ++++++++++++++++++++ 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 09dea0b43f3..e7c35957e73 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -666,8 +666,7 @@ def _identify_base_chunk(self, dobj): ) if getattr(dobj, "_chunk_info", None) is None: if use_fast_hilbert: - bbox = dobj.get_bbox() - idoms = {idom + 1 for idom in get_cpu_list(self.ds, bbox)} + idoms = {idom + 1 for idom in get_cpu_list(self.ds, dobj, factor=3)} # If the oct handler has been initialized, use it domains = [] for dom in self.domains: diff --git a/yt/frontends/ramses/hilbert.py b/yt/frontends/ramses/hilbert.py index f86c878770d..d043788fe72 100644 --- a/yt/frontends/ramses/hilbert.py +++ b/yt/frontends/ramses/hilbert.py @@ -1,5 +1,10 @@ import numpy as np +from yt.data_objects.selection_objects.region import YTRegion +from yt.geometry.selection_routines import ( + fully_contains, +) + def hilbert3d(X, bit_length): """Compute the order using Hilbert indexing. @@ -96,30 +101,31 @@ def hilbert3d(X, bit_length): def get_cpu_list( - ds, bbox: np.ndarray, LE: np.ndarray = None, dx: float = 1.0, dx_cond: float = None + ds, + region: YTRegion, + LE: np.ndarray = None, + dx: float = 1.0, + dx_cond: float = None, + factor: float = 4.0, ) -> set[int]: """ Find the subset of CPUs that intersect the bbox in a recursive fashion. """ if LE is None: - LE = np.array( - [ - 0, - 0, - 0, - ], - dtype="d", - ) - if isinstance(bbox, (list, tuple)): - bbox = ds.arr(bbox, "code_length") + LE = np.array([0, 0, 0], dtype="d") + bbox = region.get_bbox() if dx_cond is None: dx_cond = float((bbox[1] - bbox[0]).min().to("code_length")) # If the current dx is smaller than the smallest size of the bbox - if dx < dx_cond: + if dx < dx_cond / factor: # Finish recursion return get_cpu_list_cuboid(ds, np.asarray([LE, LE + dx])) + # # If the current cell is fully within the selected region, stop recursion + if fully_contains(region.selector, LE, dx): + return get_cpu_list_cuboid(ds, np.asarray([LE, LE + dx])) + dx /= 2 ret = set() @@ -128,10 +134,10 @@ def get_cpu_list( for j in range(2): for k in range(2): LE_new = LE + np.array([i, j, k], dtype="d") * dx + # Compute AABB intersection between [LE_new, LE_new + dx] and bbox if all(bbox[1] >= LE_new) and all(bbox[0] <= LE_new + dx): - # Recurse - ret.update(get_cpu_list(ds, bbox, LE_new, dx, dx_cond)) + ret.update(get_cpu_list(ds, region, LE_new, dx, dx_cond, factor)) return ret diff --git a/yt/geometry/selection_routines.pyx b/yt/geometry/selection_routines.pyx index 20e68f7fd40..07d068c99c1 100644 --- a/yt/geometry/selection_routines.pyx +++ b/yt/geometry/selection_routines.pyx @@ -148,6 +148,32 @@ def points_in_cells( return mask +def bbox_intersects( + SelectorObject selector, + np.float64_t[::1] left_edges, + np.float64_t dx +): + cdef np.float64_t[3] right_edges + right_edges[0] = left_edges[0] + dx + right_edges[1] = left_edges[1] + dx + right_edges[2] = left_edges[2] + dx + return selector.select_bbox(&left_edges[0], right_edges) == 1 + +def fully_contains( + SelectorObject selector, + np.float64_t[::1] left_edges, + np.float64_t dx, +): + cdef np.float64_t[3] right_edges + + right_edges[0] = left_edges[0] + dx + right_edges[1] = left_edges[1] + dx + right_edges[2] = left_edges[2] + dx + + return selector.select_bbox_edge(&left_edges[0], right_edges) == 1 + + + include "_selection_routines/selector_object.pxi" include "_selection_routines/point_selector.pxi" include "_selection_routines/sphere_selector.pxi" From 1ebbb2c63cc78378c4901dec6629b4c2c6583f34 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 17:48:13 +0100 Subject: [PATCH 21/38] Info specific to the choice of selector --- yt/frontends/ramses/data_structures.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index e7c35957e73..5b8ba64b97b 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -680,11 +680,19 @@ def _identify_base_chunk(self, dobj): mylog.debug("Identified domain %s", dom.domain_id) domains.append(dom) + if len(domains) >= 1: + mylog.info( + "Identified % 5d/% 5d intersecting domains (% 5d through hilbert key indexing)", + len(domains), + len(self.domains), + len(idoms), + ) else: domains = [dom for dom in self.domains if dom.included(dobj.selector)] + if len(domains) >= 1: + mylog.info("Identified %s intersecting domains", len(domains)) base_region = getattr(dobj, "base_region", dobj) - if len(domains) > 1: - mylog.info("Identified %s intersecting domains", len(domains)) + subsets = [ RAMSESDomainSubset( base_region, From 434849228570bd4760813f238edd40ada8aa6724 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 17:48:37 +0100 Subject: [PATCH 22/38] Fix issue when there are no selected regions --- yt/geometry/geometry_handler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yt/geometry/geometry_handler.py b/yt/geometry/geometry_handler.py index 6906c171d39..3270849ec90 100644 --- a/yt/geometry/geometry_handler.py +++ b/yt/geometry/geometry_handler.py @@ -310,6 +310,10 @@ def _accumulate_values(self, method): arrs = [arr[0] for arr in arrs] elif method == "tcoords": arrs = [arr[1] for arr in arrs] + if len(arrs) == 0: + self.data_size = 0 + return np.empty((0, 3), dtype="float64") + arrs = uconcatenate(arrs) self.data_size = arrs.shape[0] return arrs From bd1c02e6a45b1c11d27beb43016b02573a6b488e Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 22:02:37 +0100 Subject: [PATCH 23/38] Fix typing --- yt/frontends/ramses/hilbert.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/yt/frontends/ramses/hilbert.py b/yt/frontends/ramses/hilbert.py index d043788fe72..65cdd53f2b0 100644 --- a/yt/frontends/ramses/hilbert.py +++ b/yt/frontends/ramses/hilbert.py @@ -1,12 +1,17 @@ +from typing import Any, Optional + import numpy as np from yt.data_objects.selection_objects.region import YTRegion from yt.geometry.selection_routines import ( + bbox_intersects, fully_contains, ) -def hilbert3d(X, bit_length): +def hilbert3d( + X: np.ndarray[Any, np.dtype[np.float64]], bit_length: int +) -> np.ndarray[Any, np.dtype[np.float64]]: """Compute the order using Hilbert indexing. Arguments @@ -103,9 +108,9 @@ def hilbert3d(X, bit_length): def get_cpu_list( ds, region: YTRegion, - LE: np.ndarray = None, + LE: Optional[np.ndarray[Any, np.dtype[np.float64]]] = None, dx: float = 1.0, - dx_cond: float = None, + dx_cond: Optional[float] = None, factor: float = 4.0, ) -> set[int]: """ @@ -113,8 +118,8 @@ def get_cpu_list( """ if LE is None: LE = np.array([0, 0, 0], dtype="d") - bbox = region.get_bbox() if dx_cond is None: + bbox = region.get_bbox() dx_cond = float((bbox[1] - bbox[0]).min().to("code_length")) # If the current dx is smaller than the smallest size of the bbox @@ -135,13 +140,12 @@ def get_cpu_list( for k in range(2): LE_new = LE + np.array([i, j, k], dtype="d") * dx - # Compute AABB intersection between [LE_new, LE_new + dx] and bbox - if all(bbox[1] >= LE_new) and all(bbox[0] <= LE_new + dx): + if bbox_intersects(region.selector, LE_new, dx): ret.update(get_cpu_list(ds, region, LE_new, dx, dx_cond, factor)) return ret -def get_cpu_list_cuboid(ds, X: np.ndarray) -> set[int]: +def get_cpu_list_cuboid(ds, X: np.ndarray[Any, np.dtype[np.float64]]) -> set[int]: """ Return the list of the CPU intersecting with the cuboid containing the positions. Note that it will be 0-indexed. @@ -220,7 +224,7 @@ def get_cpu_list_cuboid(ds, X: np.ndarray) -> set[int]: cpu_min = np.searchsorted(bound_key, bounding_min, side="right") - 1 cpu_max = np.searchsorted(bound_key, bounding_max, side="right") - cpu_read = set() + cpu_read: set[int] = set() for i in range(ndom): cpu_read.update(range(cpu_min[i], cpu_max[i])) From 7a0d486a050b6dac06fa9479b2c94ee94201be68 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 22:36:12 +0100 Subject: [PATCH 24/38] Infer dtype from record length --- yt/frontends/ramses/particle_handlers.py | 30 +++++++++++++++++------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/yt/frontends/ramses/particle_handlers.py b/yt/frontends/ramses/particle_handlers.py index d9fe3dc858b..095766253bd 100644 --- a/yt/frontends/ramses/particle_handlers.py +++ b/yt/frontends/ramses/particle_handlers.py @@ -1,10 +1,11 @@ import abc import os import struct -import warnings from itertools import chain, count from typing import Optional +import numpy as np + from yt._typing import FieldKey from yt.config import ytcfg from yt.funcs import mylog @@ -104,6 +105,15 @@ def read_header(self): pass +_default_dtypes: dict[int, str] = { + 1: "c", # char + 2: "h", # short, + 4: "f", # float + 8: "d", # double + # 16: np.float128, # double double precision +} + + class DefaultParticleFileHandler(ParticleFileHandler): ptype = "io" fname = "part_{iout:05d}.out{icpu:05d}" @@ -198,21 +208,25 @@ def build_iterator(): field, vtype = next(particle_fields_iterator) old_pos = fd.tell() field_offsets[ptype, field] = old_pos - _pfields[ptype, field] = vtype fd.skip(1) ipos = fd.tell() record_len = ipos - old_pos - blockLen exp_len = struct.calcsize(vtype) * hvals["npart"] + if record_len != exp_len: - warnings.warn( - f"Field {(ptype, field)} has a length {record_len}, but " - f"expected a length of {exp_len}. " - "Use yt.load(..., extra_particle_fields=[...]) to specify " - "the right size.", - stacklevel=1, + # Guess record vtype from length + nbytes = record_len // hvals["npart"] + vtype = _default_dtypes[nbytes] + + mylog.warning( + "Supposed that `%s` has type %s given record size", + field, + np.dtype(vtype), ) + _pfields[ptype, field] = vtype + if field.startswith("particle_extra_field_"): iextra = int(field.split("_")[-1]) else: From b1a013dcae7dcddc5363c7cc1f34c3acd3695ba5 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 22:45:07 +0100 Subject: [PATCH 25/38] Quote np.ndarray typing for numpy<1.22 --- yt/frontends/ramses/hilbert.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt/frontends/ramses/hilbert.py b/yt/frontends/ramses/hilbert.py index 65cdd53f2b0..4d5bebf8f52 100644 --- a/yt/frontends/ramses/hilbert.py +++ b/yt/frontends/ramses/hilbert.py @@ -10,8 +10,8 @@ def hilbert3d( - X: np.ndarray[Any, np.dtype[np.float64]], bit_length: int -) -> np.ndarray[Any, np.dtype[np.float64]]: + X: "np.ndarray[Any, np.dtype[np.float64]]", bit_length: int +) -> "np.ndarray[Any, np.dtype[np.float64]]": """Compute the order using Hilbert indexing. Arguments @@ -108,7 +108,7 @@ def hilbert3d( def get_cpu_list( ds, region: YTRegion, - LE: Optional[np.ndarray[Any, np.dtype[np.float64]]] = None, + LE: Optional["np.ndarray[Any, np.dtype[np.float64]]"] = None, dx: float = 1.0, dx_cond: Optional[float] = None, factor: float = 4.0, @@ -145,7 +145,7 @@ def get_cpu_list( return ret -def get_cpu_list_cuboid(ds, X: np.ndarray[Any, np.dtype[np.float64]]) -> set[int]: +def get_cpu_list_cuboid(ds, X: "np.ndarray[Any, np.dtype[np.float64]]") -> set[int]: """ Return the list of the CPU intersecting with the cuboid containing the positions. Note that it will be 0-indexed. From 71a074a6780d886a7de63fb07ee6c578daad1b32 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 27 Nov 2023 15:19:05 +0100 Subject: [PATCH 26/38] Remove double "#" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Robert --- yt/frontends/ramses/hilbert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/frontends/ramses/hilbert.py b/yt/frontends/ramses/hilbert.py index 4d5bebf8f52..9e30cb7c6ff 100644 --- a/yt/frontends/ramses/hilbert.py +++ b/yt/frontends/ramses/hilbert.py @@ -127,7 +127,7 @@ def get_cpu_list( # Finish recursion return get_cpu_list_cuboid(ds, np.asarray([LE, LE + dx])) - # # If the current cell is fully within the selected region, stop recursion + # If the current cell is fully within the selected region, stop recursion if fully_contains(region.selector, LE, dx): return get_cpu_list_cuboid(ds, np.asarray([LE, LE + dx])) From 14f3e83709af1c615f68c3639d376058aba5ffa7 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 27 Nov 2023 15:19:51 +0100 Subject: [PATCH 27/38] Do not create single-use variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Robert --- yt/frontends/ramses/particle_handlers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/yt/frontends/ramses/particle_handlers.py b/yt/frontends/ramses/particle_handlers.py index 095766253bd..e3957829367 100644 --- a/yt/frontends/ramses/particle_handlers.py +++ b/yt/frontends/ramses/particle_handlers.py @@ -153,9 +153,7 @@ def read_header(self): flen = os.path.getsize(self.fname) with FortranFile(self.fname) as fd: - hvals = {} - attrs = self.attrs - hvals.update(fd.read_attrs(attrs)) + hvals = dict(fd.read_attrs(self.attrs)) particle_field_pos = fd.tell() self.header = hvals From 8ddb5d585982d75cfabdfd8337eff2c4d4ab2b13 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 27 Nov 2023 15:16:37 +0100 Subject: [PATCH 28/38] Move state diagram out of function to prevent costly alloc/dealloc --- yt/frontends/ramses/hilbert.py | 78 +++++++++++++++++----------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/yt/frontends/ramses/hilbert.py b/yt/frontends/ramses/hilbert.py index 9e30cb7c6ff..7f65fbd206d 100644 --- a/yt/frontends/ramses/hilbert.py +++ b/yt/frontends/ramses/hilbert.py @@ -8,6 +8,44 @@ fully_contains, ) +# State diagram to compute the hilbert curve +_STATE_DIAGRAM = np.array( + [ + [ + [1, 2, 0, 6, 11, 4, 5, 6, 10, 4, 7, 10], + [0, 0, 0, 2, 4, 6, 4, 6, 2, 2, 4, 6], + ], + [ + [2, 6, 9, 0, 11, 4, 7, 1, 3, 4, 2, 3], + [1, 7, 3, 3, 3, 5, 7, 7, 5, 1, 5, 1], + ], + [ + [3, 0, 10, 6, 0, 8, 5, 6, 1, 8, 11, 2], + [3, 1, 7, 1, 5, 1, 3, 5, 3, 5, 7, 7], + ], + [ + [2, 7, 9, 11, 7, 8, 3, 10, 1, 8, 2, 6], + [2, 6, 4, 0, 2, 2, 0, 4, 4, 6, 6, 0], + ], + [ + [4, 8, 1, 9, 5, 0, 1, 9, 10, 2, 7, 10], + [7, 3, 1, 5, 7, 7, 5, 1, 1, 3, 3, 5], + ], + [ + [5, 8, 1, 0, 9, 6, 1, 4, 3, 7, 5, 3], + [6, 4, 2, 4, 0, 4, 6, 0, 6, 0, 2, 2], + ], + [ + [3, 0, 11, 9, 0, 10, 11, 9, 5, 2, 8, 4], + [4, 2, 6, 6, 6, 0, 2, 2, 0, 4, 0, 4], + ], + [ + [5, 7, 11, 8, 7, 6, 11, 10, 9, 3, 5, 4], + [5, 5, 5, 7, 1, 3, 1, 3, 7, 7, 1, 3], + ], + ] +) + def hilbert3d( X: "np.ndarray[Any, np.dtype[np.float64]]", bit_length: int @@ -22,42 +60,6 @@ def hilbert3d( The bit_length for the indexing. """ X = np.atleast_2d(X) - state_diagram = np.array( - [ - [ - [1, 2, 0, 6, 11, 4, 5, 6, 10, 4, 7, 10], - [0, 0, 0, 2, 4, 6, 4, 6, 2, 2, 4, 6], - ], - [ - [2, 6, 9, 0, 11, 4, 7, 1, 3, 4, 2, 3], - [1, 7, 3, 3, 3, 5, 7, 7, 5, 1, 5, 1], - ], - [ - [3, 0, 10, 6, 0, 8, 5, 6, 1, 8, 11, 2], - [3, 1, 7, 1, 5, 1, 3, 5, 3, 5, 7, 7], - ], - [ - [2, 7, 9, 11, 7, 8, 3, 10, 1, 8, 2, 6], - [2, 6, 4, 0, 2, 2, 0, 4, 4, 6, 6, 0], - ], - [ - [4, 8, 1, 9, 5, 0, 1, 9, 10, 2, 7, 10], - [7, 3, 1, 5, 7, 7, 5, 1, 1, 3, 3, 5], - ], - [ - [5, 8, 1, 0, 9, 6, 1, 4, 3, 7, 5, 3], - [6, 4, 2, 4, 0, 4, 6, 0, 6, 0, 2, 2], - ], - [ - [3, 0, 11, 9, 0, 10, 11, 9, 5, 2, 8, 4], - [4, 2, 6, 6, 6, 0, 2, 2, 0, 4, 0, 4], - ], - [ - [5, 7, 11, 8, 7, 6, 11, 10, 9, 3, 5, 4], - [5, 5, 5, 7, 1, 3, 1, 3, 7, 7, 1, 3], - ], - ] - ) x_bit_mask, y_bit_mask, z_bit_mask = ( np.zeros(bit_length, dtype=bool) for _ in range(3) @@ -89,8 +91,8 @@ def hilbert3d( + 2 * i_bit_mask[3 * i + 1] + 1 * i_bit_mask[3 * i] ) - nstate = state_diagram[sdigit, 0, cstate] - hdigit = state_diagram[sdigit, 1, cstate] + nstate = _STATE_DIAGRAM[sdigit, 0, cstate] + hdigit = _STATE_DIAGRAM[sdigit, 1, cstate] i_bit_mask[3 * i + 2] = hdigit & 0b100 i_bit_mask[3 * i + 1] = hdigit & 0b010 From a7302edf23a61dbfa006632e7a2911f379301451 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 27 Nov 2023 15:16:50 +0100 Subject: [PATCH 29/38] Remove comment that pertains to broken code --- yt/frontends/ramses/particle_handlers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt/frontends/ramses/particle_handlers.py b/yt/frontends/ramses/particle_handlers.py index e3957829367..de02e96ca04 100644 --- a/yt/frontends/ramses/particle_handlers.py +++ b/yt/frontends/ramses/particle_handlers.py @@ -110,7 +110,6 @@ def read_header(self): 2: "h", # short, 4: "f", # float 8: "d", # double - # 16: np.float128, # double double precision } From 19996e3ff25efa3fad9aff5fab946d31f4f64524 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 27 Nov 2023 15:18:35 +0100 Subject: [PATCH 30/38] Remove arbitrary limitation to LRU cache size --- yt/frontends/ramses/io.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt/frontends/ramses/io.py b/yt/frontends/ramses/io.py index 6114d2140aa..9a6aeff5b4d 100644 --- a/yt/frontends/ramses/io.py +++ b/yt/frontends/ramses/io.py @@ -310,7 +310,7 @@ def _read_particle_subset(self, subset, fields): return tr -@lru_cache(maxsize=32) +@lru_cache def _read_part_binary_file_descriptor(fname: Union[str, "os.PathLike[str]"]): """ Read a file descriptor and returns the array of the fields found. @@ -367,7 +367,7 @@ def _read_part_binary_file_descriptor(fname: Union[str, "os.PathLike[str]"]): return fields -@lru_cache(maxsize=32) +@lru_cache def _read_part_csv_file_descriptor(fname: Union[str, "os.PathLike[str]"]): """ Read the file from the csv sink particles output. @@ -417,7 +417,7 @@ def _read_part_csv_file_descriptor(fname: Union[str, "os.PathLike[str]"]): return fields, local_particle_count -@lru_cache(maxsize=32) +@lru_cache def _read_fluid_file_descriptor(fname: Union[str, "os.PathLike[str]"], *, prefix: str): """ Read a file descriptor and returns the array of the fields found. From 1e08c62dff1a9435c8295b437fbcdbe08cbd0b02 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 27 Nov 2023 15:21:28 +0100 Subject: [PATCH 31/38] Make attributes private --- yt/frontends/ramses/data_structures.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 5b8ba64b97b..e93a3355a58 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -179,8 +179,8 @@ class RAMSESDomainFile: _hydro_offset = None _level_count = None - oct_handler_initialized = False - amr_header_initialized = False + _oct_handler_initialized = False + _amr_header_initialized = False def __init__(self, ds, domain_id): self.ds = ds @@ -251,7 +251,7 @@ def amr_file(self): return f def _read_amr_header(self): - if self.amr_header_initialized: + if self._amr_header_initialized: return hvals = {} with self.amr_file as f: @@ -305,7 +305,7 @@ def _read_amr_header(self): imin, imax = self.ds.min_level, self._amr_header["nlevelmax"] self._total_oct_count = hvals["numbl"][imin:imax, :].sum(axis=0) - self.amr_header_initialized = True + self._amr_header_initialized = True @property def ngridbound(self): @@ -381,7 +381,7 @@ def oct_handler(self): f" does not match the expected number {self.max_level}." ) self.max_level = new_max_level - self.oct_handler_initialized = True + self._oct_handler_initialized = True return oct_handler @@ -675,7 +675,7 @@ def _identify_base_chunk(self, dobj): if dom.domain_id not in idoms: continue # If the domain has its oct handler, refine the selection - if dom.oct_handler_initialized and not dom.included(dobj.selector): + if dom._oct_handler_initialized and not dom.included(dobj.selector): continue mylog.debug("Identified domain %s", dom.domain_id) From c7d67032cc9dec273c899f444f4c69a489c129bd Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 27 Nov 2023 15:22:48 +0100 Subject: [PATCH 32/38] Name function more explicitly --- yt/frontends/ramses/data_structures.py | 8 +++++--- yt/frontends/ramses/hilbert.py | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index e93a3355a58..ee8cdbc45ac 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -32,7 +32,7 @@ ) from .field_handlers import get_field_handlers from .fields import _X, RAMSESFieldInfo -from .hilbert import get_cpu_list +from .hilbert import get_intersecting_cpus from .io_utils import fill_hydro, read_amr from .particle_handlers import get_particle_handlers @@ -618,7 +618,7 @@ def __init__(self, ds, dataset_type="ramses"): def _initialize_oct_handler(self): if self.ds._bbox is not None: - cpu_list = get_cpu_list(self.dataset, self.dataset._bbox) + cpu_list = get_intersecting_cpus(self.dataset, self.dataset._bbox) else: cpu_list = range(self.dataset["ncpu"]) @@ -666,7 +666,9 @@ def _identify_base_chunk(self, dobj): ) if getattr(dobj, "_chunk_info", None) is None: if use_fast_hilbert: - idoms = {idom + 1 for idom in get_cpu_list(self.ds, dobj, factor=3)} + idoms = { + idom + 1 for idom in get_intersecting_cpus(self.ds, dobj, factor=3) + } # If the oct handler has been initialized, use it domains = [] for dom in self.domains: diff --git a/yt/frontends/ramses/hilbert.py b/yt/frontends/ramses/hilbert.py index 7f65fbd206d..93f5ae94d00 100644 --- a/yt/frontends/ramses/hilbert.py +++ b/yt/frontends/ramses/hilbert.py @@ -107,7 +107,7 @@ def hilbert3d( return order -def get_cpu_list( +def get_intersecting_cpus( ds, region: YTRegion, LE: Optional["np.ndarray[Any, np.dtype[np.float64]]"] = None, @@ -143,7 +143,9 @@ def get_cpu_list( LE_new = LE + np.array([i, j, k], dtype="d") * dx if bbox_intersects(region.selector, LE_new, dx): - ret.update(get_cpu_list(ds, region, LE_new, dx, dx_cond, factor)) + ret.update( + get_intersecting_cpus(ds, region, LE_new, dx, dx_cond, factor) + ) return ret From 526b7a5a4f640e6a4f456e32bd1b7f0d2656844d Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 6 Dec 2023 11:28:13 +0100 Subject: [PATCH 33/38] Lazy-read particle files --- yt/frontends/ramses/data_structures.py | 10 +--- yt/frontends/ramses/particle_handlers.py | 72 +++++++++++++++++------- 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index ee8cdbc45ac..d037fd772a5 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -219,11 +219,6 @@ def __init__(self, ds, domain_id): PH(self) for PH in get_particle_handlers() if PH.any_exist(ds) ] self.particle_handlers = particle_handlers - for ph in particle_handlers: - mylog.debug( - "Detected particle type %s in domain_id=%s", ph.ptype, domain_id - ) - ph.read_header() def __repr__(self): return "RAMSESDomainFile: %i" % self.domain_id @@ -641,9 +636,8 @@ def _detect_output_fields(self): dsl = set() # Get the detected particle fields - for domain in self.domains: - for ph in domain.particle_handlers: - dsl.update(set(ph.field_offsets.keys())) + for ph in self.domains[0].particle_handlers: + dsl.update(set(ph.field_offsets.keys())) self.particle_field_list = list(dsl) cosmo = self.ds.cosmological_simulation diff --git a/yt/frontends/ramses/particle_handlers.py b/yt/frontends/ramses/particle_handlers.py index de02e96ca04..06a14519943 100644 --- a/yt/frontends/ramses/particle_handlers.py +++ b/yt/frontends/ramses/particle_handlers.py @@ -2,7 +2,7 @@ import os import struct from itertools import chain, count -from typing import Optional +from typing import Any, Optional import numpy as np @@ -120,6 +120,11 @@ class DefaultParticleFileHandler(ParticleFileHandler): config_field = "ramses-particles" reader = _ramses_particle_binary_file_handler + _field_offsets: dict[tuple[str, str], int] + _field_types: dict[tuple[str, str], str] + _local_particle_count: int + _header: dict[str, Any] + attrs = ( ("ncpu", 1, "i"), ("ndim", 1, "i"), @@ -145,9 +150,10 @@ class DefaultParticleFileHandler(ParticleFileHandler): def read_header(self): if not self.exists: - self.field_offsets = {} - self.field_types = {} - self.local_particle_count = 0 + self._field_offsets = {} + self._field_types = {} + self._local_particle_count = 0 + self._header = {} return flen = os.path.getsize(self.fname) @@ -155,8 +161,8 @@ def read_header(self): hvals = dict(fd.read_attrs(self.attrs)) particle_field_pos = fd.tell() - self.header = hvals - self.local_particle_count = hvals["npart"] + self._header = hvals + self._local_particle_count = hvals["npart"] extra_particle_fields = self.ds._extra_particle_fields if self.has_descriptor: @@ -237,8 +243,36 @@ def build_iterator(): ) self.ds._warned_extra_fields["io"] = True - self.field_offsets = field_offsets - self.field_types = _pfields + self._field_offsets = field_offsets + self._field_types = _pfields + + @property + def field_offsets(self) -> dict[tuple[str, str], int]: + if hasattr(self, "_field_offsets"): + return self._field_offsets + self.read_header() + return self._field_offsets + + @property + def field_types(self) -> dict[tuple[str, str], str]: + if hasattr(self, "_field_types"): + return self._field_types + self.read_header() + return self._field_types + + @property + def local_particle_count(self) -> int: + if hasattr(self, "_local_particle_count"): + return self._local_particle_count + self.read_header() + return self._local_particle_count + + @property + def header(self) -> dict[str, Any]: + if hasattr(self, "_header"): + return self._header + self.read_header() + return self._header class SinkParticleFileHandler(ParticleFileHandler): @@ -278,9 +312,9 @@ class SinkParticleFileHandler(ParticleFileHandler): def read_header(self): if not self.exists: - self.field_offsets = {} - self.field_types = {} - self.local_particle_count = 0 + self._field_offsets = {} + self._field_types = {} + self._local_particle_count = 0 return fd = FortranFile(self.fname) flen = os.path.getsize(self.fname) @@ -296,10 +330,10 @@ def read_header(self): # domains. Here, we set the local_particle_count to 0 except # for the first domain to be red. if getattr(self.ds, "_sink_file_flag", False): - self.local_particle_count = 0 + self._local_particle_count = 0 else: self.ds._sink_file_flag = True - self.local_particle_count = hvals["nsink"] + self._local_particle_count = hvals["nsink"] # Read the fields + add the sink properties if self.has_descriptor: @@ -324,8 +358,8 @@ def read_header(self): field_offsets[self.ptype, field] = fd.tell() _pfields[self.ptype, field] = vtype fd.skip(1) - self.field_offsets = field_offsets - self.field_types = _pfields + self._field_offsets = field_offsets + self._field_types = _pfields fd.close() @@ -341,8 +375,8 @@ class SinkParticleFileHandlerCsv(ParticleFileHandler): def read_header(self): if not self.exists: - self.field_offsets = {} - self.field_types = {} + self._field_offsets = {} + self._field_types = {} self.local_particle_count = 0 return field_offsets = {} @@ -354,5 +388,5 @@ def read_header(self): field_offsets[self.ptype, field] = ind _pfields[self.ptype, field] = "d" - self.field_offsets = field_offsets - self.field_types = _pfields + self._field_offsets = field_offsets + self._field_types = _pfields From 685961f40d269d9a427ef3ca6c1b8174db0d174d Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 6 Dec 2023 11:34:44 +0100 Subject: [PATCH 34/38] Improve typing --- yt/frontends/ramses/particle_handlers.py | 56 ++++++++++++++++-------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/yt/frontends/ramses/particle_handlers.py b/yt/frontends/ramses/particle_handlers.py index 06a14519943..f25b1c092e1 100644 --- a/yt/frontends/ramses/particle_handlers.py +++ b/yt/frontends/ramses/particle_handlers.py @@ -2,7 +2,7 @@ import os import struct from itertools import chain, count -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Callable, Optional import numpy as np @@ -19,6 +19,9 @@ _read_part_csv_file_descriptor, ) +if TYPE_CHECKING: + from yt.frontends.ramses.data_structures import RAMSESDomainSubset + PARTICLE_HANDLERS: set[type["ParticleFileHandler"]] = set() @@ -42,23 +45,40 @@ class ParticleFileHandler(abc.ABC, HandlerMixin): _file_type = "particle" - # These properties are static properties - ptype: Optional[str] = None # The name to give to the particle type - fname: Optional[str] = None # The name of the file(s). - file_descriptor: Optional[str] = None # The name of the file descriptor (if any) - - attrs: tuple[tuple[str, int, str], ...] # The attributes of the header - known_fields: Optional[ - list[FieldKey] - ] = None # A list of tuple containing the field name and its type - config_field: Optional[str] = None # Name of the config section (if any) - - # These properties are computed dynamically - field_offsets = None # Mapping from field to offset in file - field_types = ( - None # Mapping from field to the type of the data (float, integer, ...) - ) - local_particle_count = None # The number of particle in the domain + ## These properties are static + # The name to give to the particle type + ptype: str + + # The name of the file(s). + fname: str + + # The name of the file descriptor (if any) + file_descriptor: Optional[str] = None + + # The attributes of the header + attrs: tuple[tuple[str, int, str], ...] + + # A list of tuple containing the field name and its type + known_fields: list[FieldKey] + + # The function to employ to read the file + reader: Callable[ + ["ParticleFileHandler", "RAMSESDomainSubset", list[tuple[str, str]], int], + dict[tuple[str, str], np.ndarray], + ] + + # Name of the config section (if any) + config_field: Optional[str] = None + + ## These properties are computed dynamically + # Mapping from field to offset in file + _field_offsets: dict[tuple[str, str], int] + + # Mapping from field to the type of the data (float, integer, ...) + _field_types: dict[tuple[str, str], str] + + # Number of particle in the domain + _local_particle_count: int def __init_subclass__(cls, *args, **kwargs): """ From a778a94c985582340e841d8527e0b6b0ea31320a Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 6 Dec 2023 11:46:01 +0100 Subject: [PATCH 35/38] Remove duplicated desc --- yt/frontends/ramses/particle_handlers.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/yt/frontends/ramses/particle_handlers.py b/yt/frontends/ramses/particle_handlers.py index f25b1c092e1..8564bef30b7 100644 --- a/yt/frontends/ramses/particle_handlers.py +++ b/yt/frontends/ramses/particle_handlers.py @@ -140,11 +140,6 @@ class DefaultParticleFileHandler(ParticleFileHandler): config_field = "ramses-particles" reader = _ramses_particle_binary_file_handler - _field_offsets: dict[tuple[str, str], int] - _field_types: dict[tuple[str, str], str] - _local_particle_count: int - _header: dict[str, Any] - attrs = ( ("ncpu", 1, "i"), ("ndim", 1, "i"), From 502ce2a0dce173a66323df11e356c757015a9980 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 6 Dec 2023 11:49:14 +0100 Subject: [PATCH 36/38] Reuse detection of gravity fields --- yt/frontends/ramses/field_handlers.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/yt/frontends/ramses/field_handlers.py b/yt/frontends/ramses/field_handlers.py index 65661c3119b..2c4160d27ed 100644 --- a/yt/frontends/ramses/field_handlers.py +++ b/yt/frontends/ramses/field_handlers.py @@ -469,6 +469,11 @@ class GravFieldFileHandler(FieldFileHandler): @classmethod def detect_fields(cls, ds): + # Try to get the detected fields + detected_fields = cls.get_detected_fields(ds) + if detected_fields: + return detected_fields + ndim = ds.dimensionality iout = int(str(ds).split("_")[1]) basedir = os.path.split(ds.parameter_filename)[0] @@ -497,6 +502,8 @@ def detect_fields(cls, ds): cls.field_list = [(cls.ftype, e) for e in fields] + cls.set_detected_fields(ds, fields) + return fields From 51ce380f32e29d338e4cf1c52b18e2d52ec11ead Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 6 Dec 2023 12:38:12 +0100 Subject: [PATCH 37/38] Move properties to ParticleFileHandler --- yt/frontends/ramses/particle_handlers.py | 60 ++++++++++++------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/yt/frontends/ramses/particle_handlers.py b/yt/frontends/ramses/particle_handlers.py index 8564bef30b7..2f9305d139e 100644 --- a/yt/frontends/ramses/particle_handlers.py +++ b/yt/frontends/ramses/particle_handlers.py @@ -115,15 +115,43 @@ def read_header(self): It is in charge of setting `self.field_offsets` and `self.field_types`. - * `field_offsets`: dictionary: tuple -> integer + * `_field_offsets`: dictionary: tuple -> integer A dictionary that maps `(type, field_name)` to their location in the file (integer) - * `field_types`: dictionary: tuple -> character + * `_field_types`: dictionary: tuple -> character A dictionary that maps `(type, field_name)` to their type (character), following Python's struct convention. """ pass + @property + def field_offsets(self) -> dict[tuple[str, str], int]: + if hasattr(self, "_field_offsets"): + return self._field_offsets + self.read_header() + return self._field_offsets + + @property + def field_types(self) -> dict[tuple[str, str], str]: + if hasattr(self, "_field_types"): + return self._field_types + self.read_header() + return self._field_types + + @property + def local_particle_count(self) -> int: + if hasattr(self, "_local_particle_count"): + return self._local_particle_count + self.read_header() + return self._local_particle_count + + @property + def header(self) -> dict[str, Any]: + if hasattr(self, "_header"): + return self._header + self.read_header() + return self._header + _default_dtypes: dict[int, str] = { 1: "c", # char @@ -261,34 +289,6 @@ def build_iterator(): self._field_offsets = field_offsets self._field_types = _pfields - @property - def field_offsets(self) -> dict[tuple[str, str], int]: - if hasattr(self, "_field_offsets"): - return self._field_offsets - self.read_header() - return self._field_offsets - - @property - def field_types(self) -> dict[tuple[str, str], str]: - if hasattr(self, "_field_types"): - return self._field_types - self.read_header() - return self._field_types - - @property - def local_particle_count(self) -> int: - if hasattr(self, "_local_particle_count"): - return self._local_particle_count - self.read_header() - return self._local_particle_count - - @property - def header(self) -> dict[str, Any]: - if hasattr(self, "_header"): - return self._header - self.read_header() - return self._header - class SinkParticleFileHandler(ParticleFileHandler): """Handle sink files""" From bcf344723690cd7c89fbb2a76b301f5ad189adf1 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 6 Dec 2023 12:49:02 +0100 Subject: [PATCH 38/38] Fix typing --- yt/frontends/ramses/particle_handlers.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/yt/frontends/ramses/particle_handlers.py b/yt/frontends/ramses/particle_handlers.py index 2f9305d139e..42d2fb9c113 100644 --- a/yt/frontends/ramses/particle_handlers.py +++ b/yt/frontends/ramses/particle_handlers.py @@ -80,6 +80,9 @@ class ParticleFileHandler(abc.ABC, HandlerMixin): # Number of particle in the domain _local_particle_count: int + # The header of the file + _header: dict[str, Any] + def __init_subclass__(cls, *args, **kwargs): """ Registers subclasses at creation. @@ -330,6 +333,7 @@ def read_header(self): self._field_offsets = {} self._field_types = {} self._local_particle_count = 0 + self._header = {} return fd = FortranFile(self.fname) flen = os.path.getsize(self.fname) @@ -392,12 +396,13 @@ def read_header(self): if not self.exists: self._field_offsets = {} self._field_types = {} - self.local_particle_count = 0 + self._local_particle_count = 0 + self._header = {} return field_offsets = {} _pfields = {} - fields, self.local_particle_count = _read_part_csv_file_descriptor(self.fname) + fields, self._local_particle_count = _read_part_csv_file_descriptor(self.fname) for ind, field in enumerate(fields): field_offsets[self.ptype, field] = ind