From a2354dc518d4a7efe7eb5102809b1d4479f928f2 Mon Sep 17 00:00:00 2001 From: Henry N Jones Date: Fri, 6 Sep 2024 17:00:50 -0700 Subject: [PATCH] OpenPMD frontend update --- pyproject.toml | 2 +- yt/frontends/open_pmd/api.py | 2 +- yt/frontends/open_pmd/data_structures.py | 754 +++++++++++++------- yt/frontends/open_pmd/fields.py | 35 +- yt/frontends/open_pmd/io.py | 261 +++++-- yt/frontends/open_pmd/misc.py | 139 +++- yt/frontends/open_pmd/tests/test_outputs.py | 11 +- yt/utilities/file_handler.py | 18 + yt/utilities/on_demand_imports.py | 11 + 9 files changed, 871 insertions(+), 362 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 86bdc5de23..49b04b9977 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,7 +118,7 @@ http-stream = ["requests>=2.20.0"] idefix = ["yt_idefix[HDF5]>=2.3.0"] # externally packaged frontend moab = ["yt[HDF5]"] nc4-cm1 = ["yt[netCDF4]"] -open-pmd = ["yt[HDF5]"] +open-pmd = ["yt[openpmd_api]"] owls = ["yt[HDF5]"] owls-subfind = ["yt[HDF5]"] ramses = ["yt[Fortran]"] diff --git a/yt/frontends/open_pmd/api.py b/yt/frontends/open_pmd/api.py index 851f7bb94b..f47da0ea30 100644 --- a/yt/frontends/open_pmd/api.py +++ b/yt/frontends/open_pmd/api.py @@ -1,4 +1,4 @@ from . import tests from .data_structures import OpenPMDDataset, OpenPMDGrid, OpenPMDHierarchy from .fields import OpenPMDFieldInfo -from .io import IOHandlerOpenPMDHDF5 +from .io import IOHandlerOpenPMD diff --git a/yt/frontends/open_pmd/data_structures.py b/yt/frontends/open_pmd/data_structures.py index c83120bf90..72b61029d0 100644 --- a/yt/frontends/open_pmd/data_structures.py +++ b/yt/frontends/open_pmd/data_structures.py @@ -1,5 +1,3 @@ -from functools import reduce -from operator import mul from os import listdir, path from re import match from typing import Optional @@ -11,12 +9,13 @@ from yt.data_objects.static_output import Dataset from yt.data_objects.time_series import DatasetSeries from yt.frontends.open_pmd.fields import OpenPMDFieldInfo -from yt.frontends.open_pmd.misc import get_component, is_const_component +from yt.frontends.open_pmd.misc import is_const_component, pad_to_threed from yt.funcs import setdefaultattr from yt.geometry.grid_geometry_handler import GridIndex -from yt.utilities.file_handler import HDF5FileHandler, valid_hdf5_signature +from yt.utilities.file_handler import OpenPMDFileHandler +from yt.utilities.lib.misc_utilities import get_box_grids_level from yt.utilities.logger import ytLogger as mylog -from yt.utilities.on_demand_imports import _h5py as h5py +from yt.utilities.on_demand_imports import _openpmd_api as openpmd_api ompd_known_versions = [Version(_) for _ in ("1.0.0", "1.0.1", "1.1.0")] opmd_required_attributes = ["openPMD", "basePath"] @@ -26,13 +25,12 @@ class OpenPMDGrid(AMRGridPatch): """Represents chunk of data on-disk. This defines the index and offset for every mesh and particle type. - It also defines parents and children grids. Since openPMD does not have multiple - levels of refinement there are no parents or children for any grid. + It also defines parents and children grids (still some problems here). """ _id_offset = 0 __slots__ = ["_level_id"] - # Every particle species and mesh might have different hdf5-indices and offsets + # Every particle species and mesh might have different indices and offsets ftypes: Optional[list[str]] = [] ptypes: Optional[list[str]] = [] @@ -41,29 +39,137 @@ class OpenPMDGrid(AMRGridPatch): pindex = 0 poffset = 0 - def __init__(self, gid, index, level=-1, fi=0, fo=0, pi=0, po=0, ft=None, pt=None): + def __init__( + self, gid, index, level=-1, fi=0, fo=0, pi=0, po=None, ft=None, pt=None + ): AMRGridPatch.__init__(self, gid, filename=index.index_filename, index=index) if ft is None: ft = [] if pt is None: pt = [] + # these indices and offset are now for on-disk purposes self.findex = fi self.foffset = fo self.pindex = pi self.poffset = po self.ftypes = ft self.ptypes = pt - self.Parent = None - self.Children = [] + self._parent_id = [] + self._children_ids = [] self.Level = level def __str__(self): return "OpenPMDGrid_%04i (%s)" % (self.id, self.ActiveDimensions) + def _read_particles(self): + """ + Return the number of particles for each species in this grid and the array + of indices for particle sorting. This sorting is only used for datasets without particlePatches. + + """ + f = self.index.dataset._handle + axes_labels = self.index.dataset._axes_labels + # gle = self.LeftEdge.d[:len(axes_labels)][::-1] + # gre = self.RightEdge.d[:len(axes_labels)][::-1] + + # quick check + print( + len(self.index.grids), + len( + f.particles[list(f.particles)[0]]["position"][ + axes_labels[0] + ].available_chunks() + ), + ) + + assert len(self.index.grids) == len( + f.particles[list(f.particles)[0]]["position"][ + axes_labels[0] + ].available_chunks() + ) + # for species in f.particles: + # + # part_dict[str(species)] = {'count': num_part, 'mask': np.where(index_array)[0]} + + """ + part_dict = {} + for species in f.particles: + if len(axes_labels) == 3: + index_array = \ + np.where(get_component(f.particles[species]['position'], axes_labels[0]) >= gle[0], True, False) *\ + np.where(get_component(f.particles[species]['position'], axes_labels[0]) < gre[0], True, False) *\ + np.where(get_component(f.particles[species]['position'], axes_labels[1]) >= gle[1], True, False) *\ + np.where(get_component(f.particles[species]['position'], axes_labels[1]) < gre[1], True, False) *\ + np.where(get_component(f.particles[species]['position'], axes_labels[2]) >= gle[2], True, False) *\ + np.where(get_component(f.particles[species]['position'], axes_labels[2]) < gre[2], True, False) + elif len(axes_labels) == 2: + index_array = \ + np.where(get_component(f.particles[species]['position'], axes_labels[0]) >= gle[0], True, False) *\ + np.where(get_component(f.particles[species]['position'], axes_labels[0]) < gre[0], True, False) *\ + np.where(get_component(f.particles[species]['position'], axes_labels[1]) >= gle[1], True, False) *\ + np.where(get_component(f.particles[species]['position'], axes_labels[1]) < gre[1], True, False) + else: + index_array = \ + np.where(get_component(f.particles[species]['position'], axes_labels[0]) >= gle[0], True, False) *\ + np.where(get_component(f.particles[species]['position'], axes_labels[0]) < gre[0], True, False) + num_part = np.sum(index_array) + part_dict[str(species)] = {'count': num_part, 'mask': np.where(index_array)[0]} + """ + # self._pdata = part_dict + + def _setup_dx(self): + # has already been read in and stored in index + self.dds = self.index.ds.arr(self.index.level_dds[self.Level, :], "code_length") + + def _prepare_grid(self): + super()._prepare_grid() + h = self.index # cache it + my_ind = self.id - self._id_offset + self.ActiveDimensions = h.grid_dimensions[my_ind] + self.LeftEdge = h.grid_left_edge[my_ind] + self.RightEdge = h.grid_right_edge[my_ind] + + @property + def Parent(self): + if len(self._parent_id) == 0: + return None + # self.Parent.dds + return [self.index.grids[pid - self._id_offset] for pid in self._parent_id] + + @property + def Children(self): + return [self.index.grids[cid - self._id_offset] for cid in self._children_ids] + + @property + def NumParticles(self): + print(list(self.index.dataset._handle.particles["electrons"]), "PRINTED") + if ( + "particlesPatches" + in self.index.dataset._handle.particles[ + list(self.index.dataset._handle.particles)[0] + ] + ): + for species in self.index.dataset._handle.particles: + for _patch in list( + self.index.dataset._handle.particles[str(species)][ + "particlePatches" + ] + ): + # TODO Add support for ParticlPatches + mylog.warning( + "ParticlePatches is not yet supported as it has not yet been tested :(" + ) + print("PartPatches") + else: + self._read_particles() + out_dict = {} + for species_key in list(self._pdata.keys()): + out_dict[species_key] = self._pdata[species_key]["count"] + return out_dict + class OpenPMDHierarchy(GridIndex): """Defines which fields and particles are created and read from disk. - Furthermore it defines the characteristics of the grids. """ @@ -74,6 +180,7 @@ def __init__(self, ds, dataset_type="openPMD"): self.dataset = ds self.index_filename = ds.parameter_filename self.directory = path.dirname(self.index_filename) + self.max_level = self.dataset._max_level GridIndex.__init__(self, ds, dataset_type) def _get_particle_type_counts(self): @@ -87,21 +194,19 @@ def _get_particle_type_counts(self): """ result = {} f = self.dataset._handle - bp = self.dataset.base_path - pp = self.dataset.particles_path try: for ptype in self.ds.particle_types_raw: if str(ptype) == "io": - spec = list(f[bp + pp].keys())[0] + spec = list(f.particles)[0] else: spec = ptype - axis = list(f[bp + pp + "/" + spec + "/position"].keys())[0] - pos = f[bp + pp + "/" + spec + "/position/" + axis] + part = f.particles[spec] + pos = part["position"][list(part["position"])[0]] if is_const_component(pos): - result[ptype] = pos.attrs["shape"] + result[ptype] = pos.shape[0] # relic, should there be a difference? else: - result[ptype] = pos.len() + result[ptype] = pos.shape[0] except KeyError: result["io"] = 0 @@ -118,20 +223,15 @@ def _detect_output_fields(self): or (for multiple species of particles) the particle name on-disk. """ f = self.dataset._handle - bp = self.dataset.base_path - mp = self.dataset.meshes_path - pp = self.dataset.particles_path - mesh_fields = [] try: - meshes = f[bp + mp] - for mname in meshes.keys(): + # here we force higher level meshes to not appear on field list + for mname in list(f.meshes)[:: self.max_level + 1]: try: - mesh = meshes[mname] - for axis in mesh.keys(): + for axis in list(f.meshes[mname]): mesh_fields.append(mname.replace("_", "-") + "_" + axis) - except AttributeError: - # This is a h5py.Dataset (i.e. no axes) + except AttributeError: # not sure if this would ever happen + # (i.e. scalar or constant component with no axes) mesh_fields.append(mname.replace("_", "-")) except (KeyError, TypeError, AttributeError): pass @@ -139,11 +239,10 @@ def _detect_output_fields(self): particle_fields = [] try: - particles = f[bp + pp] - for pname in particles.keys(): - species = particles[pname] - for recname in species.keys(): - record = species[recname] + for pname in list(f.particles): + species = list(f.particles[pname]) + for recname in species: + record = f.particles[pname][recname] if is_const_component(record): # Record itself (e.g. particle_mass) is constant particle_fields.append( @@ -153,19 +252,22 @@ def _detect_output_fields(self): try: # Create a field for every axis (x,y,z) of every # property (position) of every species (electrons) - axes = list(record.keys()) + axes = list(record) if str(recname) == "position": recname = "positionCoarse" - for axis in axes: - particle_fields.append( - pname.replace("_", "-") - + "_" - + recname.replace("_", "-") - + "_" - + axis - ) + if len(axes) > 1: + for axis in axes: + particle_fields.append( + pname.replace("_", "-") + + "_" + + recname.replace("_", "-") + + "_" + + axis + ) # so we are doing all this, and then raising error which is why we get the noaxes field + else: + raise AttributeError # in the case that is have no axes except AttributeError: - # Record is a dataset, does not have axes (e.g. weighting) + # Record is a dataset, does not have axes (e.g. weighting) #electrons and ions momentum, positionCoarse, and positionOffset are here particle_fields.append( pname.replace("_", "-") + "_" @@ -174,7 +276,7 @@ def _detect_output_fields(self): pass else: pass - if len(list(particles.keys())) > 1: + if len(list(f.particles)) > 1: # There is more than one particle species, # use the specific names as field types self.field_list.extend( @@ -203,59 +305,69 @@ def _count_grids(self): The number of grids is determined by their respective memory footprint. """ f = self.dataset._handle - bp = self.dataset.base_path - mp = self.dataset.meshes_path - pp = self.dataset.particles_path - self.meshshapes = {} self.numparts = {} - self.num_grids = 0 try: - meshes = f[bp + mp] - for mname in meshes.keys(): - mesh = meshes[mname] - if isinstance(mesh, h5py.Group): - shape = mesh[list(mesh.keys())[0]].shape + # here we assuming symmetry across meshes on a per_level basis + for level, mname in enumerate(list(f.meshes)[: self.max_level + 1]): + mesh = f.meshes[mname] + if isinstance(mesh, openpmd_api.io.openpmd_api_cxx.Mesh): + if len(mesh[list(mesh)[0]].available_chunks()) > 0: + chunk_list = mesh[list(mesh)[0]].available_chunks() + else: + raise AttributeError + shape = tuple( + mesh[list(mesh)[0]].shape + ) # per-level domain shape, not grid shape else: - shape = mesh.shape - spacing = tuple(mesh.attrs["gridSpacing"]) - offset = tuple(mesh.attrs["gridGlobalOffset"]) - unit_si = mesh.attrs["gridUnitSI"] - self.meshshapes[mname] = (shape, spacing, offset, unit_si) - except (KeyError, TypeError, AttributeError): + raise AttributeError + # currently we make one meshshape key/value dict pair per openpmdapi mesh + spacing = tuple(mesh.grid_spacing) + offset = tuple(mesh.grid_global_offset) + unit_si = mesh.grid_unit_SI + self.meshshapes[level] = ( + chunk_list, + shape, + spacing, + offset, + unit_si, + ) + except (TypeError, AttributeError): pass try: - particles = f[bp + pp] - for pname in particles.keys(): - species = particles[pname] - if "particlePatches" in species.keys(): + for pname in list(f.particles): + species = f.particles[pname] + if "particlePatches" in list( + species + ): # haven't been able to test this, likely doesn't work for patch, size in enumerate( - species["/particlePatches/numParticles"] + species.particle_patches["numParticles"] ): self.numparts[f"{pname}#{patch}"] = size else: - axis = list(species["/position"].keys())[0] - if is_const_component(species["/position/" + axis]): - self.numparts[pname] = species["/position/" + axis].attrs[ - "shape" - ] + axis = list(species["position"])[0] + if is_const_component(species["position"][axis]): + self.numparts[pname] = species["position"][axis].shape[0] else: - self.numparts[pname] = species["/position/" + axis].len() + self.numparts[pname] = species["position"][axis].shape[0] + # should these be the same? relic from old frontend except (KeyError, TypeError, AttributeError): pass # Limit values per grid by resulting memory footprint - self.vpg = int(self.dataset.gridsize / 4) # 4Byte per value (f32) - - # Meshes of the same size do not need separate chunks - for shape, *_ in set(self.meshshapes.values()): - self.num_grids += min( - shape[0], int(np.ceil(reduce(mul, shape) * self.vpg**-1)) - ) + # TODO + self.vpg = int( + self.dataset.gridsize + ) # 4Byte per value (f32) #havn't used this yet + for chunk_ls, *_ in self.meshshapes.values(): + # ) #we are just using the amount of grids per level, + # assuming equality between records of same level + self.num_grids += len(chunk_ls) # here we are not limiting by memory # Same goes for particle chunks if they are not inside particlePatches + # what does this do? patches = {} no_patches = {} for k, v in self.numparts.items(): @@ -263,18 +375,20 @@ def _count_grids(self): patches[k] = v else: no_patches[k] = v - for size in set(no_patches.values()): - self.num_grids += int(np.ceil(size * self.vpg**-1)) - for size in patches.values(): - self.num_grids += int(np.ceil(size * self.vpg**-1)) + # Do we want empty grids for particles? I don't think so + + # for size in set(no_patches.values()): + # self.num_grids += int(np.ceil(size * self.vpg**-1)) + # for size in patches.values(): + # self.num_grids += int(np.ceil(size * self.vpg**-1)) def _parse_index(self): """Fills each grid with appropriate properties (extent, dimensions, ...) This calculates the properties of every OpenPMDGrid based on the total number of - grids in the simulation. The domain is divided into ``self.num_grids`` (roughly) - equally sized chunks along the x-axis. ``grid_levels`` is always equal to 0 - since we only have one level of refinement in openPMD. + grids in the simulation. The domain is divided into ``self.num_grids`` based either on + mesh_record_component.available_chunks() (only for ADIOS2 data) or memory contraints + (HDF5 data for now). Notes ----- @@ -283,89 +397,115 @@ def _parse_index(self): only. The others do not have any particles affiliated with them. """ f = self.dataset._handle - bp = self.dataset.base_path - pp = self.dataset.particles_path - - self.grid_levels.flat[:] = 0 + self.grid_levels.flat[:] = np.arange(self.max_level + 1) self.grids = np.empty(self.num_grids, dtype="object") grid_index_total = 0 - + self.level_dds = np.empty( + (self.max_level + 1, 3) + ) # we always pad to three dimensions regardless of coordinate system # Mesh grids - for mesh in set(self.meshshapes.values()): - (shape, spacing, offset, unit_si) = mesh + for level, mesh in self.meshshapes.items(): + (chunk_ls, shape, spacing, offset, unit_si) = mesh shape = np.asarray(shape) spacing = np.asarray(spacing) offset = np.asarray(offset) - # Total dimension of this grid - domain_dimension = np.asarray(shape, dtype=np.int32) - domain_dimension = np.append( - domain_dimension, np.ones(3 - len(domain_dimension)) - ) - # Number of grids of this shape - num_grids = min(shape[0], int(np.ceil(reduce(mul, shape) * self.vpg**-1))) - gle = offset * unit_si # self.dataset.domain_left_edge - gre = ( - domain_dimension[: spacing.size] * unit_si * spacing + gle - ) # self.dataset.domain_right_edge - gle = np.append(gle, np.zeros(3 - len(gle))) - gre = np.append(gre, np.ones(3 - len(gre))) - grid_dim_offset = np.linspace( - 0, domain_dimension[0], num_grids + 1, dtype=np.int32 - ) - grid_edge_offset = ( - grid_dim_offset * float(domain_dimension[0]) ** -1 * (gre[0] - gle[0]) - + gle[0] - ) - mesh_names = [] - for mname, mdata in self.meshshapes.items(): - if mesh == mdata: - mesh_names.append(str(mname)) - prev = 0 - for grid in np.arange(num_grids): - self.grid_dimensions[grid_index_total] = domain_dimension - self.grid_dimensions[grid_index_total][0] = ( - grid_dim_offset[grid + 1] - grid_dim_offset[grid] + print("level", level, type(level)) + # Total dimension of this domain on a per-mesh-level basis! + # domain_dimension = pad_to_threed(np.asarray(shape, dtype=np.int32), 1, self.dataset._geometry, self.dataset._axes_labels, self.dataset._data_order) + + for chunk in chunk_ls: # convert chunks to grids! + # dimension of individual chunks/grids + # [::-1]? + chunk_dim = pad_to_threed( + np.array(chunk.extent, dtype=np.int32), + 1, + self.dataset._geometry, + self.dataset._axes_labels, + self.dataset._data_order, + ) + + gle = ( + np.array(chunk.offset) * unit_si * spacing + offset + ) # to get to physical units, don't we need spacing? + gre = ( + np.array(chunk.offset) + np.array(chunk.extent) + ) * unit_si * spacing + offset + + gle = pad_to_threed( + gle, + 0, + self.dataset._geometry, + self.dataset._axes_labels, + self.dataset._data_order, + ) + gre = pad_to_threed( + gre, + 1, + self.dataset._geometry, + self.dataset._axes_labels, + self.dataset._data_order, + ) + # set things up + + self.grid_dimensions[grid_index_total] = chunk_dim + self.grid_particle_count[grid_index_total] = ( + 0 # does this change for particle patches? ) self.grid_left_edge[grid_index_total] = gle - self.grid_left_edge[grid_index_total][0] = grid_edge_offset[grid] self.grid_right_edge[grid_index_total] = gre - self.grid_right_edge[grid_index_total][0] = grid_edge_offset[grid + 1] - self.grid_particle_count[grid_index_total] = 0 + + # we are hiding higher level access + mesh_names = list(f.meshes)[0 :: self.max_level + 1] + if len(list(f.particles)) > 1: + part_names = list(f.particles) + else: + part_names = ["io"] + + chunk_index = np.array(chunk.offset, dtype=np.int32) + chunk_extent = np.array(chunk.extent, dtype=np.int32) + self.grids[grid_index_total] = self.grid( grid_index_total, self, - 0, - fi=prev, - fo=self.grid_dimensions[grid_index_total][0], + level, + fi=chunk_index, # field index, openpmd-api chunk.offset + fo=chunk_extent, # field offset, openpmd-api chunk.extent ft=mesh_names, + pt=part_names, + # pi = 0, #what should these be, because particles might not be sorted? + # po = None, + # pd = num_parts #particle dictionary, mirroring _pdata dict from amrex frontend ) - prev += self.grid_dimensions[grid_index_total][0] grid_index_total += 1 + print(unit_si) + self.level_dds[level, :] = (gre - gle) / chunk_dim - handled_ptypes = [] - + # handled_ptypes = [] + """ # Particle grids for species, count in self.numparts.items(): + # HAS NOT BEEN TESTED if "#" in species: - # This is a particlePatch + # This is a particlePatch, which should correspond with openPMD record component chunks + spec = species.split("#") - patch = f[bp + pp + "/" + spec[0] + "/particlePatches"] + patch = f.particles[spec[0]]["particlePatches"] # don't know about this domain_dimension = np.ones(3, dtype=np.int32) - for ind, axis in enumerate(list(patch["extent"].keys())): - domain_dimension[ind] = patch["extent/" + axis][()][int(spec[1])] + for ind, axis in enumerate(list(patch["extent"])): + domain_dimension[ind] = patch["extent"][axis][int(spec[1])] #not sure about this num_grids = int(np.ceil(count * self.vpg**-1)) gle = [] - for axis in patch["offset"].keys(): + for axis in patch["offset"]: gle.append( - get_component(patch, "offset/" + axis, int(spec[1]), 1)[0] + get_component(patch["offset"], axis, int(spec[1]), 1)[0] ) gle = np.asarray(gle) gle = np.append(gle, np.zeros(3 - len(gle))) gre = [] - for axis in patch["extent"].keys(): + for axis in patch["extent"]: gre.append( - get_component(patch, "extent/" + axis, int(spec[1]), 1)[0] + get_component(patch["extent"], axis, int(spec[1]), 1)[0] ) gre = np.asarray(gre) gre = np.append(gre, np.ones(3 - len(gre))) @@ -375,26 +515,33 @@ def _parse_index(self): npo, npo + count, num_grids + 1, dtype=np.int32 ) particle_names = [str(spec[0])] - elif str(species) not in handled_ptypes: - domain_dimension = self.dataset.domain_dimensions - num_grids = int(np.ceil(count * self.vpg**-1)) - gle = self.dataset.domain_left_edge - gre = self.dataset.domain_right_edge - particle_count = np.linspace(0, count, num_grids + 1, dtype=np.int32) - particle_names = [] - for pname, size in self.numparts.items(): - if size == count: - # Since this is not part of a particlePatch, - # we can include multiple same-sized ptypes - particle_names.append(str(pname)) - handled_ptypes.append(str(pname)) + + + + elif str(species) not in handled_ptypes: + domain_dimension = self.dataset.domain_dimensions + num_grids = int(np.ceil(count * self.vpg**-1)) + gle = self.dataset.domain_left_edge + gre = self.dataset.domain_right_edge + particle_count = np.linspace(0, count, num_grids + 1, dtype=np.int32) + particle_names = [] + for pname, size in self.numparts.items(): + if size == count: + # Since this is not part of a particlePatch, + # we can include multiple same-sized ptypes + particle_names.append(str(pname)) + handled_ptypes.append(str(pname)) + else: # A grid with this exact particle count has already been created continue for grid in np.arange(num_grids): - self.grid_dimensions[grid_index_total] = domain_dimension - self.grid_left_edge[grid_index_total] = gle - self.grid_right_edge[grid_index_total] = gre + + DONT MAKE NEW PARTICLE GRIDS, just add to existing field ones + # self.grid_dimensions[grid_index_total] = domain_dimension + # self.grid_left_edge[grid_index_total] = gle + # self.grid_right_edge[grid_index_total] = gre + self.grid_particle_count[grid_index_total] = ( particle_count[grid + 1] - particle_count[grid] ) * len(particle_names) @@ -407,17 +554,49 @@ def _parse_index(self): pt=particle_names, ) grid_index_total += 1 + """ + print("ENDINDEX") def _populate_grid_objects(self): """This initializes all grids. - Additionally, it should set up Children and Parent lists on each grid object. - openPMD is not adaptive and thus there are no Children and Parents for any grid. + This almost works but calling 'self._reconstruct_parent_child()' before the loop + (with initializing the AMRGRidPatch baseclass) causes an attribute error in grid_patch.py:179 + AttributeError: 'OpenPMDGrid' object has no attribute 'LeftEdge'. + Calling 'self._reconstruct_parent_child()' after the loop causes an attribute error in grid_patch.py:68 + AttributeError: 'list' object has no attribute 'dds' because the Parent object is a list of grids and + not a single grid as we allow overlapping edges for grids of different levels. + """ + self.grids = np.array(self.grids, dtype="object") + # self._reconstruct_parent_child() for i in np.arange(self.num_grids): self.grids[i]._prepare_grid() self.grids[i]._setup_dx() - self.max_level = 0 + # self._reconstruct_parent_child() + + def _reconstruct_parent_child(self): + if self.max_level == 0: + return + mask = np.empty(len(self.grids), dtype="int32") + mylog.debug("First pass; identifying child grids") + for i, grid in enumerate(self.grids): + print(i, "index") + get_box_grids_level( + self.grid_left_edge[i, :], + self.grid_right_edge[i, :], + self.grid_levels[i].item() + 1, + self.grid_left_edge, + self.grid_right_edge, + self.grid_levels, + mask, + ) + ids = np.where(mask.astype("bool")) # where is a tuple + grid._children_ids = ids[0] + grid._id_offset + mylog.debug("Second pass; identifying parents") + for i, grid in enumerate(self.grids): # Second pass + for child in grid.Children: + child._parent_id.append(i + grid._id_offset) class OpenPMDDataset(Dataset): @@ -431,7 +610,7 @@ class OpenPMDDataset(Dataset): - particle and mesh positions are *absolute* with respect to the simulation origin. """ - _load_requirements = ["h5py"] + _load_requirements = ["openpmd_api"] _index_class = OpenPMDHierarchy _field_info_class = OpenPMDFieldInfo @@ -444,11 +623,17 @@ def __init__( unit_system="mks", **kwargs, ): - self._handle = HDF5FileHandler(filename) + try: + self._series_handle = OpenPMDFileHandler(filename) + self._handle = self._series_handle.handle.iterations[ + list(self._series_handle.handle.iterations)[0] + ] + except TypeError: + pass self.gridsize = kwargs.pop("open_pmd_virtual_gridsize", 10**9) - self.standard_version = Version(self._handle.attrs["openPMD"].decode()) + self.standard_version = Version(self._series_handle.handle.openPMD) self.iteration = kwargs.pop("iteration", None) - self._set_paths(self._handle, path.dirname(filename), self.iteration) + self._set_paths(self._series_handle, path.dirname(filename), self.iteration) Dataset.__init__( self, filename, @@ -459,10 +644,7 @@ def __init__( self.storage_filename = storage_filename self.fluid_types += ("openPMD",) try: - particles = tuple( - str(c) - for c in self._handle[self.base_path + self.particles_path].keys() - ) + particles = tuple(str(c) for c in self._handle.particles) if len(particles) > 1: # Only use on-disk particle names if there is more than one species self.particle_types = particles @@ -473,59 +655,69 @@ def __init__( pass def _set_paths(self, handle, path, iteration): - """Parses relevant hdf5-paths out of ``handle``. + """Parses relevant backend-paths out of ``handle``. + Now this should be agnostic to backend format, just handling + openpmd-api handle paths Parameters ---------- - handle : h5py.File + handle : openpmd_api.openpmd_api_cxx.Series path : str - (absolute) filepath for current hdf5 container + (absolute) filepath for current hdf5, adios2, or (in the future) json container """ iterations = [] if iteration is None: - iteration = list(handle["/data"].keys())[0] - encoding = handle.attrs["iterationEncoding"].decode() - if "groupBased" in encoding: - iterations = list(handle["/data"].keys()) - mylog.info("Found %s iterations in file", len(iterations)) - elif "fileBased" in encoding: - itformat = handle.attrs["iterationFormat"].decode().split("/")[-1] - regex = "^" + itformat.replace("%T", "[0-9]+") + "$" - if path == "": + iteration = list(handle.handle.iterations)[0] + # moved this in + encoding = str(handle.handle.iteration_encoding) + if "Iteration_Encoding.group_based" in encoding: + iterations = list(handle.handle.iterations) + mylog.info( + "Single iteration found: %s", iteration + ) # because of the groupbased dataset is_valid + elif "Iteration_Encoding.file_based" in encoding: + itformat = handle.handle.iteration_format + regex = ( + "^" + itformat.replace("%06T", "[0-9]+") + "$" + ) # issues converting between yt and openpmd_api file patterns + if path == "": + mylog.warning( + "For file based iterations, please use absolute file paths!" + ) + pass + for filename in listdir(path): + if match(regex, filename): + iterations.append(filename) + mylog.info("Found %s iterations in directory", len(iterations)) + + if len(iterations) == 0: + mylog.warning("No iterations found!") + if "Iteration_Encoding.group_based" in encoding and len(iterations) > 1: + # this can happen only when a user doesn't use yt.load mylog.warning( - "For file based iterations, please use absolute file paths!" + "More than one iteration found. Use OpenPMDDatasetSeries to view " + "other iterations. Loaded first iteration (%s)", + iteration, ) - pass - for filename in listdir(path): - if match(regex, filename): - iterations.append(filename) - mylog.info("Found %s iterations in directory", len(iterations)) - - if len(iterations) == 0: - mylog.warning("No iterations found!") - if "groupBased" in encoding and len(iterations) > 1: - mylog.warning("Only chose to load one iteration (%s)", iteration) - + # moved all this in self.base_path = f"/data/{iteration}/" - try: - self.meshes_path = self._handle["/"].attrs["meshesPath"].decode() - handle[self.base_path + self.meshes_path] - except KeyError: + try: # we could find other ways to do this, but works to convert to iteration + self.meshes_path = self._series_handle.handle.meshes_path + except openpmd_api.io.ErrorNoSuchAttribute: if self.standard_version <= Version("1.1.0"): mylog.info( - "meshesPath not present in file. " + "meshes_path not present in file. " "Assuming file contains no meshes and has a domain extent of 1m^3!" ) self.meshes_path = None else: raise try: - self.particles_path = self._handle["/"].attrs["particlesPath"].decode() - handle[self.base_path + self.particles_path] - except KeyError: + self.particles_path = self._series_handle.handle.particles_path + except openpmd_api.io.ErrorNoSuchAttribute: if self.standard_version <= Version("1.1.0"): mylog.info( - "particlesPath not present in file." + "particles_path not present in file." " Assuming file contains no particles!" ) self.particles_path = None @@ -547,84 +739,121 @@ def _set_code_unit_attributes(self): def _parse_parameter_file(self): """Read in metadata describing the overall data on-disk.""" - f = self._handle - bp = self.base_path - mp = self.meshes_path - + f = self._handle # this is an iteration self.parameters = 0 self._periodicity = np.zeros(3, dtype="bool") self.refine_by = 1 self.cosmological_simulation = 0 + max_lev = 0 + first_mesh = f.meshes[list(f.meshes)[0]] + self._axes_labels = first_mesh.axis_labels + self._data_order = first_mesh.data_order + self._geometry = str(first_mesh.geometry) + del first_mesh try: shapes = {} left_edges = {} right_edges = {} - meshes = f[bp + mp] - for mname in meshes.keys(): - mesh = meshes[mname] - if isinstance(mesh, h5py.Group): - shape = np.asarray(mesh[list(mesh.keys())[0]].shape) + for mname in f.meshes: + mesh = f.meshes[mname] + # only access these three times, but they should all be the same? + if isinstance(mesh, openpmd_api.io.openpmd_api_cxx.Mesh): + # all mesh axes should have same shape + shape = np.asarray(mesh[list(mesh)[0]].shape) else: - shape = np.asarray(mesh.shape) - spacing = np.asarray(mesh.attrs["gridSpacing"]) - offset = np.asarray(mesh.attrs["gridGlobalOffset"]) - unit_si = np.asarray(mesh.attrs["gridUnitSI"]) + shape = np.asarray( + mesh.shape + ) # if these aren't meshes, what are they? + spacing = np.asarray(mesh.grid_spacing) + offset = np.asarray(mesh.grid_global_offset) + unit_si = np.asarray(mesh.grid_unit_SI) le = offset * unit_si + if "lvl" in mname: + max_lev = max(max_lev, int(mname.split("lvl")[-1])) re = le + shape * unit_si * spacing - shapes[mname] = shape - left_edges[mname] = le - right_edges[mname] = re + shapes[mname] = ( + shape # component_ordering(shape, self._geometry, self._data_order, self._axes_labels) + ) + left_edges[mname] = ( + le # component_ordering(le, self._geometry, self._data_order, self._axes_labels) + ) + right_edges[mname] = ( + re # component_ordering(re, self._geometry, self._data_order, self._axes_labels) + ) + # hidden as this is traditionally an index variable + self._max_level = max_lev lowest_dim = np.min([len(i) for i in shapes.values()]) + # print([len(i) for i in shapes.values()]) shapes = np.asarray([i[:lowest_dim] for i in shapes.values()]) left_edges = np.asarray([i[:lowest_dim] for i in left_edges.values()]) right_edges = np.asarray([i[:lowest_dim] for i in right_edges.values()]) fs = [] dle = [] dre = [] + # Make these Row-ordered and then pad to three dimensions for i in np.arange(lowest_dim): - fs.append(np.max(shapes.transpose()[i])) + fs.append(np.max(shapes.transpose()[i]).astype(int)) dle.append(np.min(left_edges.transpose()[i])) dre.append(np.min(right_edges.transpose()[i])) self.dimensionality = len(fs) - self.domain_dimensions = np.append(fs, np.ones(3 - self.dimensionality)) - self.domain_left_edge = np.append(dle, np.zeros(3 - len(dle))) - self.domain_right_edge = np.append(dre, np.ones(3 - len(dre))) + self.domain_dimensions = pad_to_threed( + np.array(fs, dtype=np.int32), + 1, + self._geometry, + self._axes_labels, + self._data_order, + ) + self.domain_left_edge = pad_to_threed( + np.array(dle, dtype=np.float64), + 0, + self._geometry, + self._axes_labels, + self._data_order, + ) + self.domain_right_edge = pad_to_threed( + np.array(dre, dtype=np.float64), + 1, + self._geometry, + self._axes_labels, + self._data_order, + ) except (KeyError, TypeError, AttributeError): + # maybe we should throw an error for older versions of the standard. if self.standard_version <= Version("1.1.0"): self.dimensionality = 3 - self.domain_dimensions = np.ones(3, dtype=np.float64) + self.domain_dimensions = np.ones(3, dtype=np.int32) self.domain_left_edge = np.zeros(3, dtype=np.float64) self.domain_right_edge = np.ones(3, dtype=np.float64) else: raise - self.current_time = f[bp].attrs["time"] * f[bp].attrs["timeUnitSI"] + self.current_time = f.time * f.time_unit_SI @classmethod def _is_valid(cls, filename: str, *args, **kwargs) -> bool: """Checks whether the supplied file can be read by this frontend.""" - if not valid_hdf5_signature(filename): - return False - if cls._missing_load_requirements(): return False - try: - with h5py.File(filename, mode="r") as f: - attrs = list(f["/"].attrs.keys()) - for i in opmd_required_attributes: - if i not in attrs: - return False - - if Version(f.attrs["openPMD"].decode()) not in ompd_known_versions: + handle = openpmd_api.io.Series( + filename, openpmd_api.io.Access_Type.read_only + ) + for i in opmd_required_attributes: + if i not in handle.attributes: + handle.close() return False - - if f.attrs["iterationEncoding"].decode() == "fileBased": - return True - + if Version(handle.openPMD) not in ompd_known_versions: + handle.close() return False - except OSError: + if "Iteration_Encoding.group_based" in str(handle.iteration_encoding): + iteration = kwargs.pop("iteration", None) + if len(list(handle.iterations)) > 1 and iteration is None: + handle.close() + return False + handle.close() + return True + except (OSError, RuntimeError): return False @@ -637,10 +866,15 @@ class OpenPMDDatasetSeries(DatasetSeries): def __init__(self, filename): super().__init__([]) - self.handle = h5py.File(filename, mode="r") + # lets keep the yt patterns, then convert to %T and %06T for openpmd_api handling + self.handle = openpmd_api.io.Series( + filename, openpmd_api.io.Access_Type.read_only + ) + if len(self.handle.iterations) < 2: + mylog.info("Single iteration found, use OpenPMDDataset") self.filename = filename self._pre_outputs = sorted( - np.asarray(list(self.handle["/data"].keys()), dtype="int64") + np.asarray(list(self.handle.iterations), dtype="int64") ) def __iter__(self): @@ -662,36 +896,44 @@ def _load(self, it, **kwargs): class OpenPMDGroupBasedDataset(Dataset): - _load_requirements = ["h5py"] + _load_requirements = ["openpmd_api"] _index_class = OpenPMDHierarchy _field_info_class = OpenPMDFieldInfo def __new__(cls, filename, *args, **kwargs): ret = object.__new__(OpenPMDDatasetSeries) ret.__init__(filename) + mylog.info( + "Group Based Dataset was cast to OpenPMDDatasetSeries.\n" + "Inspect single interations with dataset_series[iteration]\n" + "where iteration is in dataset_series._pre_outputs" + ) return ret @classmethod def _is_valid(cls, filename: str, *args, **kwargs) -> bool: - if not valid_hdf5_signature(filename): - return False - if cls._missing_load_requirements(): return False - try: - with h5py.File(filename, mode="r") as f: - attrs = list(f["/"].attrs.keys()) - for i in opmd_required_attributes: - if i not in attrs: - return False - - if Version(f.attrs["openPMD"].decode()) not in ompd_known_versions: + handle = openpmd_api.io.Series( + filename, openpmd_api.io.Access_Type.read_only + ) + for i in opmd_required_attributes: + if i not in handle.attributes: + handle.close() return False - - if f.attrs["iterationEncoding"].decode() == "groupBased": - return True - + if Version(handle.openPMD) not in ompd_known_versions: + handle.close() return False - except OSError: + encoding = str(handle.iteration_encoding) + iterations = list(handle.iterations) + if "Iteration_Encoding.group_based" in encoding: + iteration = kwargs.pop("iteration", None) + # if iteration is given, not a dataset series + if len(iterations) > 1 and iteration is None: + handle.close() + return True + handle.close() + return False + except (OSError, RuntimeError): return False diff --git a/yt/frontends/open_pmd/fields.py b/yt/frontends/open_pmd/fields.py index a22cb21742..be69c8de5b 100644 --- a/yt/frontends/open_pmd/fields.py +++ b/yt/frontends/open_pmd/fields.py @@ -5,7 +5,6 @@ from yt.frontends.open_pmd.misc import is_const_component, parse_unit_dimension from yt.units.yt_array import YTQuantity from yt.utilities.logger import ytLogger as mylog -from yt.utilities.on_demand_imports import _h5py as h5py from yt.utilities.physical_constants import mu_0, speed_of_light @@ -141,20 +140,19 @@ class OpenPMDFieldInfo(FieldInfoContainer): def __init__(self, ds, field_list): f = ds._handle - bp = ds.base_path - mp = ds.meshes_path - pp = ds.particles_path try: - fields = f[bp + mp] - for fname in fields.keys(): + fields = f.meshes + for fname in list(fields): field = fields[fname] - if isinstance(field, h5py.Dataset) or is_const_component(field): + if len(list(field)) == 1 or is_const_component(field): # Don't consider axes. # This appears to be a vector field of single dimensionality - ytname = str("_".join([fname.replace("_", "-")])) + ytname = str( + "_".join([fname.replace("_", "-")]) + ) # doesn't do anything for us parsed = parse_unit_dimension( - np.asarray(field.attrs["unitDimension"], dtype="int64") + np.asarray(field.unit_dimension, dtype="int64") ) unit = str(YTQuantity(1, parsed).units) aliases = [] @@ -164,10 +162,10 @@ def __init__(self, ds, field_list): self._mag_fields.append(ytname) self.known_other_fields += ((ytname, (unit, aliases, None)),) else: - for axis in field.keys(): + for axis in list(field): ytname = str("_".join([fname.replace("_", "-"), axis])) parsed = parse_unit_dimension( - np.asarray(field.attrs["unitDimension"], dtype="int64") + np.asarray(field.unit_dimension, dtype="int64") ) unit = str(YTQuantity(1, parsed).units) aliases = [] @@ -182,29 +180,28 @@ def __init__(self, ds, field_list): pass try: - particles = f[bp + pp] - for pname in particles.keys(): + particles = f.particles + for pname in list(particles): species = particles[pname] - for recname in species.keys(): + for recname in list(species): try: record = species[recname] - parsed = parse_unit_dimension(record.attrs["unitDimension"]) + parsed = parse_unit_dimension(record.unit_dimension) unit = str(YTQuantity(1, parsed).units) + ytattrib = str(recname).replace("_", "-") if ytattrib == "position": # Symbolically rename position to preserve yt's # interpretation of the pfield particle_position is later # derived in setup_absolute_positions in the way yt expects ytattrib = "positionCoarse" - if isinstance(record, h5py.Dataset) or is_const_component( - record - ): + if len(list(record)) == 1 or is_const_component(record): name = ["particle", ytattrib] self.known_particle_fields += ( (str("_".join(name)), (unit, [], None)), ) else: - for axis in record.keys(): + for axis in list(record): aliases = [] name = ["particle", ytattrib, axis] ytname = str("_".join(name)) diff --git a/yt/frontends/open_pmd/io.py b/yt/frontends/open_pmd/io.py index d2fb5ce9dc..e3c8504258 100644 --- a/yt/frontends/open_pmd/io.py +++ b/yt/frontends/open_pmd/io.py @@ -2,12 +2,15 @@ import numpy as np -from yt.frontends.open_pmd.misc import get_component, is_const_component -from yt.geometry.selection_routines import GridSelector +from yt.frontends.open_pmd.misc import ( + coordinate_mapping, + get_component, + is_const_component, +) from yt.utilities.io_handler import BaseIOHandler -class IOHandlerOpenPMDHDF5(BaseIOHandler): +class IOHandlerOpenPMD(BaseIOHandler): _field_dtype = "float32" _dataset_type = "openPMD" @@ -15,8 +18,6 @@ def __init__(self, ds, *args, **kwargs): self.ds = ds self._handle = ds._handle self.base_path = ds.base_path - self.meshes_path = ds.meshes_path - self.particles_path = ds.particles_path self._array_fields = {} self._cached_ptype = "" @@ -32,27 +33,46 @@ def _fill_cache(self, ptype, index=0, offset=None): """ if str((ptype, index, offset)) not in self._cached_ptype: self._cached_ptype = str((ptype, index, offset)) - pds = self._handle[self.base_path + self.particles_path + "/" + ptype] - axes = list(pds["position"].keys()) + pds = self._handle.particles[ptype] + axes = list(pds["position"]) if offset is None: - if is_const_component(pds["position/" + axes[0]]): - offset = pds["position/" + axes[0]].attrs["shape"] - else: - offset = pds["position/" + axes[0]].len() + offset = pds["position"][axes[0]].shape[0] self.cache = np.empty((3, offset), dtype=np.float64) - for i in np.arange(3): - ax = "xyz"[i] - if ax in axes: - np.add( - get_component(pds, "position/" + ax, index, offset), - get_component(pds, "positionOffset/" + ax, index, offset), - self.cache[i], - ) - else: - # Pad accordingly with zeros to make 1D/2D datasets compatible + for i in np.arange(len(self.ds._axes_labels)): # only in dims of simulation + # ax = "xyz"[i] # cartesian only + # this is done to match the transpose and padding done for meshes + ax = sorted(self.ds._axes_labels)[i] + # problem, this is where we should correct particle axes order + # if ax in axes: + """ + if is_const_component(pds["position"][ax]): + # Pad accordingly with 0.5 to make 1D/2D datasets compatible + # as missing dimensions are given empty bounds [0,1] # These have to be the same shape as the existing axes since that # equals the number of particles - self.cache[i] = np.zeros(offset) + self.cache[i] = np.full(pds["position"][ax].shape[0], 0.5, dtype=np.float64) + """ + self.cache[i] = np.add( + get_component(pds["position"], ax, index, offset), + get_component(pds["positionOffset"], ax, index, offset), + ) + # now the case for dims not in simulation + for _ax in {"x", "y", "z"} - set(self.ds._axes_labels): + # Pad accordingly with 0.5 to make 1D/2D datasets compatible + # as missing dimensions are given empty bounds [0,1] + # These have to be the same shape as the existing axes since that + # equals the number of particles. Only work for Cartesian coordinates + i += 1 + self.cache[i] = np.full(offset, 0.5, dtype=np.float64) + + # TODO should we have these functions as suggested by the creating a new code frontend page? + # def _read_particle_coords(self, chunks, ptf): + # yield from ( + # (ptype, xyz, 0.0) + # for ptype, xyz in self._read_particle_fields(chunks, ptf, None) + # ) + + # def _read_particle_fields(self, chunks, ptf, selector): def _read_particle_selection(self, chunks, selector, fields): """Read particle fields for particle species masked by a selection. @@ -75,9 +95,7 @@ def _read_particle_selection(self, chunks, selector, fields): values are (N,) ndarrays with data from that field """ f = self._handle - bp = self.base_path - pp = self.particles_path - ds = f[bp + pp] + ds = f.particles unions = self.ds.particle_unions chunks = list(chunks) # chunks is a generator @@ -101,37 +119,114 @@ def _read_particle_selection(self, chunks, selector, fields): particle_count[pfield] = self.ds.particle_type_counts[ptype] ptf[ptype].append(pname) rfm[pfield].append(pfield) + # bit of a hack, but for overlapping rv[pfield] = np.empty((particle_count[pfield],), dtype=np.float64) ind[pfield] = 0 + print( + ptype, pname, ptf[ptype], rfm[pfield], "HERENOW", particle_count[pfield] + ) for ptype in ptf: for chunk in chunks: + print("len(chunk.objs)", len(chunk.objs)) for grid in chunk.objs: + print(len(chunk.objs), "new_lengths") if str(ptype) == "io": - species = list(ds.keys())[0] + species = list(ds)[0] else: species = ptype if species not in grid.ptypes: continue # read particle coords into cache - self._fill_cache(species, grid.pindex, grid.poffset) + # print('pindex,poffset', grid.pindex, grid.poffset, grid.LeftEdge, grid.RightEdge) + # grid._read_particles() #fairly inefficient, would be sweet to do better + # print('pindex,poffset', grid.pindex, grid.poffset, grid.LeftEdge, grid.RightEdge, grid.RightEdge - grid.LeftEdge) + # correctly set our particles indices and offsets according to species. Choice of position is arbitrary here + print( + "lengths", + grid.id, + len( + ds[species]["position"][ + list(ds[species]["position"])[0] + ].available_chunks() + ), + ) + grid.pindex = ( + ds[species]["position"][list(ds[species]["position"])[0]] + .available_chunks()[grid.id] + .offset[0] + ) + grid.poffset = ( + ds[species]["position"][list(ds[species]["position"])[0]] + .available_chunks()[grid.id] + .extent[0] + ) + self._fill_cache(species, grid.pindex, grid.poffset) # old + + # self._fill_cache(species, grid.pindex, grid.poffset) #old + print( + "selector.count", + selector.count_points( + self.cache[0], self.cache[1], self.cache[2], 0.0 + ), + ) + # grid._read_particles() + # problem here because don't know what is happening mask = selector.select_points( - self.cache[0], self.cache[1], self.cache[2], 0.0 + self.cache[0], # x + self.cache[1], # y + self.cache[2], # z + 0.0, # radius ) if mask is None: continue + grid._read_particles() pds = ds[species] for field in ptf[ptype]: - component = "/".join(field.split("_")[1:]) - component = component.replace("positionCoarse", "position") - component = component.replace("-", "_") - data = get_component(pds, component, grid.pindex, grid.poffset)[ - mask - ] + print(field, ptype, "fieldptype") + # just chop off 'particle', + record = field.split("_")[1:] + component = record[-1] + if len(record) > 1: # specifying axes here + record = record[-2].replace("positionCoarse", "position") + # component = component.replace("-", "_") + component = coordinate_mapping(component) + # make sure padded dims are in the cell-centered selection region + if ( + record == "position" + and is_const_component(pds[record][component]) + and component not in self.ds._axes_labels + ): + # viewing particles along padded dimensions is fairly useless + print("HERE") + # data = np.full(pds[record][component].shape[0], 0.5, dtype = np.float64)[grid._pdata[str(species)]['mask']] + # else: + # print('first5,last5', grid._pdata[str(species)]['mask'][:5], grid._pdata[str(species)]['mask'][-5:]) + print(pds[record][component].shape[0], "shape") + data = get_component( + pds[record], component, grid.pindex, grid.poffset + )[mask] # [grid._pdata[str(species)]['mask']] + # print('HERE', np.max(data), np.min(data), component) + else: # just particle_mass, charge, weighting,and id + print(type(grid.pindex), "TYPE") + data = get_component( + pds[component], + list(pds[component])[ + 0 + ], # takes care of the SCALAR key for openpmd_api + grid.pindex, + grid.poffset, + )[mask] # [grid._pdata[str(species)]['mask']] + # print('mask shapes', np.shape(mask), np.sum(np.diff(grid._pdata[str(species)]['mask']))) for request_field in rfm[ptype, field]: + print( + "shapes", ind[request_field], data.shape, np.shape(data) + ) + print(np.shape(rv[request_field]), request_field) rv[request_field][ ind[request_field] : ind[request_field] + data.shape[0] ] = data + print(np.shape(rv[request_field])) ind[request_field] += data.shape[0] for field in fields: @@ -161,47 +256,97 @@ def _read_fluid_selection(self, chunks, selector, fields, size): keys are tuples (ftype, fname) representing a field values are flat (``size``,) ndarrays with data from that field """ + f = self._handle - bp = self.base_path - mp = self.meshes_path - ds = f[bp + mp] + ds = f.meshes chunks = list(chunks) - - rv = {} - ind = {} - - if isinstance(selector, GridSelector): - if not (len(chunks) == len(chunks[0].objs) == 1): - raise RuntimeError - + rv = {} # flat fluid array + ind = {} # flat indices? + # this makes me think there will be problems + # if isinstance(selector, GridSelector): + # print() + # if not (len(chunks) == len(chunks[1].objs) == 1): + # raise RuntimeError + # print(size, 'presize') if size is None: size = sum(g.count(selector) for chunk in chunks for g in chunk.objs) + # print(size) for field in fields: rv[field] = np.empty(size, dtype=np.float64) ind[field] = 0 - for ftype, fname in fields: field = (ftype, fname) - for chunk in chunks: - for grid in chunk.objs: + for chunk in chunks: # one chunk per level? + for grid in chunk.objs: # grids per level mask = grid._get_selector_mask(selector) if mask is None: continue - component = fname.replace("_", "/").replace("-", "_") - if component.split("/")[0] not in grid.ftypes: + if grid.Level > 0: # we hide grid levels from user + component = ( + fname.split("_")[0] + + f"_lvl{str(grid.Level)}_" + + fname.split("_")[-1] + ) + else: + component = fname + component = component.replace("-", "_") + if "_".join(fname.split("_")[:-1]) not in grid.ftypes: + # we get here due to our last chunk holding just particles data = np.full(grid.ActiveDimensions, 0, dtype=np.float64) else: - data = get_component(ds, component, grid.findex, grid.foffset) + component_field = "_".join(component.split("_")[:-1]) + component_axes = component.split("_")[-1] + data = get_component( + ds[component_field], + component_axes, + grid.findex.copy(), + grid.foffset.copy(), + ) # The following is a modified AMRGridPatch.select(...) - data.shape = ( - mask.shape - ) # Workaround - casts a 2D (x,y) array to 3D (x,y,1) + # print(mask.shape, 'mask shape', data.shape, fname, grid.Level, grid.ftypes, grid.ptypes) + """ + THIS WORKS""" + # if data.shape[0] != mask.shape[0]: + if ds[component_field].data_order == "C": + # this only works for rectangular, non particle grids + # print('before swap', np.shape(data)) + if len(data.shape) == 3: + data = np.transpose(data, (2, 1, 0)) + elif len(data.shape) == 2: + data = np.transpose( + data.reshape(data.shape[0], data.shape[1], 1), (1, 0, 2) + ) + elif len(data.shape) == 1: + data = data.reshape(data.shape[0], 1, 1) + # data = np.swapaxes(data, 1,0) + # data.shape + # data = np.transpose(data, _3d) + # data = np.swapaxes(data,1,0) #this works when we pad last axes as 1 in 3c + # print('datashape,maskshape', data.shape, mask.shape) + data.shape = mask.shape + # print(data.shape, 'postahape', np.shape(data)) + # OLD VERSION + # data.shape = ( + # make.shape + # ) + # Workaround - casts a 2D (x,y) array to 3D (x,y,1) + # data.reshape(mask.shape) + # Now we want (z,x) C orderded daga to be cast to (z,1,x) + """ + Doesn't work""" + # data = np.reshape(data, mask.shape) count = grid.count(selector) - rv[field][ind[field] : ind[field] + count] = data[mask] - ind[field] += count - + # print(count,'count') + rv[field][ind[field] : ind[field] + count] = data[ + mask + ] # multiply by unit SI here? + ind[field] += count # flattened index + # it would be sweet to flush here + # how could we do this without looping? for field in fields: rv[field] = rv[field][: ind[field]] rv[field].flatten() - return rv + + +# type: ignore diff --git a/yt/frontends/open_pmd/misc.py b/yt/frontends/open_pmd/misc.py index 0bb6607be3..402736f9b2 100644 --- a/yt/frontends/open_pmd/misc.py +++ b/yt/frontends/open_pmd/misc.py @@ -52,11 +52,11 @@ def parse_unit_dimension(unit_dimension): def is_const_component(record_component): - """Determines whether a group or dataset in the HDF5 file is constant. + """Determines whether an iteration record is constant Parameters ---------- - record_component : h5py.Group or h5py.Dataset + record_component : openpmd_api.openpmd_api_cxx.Record_component Returns ------- @@ -68,23 +68,104 @@ def is_const_component(record_component): .. https://github.com/openPMD/openPMD-standard/blob/latest/STANDARD.md, section 'Constant Record Components' """ - return "value" in record_component.attrs.keys() + return "value" in record_component.attributes -def get_component(group, component_name, index=0, offset=None): - """Grabs a dataset component from a group as a whole or sliced. +""" +def component_ordering(record_component = np.array, geometry = str, data_order = str, axes_labels = list): + This function converts arrays of the on-disk record component shape to a readable, column-major style + Example + ------- + np.shape(record_component_array) == (256, 512), C-order (row-major, ie axes_labels = ['z', 'x'] + + component_ordering(record_component_array) == np.array([512,256]) to represent [nX, nZ] where nX and nZ are + the number of cells in the x and z dimensions + + Parameters + ---------- + mesh: openpmd_api.openpmd_api_cxx.Mesh + record_axis: a string which specifies which desired axis + + Returns + ------- + int + specifying index of axis + if "cartesian" in geometry: + if data_order == "C": #row major + assert(axes_labels == sorted(axes_labels)[::-1]) #is this true generally? + #[::-1] + return record_component + elif data_order == "F": #column major + assert(axes_labels == sorted(axes_labels)) + mylog.warning("Fortran dataOrder is not yet supported :( ") + raise NotImplementedError + else: + mylog.warning("'{}' geometry is not yet supported :( )".format(geometry)) + raise NotImplementedError +""" + + +def pad_to_threed( + record_component=np.array, + fill_value=int, + geometry=str, + axes_labels=list, + data_order=str, +): + """This function converts grid and domain dimension/edge arrays to column-major, 3D arrays with + padded dimensions filled with fill_value. + """ + if "cartesian" in geometry: + if data_order == "C": + assert sorted(axes_labels) == axes_labels[::-1] + record_component = record_component[::-1] + elif data_order == "F": + assert sorted(axes_labels) == axes_labels + record_component = np.append( + record_component, np.full(3 - record_component.shape[0], fill_value) + ) + return record_component + else: + mylog.warning(f"'{geometry}' geometry is not yet supported :( )") + raise NotImplementedError + +def coordinate_mapping(component=str): + """Conversion between yt axes and openpmd_api axes. Parameters ---------- - group : h5py.Group - component_name : str - relative path of the component in the group + component : string + component is a constant record component refering to the axis omitted in the simulation + + Example + -------- + Dataset has MeshRecord.axes_labels == ['z', 'x'] which is transposed for yt to + result in a OpenPMDDataset with domain_dimensions = [nX, nZ, 1] + where nX and nZ represent the number of cells along the now X and Y axes, and the z axis has been padded. + + When we annotate particles, yt will call on x and y particle postitions, but y particle positions are + actually z particle positions in the openpmd_api. + + """ + coord_dict = {"x": "x", "y": "z", "z": "y"} + return coord_dict[component] + + +def get_component(record, record_axis, index=0, extent=None): + """Grabs a Record Component from a Record as a whole or sliced. + + Parameters + ---------- + record : openpmd_api_cxx.Record + record_axis : str + the openpmd_api_cxx.Record_Component string key, not necessarily a physical axis index : int, optional first entry along the first axis to read - offset : int, optional + extent : int, optional number of entries to read - if not supplied, every entry after index is returned - + note that the previous frontend named this variable offset, + which we thinks adds some confusion. + If not supplied, every entry after index is returned. Notes ----- This scales every entry of the component with the respective "unitSI". @@ -95,18 +176,32 @@ def get_component(group, component_name, index=0, offset=None): (N,) 1D in case of particle data (O,P,Q) 1D/2D/3D in case of mesh data """ - record_component = group[component_name] - unit_si = record_component.attrs["unitSI"] + record_component = record[record_axis] + unit_si = record_component.get_attribute("unitSI") if is_const_component(record_component): - shape = np.asarray(record_component.attrs["shape"]) - if offset is None: - shape[0] -= index + shape = np.asarray(record_component.get_attribute("shape")) + if extent is None: + shape -= index else: - shape[0] = offset + shape = extent # component is constant, craft an array by hand - return np.full(shape, record_component.attrs["value"] * unit_si) + registered = record_component.get_attribute("value") + return np.full(shape, registered * unit_si) else: - if offset is not None: - offset += index - # component is a dataset, return it (possibly masked) - return np.multiply(record_component[index:offset], unit_si) + if extent is not None: + extent += index + if len(record_component.shape) == 3: + registered = record_component[ + index[0] : extent[0], index[1] : extent[1], index[2] : extent[2] + ] + elif len(record_component.shape) == 2: + registered = record_component[ + index[0] : extent[0], index[1] : extent[1] + ] + elif len(record_component.shape) == 1: + registered = record_component[index:extent] + else: + # when we don't slice we have to `load_chunk()` + registered = record_component.load_chunk() + record_component.series_flush() + return np.multiply(registered, unit_si) diff --git a/yt/frontends/open_pmd/tests/test_outputs.py b/yt/frontends/open_pmd/tests/test_outputs.py index 61c55b4dfb..db1ecca3c7 100644 --- a/yt/frontends/open_pmd/tests/test_outputs.py +++ b/yt/frontends/open_pmd/tests/test_outputs.py @@ -30,7 +30,7 @@ ] -@requires_module("h5py") +@requires_module("openpmd_api") @requires_file(threeD) def test_3d_out(): ds = data_dir_load(threeD) @@ -53,7 +53,7 @@ def test_3d_out(): assert_almost_equal(ds.domain_right_edge - ds.domain_left_edge, domain_width) -@requires_module("h5py") +@requires_module("openpmd_api") @requires_file(twoD) def test_2d_out(): ds = data_dir_load(twoD) @@ -71,6 +71,7 @@ def test_2d_out(): ("openPMD", "J_z"), ("openPMD", "rho"), ] + domain_dimensions = [51, 201, 1] * np.ones_like(ds.domain_dimensions) domain_width = [3.06e-05, 2.01e-05, 1e0] * np.ones_like(ds.domain_left_edge) @@ -87,7 +88,7 @@ def test_2d_out(): assert_almost_equal(ds.domain_right_edge - ds.domain_left_edge, domain_width) -@requires_module("h5py") +@requires_module("openpmd_api") @requires_file(noFields) def test_no_fields_out(): ds = data_dir_load(noFields) @@ -110,7 +111,7 @@ def test_no_fields_out(): assert_almost_equal(ds.domain_right_edge - ds.domain_left_edge, domain_width) -@requires_module("h5py") +@requires_module("openpmd_api") @requires_file(noParticles) def test_no_particles_out(): ds = data_dir_load(noParticles) @@ -136,7 +137,7 @@ def test_no_particles_out(): assert_almost_equal(ds.domain_right_edge - ds.domain_left_edge, domain_width) -@requires_module("h5py") +@requires_module("openpmd_api") @requires_file(groupBased) def test_groupBased_out(): dss = load(groupBased) diff --git a/yt/utilities/file_handler.py b/yt/utilities/file_handler.py index 2b4640b731..4e7c1036b4 100644 --- a/yt/utilities/file_handler.py +++ b/yt/utilities/file_handler.py @@ -92,6 +92,24 @@ def valid_netcdf_signature(fn: str, /) -> bool: ) +class OpenPMDFileHandler: + handle = None + + def __init__(self, filename): + from yt.utilities.on_demand_imports import _openpmd_api + + io = _openpmd_api.io + self.handle = io.Series(filename, io.Access_Type.read_only) + + @property + def attrs(self): + return self.handle.attributes + + def close(self): + if self.handle is not None: + self.handle.close() + + def valid_netcdf_classic_signature(filename): issue_deprecation_warning( "valid_netcdf_classic_signature is not used within yt any more and is deprecated.", diff --git a/yt/utilities/on_demand_imports.py b/yt/utilities/on_demand_imports.py index 839617f4a5..ccb6b40a5a 100644 --- a/yt/utilities/on_demand_imports.py +++ b/yt/utilities/on_demand_imports.py @@ -353,6 +353,17 @@ def h5s(self): _h5py = h5py_imports() +class openpmd_api_imports(OnDemand): + @safe_import + def io(self): + import openpmd_api as io + + return io + + +_openpmd_api = openpmd_api_imports() + + class nose_imports(OnDemand): @safe_import def run(self):