From 641e590abc89bd8e8be3a16145d228cfb077a174 Mon Sep 17 00:00:00 2001
From: chavlin <chris.havlin@gmail.com>
Date: Thu, 19 Sep 2024 12:31:29 -0500
Subject: [PATCH 1/3] avoid particle_index type cast

---
 .../data_selection_objects.py                 |  3 +-
 .../stream/tests/test_stream_particles.py     | 99 +++++++++++++++++++
 yt/utilities/io_handler.py                    | 21 +++-
 3 files changed, 120 insertions(+), 3 deletions(-)

diff --git a/yt/data_objects/selection_objects/data_selection_objects.py b/yt/data_objects/selection_objects/data_selection_objects.py
index 58a0b39d97..661da9d118 100644
--- a/yt/data_objects/selection_objects/data_selection_objects.py
+++ b/yt/data_objects/selection_objects/data_selection_objects.py
@@ -214,7 +214,8 @@ def get_data(self, fields=None):
 
         for f, v in read_particles.items():
             self.field_data[f] = self.ds.arr(v, units=finfos[f].units)
-            self.field_data[f].convert_to_units(finfos[f].output_units)
+            if finfos[f].units != finfos[f].output_units:
+                self.field_data[f].convert_to_units(finfos[f].output_units)
 
         fields_to_generate += gen_fluids + gen_particles
         self._generate_fields(fields_to_generate)
diff --git a/yt/frontends/stream/tests/test_stream_particles.py b/yt/frontends/stream/tests/test_stream_particles.py
index 28e832a4bc..3f617224da 100644
--- a/yt/frontends/stream/tests/test_stream_particles.py
+++ b/yt/frontends/stream/tests/test_stream_particles.py
@@ -2,6 +2,7 @@
 import pytest
 from numpy.testing import assert_equal
 
+import yt
 import yt.utilities.initial_conditions as ic
 from yt.loaders import load_amr_grids, load_particles, load_uniform_grid
 from yt.testing import fake_particle_ds, fake_sph_orientation_ds
@@ -404,3 +405,101 @@ def test_stream_non_cartesian_particles_amr():
     assert_equal(dd["all", "particle_position_r"].v, particle_position_r)
     assert_equal(dd["all", "particle_position_phi"].v, particle_position_phi)
     assert_equal(dd["all", "particle_position_theta"].v, particle_position_theta)
+
+
+@pytest.fixture
+def sph_dataset_with_integer_index():
+    num_particles = 100
+
+    data = {
+        ("gas", "particle_position_x"): np.linspace(0.0, 1.0, num_particles),
+        ("gas", "particle_position_y"): np.linspace(0.0, 1.0, num_particles),
+        ("gas", "particle_position_z"): np.linspace(0.0, 1.0, num_particles),
+        ("gas", "particle_mass"): np.ones(num_particles),
+        ("gas", "density"): np.ones(num_particles),
+        ("gas", "smoothing_length"): np.ones(num_particles) * 0.1,
+        ("gas", "particle_index"): np.arange(0, num_particles),
+    }
+
+    ds = load_particles(data)
+    return ds
+
+
+def test_particle_dtypes_selection(sph_dataset_with_integer_index):
+    # these operations will preserve data type
+    ds = sph_dataset_with_integer_index
+    ad = ds.all_data()
+    assert ad["gas", "particle_index"].dtype == np.int64
+
+    min_max = ad.quantities.extrema(("gas", "particle_index"))
+    assert min_max.dtype == np.int64
+
+    # check that subselections preserve type
+    le = ds.domain_center - ds.domain_width / 10.0
+    re = ds.domain_center + ds.domain_width / 10.0
+    reg = ds.region(ds.domain_center, le, re)
+    assert reg["gas", "particle_index"].dtype == np.int64
+
+    vals = ds.slice(0, ds.domain_center[0])["gas", "particle_index"]
+    assert vals.max() > 0
+    assert vals.dtype == np.int64
+
+
+def test_particle_dtypes_operations(sph_dataset_with_integer_index):
+    # these operations will not preserve dtype (will be cast to float64).
+    # note that the numerical outputs of these operations are not
+    # physical (projecting the particle index does not make any physical
+    # sense), but they do make sure the methods run in case any frontends
+    # start setting physical fields with different data types.
+
+    ds = sph_dataset_with_integer_index
+
+    field = ("gas", "particle_index")
+    frb = ds.proj(field, 2).to_frb(ds.domain_width[0], (64, 64))
+    image = frb["gas", "particle_index"]
+    assert image.max() > 0
+
+    off_axis_prj = yt.off_axis_projection(
+        ds,
+        ds.domain_center,
+        [0.5, 0.5, 0.5],
+        ds.domain_width,
+        (64, 64),
+        ("gas", "particle_index"),
+        weight=None,
+    )
+    assert off_axis_prj.max() > 0
+
+    source = ds.all_data()
+    custom_bins = np.linspace(-0.5, 99.5, 101)
+    profile = source.profile(
+        [("gas", "particle_index")],
+        [
+            ("gas", "mass"),
+            ("gas", "particle_index"),
+        ],
+        weight_field=None,
+        override_bins={("gas", "particle_index"): custom_bins},
+        logs={("gas", "particle_index"): False},
+    )
+    assert profile.x.min() == 0.0
+    assert profile.x.max() == 99.0
+    assert np.all(profile["gas", "mass"] == 1.0)
+    assert np.all(profile["gas", "particle_index"] == profile.x)
+
+    # check a particle filter on the index
+    p_index_min = 50
+
+    def index_filter(pfilter, data):
+        filter = data[(pfilter.filtered_type, "particle_index")] >= p_index_min
+        return filter
+
+    yt.add_particle_filter(
+        "index_filter",
+        function=index_filter,
+        filtered_type="all",
+        requires=["particle_index"],
+    )
+
+    ds.add_particle_filter("index_filter")
+    assert ds.all_data()["index_filter", "particle_index"].min() == p_index_min
diff --git a/yt/utilities/io_handler.py b/yt/utilities/io_handler.py
index e58dbc9345..a2aa126dfd 100644
--- a/yt/utilities/io_handler.py
+++ b/yt/utilities/io_handler.py
@@ -60,6 +60,22 @@ def push(self, grid, field, data):
             raise ValueError
         self.queue[grid][field] = data
 
+    @property
+    def _particle_dtypes(self) -> defaultdict[FieldKey, type]:
+        # returns a defaultdict of data types for particle fields.
+        # defaults for all fields are float64, except for the particle_index
+        # field (or its alias if it exists), which is set as int64. Important
+        # to note that the data type will only be preserved for direct reads
+        # and any operations will either implicity (via unyt) or explicitly
+        # convert to float64.
+        dtypes: defaultdict[FieldKey, type] = defaultdict(lambda: np.float64)
+        for ptype in self.ds.particle_types:
+            p_index = (ptype, "particle_index")
+            if p_index in self.ds.field_info.field_aliases:
+                p_index = self.ds.field_info.field_aliases[p_index]
+            dtypes[p_index] = np.int64
+        return dtypes
+
     def _field_in_backup(self, grid, backup_file, field_name):
         if os.path.exists(backup_file):
             fhandle = h5py.File(backup_file, mode="r")
@@ -173,6 +189,7 @@ def _read_particle_selection(
         # field_maps stores fields, accounting for field unions
         ptf: defaultdict[str, list[str]] = defaultdict(list)
         field_maps: defaultdict[FieldKey, list[FieldKey]] = defaultdict(list)
+        p_dtypes = self._particle_dtypes
 
         # We first need a set of masks for each particle type
         chunks = list(chunks)
@@ -206,14 +223,14 @@ def _read_particle_selection(
                 vals = data.pop(field_f)
                 # note: numpy.concatenate has a dtype argument that would avoid
                 # a copy using .astype(...), available in numpy>=1.20
-                rv[field_f] = np.concatenate(vals, axis=0).astype("float64")
+                rv[field_f] = np.concatenate(vals, axis=0).astype(p_dtypes[field_f])
             else:
                 shape = [0]
                 if field_f[1] in self._vector_fields:
                     shape.append(self._vector_fields[field_f[1]])
                 elif field_f[1] in self._array_fields:
                     shape.append(self._array_fields[field_f[1]])
-                rv[field_f] = np.empty(shape, dtype="float64")
+                rv[field_f] = np.empty(shape, dtype=p_dtypes[field_f])
         return rv
 
     def _read_particle_fields(self, chunks, ptf, selector):

From e6315d7a928c1669311e1da1e039b10955f9133f Mon Sep 17 00:00:00 2001
From: Chris Havlin <chris.havlin@gmail.com>
Date: Tue, 24 Sep 2024 14:31:14 -0500
Subject: [PATCH 2/3] add comment of 32-bit int, float in _particle_dtypes

---
 yt/utilities/io_handler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt/utilities/io_handler.py b/yt/utilities/io_handler.py
index a2aa126dfd..cd156271e5 100644
--- a/yt/utilities/io_handler.py
+++ b/yt/utilities/io_handler.py
@@ -67,7 +67,7 @@ def _particle_dtypes(self) -> defaultdict[FieldKey, type]:
         # field (or its alias if it exists), which is set as int64. Important
         # to note that the data type will only be preserved for direct reads
         # and any operations will either implicity (via unyt) or explicitly
-        # convert to float64.
+        # convert to float64. Furthermore, float32 and int32 are not supported. 
         dtypes: defaultdict[FieldKey, type] = defaultdict(lambda: np.float64)
         for ptype in self.ds.particle_types:
             p_index = (ptype, "particle_index")

From 26abd0d1834d85d68efeffa87f2e1b8eb282d173 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 25 Sep 2024 14:43:35 +0000
Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 yt/utilities/io_handler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt/utilities/io_handler.py b/yt/utilities/io_handler.py
index cd156271e5..4338b13f77 100644
--- a/yt/utilities/io_handler.py
+++ b/yt/utilities/io_handler.py
@@ -67,7 +67,7 @@ def _particle_dtypes(self) -> defaultdict[FieldKey, type]:
         # field (or its alias if it exists), which is set as int64. Important
         # to note that the data type will only be preserved for direct reads
         # and any operations will either implicity (via unyt) or explicitly
-        # convert to float64. Furthermore, float32 and int32 are not supported. 
+        # convert to float64. Furthermore, float32 and int32 are not supported.
         dtypes: defaultdict[FieldKey, type] = defaultdict(lambda: np.float64)
         for ptype in self.ds.particle_types:
             p_index = (ptype, "particle_index")