diff --git a/.gitignore b/.gitignore index a09fb54d5..199ab1057 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,5 @@ fixture/ .DS_Store tests/.hypothesis .hypothesis/ + +zarr/version.py diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 7d1aa3308..bc95252ee 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -252,12 +252,6 @@ async def _create_v3( shape = parse_shapelike(shape) codecs = list(codecs) if codecs is not None else [BytesCodec()] - if fill_value is None: - if dtype == np.dtype("bool"): - fill_value = False - else: - fill_value = 0 - if chunk_key_encoding is None: chunk_key_encoding = ("default", "/") assert chunk_key_encoding is not None @@ -281,7 +275,6 @@ async def _create_v3( ) array = cls(metadata=metadata, store_path=store_path) - await array._save_metadata(metadata) return array diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index 95c4e00e9..0a56db5e7 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -464,9 +464,14 @@ def __repr__(self) -> str: def all_equal(self, other: Any, equal_nan: bool = True) -> bool: """Compare to `other` using np.array_equal.""" + if other is None: + # Handle None fill_value for Zarr V2 + return False # use array_equal to obtain equal_nan=True functionality data, other = np.broadcast_arrays(self._data, other) - result = np.array_equal(self._data, other, equal_nan=equal_nan) + result = np.array_equal( + self._data, other, equal_nan=equal_nan if self._data.dtype.kind not in "US" else False + ) return result def fill(self, value: Any) -> None: diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 345655cc0..f1cb07fd4 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -360,7 +360,7 @@ def parse_fill_value( if fill_value is None: return dtype.type(0) if isinstance(fill_value, Sequence) and not isinstance(fill_value, str): - if dtype in (np.complex64, np.complex128): + if dtype.type in (np.complex64, np.complex128): dtype = cast(COMPLEX_DTYPE, dtype) if len(fill_value) == 2: # complex datatypes serialize to JSON arrays with two elements @@ -391,7 +391,7 @@ def parse_fill_value( pass elif fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value): pass - elif dtype.kind == "f": + elif dtype.kind in "cf": # float comparison is not exact, especially when dtype st.SearchStrategy[np.dtype]: + return ( + npst.boolean_dtypes() + | npst.integer_dtypes(endianness="=") + | npst.unsigned_integer_dtypes(endianness="=") + | npst.floating_dtypes(endianness="=") + | npst.complex_number_dtypes(endianness="=") + # | npst.byte_string_dtypes(endianness="=") + # | npst.unicode_string_dtypes() + # | npst.datetime64_dtypes() + # | npst.timedelta64_dtypes() + ) + + +def v2_dtypes() -> st.SearchStrategy[np.dtype]: + return ( + npst.boolean_dtypes() + | npst.integer_dtypes(endianness="=") + | npst.unsigned_integer_dtypes(endianness="=") + | npst.floating_dtypes(endianness="=") + | npst.complex_number_dtypes(endianness="=") + | npst.byte_string_dtypes(endianness="=") + | npst.unicode_string_dtypes(endianness="=") + | npst.datetime64_dtypes() + # | npst.timedelta64_dtypes() + ) + + # From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names # 1. must not be the empty string ("") # 2. must not include the character "/" @@ -33,21 +61,29 @@ array_names = node_names attrs = st.none() | st.dictionaries(_attr_keys, _attr_values) paths = st.lists(node_names, min_size=1).map(lambda x: "/".join(x)) | st.just("/") -np_arrays = npst.arrays( - # TODO: re-enable timedeltas once they are supported - dtype=npst.scalar_dtypes().filter( - lambda x: (x.kind not in ["m", "M"]) and (x.byteorder not in [">"]) - ), - shape=npst.array_shapes(max_dims=4), -) stores = st.builds(MemoryStore, st.just({}), mode=st.just("w")) compressors = st.sampled_from([None, "default"]) -format = st.sampled_from([2, 3]) +zarr_formats: st.SearchStrategy[Literal[2, 3]] = st.sampled_from([2, 3]) +array_shapes = npst.array_shapes(max_dims=4) + + +@st.composite # type: ignore[misc] +def numpy_arrays( + draw: st.DrawFn, + *, + shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, + zarr_formats: st.SearchStrategy[Literal[2, 3]] = zarr_formats, +) -> Any: + """ + Generate numpy arrays that can be saved in the provided Zarr format. + """ + zarr_format = draw(zarr_formats) + return draw(npst.arrays(dtype=v3_dtypes() if zarr_format == 3 else v2_dtypes(), shape=shapes)) @st.composite # type: ignore[misc] def np_array_and_chunks( - draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = np_arrays + draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = numpy_arrays ) -> tuple[np.ndarray, tuple[int]]: # type: ignore[type-arg] """A hypothesis strategy to generate small sized random arrays. @@ -66,73 +102,49 @@ def np_array_and_chunks( def arrays( draw: st.DrawFn, *, + shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, compressors: st.SearchStrategy = compressors, stores: st.SearchStrategy[StoreLike] = stores, - arrays: st.SearchStrategy[np.ndarray] = np_arrays, paths: st.SearchStrategy[None | str] = paths, array_names: st.SearchStrategy = array_names, + arrays: st.SearchStrategy | None = None, attrs: st.SearchStrategy = attrs, - format: st.SearchStrategy = format, + zarr_formats: st.SearchStrategy = zarr_formats, ) -> Array: store = draw(stores) - nparray, chunks = draw(np_array_and_chunks(arrays=arrays)) path = draw(paths) name = draw(array_names) attributes = draw(attrs) - zarr_format = draw(format) + zarr_format = draw(zarr_formats) + if arrays is None: + arrays = numpy_arrays(shapes=shapes, zarr_formats=st.just(zarr_format)) + nparray, chunks = draw(np_array_and_chunks(arrays=arrays)) + # test that None works too. + fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)])) # compressor = draw(compressors) - # TODO: clean this up - # if path is None and name is None: - # array_path = None - # array_name = None - # elif path is None and name is not None: - # array_path = f"{name}" - # array_name = f"/{name}" - # elif path is not None and name is None: - # array_path = path - # array_name = None - # elif path == "/": - # assert name is not None - # array_path = name - # array_name = "/" + name - # else: - # assert name is not None - # array_path = f"{path}/{name}" - # array_name = "/" + array_path - expected_attrs = {} if attributes is None else attributes array_path = path + ("/" if not path.endswith("/") else "") + name root = Group.from_store(store, zarr_format=zarr_format) - fill_value_args: tuple[Any, ...] = tuple() - if nparray.dtype.kind == "M": - m = re.search(r"\[(.+)\]", nparray.dtype.str) - if not m: - raise ValueError(f"Couldn't find precision for dtype '{nparray.dtype}.") - - fill_value_args = ( - # e.g. ns, D - m.groups()[0], - ) a = root.create_array( array_path, shape=nparray.shape, chunks=chunks, - dtype=nparray.dtype.str, + dtype=nparray.dtype, attributes=attributes, - # compressor=compressor, # TODO: FIXME - fill_value=nparray.dtype.type(0, *fill_value_args), + # compressor=compressor, # FIXME + fill_value=fill_value, ) assert isinstance(a, Array) + assert a.fill_value is not None + assert isinstance(root[array_path], Array) assert nparray.shape == a.shape assert chunks == a.chunks assert array_path == a.path, (path, name, array_path, a.name, a.path) - # assert array_path == a.name, (path, name, array_path, a.name, a.path) - # assert a.basename is None # TODO - # assert a.store == normalize_store_arg(store) + assert a.basename == name, (a.basename, name) assert dict(a.attrs) == expected_attrs a[:] = nparray diff --git a/tests/v3/test_properties.py b/tests/v3/test_properties.py index 250ede67b..380a4d851 100644 --- a/tests/v3/test_properties.py +++ b/tests/v3/test_properties.py @@ -8,13 +8,13 @@ import hypothesis.strategies as st # noqa: E402 from hypothesis import given # noqa: E402 -from zarr.testing.strategies import arrays, basic_indices, np_arrays # noqa: E402 +from zarr.testing.strategies import arrays, basic_indices, numpy_arrays, zarr_formats # noqa: E402 -@given(st.data()) -def test_roundtrip(data: st.DataObject) -> None: - nparray = data.draw(np_arrays) - zarray = data.draw(arrays(arrays=st.just(nparray))) +@given(data=st.data(), zarr_format=zarr_formats) +def test_roundtrip(data: st.DataObject, zarr_format: int) -> None: + nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format))) + zarray = data.draw(arrays(arrays=st.just(nparray), zarr_formats=st.just(zarr_format))) assert_array_equal(nparray, zarray[:])