diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml index 15adb0d4a8..776f859d6e 100644 --- a/.github/workflows/hypothesis.yaml +++ b/.github/workflows/hypothesis.yaml @@ -25,12 +25,19 @@ jobs: strategy: matrix: - python-version: ['3.11'] + python-version: ['3.12'] numpy-version: ['2.2'] dependency-set: ["optional"] steps: - uses: actions/checkout@v4 + - name: Set HYPOTHESIS_PROFILE based on trigger + run: | + if [[ "${{ github.event_name }}" == "schedule" || "${{ github.event_name }}" == "workflow_dispatch" ]]; then + echo "HYPOTHESIS_PROFILE=nightly" >> $GITHUB_ENV + else + echo "HYPOTHESIS_PROFILE=ci" >> $GITHUB_ENV + fi - name: Set up Python uses: actions/setup-python@v5 with: @@ -58,6 +65,7 @@ jobs: if: success() id: status run: | + echo "Using Hypothesis profile: $HYPOTHESIS_PROFILE" hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-hypothesis # explicitly save the cache so it gets updated, also do this even if it fails. diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ee1adb6b0f..ac36562a2a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -61,6 +61,8 @@ jobs: hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env - name: Run Tests + env: + HYPOTHESIS_PROFILE: ci run: | hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-coverage - name: Upload coverage diff --git a/changes/3130.feature.rst b/changes/3130.feature.rst new file mode 100644 index 0000000000..7a64582f06 --- /dev/null +++ b/changes/3130.feature.rst @@ -0,0 +1 @@ +Port more stateful testing actions from `Icechunk `_. diff --git a/pyproject.toml b/pyproject.toml index d25af7c5fc..d9264fcb6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ test = [ "rich", "mypy", "hypothesis", + "pytest-xdist", ] remote_tests = [ 'zarr[remote]', @@ -165,7 +166,7 @@ run = "run-coverage --no-cov" run-pytest = "run" run-verbose = "run-coverage --verbose" run-mypy = "mypy src" -run-hypothesis = "run-coverage --hypothesis-profile ci --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*" +run-hypothesis = "run-coverage -nauto --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*" list-env = "pip list" [tool.hatch.envs.doctest] @@ -194,7 +195,7 @@ run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report run = "run-coverage --no-cov" run-verbose = "run-coverage --verbose" run-mypy = "mypy src" -run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*" +run-hypothesis = "run-coverage --hypothesis-profile ci --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*" list-env = "pip list" [tool.hatch.envs.docs] diff --git a/src/zarr/testing/stateful.py b/src/zarr/testing/stateful.py index f4f7b33318..f83d942549 100644 --- a/src/zarr/testing/stateful.py +++ b/src/zarr/testing/stateful.py @@ -21,7 +21,14 @@ from zarr.core.buffer import Buffer, BufferPrototype, cpu, default_buffer_prototype from zarr.core.sync import SyncMixin from zarr.storage import LocalStore, MemoryStore -from zarr.testing.strategies import key_ranges, node_names, np_array_and_chunks, numpy_arrays +from zarr.testing.strategies import ( + basic_indices, + chunk_paths, + key_ranges, + node_names, + np_array_and_chunks, + numpy_arrays, +) from zarr.testing.strategies import keys as zarr_keys MAX_BINARY_SIZE = 100 @@ -120,6 +127,120 @@ def add_array( ) self.all_arrays.add(path) + @rule() + def clear(self) -> None: + note("clearing") + import zarr + + self._sync(self.store.clear()) + self._sync(self.model.clear()) + + assert self._sync(self.store.is_empty("/")) + assert self._sync(self.model.is_empty("/")) + + self.all_groups.clear() + self.all_arrays.clear() + + zarr.group(store=self.store) + zarr.group(store=self.model) + + # TODO: MemoryStore is broken? + # assert not self._sync(self.store.is_empty("/")) + # assert not self._sync(self.model.is_empty("/")) + + def draw_directory(self, data: DataObject) -> str: + group_st = st.sampled_from(sorted(self.all_groups)) if self.all_groups else st.nothing() + array_st = st.sampled_from(sorted(self.all_arrays)) if self.all_arrays else st.nothing() + array_or_group = data.draw(st.one_of(group_st, array_st)) + if data.draw(st.booleans()) and array_or_group in self.all_arrays: + arr = zarr.open_array(path=array_or_group, store=self.model) + path = data.draw( + st.one_of( + st.sampled_from([array_or_group]), + chunk_paths(ndim=arr.ndim, numblocks=arr.cdata_shape).map( + lambda x: f"{array_or_group}/c/" + ), + ) + ) + else: + path = array_or_group + return path + + @precondition(lambda self: bool(self.all_groups)) + @rule(data=st.data()) + def check_list_dir(self, data: DataObject) -> None: + path = self.draw_directory(data) + note(f"list_dir for {path=!r}") + # Consider .list_dir("path/to/array") for an array with a single chunk. + # The MemoryStore model will return `"c", "zarr.json"` only if the chunk exists + # If that chunk was deleted, then `"c"` is not returned. + # LocalStore will not have this behaviour :/ + # There are similar consistency issues with delete_dir("/path/to/array/c/0/0") + assume(not isinstance(self.store, LocalStore)) + model_ls = sorted(self._sync_iter(self.model.list_dir(path))) + store_ls = sorted(self._sync_iter(self.store.list_dir(path))) + assert model_ls == store_ls, (model_ls, store_ls) + + @precondition(lambda self: bool(self.all_arrays)) + @rule(data=st.data()) + def delete_chunk(self, data: DataObject) -> None: + array = data.draw(st.sampled_from(sorted(self.all_arrays))) + arr = zarr.open_array(path=array, store=self.model) + chunk_path = data.draw(chunk_paths(ndim=arr.ndim, numblocks=arr.cdata_shape, subset=False)) + path = f"{array}/c/{chunk_path}" + note(f"deleting chunk {path=!r}") + self._sync(self.model.delete(path)) + self._sync(self.store.delete(path)) + + @precondition(lambda self: bool(self.all_arrays)) + @rule(data=st.data()) + def overwrite_array_basic_indexing(self, data: DataObject) -> None: + array = data.draw(st.sampled_from(sorted(self.all_arrays))) + model_array = zarr.open_array(path=array, store=self.model) + store_array = zarr.open_array(path=array, store=self.store) + slicer = data.draw(basic_indices(shape=model_array.shape)) + note(f"overwriting array with basic indexer: {slicer=}") + new_data = data.draw( + npst.arrays(shape=np.shape(model_array[slicer]), dtype=model_array.dtype) + ) + model_array[slicer] = new_data + store_array[slicer] = new_data + + @precondition(lambda self: bool(self.all_arrays)) + @rule(data=st.data()) + def resize_array(self, data: DataObject) -> None: + array = data.draw(st.sampled_from(sorted(self.all_arrays))) + model_array = zarr.open_array(path=array, store=self.model) + store_array = zarr.open_array(path=array, store=self.store) + ndim = model_array.ndim + new_shape = tuple( + 0 if oldsize == 0 else newsize + for newsize, oldsize in zip( + data.draw(npst.array_shapes(max_dims=ndim, min_dims=ndim, min_side=0)), + model_array.shape, + strict=True, + ) + ) + + note(f"resizing array from {model_array.shape} to {new_shape}") + model_array.resize(new_shape) + store_array.resize(new_shape) + + @precondition(lambda self: bool(self.all_arrays) or bool(self.all_groups)) + @rule(data=st.data()) + def delete_dir(self, data: DataObject) -> None: + path = self.draw_directory(data) + note(f"delete_dir with {path=!r}") + self._sync(self.model.delete_dir(path)) + self._sync(self.store.delete_dir(path)) + + matches = set() + for node in self.all_groups | self.all_arrays: + if node.startswith(path): + matches.add(node) + self.all_groups = self.all_groups - matches + self.all_arrays = self.all_arrays - matches + # @precondition(lambda self: bool(self.all_groups)) # @precondition(lambda self: bool(self.all_arrays)) # @rule(data=st.data()) @@ -230,13 +351,19 @@ def delete_group_using_del(self, data: DataObject) -> None: # self.check_group_arrays(group) # t1 = time.time() # note(f"Checks took {t1 - t0} sec.") - @invariant() def check_list_prefix_from_root(self) -> None: model_list = self._sync_iter(self.model.list_prefix("")) store_list = self._sync_iter(self.store.list_prefix("")) - note(f"Checking {len(model_list)} keys") - assert sorted(model_list) == sorted(store_list) + note(f"Checking {len(model_list)} expected keys vs {len(store_list)} actual keys") + assert sorted(model_list) == sorted(store_list), ( + sorted(model_list), + sorted(store_list), + ) + + # check that our internal state matches that of the store and model + assert all(f"{path}/zarr.json" in model_list for path in self.all_groups | self.all_arrays) + assert all(f"{path}/zarr.json" in store_list for path in self.all_groups | self.all_arrays) class SyncStoreWrapper(zarr.core.sync.SyncMixin): diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 3b10592ec0..0cb992a4f2 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -77,7 +77,7 @@ def safe_unicode_for_dtype(dtype: np.dtype[np.str_]) -> st.SearchStrategy[str]: return st.text( alphabet=st.characters( - blacklist_categories=["Cs"], # Avoid *technically allowed* surrogates + exclude_categories=["Cs"], # Avoid *technically allowed* surrogates min_codepoint=32, ), min_size=1, @@ -324,7 +324,7 @@ def is_negative_slice(idx: Any) -> bool: @st.composite -def end_slices(draw: st.DrawFn, *, shape: tuple[int]) -> Any: +def end_slices(draw: st.DrawFn, *, shape: tuple[int, ...]) -> Any: """ A strategy that slices ranges that include the last chunk. This is intended to stress-test handling of a possibly smaller last chunk. @@ -342,7 +342,7 @@ def end_slices(draw: st.DrawFn, *, shape: tuple[int]) -> Any: def basic_indices( draw: st.DrawFn, *, - shape: tuple[int], + shape: tuple[int, ...], min_dims: int = 0, max_dims: int | None = None, allow_newaxis: bool = False, @@ -370,7 +370,7 @@ def basic_indices( @st.composite def orthogonal_indices( - draw: st.DrawFn, *, shape: tuple[int] + draw: st.DrawFn, *, shape: tuple[int, ...] ) -> tuple[tuple[np.ndarray[Any, Any], ...], tuple[np.ndarray[Any, Any], ...]]: """ Strategy that returns @@ -426,3 +426,12 @@ def make_request(start: int, length: int) -> RangeByteRequest: ) key_tuple = st.tuples(keys, byte_ranges) return st.lists(key_tuple, min_size=1, max_size=10) + + +@st.composite +def chunk_paths(draw: st.DrawFn, ndim: int, numblocks: tuple[int, ...], subset: bool = True) -> str: + blockidx = draw( + st.tuples(*tuple(st.integers(min_value=0, max_value=max(0, b - 1)) for b in numblocks)) + ) + subset_slicer = slice(draw(st.integers(min_value=0, max_value=ndim))) if subset else slice(None) + return "/".join(map(str, blockidx[subset_slicer])) diff --git a/tests/conftest.py b/tests/conftest.py index 948d3cd055..30d7eec4d4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os import pathlib from dataclasses import dataclass, field from typing import TYPE_CHECKING @@ -188,17 +189,31 @@ def pytest_collection_modifyitems(config: Any, items: Any) -> None: settings.register_profile( - "ci", - max_examples=1000, - deadline=None, + "default", + parent=settings.get_profile("default"), + max_examples=300, suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow], + deadline=None, + verbosity=Verbosity.verbose, ) settings.register_profile( - "local", + "ci", + parent=settings.get_profile("ci"), max_examples=300, + derandomize=True, # more like regression testing + deadline=None, suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow], - verbosity=Verbosity.verbose, ) +settings.register_profile( + "nightly", + max_examples=500, + parent=settings.get_profile("ci"), + derandomize=False, + stateful_step_count=100, +) + +settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "default")) + # TODO: uncomment these overrides when we can get mypy to accept them """