From 520e8d6d77214a3a1d6239643eccf358d52dc0b2 Mon Sep 17 00:00:00 2001 From: Norman Rzepka Date: Fri, 17 May 2024 11:27:47 +0200 Subject: [PATCH 1/4] config change --- pyproject.toml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3014f98031..1aafb64a2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -160,12 +160,17 @@ extend-exclude = [ "build", "dist", "venv", - "docs" + "docs", + "src/zarr/v2/", + "tests/v2/", ] [tool.ruff.lint] extend-select = [ - "RUF" + "B", # flake8-bugbear + "I", # isort + "UP", # pyupgrade + "RUF", ] ignore = [ "RUF003", From afa2ebdd877595036a996ffa718e5a773dbd8ac3 Mon Sep 17 00:00:00 2001 From: Norman Rzepka Date: Fri, 17 May 2024 11:28:06 +0200 Subject: [PATCH 2/4] auto changes --- bench/compress_normal.py | 2 +- src/zarr/__init__.py | 2 +- src/zarr/abc/codec.py | 5 +- src/zarr/abc/metadata.py | 9 +-- src/zarr/abc/store.py | 24 +++---- src/zarr/array.py | 20 +++--- src/zarr/attributes.py | 6 +- src/zarr/buffer.py | 12 ++-- src/zarr/chunk_grids.py | 14 +++-- src/zarr/chunk_key_encodings.py | 9 +-- src/zarr/codecs/__init__.py | 4 +- src/zarr/codecs/_v2.py | 6 +- src/zarr/codecs/blosc.py | 20 +++--- src/zarr/codecs/bytes.py | 21 ++++--- src/zarr/codecs/crc32c_.py | 13 ++-- src/zarr/codecs/gzip.py | 13 ++-- src/zarr/codecs/mixins.py | 7 +-- src/zarr/codecs/pipeline.py | 66 +++++++++++++------- src/zarr/codecs/registry.py | 15 ++--- src/zarr/codecs/sharding.py | 52 ++++++++------- src/zarr/codecs/transpose.py | 20 +++--- src/zarr/codecs/zstd.py | 14 ++--- src/zarr/common.py | 42 ++++++------- src/zarr/group.py | 21 ++++--- src/zarr/indexing.py | 17 +++-- src/zarr/metadata.py | 12 ++-- src/zarr/store/core.py | 8 +-- src/zarr/store/memory.py | 21 +++---- src/zarr/store/remote.py | 19 +++--- src/zarr/sync.py | 6 +- tests/v3/conftest.py | 5 +- tests/v3/package_with_entrypoint/__init__.py | 1 + tests/v3/test_buffer.py | 5 +- tests/v3/test_codec_entrypoints.py | 1 - tests/v3/test_codecs.py | 26 ++++---- tests/v3/test_common.py | 20 +++--- tests/v3/test_group.py | 9 +-- tests/v3/test_metadata.py | 7 ++- tests/v3/test_store.py | 6 +- tests/v3/test_sync.py | 8 +-- tests/v3/test_v2.py | 5 +- 41 files changed, 315 insertions(+), 278 deletions(-) diff --git a/bench/compress_normal.py b/bench/compress_normal.py index 803d54b76b..608cfe8dce 100644 --- a/bench/compress_normal.py +++ b/bench/compress_normal.py @@ -1,9 +1,9 @@ import sys import timeit +import line_profiler import numpy as np -import line_profiler import zarr from zarr import blosc diff --git a/src/zarr/__init__.py b/src/zarr/__init__.py index 00c01560f4..fdab564c64 100644 --- a/src/zarr/__init__.py +++ b/src/zarr/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations import zarr.codecs # noqa: F401 +from zarr._version import version as __version__ from zarr.array import Array, AsyncArray from zarr.config import config # noqa: F401 from zarr.group import AsyncGroup, Group @@ -9,7 +10,6 @@ make_store_path, ) from zarr.sync import sync as _sync -from zarr._version import version as __version__ # in case setuptools scm screw up and find version to be 0.0.0 assert not __version__.startswith("0.0.0") diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py index 1c665590bf..d8d7edf547 100644 --- a/src/zarr/abc/codec.py +++ b/src/zarr/abc/codec.py @@ -1,15 +1,16 @@ from __future__ import annotations from abc import abstractmethod -from typing import TYPE_CHECKING, Generic, Iterable, TypeVar +from collections.abc import Iterable +from typing import TYPE_CHECKING, Generic, TypeVar from zarr.abc.metadata import Metadata from zarr.abc.store import ByteGetter, ByteSetter from zarr.buffer import Buffer, NDBuffer - if TYPE_CHECKING: from typing_extensions import Self + from zarr.common import ArraySpec, SliceSelection from zarr.metadata import ArrayMetadata diff --git a/src/zarr/abc/metadata.py b/src/zarr/abc/metadata.py index f27b37cba4..36edf69534 100644 --- a/src/zarr/abc/metadata.py +++ b/src/zarr/abc/metadata.py @@ -1,11 +1,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Sequence + +from collections.abc import Sequence +from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Dict from typing_extensions import Self -from dataclasses import fields, dataclass +from dataclasses import dataclass, fields from zarr.common import JSON @@ -36,7 +37,7 @@ def to_dict(self) -> JSON: return out_dict @classmethod - def from_dict(cls, data: Dict[str, JSON]) -> Self: + def from_dict(cls, data: dict[str, JSON]) -> Self: """ Create an instance of the model from a dictionary """ diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index a3a112e58e..fee5422e9e 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -1,16 +1,16 @@ -from abc import abstractmethod, ABC +from abc import ABC, abstractmethod from collections.abc import AsyncGenerator -from typing import List, Protocol, Tuple, Optional, runtime_checkable +from typing import Protocol, runtime_checkable -from zarr.common import BytesLike from zarr.buffer import Buffer +from zarr.common import BytesLike class Store(ABC): @abstractmethod async def get( - self, key: str, byte_range: Optional[Tuple[int, Optional[int]]] = None - ) -> Optional[Buffer]: + self, key: str, byte_range: tuple[int, int | None] | None = None + ) -> Buffer | None: """Retrieve the value associated with a given key. Parameters @@ -26,8 +26,8 @@ async def get( @abstractmethod async def get_partial_values( - self, key_ranges: List[Tuple[str, Tuple[int, int]]] - ) -> List[Optional[Buffer]]: + self, key_ranges: list[tuple[str, tuple[int, int]]] + ) -> list[Buffer | None]: """Retrieve possibly partial values from given key_ranges. Parameters @@ -150,18 +150,14 @@ def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: @runtime_checkable class ByteGetter(Protocol): - async def get( - self, byte_range: Optional[Tuple[int, Optional[int]]] = None - ) -> Optional[Buffer]: ... + async def get(self, byte_range: tuple[int, int | None] | None = None) -> Buffer | None: ... @runtime_checkable class ByteSetter(Protocol): - async def get( - self, byte_range: Optional[Tuple[int, Optional[int]]] = None - ) -> Optional[Buffer]: ... + async def get(self, byte_range: tuple[int, int | None] | None = None) -> Buffer | None: ... - async def set(self, value: Buffer, byte_range: Optional[Tuple[int, int]] = None) -> None: ... + async def set(self, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: ... async def delete(self) -> None: ... diff --git a/src/zarr/array.py b/src/zarr/array.py index 61f91ab966..039f39e98e 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -1,29 +1,28 @@ from __future__ import annotations +import json + # Notes on what I've changed here: # 1. Split Array into AsyncArray and Array # 3. Added .size and .attrs methods # 4. Temporarily disabled the creation of ArrayV2 # 5. Added from_dict to AsyncArray - # Questions to consider: # 1. Was splitting the array into two classes really necessary? - - from asyncio import gather +from collections.abc import Iterable from dataclasses import dataclass, replace - -import json -from typing import Any, Iterable, Literal +from typing import Any, Literal import numpy as np import numpy.typing as npt + from zarr.abc.codec import Codec from zarr.abc.store import set_or_delete - - from zarr.attributes import Attributes from zarr.buffer import Factory, NDArrayLike, NDBuffer +from zarr.chunk_grids import RegularChunkGrid +from zarr.chunk_key_encodings import ChunkKeyEncoding, DefaultChunkKeyEncoding, V2ChunkKeyEncoding from zarr.codecs import BytesCodec from zarr.common import ( JSON, @@ -36,11 +35,8 @@ concurrent_map, ) from zarr.config import config - from zarr.indexing import BasicIndexer -from zarr.chunk_grids import RegularChunkGrid -from zarr.chunk_key_encodings import ChunkKeyEncoding, DefaultChunkKeyEncoding, V2ChunkKeyEncoding -from zarr.metadata import ArrayMetadata, ArrayV3Metadata, ArrayV2Metadata, parse_indexing_order +from zarr.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata, parse_indexing_order from zarr.store import StoreLike, StorePath, make_store_path from zarr.sync import sync diff --git a/src/zarr/attributes.py b/src/zarr/attributes.py index e6b26309f2..079ae38a33 100644 --- a/src/zarr/attributes.py +++ b/src/zarr/attributes.py @@ -1,13 +1,13 @@ from __future__ import annotations -from collections.abc import MutableMapping -from typing import TYPE_CHECKING, Iterator +from collections.abc import Iterator, MutableMapping +from typing import TYPE_CHECKING from zarr.common import JSON if TYPE_CHECKING: - from zarr.group import Group from zarr.array import Array + from zarr.group import Group class Attributes(MutableMapping[str, JSON]): diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py index a633cc09ec..7f74ee080f 100644 --- a/src/zarr/buffer.py +++ b/src/zarr/buffer.py @@ -1,15 +1,12 @@ from __future__ import annotations import sys +from collections.abc import Callable, Iterable from typing import ( TYPE_CHECKING, Any, - Callable, - Iterable, Literal, - Optional, Protocol, - Tuple, TypeAlias, ) @@ -17,6 +14,7 @@ if TYPE_CHECKING: from typing_extensions import Self + from zarr.codecs.bytes import Endian from zarr.common import BytesLike @@ -44,7 +42,7 @@ def __call__( shape: Iterable[int], dtype: np.DTypeLike, order: Literal["C", "F"], - fill_value: Optional[Any], + fill_value: Any | None, ) -> NDBuffer: """Factory function to create a new NDBuffer (or subclass) @@ -275,7 +273,7 @@ def create( shape: Iterable[int], dtype: np.DTypeLike, order: Literal["C", "F"] = "C", - fill_value: Optional[Any] = None, + fill_value: Any | None = None, ) -> Self: """Create a new buffer and its underlying ndarray-like object @@ -380,7 +378,7 @@ def dtype(self) -> np.dtype[Any]: return self._data.dtype @property - def shape(self) -> Tuple[int, ...]: + def shape(self) -> tuple[int, ...]: return self._data.shape @property diff --git a/src/zarr/chunk_grids.py b/src/zarr/chunk_grids.py index 16c0df9174..45f77cc99c 100644 --- a/src/zarr/chunk_grids.py +++ b/src/zarr/chunk_grids.py @@ -1,9 +1,11 @@ from __future__ import annotations + import itertools -from typing import TYPE_CHECKING, Any, Dict, Iterator +from collections.abc import Iterator from dataclasses import dataclass -from zarr.abc.metadata import Metadata +from typing import TYPE_CHECKING, Any +from zarr.abc.metadata import Metadata from zarr.common import ( JSON, ChunkCoords, @@ -20,7 +22,7 @@ @dataclass(frozen=True) class ChunkGrid(Metadata): @classmethod - def from_dict(cls, data: Dict[str, JSON]) -> ChunkGrid: + def from_dict(cls, data: dict[str, JSON]) -> ChunkGrid: if isinstance(data, ChunkGrid): return data @@ -43,15 +45,15 @@ def __init__(self, *, chunk_shape: ChunkCoordsLike) -> None: object.__setattr__(self, "chunk_shape", chunk_shape_parsed) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> Self: + def from_dict(cls, data: dict[str, Any]) -> Self: _, configuration_parsed = parse_named_configuration(data, "regular") return cls(**configuration_parsed) # type: ignore[arg-type] - def to_dict(self) -> Dict[str, JSON]: + def to_dict(self) -> dict[str, JSON]: return {"name": "regular", "configuration": {"chunk_shape": list(self.chunk_shape)}} def all_chunk_coords(self, array_shape: ChunkCoords) -> Iterator[ChunkCoords]: return itertools.product( - *(range(0, _ceildiv(s, c)) for s, c in zip(array_shape, self.chunk_shape)) + *(range(0, _ceildiv(s, c)) for s, c in zip(array_shape, self.chunk_shape, strict=False)) ) diff --git a/src/zarr/chunk_key_encodings.py b/src/zarr/chunk_key_encodings.py index ebc7654dde..5ecb98ef61 100644 --- a/src/zarr/chunk_key_encodings.py +++ b/src/zarr/chunk_key_encodings.py @@ -1,9 +1,10 @@ from __future__ import annotations + from abc import abstractmethod -from typing import TYPE_CHECKING, Dict, Literal, cast from dataclasses import dataclass -from zarr.abc.metadata import Metadata +from typing import TYPE_CHECKING, Literal, cast +from zarr.abc.metadata import Metadata from zarr.common import ( JSON, ChunkCoords, @@ -33,7 +34,7 @@ def __init__(self, *, separator: SeparatorLiteral) -> None: object.__setattr__(self, "separator", separator_parsed) @classmethod - def from_dict(cls, data: Dict[str, JSON]) -> ChunkKeyEncoding: + def from_dict(cls, data: dict[str, JSON]) -> ChunkKeyEncoding: if isinstance(data, ChunkKeyEncoding): return data @@ -44,7 +45,7 @@ def from_dict(cls, data: Dict[str, JSON]) -> ChunkKeyEncoding: return V2ChunkKeyEncoding(**configuration_parsed) # type: ignore[arg-type] raise ValueError(f"Unknown chunk key encoding. Got {name_parsed}.") - def to_dict(self) -> Dict[str, JSON]: + def to_dict(self) -> dict[str, JSON]: return {"name": self.name, "configuration": {"separator": self.separator}} @abstractmethod diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py index 959a85af57..0f0ff55df5 100644 --- a/src/zarr/codecs/__init__.py +++ b/src/zarr/codecs/__init__.py @@ -1,10 +1,10 @@ from __future__ import annotations -from zarr.codecs.blosc import BloscCodec, BloscCname, BloscShuffle # noqa: F401 +from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle # noqa: F401 from zarr.codecs.bytes import BytesCodec, Endian # noqa: F401 from zarr.codecs.crc32c_ import Crc32cCodec # noqa: F401 from zarr.codecs.gzip import GzipCodec # noqa: F401 +from zarr.codecs.pipeline import BatchedCodecPipeline # noqa: F401 from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation # noqa: F401 from zarr.codecs.transpose import TransposeCodec # noqa: F401 from zarr.codecs.zstd import ZstdCodec # noqa: F401 -from zarr.codecs.pipeline import BatchedCodecPipeline # noqa: F401 diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py index fb7122600f..06bd866c0f 100644 --- a/src/zarr/codecs/_v2.py +++ b/src/zarr/codecs/_v2.py @@ -2,13 +2,13 @@ from dataclasses import dataclass +import numcodecs +from numcodecs.compat import ensure_bytes, ensure_ndarray + from zarr.buffer import Buffer, NDBuffer from zarr.codecs.mixins import ArrayArrayCodecBatchMixin, ArrayBytesCodecBatchMixin from zarr.common import JSON, ArraySpec, to_thread -import numcodecs -from numcodecs.compat import ensure_bytes, ensure_ndarray - @dataclass(frozen=True) class V2Compressor(ArrayBytesCodecBatchMixin): diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py index ab3ffab479..24fac962db 100644 --- a/src/zarr/codecs/blosc.py +++ b/src/zarr/codecs/blosc.py @@ -1,21 +1,21 @@ from __future__ import annotations + from dataclasses import dataclass, replace from enum import Enum from functools import cached_property - -from typing import TYPE_CHECKING, Union +from typing import TYPE_CHECKING import numcodecs from numcodecs.blosc import Blosc -from zarr.codecs.mixins import BytesBytesCodecBatchMixin from zarr.buffer import Buffer, as_numpy_array_wrapper +from zarr.codecs.mixins import BytesBytesCodecBatchMixin from zarr.codecs.registry import register_codec from zarr.common import parse_enum, parse_named_configuration, to_thread if TYPE_CHECKING: - from typing import Dict, Optional from typing_extensions import Self + from zarr.common import JSON, ArraySpec @@ -86,10 +86,10 @@ class BloscCodec(BytesBytesCodecBatchMixin): def __init__( self, *, - typesize: Optional[int] = None, - cname: Union[BloscCname, str] = BloscCname.zstd, + typesize: int | None = None, + cname: BloscCname | str = BloscCname.zstd, clevel: int = 5, - shuffle: Union[BloscShuffle, str, None] = None, + shuffle: BloscShuffle | str | None = None, blocksize: int = 0, ) -> None: typesize_parsed = parse_typesize(typesize) if typesize is not None else None @@ -105,11 +105,11 @@ def __init__( object.__setattr__(self, "blocksize", blocksize_parsed) @classmethod - def from_dict(cls, data: Dict[str, JSON]) -> Self: + def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "blosc") return cls(**configuration_parsed) # type: ignore[arg-type] - def to_dict(self) -> Dict[str, JSON]: + def to_dict(self) -> dict[str, JSON]: if self.typesize is None: raise ValueError("`typesize` needs to be set for serialization.") if self.shuffle is None: @@ -169,7 +169,7 @@ async def encode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, - ) -> Optional[Buffer]: + ) -> Buffer | None: # Since blosc only takes bytes, we convert the input and output of the encoding # between bytes and Buffer return await to_thread( diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py index 6df78a08b8..a6045852e6 100644 --- a/src/zarr/codecs/bytes.py +++ b/src/zarr/codecs/bytes.py @@ -1,21 +1,22 @@ from __future__ import annotations + +import sys from dataclasses import dataclass, replace from enum import Enum -import sys - -from typing import TYPE_CHECKING, Dict, Optional, Union +from typing import TYPE_CHECKING import numpy as np -from zarr.codecs.mixins import ArrayBytesCodecBatchMixin from zarr.buffer import Buffer, NDBuffer +from zarr.codecs.mixins import ArrayBytesCodecBatchMixin from zarr.codecs.registry import register_codec from zarr.common import parse_enum, parse_named_configuration if TYPE_CHECKING: - from zarr.common import JSON, ArraySpec from typing_extensions import Self + from zarr.common import JSON, ArraySpec + class Endian(Enum): big = "big" @@ -29,22 +30,22 @@ class Endian(Enum): class BytesCodec(ArrayBytesCodecBatchMixin): is_fixed_size = True - endian: Optional[Endian] + endian: Endian | None - def __init__(self, *, endian: Union[Endian, str, None] = default_system_endian) -> None: + def __init__(self, *, endian: Endian | str | None = default_system_endian) -> None: endian_parsed = None if endian is None else parse_enum(endian, Endian) object.__setattr__(self, "endian", endian_parsed) @classmethod - def from_dict(cls, data: Dict[str, JSON]) -> Self: + def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration( data, "bytes", require_configuration=False ) configuration_parsed = configuration_parsed or {} return cls(**configuration_parsed) # type: ignore[arg-type] - def to_dict(self) -> Dict[str, JSON]: + def to_dict(self) -> dict[str, JSON]: if self.endian is None: return {"name": "bytes"} else: @@ -87,7 +88,7 @@ async def encode_single( self, chunk_array: NDBuffer, _chunk_spec: ArraySpec, - ) -> Optional[Buffer]: + ) -> Buffer | None: assert isinstance(chunk_array, NDBuffer) if chunk_array.dtype.itemsize > 1: if self.endian is not None and self.endian != chunk_array.byteorder: diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py index ab4bad65fe..0b9c8c9a96 100644 --- a/src/zarr/codecs/crc32c_.py +++ b/src/zarr/codecs/crc32c_.py @@ -1,20 +1,19 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass from typing import TYPE_CHECKING import numpy as np - from crc32c import crc32c -from zarr.codecs.mixins import BytesBytesCodecBatchMixin from zarr.buffer import Buffer +from zarr.codecs.mixins import BytesBytesCodecBatchMixin from zarr.codecs.registry import register_codec from zarr.common import parse_named_configuration if TYPE_CHECKING: - from typing import Dict, Optional from typing_extensions import Self + from zarr.common import JSON, ArraySpec @@ -23,11 +22,11 @@ class Crc32cCodec(BytesBytesCodecBatchMixin): is_fixed_size = True @classmethod - def from_dict(cls, data: Dict[str, JSON]) -> Self: + def from_dict(cls, data: dict[str, JSON]) -> Self: parse_named_configuration(data, "crc32c", require_configuration=False) return cls() - def to_dict(self) -> Dict[str, JSON]: + def to_dict(self) -> dict[str, JSON]: return {"name": "crc32c"} async def decode_single( @@ -52,7 +51,7 @@ async def encode_single( self, chunk_bytes: Buffer, _chunk_spec: ArraySpec, - ) -> Optional[Buffer]: + ) -> Buffer | None: data = chunk_bytes.as_numpy_array() # Calculate the checksum and "cast" it to a numpy array checksum = np.array([crc32c(data)], dtype=np.uint32) diff --git a/src/zarr/codecs/gzip.py b/src/zarr/codecs/gzip.py index 6a8e30db13..58c1fc6fec 100644 --- a/src/zarr/codecs/gzip.py +++ b/src/zarr/codecs/gzip.py @@ -1,17 +1,18 @@ from __future__ import annotations -from dataclasses import dataclass +from dataclasses import dataclass from typing import TYPE_CHECKING from numcodecs.gzip import GZip -from zarr.codecs.mixins import BytesBytesCodecBatchMixin + from zarr.buffer import Buffer, as_numpy_array_wrapper +from zarr.codecs.mixins import BytesBytesCodecBatchMixin from zarr.codecs.registry import register_codec from zarr.common import parse_named_configuration, to_thread if TYPE_CHECKING: - from typing import Optional, Dict from typing_extensions import Self + from zarr.common import JSON, ArraySpec @@ -37,11 +38,11 @@ def __init__(self, *, level: int = 5) -> None: object.__setattr__(self, "level", level_parsed) @classmethod - def from_dict(cls, data: Dict[str, JSON]) -> Self: + def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "gzip") return cls(**configuration_parsed) # type: ignore[arg-type] - def to_dict(self) -> Dict[str, JSON]: + def to_dict(self) -> dict[str, JSON]: return {"name": "gzip", "configuration": {"level": self.level}} async def decode_single( @@ -55,7 +56,7 @@ async def encode_single( self, chunk_bytes: Buffer, _chunk_spec: ArraySpec, - ) -> Optional[Buffer]: + ) -> Buffer | None: return await to_thread(as_numpy_array_wrapper, GZip(self.level).encode, chunk_bytes) def compute_encoded_size( diff --git a/src/zarr/codecs/mixins.py b/src/zarr/codecs/mixins.py index 8b0a684509..b571fd35ee 100644 --- a/src/zarr/codecs/mixins.py +++ b/src/zarr/codecs/mixins.py @@ -1,8 +1,8 @@ from __future__ import annotations from abc import abstractmethod -from typing import Awaitable, Callable, Generic, Iterable, TypeVar - +from collections.abc import Awaitable, Callable, Iterable +from typing import Generic, TypeVar from zarr.abc.codec import ( ArrayArrayCodec, @@ -10,14 +10,13 @@ ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin, ByteGetter, - ByteSetter, BytesBytesCodec, + ByteSetter, ) from zarr.buffer import Buffer, NDBuffer from zarr.common import ArraySpec, SliceSelection, concurrent_map from zarr.config import config - CodecInput = TypeVar("CodecInput", bound=NDBuffer | Buffer) CodecOutput = TypeVar("CodecOutput", bound=NDBuffer | Buffer) diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py index 8396a0c2ce..607745bef8 100644 --- a/src/zarr/codecs/pipeline.py +++ b/src/zarr/codecs/pipeline.py @@ -1,30 +1,32 @@ from __future__ import annotations +from collections.abc import Iterable, Iterator +from dataclasses import dataclass from itertools import islice -from typing import TYPE_CHECKING, Iterator, TypeVar, Iterable +from typing import TYPE_CHECKING, TypeVar from warnings import warn -from dataclasses import dataclass -from zarr.config import config from zarr.abc.codec import ( - ByteGetter, - ByteSetter, - Codec, - CodecPipeline, ArrayArrayCodec, ArrayBytesCodec, ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin, + ByteGetter, BytesBytesCodec, + ByteSetter, + Codec, + CodecPipeline, ) from zarr.buffer import Buffer, NDBuffer from zarr.codecs.registry import get_codec_class from zarr.common import JSON, concurrent_map, parse_named_configuration +from zarr.config import config from zarr.indexing import is_total_slice from zarr.metadata import ArrayMetadata if TYPE_CHECKING: from typing_extensions import Self + from zarr.common import ArraySpec, SliceSelection T = TypeVar("T") @@ -237,13 +239,19 @@ async def decode_batch( ) = self._codecs_with_resolved_metadata_batched(chunk_specs) for bb_codec, chunk_spec_batch in bb_codecs_with_spec[::-1]: - chunk_bytes_batch = await bb_codec.decode(zip(chunk_bytes_batch, chunk_spec_batch)) + chunk_bytes_batch = await bb_codec.decode( + zip(chunk_bytes_batch, chunk_spec_batch, strict=False) + ) ab_codec, chunk_spec_batch = ab_codec_with_spec - chunk_array_batch = await ab_codec.decode(zip(chunk_bytes_batch, chunk_spec_batch)) + chunk_array_batch = await ab_codec.decode( + zip(chunk_bytes_batch, chunk_spec_batch, strict=False) + ) for aa_codec, chunk_spec_batch in aa_codecs_with_spec[::-1]: - chunk_array_batch = await aa_codec.decode(zip(chunk_array_batch, chunk_spec_batch)) + chunk_array_batch = await aa_codec.decode( + zip(chunk_array_batch, chunk_spec_batch, strict=False) + ) return chunk_array_batch @@ -264,14 +272,20 @@ async def encode_batch( chunk_array_batch, chunk_specs = _unzip2(chunk_arrays_and_specs) for aa_codec in self.array_array_codecs: - chunk_array_batch = await aa_codec.encode(zip(chunk_array_batch, chunk_specs)) + chunk_array_batch = await aa_codec.encode( + zip(chunk_array_batch, chunk_specs, strict=False) + ) chunk_specs = resolve_batched(aa_codec, chunk_specs) - chunk_bytes_batch = await self.array_bytes_codec.encode(zip(chunk_array_batch, chunk_specs)) + chunk_bytes_batch = await self.array_bytes_codec.encode( + zip(chunk_array_batch, chunk_specs, strict=False) + ) chunk_specs = resolve_batched(self.array_bytes_codec, chunk_specs) for bb_codec in self.bytes_bytes_codecs: - chunk_bytes_batch = await bb_codec.encode(zip(chunk_bytes_batch, chunk_specs)) + chunk_bytes_batch = await bb_codec.encode( + zip(chunk_bytes_batch, chunk_specs, strict=False) + ) chunk_specs = resolve_batched(bb_codec, chunk_specs) return chunk_bytes_batch @@ -297,7 +311,7 @@ async def read_batch( ] ) for chunk_array, (_, chunk_spec, _, out_selection) in zip( - chunk_array_batch, batch_info + chunk_array_batch, batch_info, strict=False ): if chunk_array is not None: out[out_selection] = chunk_array @@ -312,11 +326,13 @@ async def read_batch( chunk_array_batch = await self.decode_batch( [ (chunk_bytes, chunk_spec) - for chunk_bytes, (_, chunk_spec, _, _) in zip(chunk_bytes_batch, batch_info) + for chunk_bytes, (_, chunk_spec, _, _) in zip( + chunk_bytes_batch, batch_info, strict=False + ) ], ) for chunk_array, (_, chunk_spec, chunk_selection, out_selection) in zip( - chunk_array_batch, batch_info + chunk_array_batch, batch_info, strict=False ): if chunk_array is not None: tmp = chunk_array[chunk_selection] @@ -356,7 +372,9 @@ async def _read_key(byte_setter: ByteSetter | None) -> Buffer | None: chunk_array_batch = await self.decode_batch( [ (chunk_bytes, chunk_spec) - for chunk_bytes, (_, chunk_spec, _, _) in zip(chunk_bytes_batch, batch_info) + for chunk_bytes, (_, chunk_spec, _, _) in zip( + chunk_bytes_batch, batch_info, strict=False + ) ], ) @@ -383,7 +401,7 @@ def _merge_chunk_array( chunk_array_batch = [ _merge_chunk_array(chunk_array, value[out_selection], chunk_spec, chunk_selection) for chunk_array, (_, chunk_spec, chunk_selection, out_selection) in zip( - chunk_array_batch, batch_info + chunk_array_batch, batch_info, strict=False ) ] @@ -391,13 +409,17 @@ def _merge_chunk_array( None if chunk_array is None or chunk_array.all_equal(chunk_spec.fill_value) else chunk_array - for chunk_array, (_, chunk_spec, _, _) in zip(chunk_array_batch, batch_info) + for chunk_array, (_, chunk_spec, _, _) in zip( + chunk_array_batch, batch_info, strict=False + ) ] chunk_bytes_batch = await self.encode_batch( [ (chunk_array, chunk_spec) - for chunk_array, (_, chunk_spec, _, _) in zip(chunk_array_batch, batch_info) + for chunk_array, (_, chunk_spec, _, _) in zip( + chunk_array_batch, batch_info, strict=False + ) ], ) @@ -410,7 +432,9 @@ async def _write_key(byte_setter: ByteSetter, chunk_bytes: Buffer | None) -> Non await concurrent_map( [ (byte_setter, chunk_bytes) - for chunk_bytes, (byte_setter, _, _, _) in zip(chunk_bytes_batch, batch_info) + for chunk_bytes, (byte_setter, _, _, _) in zip( + chunk_bytes_batch, batch_info, strict=False + ) ], _write_key, config.get("async.concurrency"), diff --git a/src/zarr/codecs/registry.py b/src/zarr/codecs/registry.py index b981f1f36c..2f2b09499f 100644 --- a/src/zarr/codecs/registry.py +++ b/src/zarr/codecs/registry.py @@ -1,28 +1,29 @@ from __future__ import annotations + from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Dict, Type from zarr.abc.codec import Codec -from importlib.metadata import EntryPoint, entry_points as get_entry_points +from importlib.metadata import EntryPoint +from importlib.metadata import entry_points as get_entry_points -__codec_registry: Dict[str, Type[Codec]] = {} -__lazy_load_codecs: Dict[str, EntryPoint] = {} +__codec_registry: dict[str, type[Codec]] = {} +__lazy_load_codecs: dict[str, EntryPoint] = {} -def _collect_entrypoints() -> Dict[str, EntryPoint]: +def _collect_entrypoints() -> dict[str, EntryPoint]: entry_points = get_entry_points() for e in entry_points.select(group="zarr.codecs"): __lazy_load_codecs[e.name] = e return __lazy_load_codecs -def register_codec(key: str, codec_cls: Type[Codec]) -> None: +def register_codec(key: str, codec_cls: type[Codec]) -> None: __codec_registry[key] = codec_cls -def get_codec_class(key: str) -> Type[Codec]: +def get_codec_class(key: str) -> type[Codec]: item = __codec_registry.get(key) if item is None: if key in __lazy_load_codecs: diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index dd7cdcd0b4..a6c5bac6a7 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -1,12 +1,16 @@ from __future__ import annotations -from enum import Enum -from typing import TYPE_CHECKING, Iterable, Mapping, MutableMapping, NamedTuple, Tuple, Union + +from collections.abc import Iterable, Mapping, MutableMapping from dataclasses import dataclass, field, replace +from enum import Enum from functools import lru_cache - +from typing import TYPE_CHECKING, NamedTuple import numpy as np + from zarr.abc.codec import ByteGetter, ByteSetter, Codec, CodecPipeline +from zarr.buffer import Buffer, NDBuffer +from zarr.chunk_grids import RegularChunkGrid from zarr.codecs.bytes import BytesCodec from zarr.codecs.crc32c_ import Crc32cCodec from zarr.codecs.mixins import ( @@ -25,18 +29,18 @@ parse_shapelike, product, ) -from zarr.chunk_grids import RegularChunkGrid from zarr.indexing import ( BasicIndexer, c_order_iter, morton_order_iter, ) from zarr.metadata import ArrayMetadata, parse_codecs -from zarr.buffer import Buffer, NDBuffer if TYPE_CHECKING: - from typing import Awaitable, Callable, Dict, Iterator, Optional, Set + from collections.abc import Awaitable, Callable, Iterator + from typing_extensions import Self + from zarr.common import JSON, SliceSelection MAX_UINT_64 = 2**64 - 1 @@ -58,7 +62,7 @@ class _ShardingByteGetter(ByteGetter): shard_dict: ShardMapping chunk_coords: ChunkCoords - async def get(self, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> Optional[Buffer]: + async def get(self, byte_range: tuple[int, int | None] | None = None) -> Buffer | None: assert byte_range is None, "byte_range is not supported within shards" return self.shard_dict.get(self.chunk_coords) @@ -67,7 +71,7 @@ async def get(self, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> O class _ShardingByteSetter(_ShardingByteGetter, ByteSetter): shard_dict: ShardMutableMapping - async def set(self, value: Buffer, byte_range: Optional[Tuple[int, int]] = None) -> None: + async def set(self, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: assert byte_range is None, "byte_range is not supported within shards" self.shard_dict[self.chunk_coords] = value @@ -86,7 +90,7 @@ def chunks_per_shard(self) -> ChunkCoords: def _localize_chunk(self, chunk_coords: ChunkCoords) -> ChunkCoords: return tuple( chunk_i % shard_i - for chunk_i, shard_i in zip(chunk_coords, self.offsets_and_lengths.shape) + for chunk_i, shard_i in zip(chunk_coords, self.offsets_and_lengths.shape, strict=False) ) def is_all_empty(self) -> bool: @@ -95,7 +99,7 @@ def is_all_empty(self) -> bool: def get_full_chunk_map(self) -> np.ndarray: return self.offsets_and_lengths[..., 0] != MAX_UINT_64 - def get_chunk_slice(self, chunk_coords: ChunkCoords) -> Optional[Tuple[int, int]]: + def get_chunk_slice(self, chunk_coords: ChunkCoords) -> tuple[int, int] | None: localized_chunk = self._localize_chunk(chunk_coords) chunk_start, chunk_len = self.offsets_and_lengths[localized_chunk] if (chunk_start, chunk_len) == (MAX_UINT_64, MAX_UINT_64): @@ -103,7 +107,7 @@ def get_chunk_slice(self, chunk_coords: ChunkCoords) -> Optional[Tuple[int, int] else: return (int(chunk_start), int(chunk_start) + int(chunk_len)) - def set_chunk_slice(self, chunk_coords: ChunkCoords, chunk_slice: Optional[slice]) -> None: + def set_chunk_slice(self, chunk_coords: ChunkCoords, chunk_slice: slice | None) -> None: localized_chunk = self._localize_chunk(chunk_coords) if chunk_slice is None: self.offsets_and_lengths[localized_chunk] = (MAX_UINT_64, MAX_UINT_64) @@ -192,7 +196,7 @@ class _ShardBuilder(_ShardReader, ShardMutableMapping): def merge_with_morton_order( cls, chunks_per_shard: ChunkCoords, - tombstones: Set[ChunkCoords], + tombstones: set[ChunkCoords], *shard_dicts: ShardMapping, ) -> _ShardBuilder: obj = cls.create_empty(chunks_per_shard) @@ -241,7 +245,7 @@ async def finalize( class _MergingShardBuilder(ShardMutableMapping): old_dict: _ShardReader new_dict: _ShardBuilder - tombstones: Set[ChunkCoords] = field(default_factory=set) + tombstones: set[ChunkCoords] = field(default_factory=set) def __getitem__(self, chunk_coords: ChunkCoords) -> Buffer: chunk_bytes_maybe = self.new_dict.get(chunk_coords) @@ -299,9 +303,9 @@ def __init__( self, *, chunk_shape: ChunkCoordsLike, - codecs: Optional[Iterable[Union[Codec, JSON]]] = None, - index_codecs: Optional[Iterable[Union[Codec, JSON]]] = None, - index_location: Optional[ShardingCodecIndexLocation] = ShardingCodecIndexLocation.end, + codecs: Iterable[Codec | JSON] | None = None, + index_codecs: Iterable[Codec | JSON] | None = None, + index_location: ShardingCodecIndexLocation | None = ShardingCodecIndexLocation.end, ) -> None: chunk_shape_parsed = parse_shapelike(chunk_shape) codecs_parsed = ( @@ -331,11 +335,11 @@ def __init__( object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard)) @classmethod - def from_dict(cls, data: Dict[str, JSON]) -> Self: + def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "sharding_indexed") return cls(**configuration_parsed) # type: ignore[arg-type] - def to_dict(self) -> Dict[str, JSON]: + def to_dict(self) -> dict[str, JSON]: return { "name": "sharding_indexed", "configuration": { @@ -366,6 +370,7 @@ def validate(self, array_metadata: ArrayMetadata) -> None: for s, c in zip( array_metadata.chunk_grid.chunk_shape, self.chunk_shape, + strict=False, ) ): raise ValueError( @@ -420,7 +425,7 @@ async def decode_partial_single( byte_getter: ByteGetter, selection: SliceSelection, shard_spec: ArraySpec, - ) -> Optional[NDBuffer]: + ) -> NDBuffer | None: shard_shape = shard_spec.shape chunk_shape = self.chunk_shape chunks_per_shard = self._get_chunks_per_shard(shard_spec) @@ -480,7 +485,7 @@ async def encode_single( self, shard_array: NDBuffer, shard_spec: ArraySpec, - ) -> Optional[Buffer]: + ) -> Buffer | None: shard_shape = shard_spec.shape chunk_shape = self.chunk_shape chunks_per_shard = self._get_chunks_per_shard(shard_spec) @@ -561,7 +566,7 @@ async def encode_partial_single( ) def _is_total_shard( - self, all_chunk_coords: Set[ChunkCoords], chunks_per_shard: ChunkCoords + self, all_chunk_coords: set[ChunkCoords], chunks_per_shard: ChunkCoords ) -> bool: return len(all_chunk_coords) == product(chunks_per_shard) and all( chunk_coords in all_chunk_coords for chunk_coords in c_order_iter(chunks_per_shard) @@ -624,12 +629,13 @@ def _get_chunks_per_shard(self, shard_spec: ArraySpec) -> ChunkCoords: for s, c in zip( shard_spec.shape, self.chunk_shape, + strict=False, ) ) async def _load_shard_index_maybe( self, byte_getter: ByteGetter, chunks_per_shard: ChunkCoords - ) -> Optional[_ShardIndex]: + ) -> _ShardIndex | None: shard_index_size = self._shard_index_size(chunks_per_shard) if self.index_location == ShardingCodecIndexLocation.start: index_bytes = await byte_getter.get((0, shard_index_size)) @@ -648,7 +654,7 @@ async def _load_shard_index( async def _load_full_shard_maybe( self, byte_getter: ByteGetter, chunks_per_shard: ChunkCoords - ) -> Optional[_ShardReader]: + ) -> _ShardReader | None: shard_bytes = await byte_getter.get() return ( diff --git a/src/zarr/codecs/transpose.py b/src/zarr/codecs/transpose.py index 5d4d2a7b84..774393464c 100644 --- a/src/zarr/codecs/transpose.py +++ b/src/zarr/codecs/transpose.py @@ -1,19 +1,21 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Dict, Iterable, Union, cast +from collections.abc import Iterable from dataclasses import dataclass, replace +from typing import TYPE_CHECKING, cast -from zarr.codecs.mixins import ArrayArrayCodecBatchMixin from zarr.buffer import NDBuffer -from zarr.common import JSON, ArraySpec, ChunkCoordsLike, parse_named_configuration +from zarr.codecs.mixins import ArrayArrayCodecBatchMixin from zarr.codecs.registry import register_codec +from zarr.common import JSON, ArraySpec, ChunkCoordsLike, parse_named_configuration if TYPE_CHECKING: - from typing import TYPE_CHECKING, Optional, Tuple + from typing import TYPE_CHECKING + from typing_extensions import Self -def parse_transpose_order(data: Union[JSON, Iterable[int]]) -> Tuple[int, ...]: +def parse_transpose_order(data: JSON | Iterable[int]) -> tuple[int, ...]: if not isinstance(data, Iterable): raise TypeError(f"Expected an iterable. Got {data} instead.") if not all(isinstance(a, int) for a in data): @@ -25,7 +27,7 @@ def parse_transpose_order(data: Union[JSON, Iterable[int]]) -> Tuple[int, ...]: class TransposeCodec(ArrayArrayCodecBatchMixin): is_fixed_size = True - order: Tuple[int, ...] + order: tuple[int, ...] def __init__(self, *, order: ChunkCoordsLike) -> None: order_parsed = parse_transpose_order(order) @@ -33,11 +35,11 @@ def __init__(self, *, order: ChunkCoordsLike) -> None: object.__setattr__(self, "order", order_parsed) @classmethod - def from_dict(cls, data: Dict[str, JSON]) -> Self: + def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "transpose") return cls(**configuration_parsed) # type: ignore[arg-type] - def to_dict(self) -> Dict[str, JSON]: + def to_dict(self) -> dict[str, JSON]: return {"name": "transpose", "configuration": {"order": list(self.order)}} def evolve(self, array_spec: ArraySpec) -> Self: @@ -86,7 +88,7 @@ async def encode_single( self, chunk_array: NDBuffer, chunk_spec: ArraySpec, - ) -> Optional[NDBuffer]: + ) -> NDBuffer | None: chunk_array = chunk_array.transpose(self.order) return chunk_array diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py index 4422188d25..3c6aac4ce3 100644 --- a/src/zarr/codecs/zstd.py +++ b/src/zarr/codecs/zstd.py @@ -1,18 +1,18 @@ from __future__ import annotations -from typing import TYPE_CHECKING -from dataclasses import dataclass +from dataclasses import dataclass +from typing import TYPE_CHECKING from zstandard import ZstdCompressor, ZstdDecompressor -from zarr.codecs.mixins import BytesBytesCodecBatchMixin from zarr.buffer import Buffer, as_numpy_array_wrapper +from zarr.codecs.mixins import BytesBytesCodecBatchMixin from zarr.codecs.registry import register_codec from zarr.common import parse_named_configuration, to_thread if TYPE_CHECKING: - from typing import Dict, Optional from typing_extensions import Self + from zarr.common import JSON, ArraySpec @@ -45,11 +45,11 @@ def __init__(self, *, level: int = 0, checksum: bool = False) -> None: object.__setattr__(self, "checksum", checksum_parsed) @classmethod - def from_dict(cls, data: Dict[str, JSON]) -> Self: + def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "zstd") return cls(**configuration_parsed) # type: ignore[arg-type] - def to_dict(self) -> Dict[str, JSON]: + def to_dict(self) -> dict[str, JSON]: return {"name": "zstd", "configuration": {"level": self.level, "checksum": self.checksum}} def _compress(self, data: bytes) -> bytes: @@ -71,7 +71,7 @@ async def encode_single( self, chunk_bytes: Buffer, _chunk_spec: ArraySpec, - ) -> Optional[Buffer]: + ) -> Buffer | None: return await to_thread(as_numpy_array_wrapper, self._compress, chunk_bytes) def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: diff --git a/src/zarr/common.py b/src/zarr/common.py index 3ef847a1f3..20b372e74d 100644 --- a/src/zarr/common.py +++ b/src/zarr/common.py @@ -1,23 +1,23 @@ from __future__ import annotations + +import asyncio +import contextvars +import functools +from collections.abc import Iterable +from dataclasses import dataclass +from enum import Enum from typing import ( TYPE_CHECKING, - ParamSpec, + Any, Literal, - Union, - Tuple, - Iterable, + ParamSpec, TypeVar, + Union, overload, - Any, ) -import asyncio -import contextvars -from dataclasses import dataclass -from enum import Enum -import functools if TYPE_CHECKING: - from typing import Awaitable, Callable, Iterator, Optional, Type + from collections.abc import Awaitable, Callable, Iterator import numpy as np @@ -44,7 +44,7 @@ def product(tup: ChunkCoords) -> int: async def concurrent_map( - items: list[T], func: Callable[..., Awaitable[V]], limit: Optional[int] = None + items: list[T], func: Callable[..., Awaitable[V]], limit: int | None = None ) -> list[V]: if limit is None: return await asyncio.gather(*[func(*item) for item in items]) @@ -52,7 +52,7 @@ async def concurrent_map( else: sem = asyncio.Semaphore(limit) - async def run(item: Tuple[Any]) -> V: + async def run(item: tuple[Any]) -> V: async with sem: return await func(*item) @@ -73,12 +73,12 @@ async def to_thread(func: Callable[P, U], /, *args: P.args, **kwargs: P.kwargs) E = TypeVar("E", bound=Enum) -def enum_names(enum: Type[E]) -> Iterator[str]: +def enum_names(enum: type[E]) -> Iterator[str]: for item in enum: yield item.name -def parse_enum(data: JSON, cls: Type[E]) -> E: +def parse_enum(data: JSON, cls: type[E]) -> E: if isinstance(data, cls): return data if not isinstance(data, str): @@ -113,7 +113,7 @@ def ndim(self) -> int: return len(self.shape) -def parse_name(data: JSON, expected: Optional[str] = None) -> str: +def parse_name(data: JSON, expected: str | None = None) -> str: if isinstance(data, str): if expected is None or data == expected: return data @@ -130,19 +130,19 @@ def parse_configuration(data: JSON) -> JSON: @overload def parse_named_configuration( - data: JSON, expected_name: Optional[str] = None + data: JSON, expected_name: str | None = None ) -> tuple[str, dict[str, JSON]]: ... @overload def parse_named_configuration( - data: JSON, expected_name: Optional[str] = None, *, require_configuration: bool = True -) -> tuple[str, Optional[dict[str, JSON]]]: ... + data: JSON, expected_name: str | None = None, *, require_configuration: bool = True +) -> tuple[str, dict[str, JSON] | None]: ... def parse_named_configuration( - data: JSON, expected_name: Optional[str] = None, *, require_configuration: bool = True -) -> tuple[str, Optional[JSON]]: + data: JSON, expected_name: str | None = None, *, require_configuration: bool = True +) -> tuple[str, JSON | None]: if not isinstance(data, dict): raise TypeError(f"Expected dict, got {type(data)}") if "name" not in data: diff --git a/src/zarr/group.py b/src/zarr/group.py index 6cd6ab6aad..3371b240a4 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -1,19 +1,20 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Iterator -from dataclasses import asdict, dataclass, field, replace import asyncio import json import logging +from collections.abc import Iterator +from dataclasses import asdict, dataclass, field, replace +from typing import TYPE_CHECKING, overload + import numpy.typing as npt -from zarr.abc.store import set_or_delete from zarr.abc.codec import Codec from zarr.abc.metadata import Metadata - -from zarr.buffer import Buffer -from zarr.array import AsyncArray, Array +from zarr.abc.store import set_or_delete +from zarr.array import Array, AsyncArray from zarr.attributes import Attributes +from zarr.buffer import Buffer from zarr.chunk_key_encodings import ChunkKeyEncoding from zarr.common import ( JSON, @@ -26,10 +27,10 @@ ) from zarr.store import StoreLike, StorePath, make_store_path from zarr.sync import SyncMixin, sync -from typing import overload if TYPE_CHECKING: - from typing import Any, AsyncGenerator, Literal, Iterable + from collections.abc import AsyncGenerator, Iterable + from typing import Any, Literal logger = logging.getLogger("zarr.group") @@ -332,7 +333,7 @@ async def create_array( zarr_format=self.metadata.zarr_format, ) - async def update_attributes(self, new_attributes: dict[str, Any]) -> "AsyncGroup": + async def update_attributes(self, new_attributes: dict[str, Any]) -> AsyncGroup: # metadata.attributes is "frozen" so we simply clear and update the dict self.metadata.attributes.clear() self.metadata.attributes.update(new_attributes) @@ -525,7 +526,7 @@ def attrs(self) -> Attributes: def info(self): return self._async_group.info - def update_attributes(self, new_attributes: dict[str, Any]) -> "Group": + def update_attributes(self, new_attributes: dict[str, Any]) -> Group: self._sync(self._async_group.update_attributes(new_attributes)) return self diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index 8e7cd95430..b518d75677 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -2,7 +2,8 @@ import itertools import math -from typing import TYPE_CHECKING, Iterator, List, NamedTuple, Optional, Tuple +from collections.abc import Iterator +from typing import TYPE_CHECKING, NamedTuple from zarr.common import ChunkCoords, Selection, SliceSelection, product @@ -17,9 +18,7 @@ def _ensure_tuple(v: Selection) -> SliceSelection: def _err_too_many_indices(selection: SliceSelection, shape: ChunkCoords) -> None: - raise IndexError( - "too many indices for array; expected {}, got {}".format(len(shape), len(selection)) - ) + raise IndexError(f"too many indices for array; expected {len(shape)}, got {len(selection)}") def _err_negative_step() -> None: @@ -50,7 +49,7 @@ def _ensure_selection( class _ChunkDimProjection(NamedTuple): dim_chunk_ix: int dim_chunk_sel: slice - dim_out_sel: Optional[slice] + dim_out_sel: slice | None def _ceildiv(a: float, b: float) -> int: @@ -127,13 +126,13 @@ class _ChunkProjection(NamedTuple): class BasicIndexer: - dim_indexers: List[_SliceDimIndexer] + dim_indexers: list[_SliceDimIndexer] shape: ChunkCoords def __init__( self, selection: Selection, - shape: Tuple[int, ...], + shape: tuple[int, ...], chunk_grid: ChunkGrid, ): from zarr.chunk_grids import RegularChunkGrid @@ -145,7 +144,7 @@ def __init__( self.dim_indexers = [ _SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) for dim_sel, dim_len, dim_chunk_len in zip( - _ensure_selection(selection, shape), shape, chunk_grid.chunk_shape + _ensure_selection(selection, shape), shape, chunk_grid.chunk_shape, strict=False ) ] self.shape = tuple(s.nitems for s in self.dim_indexers) @@ -206,7 +205,7 @@ def is_total_slice(item: Selection, shape: ChunkCoords) -> bool: or ((dim_sel.stop - dim_sel.start == dim_len) and (dim_sel.step in [1, None])) ) ) - for dim_sel, dim_len in zip(item, shape) + for dim_sel, dim_len in zip(item, shape, strict=False) ) else: raise TypeError("expected slice or tuple of slices, found %r" % item) diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py index 695d83da55..8db8c8033e 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata.py @@ -1,9 +1,12 @@ from __future__ import annotations + +import json from abc import ABC, abstractmethod -from enum import Enum -from typing import TYPE_CHECKING, Any, cast, Iterable +from collections.abc import Iterable from dataclasses import dataclass, field, replace -import json +from enum import Enum +from typing import TYPE_CHECKING, Any, cast + import numpy as np import numpy.typing as npt @@ -14,9 +17,9 @@ from zarr.chunk_key_encodings import ChunkKeyEncoding, parse_separator from zarr.codecs._v2 import V2Compressor, V2Filters - if TYPE_CHECKING: from typing import Literal + from typing_extensions import Self @@ -33,7 +36,6 @@ ) from zarr.config import parse_indexing_order - # For type checking _bool = bool diff --git a/src/zarr/store/core.py b/src/zarr/store/core.py index c6ffbc6c05..fac2d87e41 100644 --- a/src/zarr/store/core.py +++ b/src/zarr/store/core.py @@ -1,7 +1,7 @@ from __future__ import annotations from pathlib import Path -from typing import Any, Optional, Tuple, Union +from typing import Any, Union from zarr.abc.store import Store from zarr.buffer import Buffer @@ -21,14 +21,14 @@ class StorePath: store: Store path: str - def __init__(self, store: Store, path: Optional[str] = None): + def __init__(self, store: Store, path: str | None = None): self.store = store self.path = path or "" - async def get(self, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> Optional[Buffer]: + async def get(self, byte_range: tuple[int, int | None] | None = None) -> Buffer | None: return await self.store.get(self.path, byte_range) - async def set(self, value: Buffer, byte_range: Optional[Tuple[int, int]] = None) -> None: + async def set(self, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: if byte_range is not None: raise NotImplementedError("Store.set does not have partial writes yet") await self.store.set(self.path, value) diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index c053f941ef..467be69048 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -1,11 +1,10 @@ from __future__ import annotations -from collections.abc import AsyncGenerator -from typing import Optional, MutableMapping, List, Tuple +from collections.abc import AsyncGenerator, MutableMapping -from zarr.common import concurrent_map from zarr.abc.store import Store from zarr.buffer import Buffer +from zarr.common import concurrent_map # TODO: this store could easily be extended to wrap any MutableMapping store from v2 @@ -17,7 +16,7 @@ class MemoryStore(Store): _store_dict: MutableMapping[str, Buffer] - def __init__(self, store_dict: Optional[MutableMapping[str, Buffer]] = None): + def __init__(self, store_dict: MutableMapping[str, Buffer] | None = None): self._store_dict = store_dict or {} def __str__(self) -> str: @@ -27,8 +26,8 @@ def __repr__(self) -> str: return f"MemoryStore({str(self)!r})" async def get( - self, key: str, byte_range: Optional[Tuple[int, Optional[int]]] = None - ) -> Optional[Buffer]: + self, key: str, byte_range: tuple[int, int | None] | None = None + ) -> Buffer | None: assert isinstance(key, str) try: value = self._store_dict[key] @@ -39,17 +38,15 @@ async def get( return None async def get_partial_values( - self, key_ranges: List[Tuple[str, Tuple[int, int]]] - ) -> List[Optional[Buffer]]: + self, key_ranges: list[tuple[str, tuple[int, int]]] + ) -> list[Buffer | None]: vals = await concurrent_map(key_ranges, self.get, limit=None) return vals async def exists(self, key: str) -> bool: return key in self._store_dict - async def set( - self, key: str, value: Buffer, byte_range: Optional[Tuple[int, int]] = None - ) -> None: + async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: assert isinstance(key, str) if isinstance(value, (bytes, bytearray)): # TODO: to support the v2 tests, we convert bytes to Buffer here @@ -70,7 +67,7 @@ async def delete(self, key: str) -> None: except KeyError: pass # Q(JH): why not raise? - async def set_partial_values(self, key_start_values: List[Tuple[str, int, bytes]]) -> None: + async def set_partial_values(self, key_start_values: list[tuple[str, int, bytes]]) -> None: raise NotImplementedError async def list(self) -> AsyncGenerator[str, None]: diff --git a/src/zarr/store/remote.py b/src/zarr/store/remote.py index 35fd2d60b6..2986133fbd 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/store/remote.py @@ -1,15 +1,14 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any from zarr.abc.store import Store from zarr.buffer import Buffer from zarr.store.core import _dereference_path - if TYPE_CHECKING: - from upath import UPath from fsspec.asyn import AsyncFileSystem + from upath import UPath class RemoteStore(Store): @@ -19,9 +18,9 @@ class RemoteStore(Store): root: UPath - def __init__(self, url: Union[UPath, str], **storage_options: Dict[str, Any]): - from upath import UPath + def __init__(self, url: UPath | str, **storage_options: dict[str, Any]): import fsspec + from upath import UPath if isinstance(url, str): self.root = UPath(url, **storage_options) @@ -41,7 +40,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"RemoteStore({str(self)!r})" - def _make_fs(self) -> Tuple[AsyncFileSystem, str]: + def _make_fs(self) -> tuple[AsyncFileSystem, str]: import fsspec storage_options = self.root._kwargs.copy() @@ -51,8 +50,8 @@ def _make_fs(self) -> Tuple[AsyncFileSystem, str]: return fs, root async def get( - self, key: str, byte_range: Optional[Tuple[int, Optional[int]]] = None - ) -> Optional[Buffer]: + self, key: str, byte_range: tuple[int, int | None] | None = None + ) -> Buffer | None: assert isinstance(key, str) fs, root = self._make_fs() path = _dereference_path(root, key) @@ -68,9 +67,7 @@ async def get( return value - async def set( - self, key: str, value: Buffer, byte_range: Optional[Tuple[int, int]] = None - ) -> None: + async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: assert isinstance(key, str) fs, root = self._make_fs() path = _dereference_path(root, key) diff --git a/src/zarr/sync.py b/src/zarr/sync.py index ea765077ce..8af14f602e 100644 --- a/src/zarr/sync.py +++ b/src/zarr/sync.py @@ -1,12 +1,14 @@ from __future__ import annotations + from typing import TYPE_CHECKING, TypeVar if TYPE_CHECKING: - from typing import Any, AsyncIterator, Coroutine + from collections.abc import AsyncIterator, Coroutine + from typing import Any import asyncio -from concurrent.futures import wait import threading +from concurrent.futures import wait from typing_extensions import ParamSpec diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 3588048906..80eaa0dbf7 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -1,4 +1,5 @@ from __future__ import annotations + from typing import TYPE_CHECKING from zarr.common import ZarrFormat @@ -6,12 +7,12 @@ if TYPE_CHECKING: from typing import Any, Literal -from dataclasses import dataclass, field import pathlib +from dataclasses import dataclass, field import pytest -from zarr.store import LocalStore, StorePath, MemoryStore +from zarr.store import LocalStore, MemoryStore, StorePath from zarr.store.remote import RemoteStore diff --git a/tests/v3/package_with_entrypoint/__init__.py b/tests/v3/package_with_entrypoint/__init__.py index 8b31733069..b8bf903c01 100644 --- a/tests/v3/package_with_entrypoint/__init__.py +++ b/tests/v3/package_with_entrypoint/__init__.py @@ -1,4 +1,5 @@ from numpy import ndarray + from zarr.abc.codec import ArrayBytesCodec from zarr.common import ArraySpec, BytesLike diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py index a56c768782..4ab92768b4 100644 --- a/tests/v3/test_buffer.py +++ b/tests/v3/test_buffer.py @@ -1,6 +1,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional +from collections.abc import Iterable +from typing import TYPE_CHECKING, Any, Literal import numpy as np import numpy.typing as npt @@ -31,7 +32,7 @@ def create( shape: Iterable[int], dtype: npt.DTypeLike, order: Literal["C", "F"] = "C", - fill_value: Optional[Any] = None, + fill_value: Any | None = None, ) -> Self: """Overwrite `NDBuffer.create` to create an MyNDArrayLike instance""" ret = cls(MyNDArrayLike(shape=shape, dtype=dtype, order=order)) diff --git a/tests/v3/test_codec_entrypoints.py b/tests/v3/test_codec_entrypoints.py index 8fbf76b83d..6b5c221f4d 100644 --- a/tests/v3/test_codec_entrypoints.py +++ b/tests/v3/test_codec_entrypoints.py @@ -5,7 +5,6 @@ import zarr.codecs.registry - here = os.path.abspath(os.path.dirname(__file__)) diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index 73553b5565..5f94114ede 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -1,29 +1,29 @@ from __future__ import annotations -from dataclasses import dataclass import json -from typing import Iterator, List, Literal, Optional, Tuple - +from collections.abc import Iterator +from dataclasses import dataclass +from typing import Literal import numpy as np import pytest + import zarr.v2 from zarr.abc.codec import Codec +from zarr.abc.store import Store from zarr.array import Array, AsyncArray -from zarr.common import Selection -from zarr.indexing import morton_order_iter from zarr.codecs import ( - ShardingCodec, - ShardingCodecIndexLocation, BloscCodec, BytesCodec, GzipCodec, + ShardingCodec, + ShardingCodecIndexLocation, TransposeCodec, ZstdCodec, ) - -from zarr.abc.store import Store +from zarr.common import Selection from zarr.config import config +from zarr.indexing import morton_order_iter from zarr.store import MemoryStore, StorePath @@ -57,7 +57,7 @@ def sample_data() -> np.ndarray: return np.arange(0, 128 * 128 * 128, dtype="uint16").reshape((128, 128, 128), order="F") -def order_from_dim(order: Literal["F", "C"], ndim: int) -> Tuple[int, ...]: +def order_from_dim(order: Literal["F", "C"], ndim: int) -> tuple[int, ...]: if order == "F": return tuple(ndim - x - 1 for x in range(ndim)) else: @@ -243,7 +243,7 @@ async def test_order( ): data = np.arange(0, 256, dtype="uint16").reshape((32, 8), order=input_order) - codecs_: List[Codec] = ( + codecs_: list[Codec] = ( [ ShardingCodec( chunk_shape=(16, 8), @@ -310,7 +310,7 @@ def test_order_implicit( ): data = np.arange(0, 256, dtype="uint16").reshape((16, 16), order=input_order) - codecs_: Optional[List[Codec]] = [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None + codecs_: list[Codec] | None = [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None with config.set({"array.order": runtime_write_order}): a = Array.create( @@ -352,7 +352,7 @@ async def test_transpose( ): data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8), order=input_order) - codecs_: List[Codec] = ( + codecs_: list[Codec] = ( [ ShardingCodec( chunk_shape=(1, 16, 8), diff --git a/tests/v3/test_common.py b/tests/v3/test_common.py index b5690d0d7e..3bdbd2bffe 100644 --- a/tests/v3/test_common.py +++ b/tests/v3/test_common.py @@ -1,18 +1,20 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Iterable + +from collections.abc import Iterable +from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Literal, Any, Tuple + from typing import Any, Literal import numpy as np -from zarr.config import parse_indexing_order -from zarr.common import parse_shapelike -from zarr.common import parse_name, product import pytest +from zarr.common import parse_name, parse_shapelike, product +from zarr.config import parse_indexing_order + @pytest.mark.parametrize("data", [(0, 0, 0, 0), (1, 3, 4, 5, 6), (2, 4)]) -def test_product(data: Tuple[int, ...]): +def test_product(data: tuple[int, ...]): assert product(data) == np.prod(data) @@ -33,7 +35,7 @@ def test_parse_enum(): ... @pytest.mark.parametrize("data", [("foo", "bar"), (10, 11)]) -def test_parse_name_invalid(data: Tuple[Any, Any]): +def test_parse_name_invalid(data: tuple[Any, Any]): observed, expected = data if isinstance(observed, str): with pytest.raises(ValueError, match=f"Expected '{expected}'. Got {observed} instead."): @@ -46,7 +48,7 @@ def test_parse_name_invalid(data: Tuple[Any, Any]): @pytest.mark.parametrize("data", [("foo", "foo"), ("10", "10")]) -def test_parse_name_valid(data: Tuple[Any, Any]): +def test_parse_name_valid(data: tuple[Any, Any]): observed, expected = data assert parse_name(observed, expected) == observed @@ -83,7 +85,7 @@ def test_parse_shapelike_valid(data: Iterable[Any]): # todo: more dtypes @pytest.mark.parametrize("data", [("uint8", np.uint8), ("float64", np.float64)]) -def parse_dtype(data: Tuple[str, np.dtype]): +def parse_dtype(data: tuple[str, np.dtype]): unparsed, parsed = data assert parse_dtype(unparsed) == parsed diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 5a6751c11a..14d7765024 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -1,17 +1,18 @@ from __future__ import annotations + from typing import TYPE_CHECKING, Any -from zarr.buffer import Buffer -from zarr.sync import sync from zarr.array import AsyncArray +from zarr.buffer import Buffer from zarr.store.core import make_store_path +from zarr.sync import sync if TYPE_CHECKING: - from zarr.store import MemoryStore, LocalStore from zarr.common import ZarrFormat + from zarr.store import LocalStore, MemoryStore -import pytest import numpy as np +import pytest from zarr.group import AsyncGroup, Group, GroupMetadata from zarr.store import StorePath diff --git a/tests/v3/test_metadata.py b/tests/v3/test_metadata.py index 33df4a6438..65297c52d8 100644 --- a/tests/v3/test_metadata.py +++ b/tests/v3/test_metadata.py @@ -1,9 +1,12 @@ from __future__ import annotations -import pytest + from typing import TYPE_CHECKING +import pytest + if TYPE_CHECKING: - from typing import Sequence, Any + from collections.abc import Sequence + from typing import Any from zarr.metadata import parse_dimension_names, parse_zarr_format_v2, parse_zarr_format_v3 diff --git a/tests/v3/test_store.py b/tests/v3/test_store.py index e514d505ce..f7ba46aa33 100644 --- a/tests/v3/test_store.py +++ b/tests/v3/test_store.py @@ -1,10 +1,12 @@ from __future__ import annotations -from zarr.store.local import LocalStore + from pathlib import Path + import pytest -from zarr.testing.store import StoreTests +from zarr.store.local import LocalStore from zarr.store.memory import MemoryStore +from zarr.testing.store import StoreTests @pytest.mark.parametrize("auto_mkdir", (True, False)) diff --git a/tests/v3/test_sync.py b/tests/v3/test_sync.py index ba262f521d..5b953573d8 100644 --- a/tests/v3/test_sync.py +++ b/tests/v3/test_sync.py @@ -1,12 +1,12 @@ -from collections.abc import AsyncGenerator import asyncio import time -from unittest.mock import patch, AsyncMock - -from zarr.sync import sync, _get_loop, _get_lock, SyncError, SyncMixin +from collections.abc import AsyncGenerator +from unittest.mock import AsyncMock, patch import pytest +from zarr.sync import SyncError, SyncMixin, _get_lock, _get_loop, sync + @pytest.fixture(params=[True, False]) def sync_loop(request) -> asyncio.AbstractEventLoop | None: diff --git a/tests/v3/test_v2.py b/tests/v3/test_v2.py index 5b831b1bb0..2a38dc8fdc 100644 --- a/tests/v3/test_v2.py +++ b/tests/v3/test_v2.py @@ -1,10 +1,11 @@ -from typing import Iterator +from collections.abc import Iterator + import numpy as np import pytest from zarr.abc.store import Store from zarr.array import Array -from zarr.store import StorePath, MemoryStore +from zarr.store import MemoryStore, StorePath @pytest.fixture From 92c55d94aae3c3a1585498e774d6f9725ea7f2cb Mon Sep 17 00:00:00 2001 From: Norman Rzepka Date: Fri, 17 May 2024 11:36:30 +0200 Subject: [PATCH 3/4] manual fixes --- src/zarr/buffer.py | 2 +- src/zarr/codecs/pipeline.py | 8 ++------ src/zarr/common.py | 12 ++---------- src/zarr/fixture/.zgroup | 3 +++ src/zarr/fixture/flat/.zarray | 23 +++++++++++++++++++++++ src/zarr/fixture/flat/0.0 | Bin 0 -> 48 bytes src/zarr/fixture/flat_legacy/.zarray | 22 ++++++++++++++++++++++ src/zarr/fixture/flat_legacy/0.0 | Bin 0 -> 48 bytes src/zarr/fixture/meta/.zarray | 23 +++++++++++++++++++++++ src/zarr/fixture/meta/0.0 | Bin 0 -> 48 bytes src/zarr/fixture/nested/.zarray | 23 +++++++++++++++++++++++ src/zarr/fixture/nested/0/0 | Bin 0 -> 48 bytes src/zarr/fixture/nested_legacy/.zarray | 23 +++++++++++++++++++++++ src/zarr/fixture/nested_legacy/0/0 | Bin 0 -> 48 bytes src/zarr/group.py | 15 +++++++++------ src/zarr/indexing.py | 2 +- src/zarr/store/core.py | 4 ++-- src/zarr/store/local.py | 2 +- src/zarr/store/memory.py | 2 +- src/zarr/testing/__init__.py | 2 +- tests/v3/conftest.py | 2 +- tests/v3/test_group.py | 6 +++--- 22 files changed, 141 insertions(+), 33 deletions(-) create mode 100644 src/zarr/fixture/.zgroup create mode 100644 src/zarr/fixture/flat/.zarray create mode 100644 src/zarr/fixture/flat/0.0 create mode 100644 src/zarr/fixture/flat_legacy/.zarray create mode 100644 src/zarr/fixture/flat_legacy/0.0 create mode 100644 src/zarr/fixture/meta/.zarray create mode 100644 src/zarr/fixture/meta/0.0 create mode 100644 src/zarr/fixture/nested/.zarray create mode 100644 src/zarr/fixture/nested/0/0 create mode 100644 src/zarr/fixture/nested_legacy/.zarray create mode 100644 src/zarr/fixture/nested_legacy/0/0 diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py index 7f74ee080f..84bf6b0bb0 100644 --- a/src/zarr/buffer.py +++ b/src/zarr/buffer.py @@ -225,7 +225,7 @@ def __add__(self, other: Buffer) -> Self: return self.__class__(np.concatenate((self._data, other_array))) def __eq__(self, other: Any) -> bool: - if isinstance(other, (bytes, bytearray)): + if isinstance(other, bytes | bytearray): # Many of the tests compares `Buffer` with `bytes` so we # convert the bytes to a Buffer and try again return self == self.from_bytes(other) diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py index 607745bef8..57b4fa4668 100644 --- a/src/zarr/codecs/pipeline.py +++ b/src/zarr/codecs/pipeline.py @@ -182,13 +182,9 @@ def supports_partial_encode(self) -> bool: ) def __iter__(self) -> Iterator[Codec]: - for aa_codec in self.array_array_codecs: - yield aa_codec - + yield from self.array_array_codecs yield self.array_bytes_codec - - for bb_codec in self.bytes_bytes_codecs: - yield bb_codec + yield from self.bytes_bytes_codecs def validate(self, array_metadata: ArrayMetadata) -> None: for codec in self: diff --git a/src/zarr/common.py b/src/zarr/common.py index 20b372e74d..5781cc423b 100644 --- a/src/zarr/common.py +++ b/src/zarr/common.py @@ -6,15 +6,7 @@ from collections.abc import Iterable from dataclasses import dataclass from enum import Enum -from typing import ( - TYPE_CHECKING, - Any, - Literal, - ParamSpec, - TypeVar, - Union, - overload, -) +from typing import TYPE_CHECKING, Any, Literal, ParamSpec, TypeVar, overload if TYPE_CHECKING: from collections.abc import Awaitable, Callable, Iterator @@ -32,7 +24,7 @@ SliceSelection = tuple[slice, ...] Selection = slice | SliceSelection ZarrFormat = Literal[2, 3] -JSON = Union[str, None, int, float, Enum, dict[str, "JSON"], list["JSON"], tuple["JSON", ...]] +JSON = None | str | int | float | Enum | dict[str, "JSON"] | list["JSON"] | tuple["JSON", ...] def product(tup: ChunkCoords) -> int: diff --git a/src/zarr/fixture/.zgroup b/src/zarr/fixture/.zgroup new file mode 100644 index 0000000000..3b7daf227c --- /dev/null +++ b/src/zarr/fixture/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/src/zarr/fixture/flat/.zarray b/src/zarr/fixture/flat/.zarray new file mode 100644 index 0000000000..d1acce7665 --- /dev/null +++ b/src/zarr/fixture/flat/.zarray @@ -0,0 +1,23 @@ +{ + "chunks": [ + 2, + 2 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dimension_separator": ".", + "dtype": " Array | Group: elif isinstance(node, AsyncGroup): return Group(node) else: - assert False + raise TypeError(f"Unknown node type, got {type(node)}") @dataclass(frozen=True) @@ -115,7 +115,7 @@ async def create( cls, store: StoreLike, *, - attributes: dict[str, Any] = {}, + attributes: dict[str, Any] = {}, # noqa: B006, FIXME exists_ok: bool = False, zarr_format: ZarrFormat = 3, ) -> AsyncGroup: @@ -279,7 +279,10 @@ def info(self): return self.metadata.info async def create_group( - self, path: str, exists_ok: bool = False, attributes: dict[str, Any] = {} + self, + path: str, + exists_ok: bool = False, + attributes: dict[str, Any] = {}, # noqa: B006, FIXME ) -> AsyncGroup: return await type(self).create( self.store_path / path, @@ -402,7 +405,7 @@ async def group_keys(self) -> AsyncGenerator[str, None]: # todo: decide if this method should be separate from `group_keys` async def groups(self) -> AsyncGenerator[AsyncGroup, None]: - async for key, value in self.members(): + async for _, value in self.members(): if isinstance(value, AsyncGroup): yield value @@ -414,7 +417,7 @@ async def array_keys(self) -> AsyncGenerator[str, None]: # todo: decide if this method should be separate from `array_keys` async def arrays(self) -> AsyncGenerator[AsyncArray, None]: - async for key, value in self.members(): + async for _, value in self.members(): if isinstance(value, AsyncArray): yield value @@ -458,7 +461,7 @@ def create( cls, store: StoreLike, *, - attributes: dict[str, Any] = {}, + attributes: dict[str, Any] = {}, # noqa: B006, FIXME exists_ok: bool = False, ) -> Group: obj = sync( diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index b518d75677..45413bc5b2 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -208,4 +208,4 @@ def is_total_slice(item: Selection, shape: ChunkCoords) -> bool: for dim_sel, dim_len in zip(item, shape, strict=False) ) else: - raise TypeError("expected slice or tuple of slices, found %r" % item) + raise TypeError(f"expected slice or tuple of slices, found {item!r}") diff --git a/src/zarr/store/core.py b/src/zarr/store/core.py index fac2d87e41..31cce65095 100644 --- a/src/zarr/store/core.py +++ b/src/zarr/store/core.py @@ -1,7 +1,7 @@ from __future__ import annotations from pathlib import Path -from typing import Any, Union +from typing import Any from zarr.abc.store import Store from zarr.buffer import Buffer @@ -57,7 +57,7 @@ def __eq__(self, other: Any) -> bool: return False -StoreLike = Union[Store, StorePath, Path, str] +StoreLike = Store | StorePath | Path | str def make_store_path(store_like: StoreLike) -> StorePath: diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py index f27b832a39..60d0022f94 100644 --- a/src/zarr/store/local.py +++ b/src/zarr/store/local.py @@ -122,7 +122,7 @@ async def get_partial_values( async def set(self, key: str, value: Buffer) -> None: assert isinstance(key, str) - if isinstance(value, (bytes, bytearray)): + if isinstance(value, bytes | bytearray): # TODO: to support the v2 tests, we convert bytes to Buffer here value = Buffer.from_bytes(value) if not isinstance(value, Buffer): diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index 467be69048..c6e838417e 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -48,7 +48,7 @@ async def exists(self, key: str) -> bool: async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: assert isinstance(key, str) - if isinstance(value, (bytes, bytearray)): + if isinstance(value, bytes | bytearray): # TODO: to support the v2 tests, we convert bytes to Buffer here value = Buffer.from_bytes(value) if not isinstance(value, Buffer): diff --git a/src/zarr/testing/__init__.py b/src/zarr/testing/__init__.py index 9b622b43cd..35b91f9167 100644 --- a/src/zarr/testing/__init__.py +++ b/src/zarr/testing/__init__.py @@ -4,6 +4,6 @@ if importlib.util.find_spec("pytest") is not None: from zarr.testing.store import StoreTests else: - warnings.warn("pytest not installed, skipping test suite") + warnings.warn("pytest not installed, skipping test suite", stacklevel=2) __all__ = ["StoreTests"] diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 80eaa0dbf7..b6a121520d 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -25,7 +25,7 @@ def parse_store( return MemoryStore() if store == "remote": return RemoteStore() - assert False + raise AssertionError() @pytest.fixture(params=[str, pathlib.Path]) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 14d7765024..771baddc0b 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -203,7 +203,7 @@ async def test_asyncgroup_open_wrong_format( elif zarr_format == 2: zarr_format_wrong = 3 else: - assert False + raise AssertionError() with pytest.raises(FileNotFoundError): await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) @@ -278,7 +278,7 @@ async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: elif zarr_format == 3: assert not await agroup.store_path.store.exists(sub_array_path + "/" + "zarr.json") else: - assert False + raise AssertionError() sub_group_path = "sub_group" _ = await agroup.create_group(sub_group_path, attributes={"foo": 100}) @@ -289,7 +289,7 @@ async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: elif zarr_format == 3: assert not await agroup.store_path.store.exists(sub_array_path + "/" + "zarr.json") else: - assert False + raise AssertionError() @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) From f833d7c0041314a3e0c94071e18f39a02853b99c Mon Sep 17 00:00:00 2001 From: Norman Rzepka Date: Fri, 17 May 2024 11:38:38 +0200 Subject: [PATCH 4/4] ci