|
1 | 1 | import abc
|
2 | 2 | import os
|
| 3 | +from collections import defaultdict |
3 | 4 | from collections.abc import MutableMapping
|
| 5 | +from copy import copy |
4 | 6 | from string import ascii_letters, digits
|
5 |
| -from typing import Any, List, Mapping, Optional, Union |
| 7 | +from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, Union |
6 | 8 |
|
7 | 9 | from zarr.meta import Metadata2, Metadata3
|
8 | 10 | from zarr.util import normalize_storage_path
|
@@ -254,6 +256,82 @@ def __setitem__(self, key, value):
|
254 | 256 | def __getitem__(self, key):
|
255 | 257 | """Get a value."""
|
256 | 258 |
|
| 259 | + @abc.abstractmethod |
| 260 | + def rmdir(self, path=None): |
| 261 | + """Remove a data path and all its subkeys and related metadata. |
| 262 | + Expects a path without the data or meta root prefix.""" |
| 263 | + |
| 264 | + @property |
| 265 | + def supports_efficient_get_partial_values(self): |
| 266 | + return False |
| 267 | + |
| 268 | + def get_partial_values( |
| 269 | + self, |
| 270 | + key_ranges: Sequence[Tuple[str, Tuple[int, Optional[int]]]] |
| 271 | + ) -> List[Union[bytes, memoryview, bytearray]]: |
| 272 | + """Get multiple partial values. |
| 273 | + key_ranges can be an iterable of key, range pairs, |
| 274 | + where a range specifies two integers range_start and range_length |
| 275 | + as a tuple, (range_start, range_length). |
| 276 | + range_length may be None to indicate to read until the end. |
| 277 | + range_start may be negative to start reading range_start bytes |
| 278 | + from the end of the file. |
| 279 | + A key may occur multiple times with different ranges. |
| 280 | + Inserts None for missing keys into the returned list.""" |
| 281 | + results: List[Union[bytes, memoryview, bytearray]] = ( |
| 282 | + [None] * len(key_ranges) # type: ignore[list-item] |
| 283 | + ) |
| 284 | + indexed_ranges_by_key: Dict[str, List[Tuple[int, Tuple[int, Optional[int]]]]] = ( |
| 285 | + defaultdict(list) |
| 286 | + ) |
| 287 | + for i, (key, range_) in enumerate(key_ranges): |
| 288 | + indexed_ranges_by_key[key].append((i, range_)) |
| 289 | + for key, indexed_ranges in indexed_ranges_by_key.items(): |
| 290 | + try: |
| 291 | + value = self[key] |
| 292 | + except KeyError: # pragma: no cover |
| 293 | + continue |
| 294 | + for i, (range_from, range_length) in indexed_ranges: |
| 295 | + if range_length is None: |
| 296 | + results[i] = value[range_from:] |
| 297 | + else: |
| 298 | + results[i] = value[range_from:range_from + range_length] |
| 299 | + return results |
| 300 | + |
| 301 | + def supports_efficient_set_partial_values(self): |
| 302 | + return False |
| 303 | + |
| 304 | + def set_partial_values(self, key_start_values): |
| 305 | + """Set multiple partial values. |
| 306 | + key_start_values can be an iterable of key, start and value triplets |
| 307 | + as tuples, (key, start, value), where start defines the offset in bytes. |
| 308 | + A key may occur multiple times with different starts and non-overlapping values. |
| 309 | + Also, start may only be beyond the current value if other values fill the gap. |
| 310 | + start may be negative to start writing start bytes from the current |
| 311 | + end of the file, ending the file with the new value.""" |
| 312 | + unique_keys = set(next(zip(*key_start_values))) |
| 313 | + values = {} |
| 314 | + for key in unique_keys: |
| 315 | + old_value = self.get(key) |
| 316 | + values[key] = None if old_value is None else bytearray(old_value) |
| 317 | + for key, start, value in key_start_values: |
| 318 | + if values[key] is None: |
| 319 | + assert start == 0 |
| 320 | + values[key] = value |
| 321 | + else: |
| 322 | + if start > len(values[key]): # pragma: no cover |
| 323 | + raise ValueError( |
| 324 | + f"Cannot set value at start {start}, " |
| 325 | + + f"since it is beyond the data at key {key}, " |
| 326 | + + f"having length {len(values[key])}." |
| 327 | + ) |
| 328 | + if start < 0: |
| 329 | + values[key][start:] = value |
| 330 | + else: |
| 331 | + values[key][start:start + len(value)] = value |
| 332 | + for key, value in values.items(): |
| 333 | + self[key] = value |
| 334 | + |
257 | 335 | def clear(self):
|
258 | 336 | """Remove all items from store."""
|
259 | 337 | self.erase_prefix("/")
|
@@ -303,6 +381,151 @@ def _ensure_store(store):
|
303 | 381 | )
|
304 | 382 |
|
305 | 383 |
|
| 384 | +class StorageTransformer(MutableMapping, abc.ABC): |
| 385 | + """Base class for storage transformers. The methods simply pass on the data as-is |
| 386 | + and should be overwritten by sub-classes.""" |
| 387 | + |
| 388 | + _store_version = 3 |
| 389 | + _metadata_class = Metadata3 |
| 390 | + |
| 391 | + def __init__(self, _type) -> None: |
| 392 | + if _type not in self.valid_types: # pragma: no cover |
| 393 | + raise ValueError( |
| 394 | + f"Storage transformer cannot be initialized with type {_type}, " |
| 395 | + + f"must be one of {list(self.valid_types)}." |
| 396 | + ) |
| 397 | + self.type = _type |
| 398 | + self._inner_store = None |
| 399 | + |
| 400 | + def _copy_for_array(self, array, inner_store): |
| 401 | + transformer_copy = copy(self) |
| 402 | + transformer_copy._inner_store = inner_store |
| 403 | + return transformer_copy |
| 404 | + |
| 405 | + @abc.abstractproperty |
| 406 | + def extension_uri(self): |
| 407 | + pass # pragma: no cover |
| 408 | + |
| 409 | + @abc.abstractproperty |
| 410 | + def valid_types(self): |
| 411 | + pass # pragma: no cover |
| 412 | + |
| 413 | + def get_config(self): |
| 414 | + """Return a dictionary holding configuration parameters for this |
| 415 | + storage transformer. All values must be compatible with JSON encoding.""" |
| 416 | + # Override in sub-class if need special encoding of config values. |
| 417 | + # By default, assume all non-private members are configuration |
| 418 | + # parameters except for type . |
| 419 | + return { |
| 420 | + k: v for k, v in self.__dict__.items() |
| 421 | + if not k.startswith('_') and k != "type" |
| 422 | + } |
| 423 | + |
| 424 | + @classmethod |
| 425 | + def from_config(cls, _type, config): |
| 426 | + """Instantiate storage transformer from a configuration object.""" |
| 427 | + # override in sub-class if need special decoding of config values |
| 428 | + |
| 429 | + # by default, assume constructor accepts configuration parameters as |
| 430 | + # keyword arguments without any special decoding |
| 431 | + return cls(_type, **config) |
| 432 | + |
| 433 | + @property |
| 434 | + def inner_store(self) -> Union["StorageTransformer", StoreV3]: |
| 435 | + assert self._inner_store is not None, ( |
| 436 | + "inner_store is not initialized, first get a copy via _copy_for_array." |
| 437 | + ) |
| 438 | + return self._inner_store |
| 439 | + |
| 440 | + # The following implementations are usually fine to keep as-is: |
| 441 | + |
| 442 | + def __eq__(self, other): |
| 443 | + return ( |
| 444 | + type(self) == type(other) and |
| 445 | + self._inner_store == other._inner_store and |
| 446 | + self.get_config() == other.get_config() |
| 447 | + ) |
| 448 | + |
| 449 | + def erase(self, key): |
| 450 | + self.__delitem__(key) |
| 451 | + |
| 452 | + def list(self): |
| 453 | + return list(self.keys()) |
| 454 | + |
| 455 | + def list_dir(self, prefix): |
| 456 | + return StoreV3.list_dir(self, prefix) |
| 457 | + |
| 458 | + def is_readable(self): |
| 459 | + return self.inner_store.is_readable() |
| 460 | + |
| 461 | + def is_writeable(self): |
| 462 | + return self.inner_store.is_writeable() |
| 463 | + |
| 464 | + def is_listable(self): |
| 465 | + return self.inner_store.is_listable() |
| 466 | + |
| 467 | + def is_erasable(self): |
| 468 | + return self.inner_store.is_erasable() |
| 469 | + |
| 470 | + def clear(self): |
| 471 | + return self.inner_store.clear() |
| 472 | + |
| 473 | + def __enter__(self): |
| 474 | + return self.inner_store.__enter__() |
| 475 | + |
| 476 | + def __exit__(self, exc_type, exc_value, traceback): |
| 477 | + return self.inner_store.__exit__(exc_type, exc_value, traceback) |
| 478 | + |
| 479 | + def close(self) -> None: |
| 480 | + return self.inner_store.close() |
| 481 | + |
| 482 | + # The following implementations might need to be re-implemented |
| 483 | + # by subclasses implementing storage transformers: |
| 484 | + |
| 485 | + def rename(self, src_path: str, dst_path: str) -> None: |
| 486 | + return self.inner_store.rename(src_path, dst_path) |
| 487 | + |
| 488 | + def list_prefix(self, prefix): |
| 489 | + return self.inner_store.list_prefix(prefix) |
| 490 | + |
| 491 | + def erase_prefix(self, prefix): |
| 492 | + return self.inner_store.erase_prefix(prefix) |
| 493 | + |
| 494 | + def rmdir(self, path=None): |
| 495 | + return self.inner_store.rmdir(path) |
| 496 | + |
| 497 | + def __contains__(self, key): |
| 498 | + return self.inner_store.__contains__(key) |
| 499 | + |
| 500 | + def __setitem__(self, key, value): |
| 501 | + return self.inner_store.__setitem__(key, value) |
| 502 | + |
| 503 | + def __getitem__(self, key): |
| 504 | + return self.inner_store.__getitem__(key) |
| 505 | + |
| 506 | + def __delitem__(self, key): |
| 507 | + return self.inner_store.__delitem__(key) |
| 508 | + |
| 509 | + def __iter__(self): |
| 510 | + return self.inner_store.__iter__() |
| 511 | + |
| 512 | + def __len__(self): |
| 513 | + return self.inner_store.__len__() |
| 514 | + |
| 515 | + @property |
| 516 | + def supports_efficient_get_partial_values(self): |
| 517 | + return self.inner_store.supports_efficient_get_partial_values |
| 518 | + |
| 519 | + def get_partial_values(self, key_ranges): |
| 520 | + return self.inner_store.get_partial_values(key_ranges) |
| 521 | + |
| 522 | + def supports_efficient_set_partial_values(self): |
| 523 | + return self.inner_store.supports_efficient_set_partial_values() |
| 524 | + |
| 525 | + def set_partial_values(self, key_start_values): |
| 526 | + return self.inner_store.set_partial_values(key_start_values) |
| 527 | + |
| 528 | + |
306 | 529 | # allow MutableMapping for backwards compatibility
|
307 | 530 | StoreLike = Union[BaseStore, MutableMapping]
|
308 | 531 |
|
|
0 commit comments