From d0d352c96a7fa3792b09f2893d55c668cf944834 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 29 Mar 2023 16:13:28 -0400 Subject: [PATCH 1/4] RF: add a helper add_cache to avoid duplication (.name) and allow for "formalized" addition of caches Aiming to possibly to expand with another cache without (initially) contributing to fsspec, thought to define helper. --- fsspec/caching.py | 54 ++++++++++++++++++++++++++++--------- fsspec/tests/test_caches.py | 9 ++++++- 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/fsspec/caching.py b/fsspec/caching.py index b6d92587e..c3aacdbe1 100644 --- a/fsspec/caching.py +++ b/fsspec/caching.py @@ -766,15 +766,45 @@ def _read_cache(self, start, end, start_block_number, end_block_number): return b"".join(out) -caches = { - "none": BaseCache, - None: BaseCache, - "mmap": MMapCache, - "bytes": BytesCache, - "readahead": ReadAheadCache, - "block": BlockCache, - "first": FirstChunkCache, - "all": AllBytes, - "parts": KnownPartsOfAFile, - "background": BackgroundBlockCache, -} +caches = {} + + +def add_cache(cls, overload=False): + """'Register' cache implementation. + + Parameters + ---------- + overload: bool, optional + If overload is set to True (default is False) - allow to overload existing + entry. + + Raises + ------ + ValueError + """ + name = cls.name + if not overload and name in caches: + raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}") + caches[name] = cls + # custom handling for None + if name == "none": + if not overload: + if None in caches: + raise ValueError(f"Cache for None is already known: {caches[None]}") + caches[None] = cls + + +[ + add_cache(c) + for c in ( + BaseCache, + MMapCache, + BytesCache, + ReadAheadCache, + BlockCache, + FirstChunkCache, + AllBytes, + KnownPartsOfAFile, + BackgroundBlockCache, + ) +] diff --git a/fsspec/tests/test_caches.py b/fsspec/tests/test_caches.py index c655e2ae7..566a42c78 100644 --- a/fsspec/tests/test_caches.py +++ b/fsspec/tests/test_caches.py @@ -3,7 +3,7 @@ import pytest -from fsspec.caching import BlockCache, FirstChunkCache, caches +from fsspec.caching import BlockCache, FirstChunkCache, add_cache, caches def test_cache_getitem(Cache_imp): @@ -138,3 +138,10 @@ def wrapped(*a, **kw): f.read(1) time.sleep(0.1) # second block is loading assert len(thread_ids) == 2 + + +def test_add_cache(): + # just test that we have them populated and fail to re-add again unless overload + with pytest.raises(ValueError): + add_cache(BlockCache) + add_cache(BlockCache, overload=True) From 1bd592216fdfee47366896c0c53616a151b2620e Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 3 Apr 2023 17:25:37 -0400 Subject: [PATCH 2/4] RF: adjust based on code review - harmonize to use clobber and register_cache --- fsspec/caching.py | 45 +++++++++++++++++-------------------- fsspec/tests/test_caches.py | 8 +++---- 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/fsspec/caching.py b/fsspec/caching.py index c3aacdbe1..b48f43198 100644 --- a/fsspec/caching.py +++ b/fsspec/caching.py @@ -766,10 +766,13 @@ def _read_cache(self, start, end, start_block_number, end_block_number): return b"".join(out) -caches = {} +caches = { + # one custom case + None: BaseCache, +} -def add_cache(cls, overload=False): +def register_cache(cls, clobber=False): """'Register' cache implementation. Parameters @@ -783,28 +786,20 @@ def add_cache(cls, overload=False): ValueError """ name = cls.name - if not overload and name in caches: + if not clobber and name in caches: raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}") caches[name] = cls - # custom handling for None - if name == "none": - if not overload: - if None in caches: - raise ValueError(f"Cache for None is already known: {caches[None]}") - caches[None] = cls - - -[ - add_cache(c) - for c in ( - BaseCache, - MMapCache, - BytesCache, - ReadAheadCache, - BlockCache, - FirstChunkCache, - AllBytes, - KnownPartsOfAFile, - BackgroundBlockCache, - ) -] + + +for c in ( + BaseCache, + MMapCache, + BytesCache, + ReadAheadCache, + BlockCache, + FirstChunkCache, + AllBytes, + KnownPartsOfAFile, + BackgroundBlockCache, +): + register_cache(c) diff --git a/fsspec/tests/test_caches.py b/fsspec/tests/test_caches.py index 566a42c78..65c80cdb4 100644 --- a/fsspec/tests/test_caches.py +++ b/fsspec/tests/test_caches.py @@ -3,7 +3,7 @@ import pytest -from fsspec.caching import BlockCache, FirstChunkCache, add_cache, caches +from fsspec.caching import BlockCache, FirstChunkCache, caches, register_cache def test_cache_getitem(Cache_imp): @@ -140,8 +140,8 @@ def wrapped(*a, **kw): assert len(thread_ids) == 2 -def test_add_cache(): +def test_register_cache(): # just test that we have them populated and fail to re-add again unless overload with pytest.raises(ValueError): - add_cache(BlockCache) - add_cache(BlockCache, overload=True) + register_cache(BlockCache) + register_cache(BlockCache, clobber=True) From 63f06ccee74deeddf46892d0581cb8c801b7f8d7 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 4 Apr 2023 13:38:50 -0400 Subject: [PATCH 3/4] Update fsspec/caching.py --- fsspec/caching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/caching.py b/fsspec/caching.py index b48f43198..765dcf4e6 100644 --- a/fsspec/caching.py +++ b/fsspec/caching.py @@ -777,7 +777,7 @@ def register_cache(cls, clobber=False): Parameters ---------- - overload: bool, optional + clobber: bool, optional If overload is set to True (default is False) - allow to overload existing entry. From 0f3f2d3342ec5ccecf08bca259aad9c2a8c190d7 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 4 Apr 2023 13:38:57 -0400 Subject: [PATCH 4/4] Update fsspec/caching.py --- fsspec/caching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/caching.py b/fsspec/caching.py index 765dcf4e6..828a67061 100644 --- a/fsspec/caching.py +++ b/fsspec/caching.py @@ -778,7 +778,7 @@ def register_cache(cls, clobber=False): Parameters ---------- clobber: bool, optional - If overload is set to True (default is False) - allow to overload existing + If set to True (default is False) - allow to overwrite existing entry. Raises