From 10d1ba5a4486c14aef8d27436689ea4bc0d384b9 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio Date: Thu, 7 Dec 2023 09:37:41 -0600 Subject: [PATCH 1/9] add fix from https://github.com/fsspec/filesystem_spec/issues/1447#issuecomment-1845448026 --- fsspec/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsspec/core.py b/fsspec/core.py index a1e15b2eb..8bc8e8e9b 100644 --- a/fsspec/core.py +++ b/fsspec/core.py @@ -521,7 +521,7 @@ def split_protocol(urlpath): if len(protocol) > 1: # excludes Windows paths return protocol, path - if ":" in urlpath and urlpath.find(":") > 1: + if urlpath.startswith("data:"): return urlpath.split(":", 1) return None, urlpath From 19b038ec829043ca20f3d4ab0b0a18bbc71c33ca Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio Date: Thu, 7 Dec 2023 09:46:04 -0600 Subject: [PATCH 2/9] test: filetexts now creates parent directories if needed --- fsspec/implementations/tests/test_local.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 82eba0f02..52b51289b 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -56,6 +56,8 @@ def filetexts(d, open=open, mode="t"): try: os.chdir(dirname) for filename, text in d.items(): + if dirname := os.path.dirname(filename): + os.makedirs(dirname, exist_ok=True) f = open(filename, f"w{mode}") try: f.write(text) From 118200646ceb37900006a42ca49368c1158fb5b0 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio Date: Thu, 7 Dec 2023 09:46:41 -0600 Subject: [PATCH 3/9] add tests for https://github.com/fsspec/filesystem_spec/issues/1447 --- fsspec/implementations/tests/test_local.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 52b51289b..a1e1343fb 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -993,3 +993,20 @@ def test_cp_two_files(tmpdir): make_path_posix(os.path.join(target, "file0")), make_path_posix(os.path.join(target, "file1")), ] + + +def test_issue_1447(): + files_with_colons = { + ".local:file:with:colons.txt": b"content1", + ".colons-after-extension.txt:after": b"content3", + ".colons-after-extension/file:colon.txt:before/after": b"content2", + } + with filetexts(files_with_colons, mode="b"): + for file, contents in files_with_colons.items(): + with fsspec.filesystem("file").open(file, "rb") as f: + assert f.read() == contents + + fs, urlpath = fsspec.core.url_to_fs(file) + assert isinstance(fs, fsspec.implementations.local.LocalFileSystem) + with fs.open(urlpath, "rb") as f: + assert f.read() == contents From 99b61c316c7f6c9451eacbdae7e49121c2434aa8 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio Date: Thu, 7 Dec 2023 10:16:57 -0600 Subject: [PATCH 4/9] correctly delete files --- fsspec/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fsspec/generic.py b/fsspec/generic.py index 290bb436a..54d376653 100644 --- a/fsspec/generic.py +++ b/fsspec/generic.py @@ -136,7 +136,8 @@ def rsync( fs.cp(source_files, target_files, **kwargs) logger.debug(f"{len(to_delete)} files to delete") if delete_missing: - fs.rm(to_delete) + for file in to_delete: + fs.rm(file) class GenericFileSystem(AsyncFileSystem): From 58939569ccf6affbc99ce5764060dff08cd6bf06 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio Date: Thu, 7 Dec 2023 10:28:50 -0600 Subject: [PATCH 5/9] skip test in Windows --- fsspec/implementations/tests/test_local.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index a1e1343fb..2980eb7c7 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -33,7 +33,6 @@ ), } - csv_files = { ".test.fakedata.1.csv": (b"a,b\n" b"1,2\n"), ".test.fakedata.2.csv": (b"a,b\n" b"3,4\n"), @@ -995,11 +994,12 @@ def test_cp_two_files(tmpdir): ] +@pytest.mark.skipif(WIN, reason="Windows does not support colons in filenames") def test_issue_1447(): files_with_colons = { ".local:file:with:colons.txt": b"content1", - ".colons-after-extension.txt:after": b"content3", - ".colons-after-extension/file:colon.txt:before/after": b"content2", + ".colons-after-extension.txt:after": b"content2", + ".colons-after-extension/file:colon.txt:before/after": b"content3", } with filetexts(files_with_colons, mode="b"): for file, contents in files_with_colons.items(): From 0635e9103c93499e078d7db8727199015f2e4dd5 Mon Sep 17 00:00:00 2001 From: Luis Antonio Obis Aparicio Date: Thu, 7 Dec 2023 13:44:38 -0600 Subject: [PATCH 6/9] generic file system batch rm --- fsspec/generic.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fsspec/generic.py b/fsspec/generic.py index 54d376653..8dbb41c80 100644 --- a/fsspec/generic.py +++ b/fsspec/generic.py @@ -251,11 +251,15 @@ async def _pipe_file( return fs.pipe_file(path, value, **kwargs) async def _rm(self, url, **kwargs): - fs = _resolve_fs(url, self.method) - if fs.async_impl: - await fs._rm(url, **kwargs) - else: - fs.rm(url, **kwargs) + urls = url + if isinstance(urls, str): + urls = [urls] + for url in urls: + fs = _resolve_fs(url, self.method) + if fs.async_impl: + await fs._rm(url, **kwargs) + else: + fs.rm(url, **kwargs) async def _makedirs(self, path, exist_ok=False): logger.debug("Make dir %s", path) From 5b12e1cee807f7080acdf4779a854d667d6a0e42 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 7 Dec 2023 14:51:44 -0500 Subject: [PATCH 7/9] Update fsspec/generic.py --- fsspec/generic.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fsspec/generic.py b/fsspec/generic.py index 8dbb41c80..6700377cd 100644 --- a/fsspec/generic.py +++ b/fsspec/generic.py @@ -254,12 +254,11 @@ async def _rm(self, url, **kwargs): urls = url if isinstance(urls, str): urls = [urls] - for url in urls: - fs = _resolve_fs(url, self.method) - if fs.async_impl: - await fs._rm(url, **kwargs) - else: - fs.rm(url, **kwargs) + fs = _resolve_fs(url[0], self.method) + if fs.async_impl: + await fs._rm(url, **kwargs) + else: + fs.rm(url, **kwargs) async def _makedirs(self, path, exist_ok=False): logger.debug("Make dir %s", path) From cfc1e1f0cf146248fcb14027126b98afa999003e Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 7 Dec 2023 14:51:50 -0500 Subject: [PATCH 8/9] Update fsspec/generic.py --- fsspec/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fsspec/generic.py b/fsspec/generic.py index 6700377cd..f38fc300a 100644 --- a/fsspec/generic.py +++ b/fsspec/generic.py @@ -136,8 +136,7 @@ def rsync( fs.cp(source_files, target_files, **kwargs) logger.debug(f"{len(to_delete)} files to delete") if delete_missing: - for file in to_delete: - fs.rm(file) + fs.rm(to_delete) class GenericFileSystem(AsyncFileSystem): From 379db0f11be0f735d1d129fce15264cf8f574148 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 7 Dec 2023 15:15:14 -0500 Subject: [PATCH 9/9] Update fsspec/generic.py --- fsspec/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fsspec/generic.py b/fsspec/generic.py index f38fc300a..20534cf40 100644 --- a/fsspec/generic.py +++ b/fsspec/generic.py @@ -253,9 +253,9 @@ async def _rm(self, url, **kwargs): urls = url if isinstance(urls, str): urls = [urls] - fs = _resolve_fs(url[0], self.method) + fs = _resolve_fs(urls[0], self.method) if fs.async_impl: - await fs._rm(url, **kwargs) + await fs._rm(urls, **kwargs) else: fs.rm(url, **kwargs)