From f29b789c942a861aa39c87624a96ddf5f09601ee Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 1 Jun 2021 07:58:08 -0500 Subject: [PATCH 1/2] Raise FileNotFoundError from HTTP.ls Closes https://github.com/intake/filesystem_spec/issues/648 --- fsspec/implementations/http.py | 18 +++++++++++------- fsspec/implementations/tests/test_http.py | 7 +++++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index bbbe5e60f..31597c2dd 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -138,7 +138,7 @@ async def _ls_real(self, url, detail=True, **kwargs): logger.debug(url) session = await self.set_session() async with session.get(url, **self.kwargs) as r: - r.raise_for_status() + self._raise_not_found_for_status(r, url) text = await r.text() if self.simple_links: links = ex2.findall(text) + [u[2] for u in ex.findall(text)] @@ -190,6 +190,14 @@ async def _ls(self, url, detail=True, **kwargs): ls = sync_wrapper(_ls) + def _raise_not_found_for_status(self, response, url): + """ + Raises FileNotFoundError if the status code is a 404, otherwise uses raise_for_status. + """ + if response.status == 404: + raise FileNotFoundError(url) + response.raise_for_status() + async def _cat_file(self, url, start=None, end=None, **kwargs): kw = self.kwargs.copy() kw.update(kwargs) @@ -225,9 +233,7 @@ async def _cat_file(self, url, start=None, end=None, **kwargs): kw["headers"] = headers session = await self.set_session() async with session.get(url, **kw) as r: - if r.status == 404: - raise FileNotFoundError(url) - r.raise_for_status() + self._raise_not_found_for_status(r, url) out = await r.read() return out @@ -237,9 +243,7 @@ async def _get_file(self, rpath, lpath, chunk_size=5 * 2 ** 20, **kwargs): logger.debug(rpath) session = await self.set_session() async with session.get(rpath, **self.kwargs) as r: - if r.status == 404: - raise FileNotFoundError(rpath) - r.raise_for_status() + self._raise_not_found_for_status(r, url) with open(lpath, "wb") as fd: chunk = True while chunk: diff --git a/fsspec/implementations/tests/test_http.py b/fsspec/implementations/tests/test_http.py index fae174c63..6b526ed43 100644 --- a/fsspec/implementations/tests/test_http.py +++ b/fsspec/implementations/tests/test_http.py @@ -201,6 +201,13 @@ def test_list_cache_reuse(server): assert len(h.dircache) == 0 +def test_ls_raises_filenotfound(server): + h = fsspec.filesystem("http") + + with pytest.raises(FileNotFoundError): + h.ls(server + "/not-a-key") + + def test_list_cache_with_max_paths(server): h = fsspec.filesystem("http", use_listings_cache=True, max_paths=5) out = h.glob(server + "/index/*") From 2ad392efd7cc4a3424bf1dd1e1078f530f0511d7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 1 Jun 2021 08:00:41 -0500 Subject: [PATCH 2/2] fixup --- fsspec/implementations/http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index 31597c2dd..25a85fd4e 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -192,7 +192,7 @@ async def _ls(self, url, detail=True, **kwargs): def _raise_not_found_for_status(self, response, url): """ - Raises FileNotFoundError if the status code is a 404, otherwise uses raise_for_status. + Raises FileNotFoundError for 404s, otherwise uses raise_for_status. """ if response.status == 404: raise FileNotFoundError(url) @@ -243,7 +243,7 @@ async def _get_file(self, rpath, lpath, chunk_size=5 * 2 ** 20, **kwargs): logger.debug(rpath) session = await self.set_session() async with session.get(rpath, **self.kwargs) as r: - self._raise_not_found_for_status(r, url) + self._raise_not_found_for_status(r, rpath) with open(lpath, "wb") as fd: chunk = True while chunk: