From 3d1bbe1f35946c880056252c1e10f86e802d5070 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 13 Apr 2024 01:55:27 +0100 Subject: [PATCH 1/6] GH-115060: Speed up `pathlib.Path.glob()` by omitting initial `stat()` Since 6258844c, paths that might not exist can be fed into pathlib's globbing implementation, which will call `os.scandir()` / `os.lstat()` only when strictly necessary. This allows us to drop an initial `self.is_dir()` call, which saves a `stat()`. --- Doc/library/pathlib.rst | 4 +--- Lib/pathlib/__init__.py | 4 +--- Lib/pathlib/_abc.py | 4 +--- .../Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst | 3 +++ 4 files changed, 6 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index f4ed479401f65c..74aadeaa935c47 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1004,9 +1004,7 @@ call fails (for example because the path doesn't exist). .. seealso:: :ref:`pathlib-pattern-language` documentation. - This method calls :meth:`Path.is_dir` on the top-level directory and - propagates any :exc:`OSError` exception that is raised. Subsequent - :exc:`OSError` exceptions from scanning directories are suppressed. + This method suppresses any :exc:`OSError` exceptions raised while scanning. By default, or when the *case_sensitive* keyword-only argument is set to ``None``, this method matches paths using platform-specific casing rules: diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 66eb08a45b1bb3..a4721fbe813962 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -607,11 +607,9 @@ def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): if raw[-1] in (self.parser.sep, self.parser.altsep): # GH-65238: pathlib doesn't preserve trailing slash. Add it back. parts.append('') - if not self.is_dir(): - return iter([]) select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks) root = str(self) - paths = select(root, exists=True) + paths = select(root) # Normalize results if root == '.': diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index b51ad6f46d292a..05698d5de24afb 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -705,10 +705,8 @@ def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True): anchor, parts = pattern._stack if anchor: raise NotImplementedError("Non-relative patterns are unsupported") - if not self.is_dir(): - return iter([]) select = self._glob_selector(parts, case_sensitive, recurse_symlinks) - return select(self, exists=True) + return select(self) def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True): """Recursively yield all existing files (of any kind, including diff --git a/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst b/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst new file mode 100644 index 00000000000000..6500b9caa251db --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst @@ -0,0 +1,3 @@ +Speed up :meth:`pathlib.Path.glob` my omitting an initial +:meth:`~pathlib.Path.is_dir` call. As a result of this change, +:meth:`~pathlib.Path.glob` can no longer raise :exc:`OSError`. From ed3f478f789d9f43ce73ee7cd9b5b7309ab7cf51 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 13 Apr 2024 02:52:52 +0100 Subject: [PATCH 2/6] Skip test_glob_dotdot on WASI --- Lib/test/test_pathlib/test_pathlib_abc.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 6656b032cde28e..aadecbc142cca6 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -8,6 +8,7 @@ from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, PathBase import posixpath +from test.support import is_wasi from test.support.os_helper import TESTFN @@ -1920,6 +1921,8 @@ def test_rglob_symlink_loop(self): } self.assertEqual(given, {p / x for x in expect}) + # See https://github.com/WebAssembly/wasi-filesystem/issues/26 + @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX") def test_glob_dotdot(self): # ".." is not special in globs. P = self.cls From 2df517558b158d1925634ed50161691a05be0f18 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 13 Apr 2024 20:06:48 +0100 Subject: [PATCH 3/6] Update Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com> --- .../next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst b/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst index 6500b9caa251db..50b374acb90ad0 100644 --- a/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst +++ b/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst @@ -1,3 +1,3 @@ -Speed up :meth:`pathlib.Path.glob` my omitting an initial +Speed up :meth:`pathlib.Path.glob` by omitting an initial :meth:`~pathlib.Path.is_dir` call. As a result of this change, :meth:`~pathlib.Path.glob` can no longer raise :exc:`OSError`. From 2bf505e67a8efb2bd0652964848523fb191ccf81 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 13 Apr 2024 20:17:04 +0100 Subject: [PATCH 4/6] Test that PermissionError is no longer raised --- Lib/test/test_pathlib/test_pathlib.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 651d66656cbd61..5fd1a41cbee17b 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1263,6 +1263,13 @@ def test_glob_dot(self): self.assertEqual( set(P('.').glob('**/*/*')), {P("dirD/fileD")}) + def test_glob_inaccessible(self): + P = self.cls + p = P(self.base, "mydir1", "mydir2") + p.mkdir(parents=True) + p.parent.chmod(0) + self.assertEqual(set(p.glob('*')), set()) + def test_rglob_pathlike(self): P = self.cls p = P(self.base, "dirC") From 85bd81ea733e0334f67e1e89e83ae32914f0d687 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 13 Apr 2024 20:23:17 +0100 Subject: [PATCH 5/6] Clarify docs --- Doc/library/pathlib.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 74aadeaa935c47..7f984e25824e3e 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1004,7 +1004,8 @@ call fails (for example because the path doesn't exist). .. seealso:: :ref:`pathlib-pattern-language` documentation. - This method suppresses any :exc:`OSError` exceptions raised while scanning. + This method suppresses any :exc:`OSError` exceptions that are raised while + scanning the filesystem. By default, or when the *case_sensitive* keyword-only argument is set to ``None``, this method matches paths using platform-specific casing rules: From ef775e34fed798a3eb8b6c90c1ebaf7002de089d Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 13 Apr 2024 21:07:54 +0100 Subject: [PATCH 6/6] Switch to `.. versionchanged::` --- Doc/library/pathlib.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 7f984e25824e3e..2e18e41869376e 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1004,9 +1004,6 @@ call fails (for example because the path doesn't exist). .. seealso:: :ref:`pathlib-pattern-language` documentation. - This method suppresses any :exc:`OSError` exceptions that are raised while - scanning the filesystem. - By default, or when the *case_sensitive* keyword-only argument is set to ``None``, this method matches paths using platform-specific casing rules: typically, case-sensitive on POSIX, and case-insensitive on Windows. @@ -1027,6 +1024,11 @@ call fails (for example because the path doesn't exist). .. versionchanged:: 3.13 The *pattern* parameter accepts a :term:`path-like object`. + .. versionchanged:: 3.13 + Any :exc:`OSError` exceptions raised from scanning the filesystem are + suppressed. In previous versions, such exceptions are suppressed in many + cases, but not all. + .. method:: Path.rglob(pattern, *, case_sensitive=None, recurse_symlinks=False)