From ac622b7fbbd245a29d501ba91c59b4ff44aeac03 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sun, 8 May 2022 18:23:19 -0400 Subject: [PATCH 01/32] Add Path.walk and Path.walk_bottom_up methods --- Lib/pathlib.py | 126 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 1f098fe6bd5f37..6eb12108734afb 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1385,6 +1385,132 @@ def expanduser(self): return self + def walk(self, on_error=None, follow_links=False): + """Generate a top-down directory tree from this directory + + For each directory in the directory tree rooted at self (including + self but excluding '.' and '..'), yields a 3-tuple + + root_directory, child_directory_names, child_file_names + + The caller can modify the child_directory_names list in-place + (e.g., via del or slice assignment), and walk will only recurse into + the subdirectories whose names remain in child_directory_names; this + can be used to prune the search, or to impose a specific order of + visiting. + + By default errors from Path._scandir() call are ignored. If + optional arg 'on_error' is specified, it should be a function; it + will be called with one argument, an OSError instance. It can + report the error to continue with the walk, or raise the exception + to abort the walk. Note that the filename is available as the + filename attribute of the exception object. + + By default, Path.walk does not follow symbolic links to subdirectories + on systems that support them. In order to get this functionality, set + the optional argument 'follow_links' to true. + + Caution: if self is a relative Path, don't change the + current working directory between resumptions of walk. walk never + changes the current directory, and assumes that the client doesn't + either. + + Example: + + from pathlib import Path + for root, dirs, files in Path('python/Lib/email'): + print(root, "consumes", end="") + print(sum((root / file).stat().st_size for file in files), end="") + print("bytes in", len(files), "non-directory files") + if 'CVS' in dirs: + dirs.remove('CVS') # don't visit CVS directories + """ + sys.audit("pathlib.Path.walk", self, on_error, follow_links) + return self._walk(True, on_error, follow_links) + + def walk_bottom_up(self, on_error=None, follow_links=False): + """Generate a bottom up directory tree from this directory + + The return type and arguments are identical to Path.walk. + However, the caller cannot modify the child_directory_names to prune + the search or to impose a specific order of visiting because the list + of directories to visit is calculated before we yield it. + """ + sys.audit("pathlib.Path.walk_bottom_up", self, on_error, follow_links) + return self._walk(False, on_error, follow_links) + + def _walk(self, topdown, on_error, follow_links): + dirs = [] + nondirs = [] + walk_dirs = [] + + # We may not have read permission for self, in which case we can't + # get a list of the files the directory contains. os.walk + # always suppressed the exception then, rather than blow up for a + # minor reason when (say) a thousand readable directories are still + # left to visit. That logic is copied here. + try: + scandir_it = self._scandir() + except OSError as error: + if on_error is not None: + on_error(error) + return + + with scandir_it: + while True: + try: + entry = next(scandir_it, ...) + if entry is ...: + break + except OSError as error: + if on_error is not None: + on_error(error) + return + + try: + is_dir = entry.is_dir() + except OSError: + # If is_dir() raises an OSError, consider that the entry + # is not a directory, same behavior as os.path.isdir() + is_dir = False + + if is_dir: + dirs.append(entry.name) + else: + nondirs.append(entry.name) + + if not topdown and is_dir: + # Bottom-up: recurse into sub-directory, but exclude symlinks to + # directories if follow_links is False + if follow_links: + walk_into = True + else: + try: + is_symlink = entry.is_symlink() + except OSError: + # If is_symlink() raises an OSError, consider that + # the entry is not a symbolic link, same behavior + # as os.path.islink() + is_symlink = False + walk_into = not is_symlink + + if walk_into: + walk_dirs.append(entry) + if topdown: + for raw_new_path in dirs: + # Path.is_symlink() is used instead of caching entry.is_symlink() + # result during the loop on Path._scandir() because the caller + # can replace the directory entry during the "yield" above. + new_path = self._make_child_relpath(raw_new_path) + if follow_links or not new_path.is_symlink(): + yield from new_path._walk(topdown, on_error, follow_links) + else: + for raw_new_path in walk_dirs: + new_path = self._make_child_relpath(raw_new_path.name) + yield from new_path._walk(topdown, on_error, follow_links) + # Yield after recursion if going bottom up + yield self, dirs, nondirs + class PosixPath(Path, PurePosixPath): """Path subclass for non-Windows systems. From 14f031af907602d56e396cd4f5ffccb3df39245f Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Mon, 9 May 2022 06:15:57 -0400 Subject: [PATCH 02/32] Fix errors in Path.walk docstrings and add caching of entries --- Lib/pathlib.py | 79 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 31 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 6eb12108734afb..48cd4f0cb6a8cb 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1385,22 +1385,22 @@ def expanduser(self): return self - def walk(self, on_error=None, follow_links=False): + def walk(self, on_error=None, follow_symlinks=False): """Generate a top-down directory tree from this directory For each directory in the directory tree rooted at self (including self but excluding '.' and '..'), yields a 3-tuple - root_directory, child_directory_names, child_file_names + dirpath, dirnames, filenames - The caller can modify the child_directory_names list in-place - (e.g., via del or slice assignment), and walk will only recurse into - the subdirectories whose names remain in child_directory_names; this + The caller can modify the dirnames list in-place + (e.g., via del or slice assignment), and walk will only recurse + into the subdirectories whose names remain in dirnames; this can be used to prune the search, or to impose a specific order of visiting. By default errors from Path._scandir() call are ignored. If - optional arg 'on_error' is specified, it should be a function; it + optional arg 'on_error' is specified, it should be a callable; it will be called with one argument, an OSError instance. It can report the error to continue with the walk, or raise the exception to abort the walk. Note that the filename is available as the @@ -1408,41 +1408,56 @@ def walk(self, on_error=None, follow_links=False): By default, Path.walk does not follow symbolic links to subdirectories on systems that support them. In order to get this functionality, set - the optional argument 'follow_links' to true. + the optional argument 'follow_symlinks' to true. Caution: if self is a relative Path, don't change the current working directory between resumptions of walk. walk never changes the current directory, and assumes that the client doesn't either. + + Caution: walk assumes the directories have not been modified between + its resumptions. I.e. If a directory from dirnames has been replaced + with a symlink and follow_symlinks=False, walk will still try to + descend into it. To prevent such behavior, remove directories from + dirnames if they have been modified and you do not want walk to + descend into them anymore. Example: from pathlib import Path - for root, dirs, files in Path('python/Lib/email'): - print(root, "consumes", end="") - print(sum((root / file).stat().st_size for file in files), end="") - print("bytes in", len(files), "non-directory files") - if 'CVS' in dirs: - dirs.remove('CVS') # don't visit CVS directories - """ - sys.audit("pathlib.Path.walk", self, on_error, follow_links) - return self._walk(True, on_error, follow_links) - - def walk_bottom_up(self, on_error=None, follow_links=False): + for root, dirs, files in Path('Lib/concurrent').walk(on_error=print): + print( + root, + "consumes", + sum((root / file).stat().st_size for file in files), + "bytes in", + len(files), + "non-directory files" + ) + # don't visit __pycache__ directories + if '__pycache__' in dirs: + dirs.remove('__pycache__') + """ + sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) + return self._walk(True, on_error, follow_symlinks) + + def walk_bottom_up(self, on_error=None, follow_symlinks=False): """Generate a bottom up directory tree from this directory The return type and arguments are identical to Path.walk. - However, the caller cannot modify the child_directory_names to prune - the search or to impose a specific order of visiting because the list - of directories to visit is calculated before we yield it. + + Unline walk, the caller cannot modify the dirnames to prune the + search or to impose a specific order of visiting because the + list of directories to visit is calculated before yielding it. """ - sys.audit("pathlib.Path.walk_bottom_up", self, on_error, follow_links) - return self._walk(False, on_error, follow_links) + sys.audit("pathlib.Path.walk_bottom_up", self, on_error, follow_symlinks) + return self._walk(False, on_error, follow_symlinks) def _walk(self, topdown, on_error, follow_links): dirs = [] nondirs = [] walk_dirs = [] + walk_dirs_map = {} # We may not have read permission for self, in which case we can't # get a list of the files the directory contains. os.walk @@ -1476,6 +1491,8 @@ def _walk(self, topdown, on_error, follow_links): if is_dir: dirs.append(entry.name) + walk_dirs_map[entry.name] = entry + else: nondirs.append(entry.name) @@ -1497,16 +1514,16 @@ def _walk(self, topdown, on_error, follow_links): if walk_into: walk_dirs.append(entry) if topdown: - for raw_new_path in dirs: - # Path.is_symlink() is used instead of caching entry.is_symlink() - # result during the loop on Path._scandir() because the caller - # can replace the directory entry during the "yield" above. - new_path = self._make_child_relpath(raw_new_path) - if follow_links or not new_path.is_symlink(): + yield self, dirs, nondirs + + for dir_name in dirs: + new_path = self._make_child_relpath(dir_name) + + if follow_links or not walk_dirs_map[dir_name].is_symlink(): yield from new_path._walk(topdown, on_error, follow_links) else: - for raw_new_path in walk_dirs: - new_path = self._make_child_relpath(raw_new_path.name) + for dir_entry in walk_dirs: + new_path = self._make_child_relpath(dir_entry.name) yield from new_path._walk(topdown, on_error, follow_links) # Yield after recursion if going bottom up yield self, dirs, nondirs From 3ad60a9f41780aa79fe182a71f48d7c97acdc262 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Mon, 9 May 2022 17:51:37 -0400 Subject: [PATCH 03/32] Refactor symlink handling --- Lib/pathlib.py | 56 +++++++++++--------------------------------------- 1 file changed, 12 insertions(+), 44 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 48cd4f0cb6a8cb..14d62ef7ba926f 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1,3 +1,4 @@ +from collections import deque import fnmatch import functools import io @@ -1453,11 +1454,10 @@ def walk_bottom_up(self, on_error=None, follow_symlinks=False): sys.audit("pathlib.Path.walk_bottom_up", self, on_error, follow_symlinks) return self._walk(False, on_error, follow_symlinks) - def _walk(self, topdown, on_error, follow_links): + def _walk(self, top_down, on_error, follow_symlinks): dirs = [] nondirs = [] - walk_dirs = [] - walk_dirs_map = {} + dir_entry_cache = {} # We may not have read permission for self, in which case we can't # get a list of the files the directory contains. os.walk @@ -1472,18 +1472,9 @@ def _walk(self, topdown, on_error, follow_links): return with scandir_it: - while True: + for entry in scandir_it: try: - entry = next(scandir_it, ...) - if entry is ...: - break - except OSError as error: - if on_error is not None: - on_error(error) - return - - try: - is_dir = entry.is_dir() + is_dir = entry.is_dir(follow_symlinks=follow_symlinks) except OSError: # If is_dir() raises an OSError, consider that the entry # is not a directory, same behavior as os.path.isdir() @@ -1491,41 +1482,18 @@ def _walk(self, topdown, on_error, follow_links): if is_dir: dirs.append(entry.name) - walk_dirs_map[entry.name] = entry - + dir_entry_cache[entry.name] = entry else: nondirs.append(entry.name) - if not topdown and is_dir: - # Bottom-up: recurse into sub-directory, but exclude symlinks to - # directories if follow_links is False - if follow_links: - walk_into = True - else: - try: - is_symlink = entry.is_symlink() - except OSError: - # If is_symlink() raises an OSError, consider that - # the entry is not a symbolic link, same behavior - # as os.path.islink() - is_symlink = False - walk_into = not is_symlink - - if walk_into: - walk_dirs.append(entry) - if topdown: + if top_down: yield self, dirs, nondirs - for dir_name in dirs: - new_path = self._make_child_relpath(dir_name) - - if follow_links or not walk_dirs_map[dir_name].is_symlink(): - yield from new_path._walk(topdown, on_error, follow_links) - else: - for dir_entry in walk_dirs: - new_path = self._make_child_relpath(dir_entry.name) - yield from new_path._walk(topdown, on_error, follow_links) - # Yield after recursion if going bottom up + for dir_name in dirs: + new_path = self._make_child_relpath(dir_name) + yield from new_path._walk(top_down, on_error, follow_symlinks) + + if not top_down: yield self, dirs, nondirs From 2f9882333bf9710ed39241e75760985c4bf3763e Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Tue, 10 May 2022 01:55:16 -0400 Subject: [PATCH 04/32] Add Path.walk docs and unite Path.walk interfaces --- Doc/library/pathlib.rst | 117 ++++++++++++++++++++++++++++++++++++++++ Lib/pathlib.py | 59 ++++++++++---------- 2 files changed, 149 insertions(+), 27 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index ab26e2f1719fe9..a6d297c2e1001c 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -920,6 +920,123 @@ call fails (for example because the path doesn't exist). to the directory after creating the iterator, whether a path object for that file be included is unspecified. +.. method:: Path.walk(top_down=True, on_error=None, follow_symlinks=False) + + Generate the file names in a directory tree by walking the tree + either top-down or bottom-up. + + For each directory in the directory tree rooted at *self* (including + *self* but excluding '.' and '..'), yields a 3-tuple + ``(dirpath, dirnames, filenames)`` + + *dirpath* is a Path to the directory, *dirnames* is a list of the names + of the subdirectories in *dirpath* (excluding ``'.'`` and ``'..'``), and + *filenames* is a list of the names of the non-directory files in *dirpath*. + Note that the names in the lists contain no path components. To get a full + path (which begins with *self*) to a file or directory in *dirpath*, do + ``dirpath / name``. Whether or not the lists are sorted depends on the file + system. If a file or a directory is removed from or added to the *dirpath* + during the generation of *dirnames* and *filenames*, it is uncertain whether + the new entry will appear in the generated lists. + + If optional argument *top_down* is ``True`` or not specified, the triple for a + directory is generated before the triples for any of its subdirectories + (directories are generated top-down). If *top_down* is ``False``, the triple + for a directory is generated after the triples for all of its subdirectories + (directories are generated bottom-up). No matter the value of *top_down*, the + list of subdirectories is retrieved before the tuples for the directory and + its subdirectories are generated. + + When *top_down* is True, the caller can modify the *dirnames* list in-place + (For example, using :keyword:`del` or slice assignment), and :meth:`Path.walk` + will only recurse into the subdirectories whose names remain in *dirnames*; + this can be used to prune the search, or to impose a specific order of visiting, + or even to inform :meth:`Path.walk` about directories the caller creates or + renames before it resumes :meth:`Path.walk` again. Modifying *dirnames* when + *top_down* is False has no effect on the behavior of :meth:`Path.walk()`, since the + directories in *dirnames* have already been generated by the time *dirnames* + is yielded to the caller. + + By default errors from :meth:`Path._scandir` call are ignored. If + optional argument *on_error* is specified, it should be a callable; it + will be called with one argument, an :exc:`OSError` instance. It can + report the error to continue with the walk, or raise the exception + to abort the walk. Note that the filename is available as the + ``filename`` attribute of the exception object. + + By default, :meth:`Path.walk` will not walk down into symbolic links that + resolve to directories. Set *follow_symlinks* to ``True`` to visit directories + pointed to by symlinks, on systems that support them. + + .. note:: + + Be aware that setting *follow_symlinks* to ``True`` can lead to infinite + recursion if a link points to a parent directory of itself. :meth:`Path.walk` + does not keep track of the directories it visited already. + + .. note:: + + If self is a relative Path, don't change the current working directory between + resumptions of :meth:`Path.walk`. :meth:`Path.walk` never changes the current + directory, and assumes that the caller doesn't either. + + .. note:: + + :meth:`Path.walk` assumes the directories have not been modified between + its resumptions. I.e. If a directory from *dirnames* has been replaced + with a symlink and *follow_symlinks* = ``False``, :meth:`Path.walk` will + still try to descend into it. To prevent such behavior, remove directories + from *dirnames* if they have been modified and you do not want to + descend into them anymore. + + .. note:: + + Unlike :func:`os.walk`, :meth:`Path.walk` adds symlinks to directories into *filenames* + + This example displays the number of bytes taken by non-directory files in each + directory under the starting directory, except that it doesn't look under any + __pycache__ subdirectory:: + + from pathlib import Path + for root, dirs, files in Path("cpython/Lib/concurrent").walk(on_error=print): + print( + root, + "consumes", + sum((root / file).stat().st_size for file in files), + "bytes in", + len(files), + "non-directory files" + ) + if '__pycache__' in dirs: + dirs.remove('__pycache__') + + In the next example (simple implementation of :func:`shutil.rmtree`), + walking the tree bottom-up is essential, :func:`rmdir` doesn't allow + deleting a directory before the directory is empty:: + + # Delete everything reachable from the directory "top", + # assuming there are no symbolic links. + # CAUTION: This is dangerous! For example, if top == Path('/'), + # it could delete all your disk files. + for root, dirs, files in top.walk(topdown=False): + for name in files: + (root / name).unlink() + for name in dirs: + (root / name).rmdir() + + .. versionadded:: 3.12 + +.. method:: Path.walk_bottom_up(on_error=None, follow_symlinks=False) + + Generate a bottom up directory tree from this directory + + The return type and arguments are identical to :meth:`Path.walk`. + + Unline :meth:`Path.walk`, the caller cannot modify the dirnames to prune the + search or to impose a specific order of visiting because the + list of directories to visit is calculated before yielding it. + + .. method:: Path.lchmod(mode) Like :meth:`Path.chmod` but, if the path points to a symbolic link, the diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 14d62ef7ba926f..3ec09dbb8b95f2 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1386,19 +1386,37 @@ def expanduser(self): return self - def walk(self, on_error=None, follow_symlinks=False): + def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Generate a top-down directory tree from this directory For each directory in the directory tree rooted at self (including self but excluding '.' and '..'), yields a 3-tuple dirpath, dirnames, filenames + + dirpath is the Path to the directory. dirnames is a list of + the names of the subdirectories in dirpath (excluding '.' and '..'). + filenames is a list of the names of the non-directory files in dirpath. + Note that the names in the lists are just names, with no path components. + To get a full path (which begins with top) to a file or directory in + dirpath, do dirpath / name. + + If optional arg 'top_down' is true or not specified, the triple for a + directory is generated before the triples for any of its subdirectories + (directories are generated top down). If top_down is false, the triple + for a directory is generated after the triples for all of its + subdirectories (directories are generated bottom up). - The caller can modify the dirnames list in-place + When top_down is True, the caller can modify the dirnames list in-place (e.g., via del or slice assignment), and walk will only recurse into the subdirectories whose names remain in dirnames; this can be used to prune the search, or to impose a specific order of - visiting. + visiting. Modifying dirnames when + top_down is False has no effect on the behavior of Path.walk(), since the + directories in dirnames have already been generated by the time dirnames + itself is generated. No matter the value of top_down, the list of + subdirectories is retrieved before the tuples for the directory and its + subdirectories are generated. By default errors from Path._scandir() call are ignored. If optional arg 'on_error' is specified, it should be a callable; it @@ -1409,24 +1427,25 @@ def walk(self, on_error=None, follow_symlinks=False): By default, Path.walk does not follow symbolic links to subdirectories on systems that support them. In order to get this functionality, set - the optional argument 'follow_symlinks' to true. + the optional argument 'follow_symlinks' to true. Unlike os.walk, + Path.walk only adds symbolic links to dirnames if follow_symlinks=True. Caution: if self is a relative Path, don't change the current working directory between resumptions of walk. walk never - changes the current directory, and assumes that the client doesn't + changes the current directory, and assumes that the caller doesn't either. - Caution: walk assumes the directories have not been modified between - its resumptions. I.e. If a directory from dirnames has been replaced - with a symlink and follow_symlinks=False, walk will still try to - descend into it. To prevent such behavior, remove directories from - dirnames if they have been modified and you do not want walk to - descend into them anymore. + Caution: Unlike os.walk, Path.walk assumes the directories have not + been modified between its resumptions. I.e. If a directory from + dirnames has been replaced with a symlink and follow_symlinks=False, + walk will still try to descend into it. To prevent such behavior, + remove directories from dirnames if they have been modified and you + do not want Path.walk to descend into them anymore. Example: from pathlib import Path - for root, dirs, files in Path('Lib/concurrent').walk(on_error=print): + for root, dirs, files in Path().walk(on_error=print): print( root, "consumes", @@ -1440,24 +1459,11 @@ def walk(self, on_error=None, follow_symlinks=False): dirs.remove('__pycache__') """ sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) - return self._walk(True, on_error, follow_symlinks) - - def walk_bottom_up(self, on_error=None, follow_symlinks=False): - """Generate a bottom up directory tree from this directory - - The return type and arguments are identical to Path.walk. - - Unline walk, the caller cannot modify the dirnames to prune the - search or to impose a specific order of visiting because the - list of directories to visit is calculated before yielding it. - """ - sys.audit("pathlib.Path.walk_bottom_up", self, on_error, follow_symlinks) - return self._walk(False, on_error, follow_symlinks) + return self._walk(top_down, on_error, follow_symlinks) def _walk(self, top_down, on_error, follow_symlinks): dirs = [] nondirs = [] - dir_entry_cache = {} # We may not have read permission for self, in which case we can't # get a list of the files the directory contains. os.walk @@ -1482,7 +1488,6 @@ def _walk(self, top_down, on_error, follow_symlinks): if is_dir: dirs.append(entry.name) - dir_entry_cache[entry.name] = entry else: nondirs.append(entry.name) From 513030a27cad499cf161bb90761f89d6581d4d61 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Tue, 10 May 2022 01:56:36 -0400 Subject: [PATCH 05/32] Remove Path.walk_bottom_up definition --- Doc/library/pathlib.rst | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index a6d297c2e1001c..f34fb357033870 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1025,17 +1025,6 @@ call fails (for example because the path doesn't exist). (root / name).rmdir() .. versionadded:: 3.12 - -.. method:: Path.walk_bottom_up(on_error=None, follow_symlinks=False) - - Generate a bottom up directory tree from this directory - - The return type and arguments are identical to :meth:`Path.walk`. - - Unline :meth:`Path.walk`, the caller cannot modify the dirnames to prune the - search or to impose a specific order of visiting because the - list of directories to visit is calculated before yielding it. - .. method:: Path.lchmod(mode) From 5fdd72e3a8911d6724c26f7731d1d3d7cffdbd18 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 10 May 2022 16:30:41 +0000 Subject: [PATCH 06/32] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2022-05-10-16-30-40.gh-issue-90385.1_wBRQ.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-05-10-16-30-40.gh-issue-90385.1_wBRQ.rst diff --git a/Misc/NEWS.d/next/Library/2022-05-10-16-30-40.gh-issue-90385.1_wBRQ.rst b/Misc/NEWS.d/next/Library/2022-05-10-16-30-40.gh-issue-90385.1_wBRQ.rst new file mode 100644 index 00000000000000..1f06fbbb79f5a3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-10-16-30-40.gh-issue-90385.1_wBRQ.rst @@ -0,0 +1 @@ +Added method walk to :class:`pathlib.Path` objects as a pathlib alternative for :func:`os.walk` From 452f24eb95caa94271e87c7d70ee6e64cbb26f63 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Tue, 10 May 2022 13:07:27 -0400 Subject: [PATCH 07/32] Add Path.walk tests --- Doc/library/pathlib.rst | 1 + Lib/test/support/os_helper.py | 2 +- Lib/test/test_pathlib.py | 209 ++++++++++++++++++++++++++++++++++ 3 files changed, 211 insertions(+), 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index f34fb357033870..a7362c228ea11c 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -992,6 +992,7 @@ call fails (for example because the path doesn't exist). .. note:: Unlike :func:`os.walk`, :meth:`Path.walk` adds symlinks to directories into *filenames* + if *follow_symlinks* is ``True`` This example displays the number of bytes taken by non-directory files in each directory under the starting directory, except that it doesn't look under any diff --git a/Lib/test/support/os_helper.py b/Lib/test/support/os_helper.py index eee37ef0d5a713..86703edb206207 100644 --- a/Lib/test/support/os_helper.py +++ b/Lib/test/support/os_helper.py @@ -482,7 +482,7 @@ def fs_is_case_insensitive(directory): class FakePath: - """Simple implementing of the path protocol. + """Simple implementation of the path protocol. """ def __init__(self, path): self.path = path diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 6737068c0ff6d6..1668586e272722 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2483,6 +2483,215 @@ def test_complex_symlinks_relative(self): def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(os.path.join('dirA', '..')) +class WalkTests(unittest.TestCase): + """Tests for Path.walk() + + They are mostly just copies of os.walk tests converted to use Paths + because os.walk tests are already mature and cover many edge cases + that we could miss writing new tests. + + It is not combined with Path* tests because its setup is aimed at + testing a bit more complex cases than Path's setup. + """ + + # Wrapper to hide minor differences between different implementations + # of Path.walk (for example, Path.fwalk) + def walk(self, top, **kwargs): + return top.walk(**kwargs) + + def setUp(self): + P = pathlib.Path + join = os.path.join + self.addCleanup(os_helper.rmtree, os_helper.TESTFN) + + # Build: + # TESTFN/ + # TEST1/ a file kid and two directory kids + # tmp1 + # SUB1/ a file kid and a directory kid + # tmp2 + # SUB11/ no kids + # SUB2/ a file kid and a dirsymlink kid + # tmp3 + # SUB21/ not readable + # tmp5 + # link/ a symlink to TESTFN.2 + # broken_link + # broken_link2 + # broken_link3 + # TEST2/ + # tmp4 a lone file + self.walk_path = P(os_helper.TESTFN, "TEST1") + self.sub1_path = self.walk_path / "SUB1" + self.sub11_path = self.sub1_path / "SUB11" + sub2_path = self.walk_path / "SUB2" + sub21_path= sub2_path / "SUB21" + tmp1_path = self.walk_path / "tmp1" + tmp2_path = self.sub1_path / "tmp2" + tmp3_path = sub2_path / "tmp3" + tmp5_path = sub21_path / "tmp3" + self.link_path = sub2_path / "link" + t2_path = P(os_helper.TESTFN, "TEST2") + tmp4_path = P(os_helper.TESTFN, "TEST2", "tmp4") + broken_link_path = sub2_path / "broken_link" + broken_link2_path = sub2_path / "broken_link2" + broken_link3_path = sub2_path / "broken_link3" + + # Create stuff. + os.makedirs(self.sub11_path) + os.makedirs(sub2_path) + os.makedirs(sub21_path) + os.makedirs(t2_path) + + for path in tmp1_path, tmp2_path, tmp3_path, tmp4_path, tmp5_path: + with open(path, "x", encoding='utf-8') as f: + f.write(f"I'm {path} and proud of it. Blame test_os.\n") + + if os_helper.can_symlink(): + os.symlink(os.path.abspath(t2_path), self.link_path) + os.symlink('broken', broken_link_path, True) + os.symlink(join('tmp3', 'broken'), broken_link2_path, True) + os.symlink(join('SUB21', 'tmp5'), broken_link3_path, True) + self.sub2_tree = (sub2_path, ["SUB21"], # FIXME + ["broken_link", "broken_link2", "broken_link3", + "link", + "tmp3"]) + else: + self.sub2_tree = (sub2_path, ["SUB21"], ["tmp3"]) + + if not is_emscripten: + # Emscripten fails with inaccessible directory + os.chmod(sub21_path, 0) + try: + os.listdir(sub21_path) + except PermissionError: + self.addCleanup(os.chmod, sub21_path, stat.S_IRWXU) + else: + os.chmod(sub21_path, stat.S_IRWXU) + os.unlink(tmp5_path) + os.rmdir(sub21_path) + del self.sub2_tree[1][:1] + + def test_walk_topdown(self): + if 'unittest.util' in __import__('sys').modules: + # Show full diff in self.assertEqual. + __import__('sys').modules['unittest.util']._MAX_LENGTH = 999999999 + + # Walk top-down. + P = pathlib.Path + all = list(self.walk(self.walk_path)) + + self.assertEqual(len(all), 4) + # We can't know which order SUB1 and SUB2 will appear in. + # Not flipped: TESTFN, SUB1, SUB11, SUB2 + # flipped: TESTFN, SUB2, SUB1, SUB11 + flipped = all[0][1][0] != "SUB1" + all[0][1].sort() + all[3 - 2 * flipped][-1].sort() + all[3 - 2 * flipped][1].sort() + self.assertEqual(all[0], (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) + self.assertEqual(all[1 + flipped], (self.sub1_path, ["SUB11"], ["tmp2"])) + self.assertEqual(all[2 + flipped], (self.sub11_path, [], [])) + self.assertEqual(all[3 - 2 * flipped], self.sub2_tree) + + def test_walk_prune(self, walk_path=None): + if walk_path is None: + walk_path = self.walk_path + # Prune the search. + all = [] + for root, dirs, files in self.walk(walk_path): + all.append((root, dirs, files)) + # Don't descend into SUB1. + if 'SUB1' in dirs: + # Note that this also mutates the dirs we appended to all! + dirs.remove('SUB1') + + self.assertEqual(len(all), 2) + self.assertEqual(all[0], (self.walk_path, ["SUB2"], ["tmp1"])) + + all[1][-1].sort() + all[1][1].sort() + self.assertEqual(all[1], self.sub2_tree) + + def test_file_like_path(self): + self.test_walk_prune(FakePath(self.walk_path).__fspath__()) + + def test_walk_bottom_up(self): + # Walk bottom-up. + all = list(self.walk(self.walk_path, top_down=False)) + + self.assertEqual(len(all), 4, all) + # We can't know which order SUB1 and SUB2 will appear in. + # Not flipped: SUB11, SUB1, SUB2, TESTFN + # flipped: SUB2, SUB11, SUB1, TESTFN + flipped = all[3][1][0] != "SUB1" + all[3][1].sort() + all[2 - 2 * flipped][-1].sort() + all[2 - 2 * flipped][1].sort() + self.assertEqual(all[3], + (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) + self.assertEqual(all[flipped], + (self.sub11_path, [], [])) + self.assertEqual(all[flipped + 1], + (self.sub1_path, ["SUB11"], ["tmp2"])) + self.assertEqual(all[2 - 2 * flipped], + self.sub2_tree) + + @os_helper.skip_unless_symlink + def test_walk_symlink(self): + # Walk, following symlinks. + walk_it = self.walk(self.walk_path, follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.link_path: + self.assertEqual(dirs, []) + self.assertEqual(files, ["tmp4"]) + break + else: + self.fail("Didn't follow symlink with follow_symlinks=True") + + def test_walk_bad_dir(self): + # Walk top-down. + errors = [] + walk_it = self.walk(self.walk_path, on_error=errors.append) + root, dirs, files = next(walk_it) + self.assertEqual(errors, []) + dir1 = 'SUB1' + path1 = root / dir1 + path1new = (root / dir1).with_suffix(".new") + path1.rename(path1new) + try: + roots = [r for r, _, _ in walk_it] + self.assertTrue(errors) + self.assertNotIn(path1, roots) + self.assertNotIn(path1new, roots) + for dir2 in dirs: + if dir2 != dir1: + self.assertIn(root / dir2, roots) + finally: + path1new.rename(path1) + + def test_walk_many_open_files(self): + P = pathlib.Path + depth = 30 + base = P(os_helper.TESTFN, 'deep') + p = P(base, *(['d']*depth)) + p.mkdir(parents=True) + + iters = [self.walk(base, top_down=False) for _ in range(100)] + for i in range(depth + 1): + expected = (p, ['d'] if i else [], []) + for it in iters: + self.assertEqual(next(it), expected) + p = p.parent + + iters = [self.walk(base, top_down=True) for _ in range(100)] + p = base + for i in range(depth + 1): + expected = (p, ['d'] if i < depth else [], []) + for it in iters: + self.assertEqual(next(it), expected) + p = p / 'd' + class PathTest(_BasePathTest, unittest.TestCase): cls = pathlib.Path From 3702a1262fd941cb13e67b137be01b52f2d32015 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Tue, 10 May 2022 13:11:20 -0400 Subject: [PATCH 08/32] Make Path.walk variable naming consistent --- Lib/pathlib.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 3ec09dbb8b95f2..e46c248d57389e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1,4 +1,3 @@ -from collections import deque import fnmatch import functools import io @@ -1462,8 +1461,8 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): return self._walk(top_down, on_error, follow_symlinks) def _walk(self, top_down, on_error, follow_symlinks): - dirs = [] - nondirs = [] + dirnames = [] + filenames = [] # We may not have read permission for self, in which case we can't # get a list of the files the directory contains. os.walk @@ -1482,24 +1481,23 @@ def _walk(self, top_down, on_error, follow_symlinks): try: is_dir = entry.is_dir(follow_symlinks=follow_symlinks) except OSError: - # If is_dir() raises an OSError, consider that the entry - # is not a directory, same behavior as os.path.isdir() + # same behavior as os.path.isdir() is_dir = False if is_dir: - dirs.append(entry.name) + dirnames.append(entry.name) else: - nondirs.append(entry.name) + filenames.append(entry.name) if top_down: - yield self, dirs, nondirs + yield self, dirnames, filenames - for dir_name in dirs: + for dir_name in dirnames: new_path = self._make_child_relpath(dir_name) yield from new_path._walk(top_down, on_error, follow_symlinks) if not top_down: - yield self, dirs, nondirs + yield self, dirnames, filenames class PosixPath(Path, PurePosixPath): From fabc925c2f3386d57e203b059cc9775738905e8a Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Tue, 10 May 2022 13:16:29 -0400 Subject: [PATCH 09/32] Remove redundant FIXME --- Lib/test/test_pathlib.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 1668586e272722..0770538d87fac6 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2552,10 +2552,9 @@ def setUp(self): os.symlink('broken', broken_link_path, True) os.symlink(join('tmp3', 'broken'), broken_link2_path, True) os.symlink(join('SUB21', 'tmp5'), broken_link3_path, True) - self.sub2_tree = (sub2_path, ["SUB21"], # FIXME + self.sub2_tree = (sub2_path, ["SUB21"], ["broken_link", "broken_link2", "broken_link3", - "link", - "tmp3"]) + "link", "tmp3"]) else: self.sub2_tree = (sub2_path, ["SUB21"], ["tmp3"]) From b387b5411ead8dc8f9b7da9e46c71e32c172fb06 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Tue, 10 May 2022 18:08:54 -0400 Subject: [PATCH 10/32] Minor Path.walk docs and tests fixes --- Doc/library/pathlib.rst | 1 + Lib/test/test_pathlib.py | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index a7362c228ea11c..5d24e113eec2b8 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1384,6 +1384,7 @@ Below is a table mapping various :mod:`os` functions to their corresponding :func:`os.path.expanduser` :meth:`Path.expanduser` and :meth:`Path.home` :func:`os.listdir` :meth:`Path.iterdir` +:func:`os.walk` :meth:`Path.walk` :func:`os.path.isdir` :meth:`Path.is_dir` :func:`os.path.isfile` :meth:`Path.is_file` :func:`os.path.islink` :meth:`Path.is_symlink` diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 0770538d87fac6..41b55d86863f83 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2572,10 +2572,6 @@ def setUp(self): del self.sub2_tree[1][:1] def test_walk_topdown(self): - if 'unittest.util' in __import__('sys').modules: - # Show full diff in self.assertEqual. - __import__('sys').modules['unittest.util']._MAX_LENGTH = 999999999 - # Walk top-down. P = pathlib.Path all = list(self.walk(self.walk_path)) From 76fadfc3ccd8f9175e685ad216c4677fe418528e Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Thu, 30 Jun 2022 07:32:14 -0400 Subject: [PATCH 11/32] Update Doc/library/pathlib.rst Co-authored-by: Oleg Iarygin --- Doc/library/pathlib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 3bb0d10bc5481f..4fa3d5d8c9dddc 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -953,7 +953,7 @@ call fails (for example because the path doesn't exist). For each directory in the directory tree rooted at *self* (including *self* but excluding '.' and '..'), yields a 3-tuple - ``(dirpath, dirnames, filenames)`` + ``(dirpath, dirnames, filenames)`` *dirpath* is a Path to the directory, *dirnames* is a list of the names of the subdirectories in *dirpath* (excluding ``'.'`` and ``'..'``), and From 0c198714fa51c33490fab4b831a46afabaf7ffc8 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Thu, 30 Jun 2022 07:32:27 -0400 Subject: [PATCH 12/32] Update Doc/library/pathlib.rst Co-authored-by: Oleg Iarygin --- Doc/library/pathlib.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 4fa3d5d8c9dddc..cf75122cefad12 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1001,7 +1001,6 @@ call fails (for example because the path doesn't exist). does not keep track of the directories it visited already. .. note:: - If self is a relative Path, don't change the current working directory between resumptions of :meth:`Path.walk`. :meth:`Path.walk` never changes the current directory, and assumes that the caller doesn't either. From 50b4a2b71d7616fae249a0508b7468e44c1f0b17 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Thu, 30 Jun 2022 07:32:39 -0400 Subject: [PATCH 13/32] Update Doc/library/pathlib.rst Co-authored-by: Oleg Iarygin --- Doc/library/pathlib.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index cf75122cefad12..9a4f1d41d4a2d1 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1049,7 +1049,6 @@ call fails (for example because the path doesn't exist). (root / name).unlink() for name in dirs: (root / name).rmdir() - .. versionadded:: 3.12 .. method:: Path.lchmod(mode) From cade3e934fec6d5bb7eb227f6a7226ad289df44f Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Thu, 30 Jun 2022 07:32:47 -0400 Subject: [PATCH 14/32] Update Doc/library/pathlib.rst Co-authored-by: Oleg Iarygin --- Doc/library/pathlib.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 9a4f1d41d4a2d1..b469b566ec4bc6 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1018,7 +1018,6 @@ call fails (for example because the path doesn't exist). Unlike :func:`os.walk`, :meth:`Path.walk` adds symlinks to directories into *filenames* if *follow_symlinks* is ``True`` - This example displays the number of bytes taken by non-directory files in each directory under the starting directory, except that it doesn't look under any __pycache__ subdirectory:: From b32627c4ca515c046650a6b4886c586aa6ac57d7 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Thu, 30 Jun 2022 07:32:55 -0400 Subject: [PATCH 15/32] Update Doc/library/pathlib.rst Co-authored-by: Oleg Iarygin --- Doc/library/pathlib.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index b469b566ec4bc6..2d00d3ee3308be 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1034,7 +1034,6 @@ call fails (for example because the path doesn't exist). ) if '__pycache__' in dirs: dirs.remove('__pycache__') - In the next example (simple implementation of :func:`shutil.rmtree`), walking the tree bottom-up is essential, :func:`rmdir` doesn't allow deleting a directory before the directory is empty:: From d1a083338653378a406704d1c5f8081748451999 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Thu, 30 Jun 2022 07:33:04 -0400 Subject: [PATCH 16/32] Update Doc/library/pathlib.rst Co-authored-by: Oleg Iarygin --- Doc/library/pathlib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 2d00d3ee3308be..ac45b72801d1bd 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1011,7 +1011,7 @@ call fails (for example because the path doesn't exist). its resumptions. I.e. If a directory from *dirnames* has been replaced with a symlink and *follow_symlinks* = ``False``, :meth:`Path.walk` will still try to descend into it. To prevent such behavior, remove directories - from *dirnames* if they have been modified and you do not want to + from *dirnames* if they have been modified and you do not want to descend into them anymore. .. note:: From e367f1f9ea99291b134e174e01a768d836b1412e Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Thu, 30 Jun 2022 07:33:14 -0400 Subject: [PATCH 17/32] Update Doc/library/pathlib.rst Co-authored-by: Oleg Iarygin --- Doc/library/pathlib.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index ac45b72801d1bd..ad9e7b0ebea856 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1004,7 +1004,6 @@ call fails (for example because the path doesn't exist). If self is a relative Path, don't change the current working directory between resumptions of :meth:`Path.walk`. :meth:`Path.walk` never changes the current directory, and assumes that the caller doesn't either. - .. note:: :meth:`Path.walk` assumes the directories have not been modified between From bf8b0eb5208ec8326a0a93120aa88c5d3143e62d Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Thu, 30 Jun 2022 09:40:00 -0400 Subject: [PATCH 18/32] Fix 'no blank lines' error --- Doc/library/pathlib.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index ad9e7b0ebea856..d3d0667c33cd4b 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1017,6 +1017,7 @@ call fails (for example because the path doesn't exist). Unlike :func:`os.walk`, :meth:`Path.walk` adds symlinks to directories into *filenames* if *follow_symlinks* is ``True`` + This example displays the number of bytes taken by non-directory files in each directory under the starting directory, except that it doesn't look under any __pycache__ subdirectory:: @@ -1033,6 +1034,7 @@ call fails (for example because the path doesn't exist). ) if '__pycache__' in dirs: dirs.remove('__pycache__') + In the next example (simple implementation of :func:`shutil.rmtree`), walking the tree bottom-up is essential, :func:`rmdir` doesn't allow deleting a directory before the directory is empty:: @@ -1046,6 +1048,7 @@ call fails (for example because the path doesn't exist). (root / name).unlink() for name in dirs: (root / name).rmdir() + .. versionadded:: 3.12 .. method:: Path.lchmod(mode) From d8667c7ed45ab138e49b54449c0eafa43cad20d8 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sun, 3 Jul 2022 13:09:42 -0400 Subject: [PATCH 19/32] Apply suggestions from code review Co-authored-by: Barney Gale --- Doc/library/pathlib.rst | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index d3d0667c33cd4b..bbb275fa85814b 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -955,7 +955,7 @@ call fails (for example because the path doesn't exist). *self* but excluding '.' and '..'), yields a 3-tuple ``(dirpath, dirnames, filenames)`` - *dirpath* is a Path to the directory, *dirnames* is a list of the names + *dirpath* is a :class:`Path` to the directory, *dirnames* is a list of the names of the subdirectories in *dirpath* (excluding ``'.'`` and ``'..'``), and *filenames* is a list of the names of the non-directory files in *dirpath*. Note that the names in the lists contain no path components. To get a full @@ -983,7 +983,7 @@ call fails (for example because the path doesn't exist). directories in *dirnames* have already been generated by the time *dirnames* is yielded to the caller. - By default errors from :meth:`Path._scandir` call are ignored. If + By default errors from :func:`os.scandir` call are ignored. If optional argument *on_error* is specified, it should be a callable; it will be called with one argument, an :exc:`OSError` instance. It can report the error to continue with the walk, or raise the exception @@ -1001,13 +1001,12 @@ call fails (for example because the path doesn't exist). does not keep track of the directories it visited already. .. note:: - If self is a relative Path, don't change the current working directory between - resumptions of :meth:`Path.walk`. :meth:`Path.walk` never changes the current - directory, and assumes that the caller doesn't either. + This method assumes that the current working directory is not changing while + walking relative paths. .. note:: :meth:`Path.walk` assumes the directories have not been modified between - its resumptions. I.e. If a directory from *dirnames* has been replaced + its resumptions. For example, if a directory from *dirnames* has been replaced with a symlink and *follow_symlinks* = ``False``, :meth:`Path.walk` will still try to descend into it. To prevent such behavior, remove directories from *dirnames* if they have been modified and you do not want to @@ -1020,7 +1019,7 @@ call fails (for example because the path doesn't exist). This example displays the number of bytes taken by non-directory files in each directory under the starting directory, except that it doesn't look under any - __pycache__ subdirectory:: + :file:`__pycache__` subdirectory:: from pathlib import Path for root, dirs, files in Path("cpython/Lib/concurrent").walk(on_error=print): From 4509797b2b1e774953216893c2c17cfea821585e Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sun, 3 Jul 2022 13:30:59 -0400 Subject: [PATCH 20/32] More code review fixes for Path.walk --- Doc/library/pathlib.rst | 4 +-- Lib/pathlib.py | 77 ++--------------------------------------- 2 files changed, 4 insertions(+), 77 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index bbb275fa85814b..4d4a52f93b539b 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -961,9 +961,7 @@ call fails (for example because the path doesn't exist). Note that the names in the lists contain no path components. To get a full path (which begins with *self*) to a file or directory in *dirpath*, do ``dirpath / name``. Whether or not the lists are sorted depends on the file - system. If a file or a directory is removed from or added to the *dirpath* - during the generation of *dirnames* and *filenames*, it is uncertain whether - the new entry will appear in the generated lists. + system. If optional argument *top_down* is ``True`` or not specified, the triple for a directory is generated before the triples for any of its subdirectories diff --git a/Lib/pathlib.py b/Lib/pathlib.py index fe820bc7189f8f..36b04b760f4f7e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1311,84 +1311,11 @@ def expanduser(self): return self def walk(self, top_down=True, on_error=None, follow_symlinks=False): - """Generate a top-down directory tree from this directory - - For each directory in the directory tree rooted at self (including - self but excluding '.' and '..'), yields a 3-tuple - - dirpath, dirnames, filenames - - dirpath is the Path to the directory. dirnames is a list of - the names of the subdirectories in dirpath (excluding '.' and '..'). - filenames is a list of the names of the non-directory files in dirpath. - Note that the names in the lists are just names, with no path components. - To get a full path (which begins with top) to a file or directory in - dirpath, do dirpath / name. - - If optional arg 'top_down' is true or not specified, the triple for a - directory is generated before the triples for any of its subdirectories - (directories are generated top down). If top_down is false, the triple - for a directory is generated after the triples for all of its - subdirectories (directories are generated bottom up). - - When top_down is True, the caller can modify the dirnames list in-place - (e.g., via del or slice assignment), and walk will only recurse - into the subdirectories whose names remain in dirnames; this - can be used to prune the search, or to impose a specific order of - visiting. Modifying dirnames when - top_down is False has no effect on the behavior of Path.walk(), since the - directories in dirnames have already been generated by the time dirnames - itself is generated. No matter the value of top_down, the list of - subdirectories is retrieved before the tuples for the directory and its - subdirectories are generated. - - By default errors from Path._scandir() call are ignored. If - optional arg 'on_error' is specified, it should be a callable; it - will be called with one argument, an OSError instance. It can - report the error to continue with the walk, or raise the exception - to abort the walk. Note that the filename is available as the - filename attribute of the exception object. - - By default, Path.walk does not follow symbolic links to subdirectories - on systems that support them. In order to get this functionality, set - the optional argument 'follow_symlinks' to true. Unlike os.walk, - Path.walk only adds symbolic links to dirnames if follow_symlinks=True. - - Caution: if self is a relative Path, don't change the - current working directory between resumptions of walk. walk never - changes the current directory, and assumes that the caller doesn't - either. - - Caution: Unlike os.walk, Path.walk assumes the directories have not - been modified between its resumptions. I.e. If a directory from - dirnames has been replaced with a symlink and follow_symlinks=False, - walk will still try to descend into it. To prevent such behavior, - remove directories from dirnames if they have been modified and you - do not want Path.walk to descend into them anymore. - - Example: - - from pathlib import Path - for root, dirs, files in Path().walk(on_error=print): - print( - root, - "consumes", - sum((root / file).stat().st_size for file in files), - "bytes in", - len(files), - "non-directory files" - ) - # don't visit __pycache__ directories - if '__pycache__' in dirs: - dirs.remove('__pycache__') - """ + """Generate a top-down directory tree from this directory, similar to os.walk()""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) return self._walk(top_down, on_error, follow_symlinks) def _walk(self, top_down, on_error, follow_symlinks): - dirnames = [] - filenames = [] - # We may not have read permission for self, in which case we can't # get a list of the files the directory contains. os.walk # always suppressed the exception then, rather than blow up for a @@ -1402,6 +1329,8 @@ def _walk(self, top_down, on_error, follow_symlinks): return with scandir_it: + dirnames = [] + filenames = [] for entry in scandir_it: try: is_dir = entry.is_dir(follow_symlinks=follow_symlinks) From 15d96b99db69e65f6210f813335fab808bdeda12 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sun, 10 Jul 2022 03:51:08 +0400 Subject: [PATCH 21/32] Apply suggestions from code review Co-authored-by: Brett Cannon --- Doc/library/pathlib.rst | 55 ++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 4d4a52f93b539b..82319e97e895c3 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -952,24 +952,24 @@ call fails (for example because the path doesn't exist). either top-down or bottom-up. For each directory in the directory tree rooted at *self* (including - *self* but excluding '.' and '..'), yields a 3-tuple + *self* but excluding '.' and '..'), the method yields a 3-tuple of ``(dirpath, dirnames, filenames)`` - *dirpath* is a :class:`Path` to the directory, *dirnames* is a list of the names - of the subdirectories in *dirpath* (excluding ``'.'`` and ``'..'``), and - *filenames* is a list of the names of the non-directory files in *dirpath*. - Note that the names in the lists contain no path components. To get a full - path (which begins with *self*) to a file or directory in *dirpath*, do - ``dirpath / name``. Whether or not the lists are sorted depends on the file - system. + *dirpath* is a :class:`Path` to the directory currently being walked, + *dirnames* is a list of strings for the names of subdirectories in *dirpath* + (excluding ``'.'`` and ``'..'``), and *filenames* is a list of strings for + the names of the non-directory files in *dirpath*. To get a full path + (which begins with *self*) to a file or directory in *dirpath*, do + ``dirpath / name``. Whether or not the lists are sorted is file + system-dependent. If optional argument *top_down* is ``True`` or not specified, the triple for a directory is generated before the triples for any of its subdirectories - (directories are generated top-down). If *top_down* is ``False``, the triple + (directories are walked top-down). If *top_down* is ``False``, the triple for a directory is generated after the triples for all of its subdirectories (directories are generated bottom-up). No matter the value of *top_down*, the list of subdirectories is retrieved before the tuples for the directory and - its subdirectories are generated. + its subdirectories are walked. When *top_down* is True, the caller can modify the *dirnames* list in-place (For example, using :keyword:`del` or slice assignment), and :meth:`Path.walk` @@ -981,42 +981,41 @@ call fails (for example because the path doesn't exist). directories in *dirnames* have already been generated by the time *dirnames* is yielded to the caller. - By default errors from :func:`os.scandir` call are ignored. If + By default, errors from :func:`os.scandir` are ignored. If the optional argument *on_error* is specified, it should be a callable; it will be called with one argument, an :exc:`OSError` instance. It can report the error to continue with the walk, or raise the exception to abort the walk. Note that the filename is available as the ``filename`` attribute of the exception object. - By default, :meth:`Path.walk` will not walk down into symbolic links that + By default, :meth:`Path.walk` will not follow symbolic links that resolve to directories. Set *follow_symlinks* to ``True`` to visit directories - pointed to by symlinks, on systems that support them. + pointed to by symlinks (where supported). .. note:: Be aware that setting *follow_symlinks* to ``True`` can lead to infinite recursion if a link points to a parent directory of itself. :meth:`Path.walk` - does not keep track of the directories it visited already. + does not keep track of the directories it has already visited. .. note:: - This method assumes that the current working directory is not changing while + This method assumes that the current working directory does not change while walking relative paths. - .. note:: - :meth:`Path.walk` assumes the directories have not been modified between - its resumptions. For example, if a directory from *dirnames* has been replaced + .. note:: + :meth:`Path.walk` assumes the directories it walks are not been modified during + execution. For example, if a directory from *dirnames* has been replaced with a symlink and *follow_symlinks* = ``False``, :meth:`Path.walk` will still try to descend into it. To prevent such behavior, remove directories - from *dirnames* if they have been modified and you do not want to - descend into them anymore. + from *dirnames* as appropriate. .. note:: - Unlike :func:`os.walk`, :meth:`Path.walk` adds symlinks to directories into *filenames* - if *follow_symlinks* is ``True`` + Unlike :func:`os.walk`, :meth:`Path.walk` lists symlinks to directories into + *filenames* if *follow_symlinks* is ``True``. - This example displays the number of bytes taken by non-directory files in each - directory under the starting directory, except that it doesn't look under any + This example displays the number of bytes used by all files in each directory, + while ignoring `__pycache__` directories. :file:`__pycache__` subdirectory:: from pathlib import Path @@ -1032,14 +1031,14 @@ call fails (for example because the path doesn't exist). if '__pycache__' in dirs: dirs.remove('__pycache__') - In the next example (simple implementation of :func:`shutil.rmtree`), - walking the tree bottom-up is essential, :func:`rmdir` doesn't allow - deleting a directory before the directory is empty:: + This next example is asimple implementation of :func:`shutil.rmtree`. + Walking the tree bottom-up is essential as :func:`rmdir` doesn't allow + deleting a directory before it is empty:: # Delete everything reachable from the directory "top", # assuming there are no symbolic links. # CAUTION: This is dangerous! For example, if top == Path('/'), - # it could delete all your disk files. + # it could delete all of your files. for root, dirs, files in top.walk(topdown=False): for name in files: (root / name).unlink() From 92e1a7afb023eddee39ee119c8916810b282b9bd Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sun, 10 Jul 2022 03:51:43 +0400 Subject: [PATCH 22/32] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Brett Cannon Co-authored-by: Éric --- Doc/library/pathlib.rst | 2 +- .../next/Library/2022-05-10-16-30-40.gh-issue-90385.1_wBRQ.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 82319e97e895c3..acd2e6af4a7ef0 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -967,7 +967,7 @@ call fails (for example because the path doesn't exist). directory is generated before the triples for any of its subdirectories (directories are walked top-down). If *top_down* is ``False``, the triple for a directory is generated after the triples for all of its subdirectories - (directories are generated bottom-up). No matter the value of *top_down*, the + (directories are walked bottom-up). No matter the value of *top_down*, the list of subdirectories is retrieved before the tuples for the directory and its subdirectories are walked. diff --git a/Misc/NEWS.d/next/Library/2022-05-10-16-30-40.gh-issue-90385.1_wBRQ.rst b/Misc/NEWS.d/next/Library/2022-05-10-16-30-40.gh-issue-90385.1_wBRQ.rst index 1f06fbbb79f5a3..24aa4403f8a68a 100644 --- a/Misc/NEWS.d/next/Library/2022-05-10-16-30-40.gh-issue-90385.1_wBRQ.rst +++ b/Misc/NEWS.d/next/Library/2022-05-10-16-30-40.gh-issue-90385.1_wBRQ.rst @@ -1 +1 @@ -Added method walk to :class:`pathlib.Path` objects as a pathlib alternative for :func:`os.walk` +Add :meth:`pathlib.Path.walk` as an alternative to :func:`os.walk`. From cfa730d48c4ba26dd767433e94213a303c1799a8 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sun, 10 Jul 2022 05:32:55 +0400 Subject: [PATCH 23/32] Code review fixes --- Doc/library/pathlib.rst | 8 ++++---- Lib/test/test_pathlib.py | 44 +++++++++++++++++++++++++++------------- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index acd2e6af4a7ef0..a428ed0d711050 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -999,7 +999,7 @@ call fails (for example because the path doesn't exist). does not keep track of the directories it has already visited. .. note:: - This method assumes that the current working directory does not change while + :meth:`Path.walk` assumes that the current working directory does not change while walking relative paths. .. note:: @@ -1012,10 +1012,10 @@ call fails (for example because the path doesn't exist). .. note:: Unlike :func:`os.walk`, :meth:`Path.walk` lists symlinks to directories into - *filenames* if *follow_symlinks* is ``True``. + *filenames* if *follow_symlinks* is ``False``. This example displays the number of bytes used by all files in each directory, - while ignoring `__pycache__` directories. + while ignoring ``__pycache__`` directories. :file:`__pycache__` subdirectory:: from pathlib import Path @@ -1031,7 +1031,7 @@ call fails (for example because the path doesn't exist). if '__pycache__' in dirs: dirs.remove('__pycache__') - This next example is asimple implementation of :func:`shutil.rmtree`. + This next example is a simple implementation of :func:`shutil.rmtree`. Walking the tree bottom-up is essential as :func:`rmdir` doesn't allow deleting a directory before it is empty:: diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 62a8872563e5ae..3ada51cc02597c 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2480,11 +2480,11 @@ def test_complex_symlinks_relative_dot_dot(self): class WalkTests(unittest.TestCase): """Tests for Path.walk() - + They are mostly just copies of os.walk tests converted to use Paths because os.walk tests are already mature and cover many edge cases that we could miss writing new tests. - + It is not combined with Path* tests because its setup is aimed at testing a bit more complex cases than Path's setup. """ @@ -2510,7 +2510,7 @@ def setUp(self): # tmp3 # SUB21/ not readable # tmp5 - # link/ a symlink to TESTFN.2 + # link/ a symlink to TEST2 # broken_link # broken_link2 # broken_link3 @@ -2519,22 +2519,22 @@ def setUp(self): self.walk_path = P(os_helper.TESTFN, "TEST1") self.sub1_path = self.walk_path / "SUB1" self.sub11_path = self.sub1_path / "SUB11" - sub2_path = self.walk_path / "SUB2" - sub21_path= sub2_path / "SUB21" + self.sub2_path = self.walk_path / "SUB2" + sub21_path= self.sub2_path / "SUB21" tmp1_path = self.walk_path / "tmp1" tmp2_path = self.sub1_path / "tmp2" - tmp3_path = sub2_path / "tmp3" + tmp3_path = self.sub2_path / "tmp3" tmp5_path = sub21_path / "tmp3" - self.link_path = sub2_path / "link" + self.link_path = self.sub2_path / "link" t2_path = P(os_helper.TESTFN, "TEST2") tmp4_path = P(os_helper.TESTFN, "TEST2", "tmp4") - broken_link_path = sub2_path / "broken_link" - broken_link2_path = sub2_path / "broken_link2" - broken_link3_path = sub2_path / "broken_link3" + broken_link_path = self.sub2_path / "broken_link" + broken_link2_path = self.sub2_path / "broken_link2" + broken_link3_path = self.sub2_path / "broken_link3" # Create stuff. os.makedirs(self.sub11_path) - os.makedirs(sub2_path) + os.makedirs(self.sub2_path) os.makedirs(sub21_path) os.makedirs(t2_path) @@ -2547,11 +2547,11 @@ def setUp(self): os.symlink('broken', broken_link_path, True) os.symlink(join('tmp3', 'broken'), broken_link2_path, True) os.symlink(join('SUB21', 'tmp5'), broken_link3_path, True) - self.sub2_tree = (sub2_path, ["SUB21"], + self.sub2_tree = (self.sub2_path, ["SUB21"], ["broken_link", "broken_link2", "broken_link3", "link", "tmp3"]) else: - self.sub2_tree = (sub2_path, ["SUB21"], ["tmp3"]) + self.sub2_tree = (self.sub2_path, ["SUB21"], ["tmp3"]) if not is_emscripten: # Emscripten fails with inaccessible directory @@ -2628,7 +2628,7 @@ def test_walk_bottom_up(self): self.sub2_tree) @os_helper.skip_unless_symlink - def test_walk_symlink(self): + def test_walk_follow_symlinks(self): # Walk, following symlinks. walk_it = self.walk(self.walk_path, follow_symlinks=True) for root, dirs, files in walk_it: @@ -2639,6 +2639,22 @@ def test_walk_symlink(self): else: self.fail("Didn't follow symlink with follow_symlinks=True") + def test_walk_symlink_location(self): + """ Tests whether symlinks end up in filenames or dirnames depending + on the `follow_symlinks` argument + """ + walk_it = self.walk(self.walk_path, follow_symlinks=False) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", files) + break + + walk_it = self.walk(self.walk_path, follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", dirs) + break + def test_walk_bad_dir(self): # Walk top-down. errors = [] From 7aec96d102d37d8ec75eee219c46d2ed28b4770a Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sun, 10 Jul 2022 16:45:39 +0400 Subject: [PATCH 24/32] Clarify pathlib.Path.walk() error handling --- Doc/library/pathlib.rst | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index a428ed0d711050..8c8558eaa16f64 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -981,12 +981,11 @@ call fails (for example because the path doesn't exist). directories in *dirnames* have already been generated by the time *dirnames* is yielded to the caller. - By default, errors from :func:`os.scandir` are ignored. If the - optional argument *on_error* is specified, it should be a callable; it - will be called with one argument, an :exc:`OSError` instance. It can - report the error to continue with the walk, or raise the exception - to abort the walk. Note that the filename is available as the - ``filename`` attribute of the exception object. + By default, errors from :func:`os.scandir` are ignored. If the optional + argument *on_error* is specified, it should be a callable; it will be + called with one argument, an :exc:`OSError` instance. It can handle the + error to continue the walk or re-raise it to stop the walk. Note that the + filename is available as the ``filename`` attribute of the exception object. By default, :meth:`Path.walk` will not follow symbolic links that resolve to directories. Set *follow_symlinks* to ``True`` to visit directories From 38fe1e5011452f97510bf4a625cd54eb1efa33a7 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sun, 10 Jul 2022 21:50:22 +0400 Subject: [PATCH 25/32] Apply suggestions from code review Co-authored-by: Barney Gale --- Doc/library/pathlib.rst | 11 ++++++----- Lib/pathlib.py | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 8c8558eaa16f64..ca325ad2d29abe 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -971,13 +971,13 @@ call fails (for example because the path doesn't exist). list of subdirectories is retrieved before the tuples for the directory and its subdirectories are walked. - When *top_down* is True, the caller can modify the *dirnames* list in-place + When *top_down* is ``True``, the caller can modify the *dirnames* list in-place (For example, using :keyword:`del` or slice assignment), and :meth:`Path.walk` will only recurse into the subdirectories whose names remain in *dirnames*; this can be used to prune the search, or to impose a specific order of visiting, or even to inform :meth:`Path.walk` about directories the caller creates or renames before it resumes :meth:`Path.walk` again. Modifying *dirnames* when - *top_down* is False has no effect on the behavior of :meth:`Path.walk()`, since the + *top_down* is ``False`` has no effect on the behavior of :meth:`Path.walk()`, since the directories in *dirnames* have already been generated by the time *dirnames* is yielded to the caller. @@ -987,9 +987,10 @@ call fails (for example because the path doesn't exist). error to continue the walk or re-raise it to stop the walk. Note that the filename is available as the ``filename`` attribute of the exception object. - By default, :meth:`Path.walk` will not follow symbolic links that - resolve to directories. Set *follow_symlinks* to ``True`` to visit directories - pointed to by symlinks (where supported). + By default, :meth:`Path.walk` does not follow symbolic links, and instead adds them + to the *filenames* list. Set *follow_symlinks* to ``True`` to resolve symlinks + and place them in *dirnames* and *filenames* as appropriate for their targets, and + consequently visit directories pointed to by symlinks (where supported). .. note:: diff --git a/Lib/pathlib.py b/Lib/pathlib.py index eec29ba8d25676..a6a5b515657759 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1353,9 +1353,9 @@ def _walk(self, top_down, on_error, follow_symlinks): if top_down: yield self, dirnames, filenames - for dir_name in dirnames: - new_path = self._make_child_relpath(dir_name) - yield from new_path._walk(top_down, on_error, follow_symlinks) + for dirname in dirnames: + dirpath = self._make_child_relpath(dirname) + yield from dirpath._walk(top_down, on_error, follow_symlinks) if not top_down: yield self, dirnames, filenames From eef3ba3c62f393c80ce6b1e2ad730de469904bd2 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Mon, 11 Jul 2022 02:13:08 +0400 Subject: [PATCH 26/32] Code review fixes --- Doc/library/pathlib.rst | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 8c8558eaa16f64..06ac0a39a108b8 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -997,10 +997,6 @@ call fails (for example because the path doesn't exist). recursion if a link points to a parent directory of itself. :meth:`Path.walk` does not keep track of the directories it has already visited. - .. note:: - :meth:`Path.walk` assumes that the current working directory does not change while - walking relative paths. - .. note:: :meth:`Path.walk` assumes the directories it walks are not been modified during execution. For example, if a directory from *dirnames* has been replaced @@ -1034,9 +1030,8 @@ call fails (for example because the path doesn't exist). Walking the tree bottom-up is essential as :func:`rmdir` doesn't allow deleting a directory before it is empty:: - # Delete everything reachable from the directory "top", - # assuming there are no symbolic links. - # CAUTION: This is dangerous! For example, if top == Path('/'), + # Delete everything reachable from the directory "top". + # CAUTION: This is dangerous! For example, if top == Path('/'), # it could delete all of your files. for root, dirs, files in top.walk(topdown=False): for name in files: From 8fe3b6263781e082a6e83b5bcdf12606b6009d70 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Tue, 12 Jul 2022 10:39:32 +0400 Subject: [PATCH 27/32] Apply suggestions from code review Co-authored-by: Barney Gale --- Lib/test/test_pathlib.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 3ada51cc02597c..5f891498d2d302 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2540,13 +2540,13 @@ def setUp(self): for path in tmp1_path, tmp2_path, tmp3_path, tmp4_path, tmp5_path: with open(path, "x", encoding='utf-8') as f: - f.write(f"I'm {path} and proud of it. Blame test_os.\n") + f.write(f"I'm {path} and proud of it. Blame test_pathlib.\n") if os_helper.can_symlink(): os.symlink(os.path.abspath(t2_path), self.link_path) os.symlink('broken', broken_link_path, True) - os.symlink(join('tmp3', 'broken'), broken_link2_path, True) - os.symlink(join('SUB21', 'tmp5'), broken_link3_path, True) + os.symlink(P('tmp3', 'broken'), broken_link2_path, True) + os.symlink(P('SUB21', 'tmp5'), broken_link3_path, True) self.sub2_tree = (self.sub2_path, ["SUB21"], ["broken_link", "broken_link2", "broken_link3", "link", "tmp3"]) From e8ea6ba0a9b2c7d7ba05b68a75771892aa11aea3 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Tue, 12 Jul 2022 12:21:12 +0400 Subject: [PATCH 28/32] Code review fixes --- Doc/library/pathlib.rst | 8 ++++---- Lib/test/test_pathlib.py | 24 +++++++++--------------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 8871825955bbe2..d2a542c88c0f26 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -963,7 +963,7 @@ call fails (for example because the path doesn't exist). ``dirpath / name``. Whether or not the lists are sorted is file system-dependent. - If optional argument *top_down* is ``True`` or not specified, the triple for a + If optional argument *top_down* is true or not specified, the triple for a directory is generated before the triples for any of its subdirectories (directories are walked top-down). If *top_down* is ``False``, the triple for a directory is generated after the triples for all of its subdirectories @@ -971,7 +971,7 @@ call fails (for example because the path doesn't exist). list of subdirectories is retrieved before the tuples for the directory and its subdirectories are walked. - When *top_down* is ``True``, the caller can modify the *dirnames* list in-place + When *top_down* is true, the caller can modify the *dirnames* list in-place (For example, using :keyword:`del` or slice assignment), and :meth:`Path.walk` will only recurse into the subdirectories whose names remain in *dirnames*; this can be used to prune the search, or to impose a specific order of visiting, @@ -988,13 +988,13 @@ call fails (for example because the path doesn't exist). filename is available as the ``filename`` attribute of the exception object. By default, :meth:`Path.walk` does not follow symbolic links, and instead adds them - to the *filenames* list. Set *follow_symlinks* to ``True`` to resolve symlinks + to the *filenames* list. Set *follow_symlinks* to true to resolve symlinks and place them in *dirnames* and *filenames* as appropriate for their targets, and consequently visit directories pointed to by symlinks (where supported). .. note:: - Be aware that setting *follow_symlinks* to ``True`` can lead to infinite + Be aware that setting *follow_symlinks* to true can lead to infinite recursion if a link points to a parent directory of itself. :meth:`Path.walk` does not keep track of the directories it has already visited. diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 5f891498d2d302..91e12049322fbe 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2489,14 +2489,8 @@ class WalkTests(unittest.TestCase): testing a bit more complex cases than Path's setup. """ - # Wrapper to hide minor differences between different implementations - # of Path.walk (for example, Path.fwalk) - def walk(self, top, **kwargs): - return top.walk(**kwargs) - def setUp(self): P = pathlib.Path - join = os.path.join self.addCleanup(os_helper.rmtree, os_helper.TESTFN) # Build: @@ -2569,7 +2563,7 @@ def setUp(self): def test_walk_topdown(self): # Walk top-down. P = pathlib.Path - all = list(self.walk(self.walk_path)) + all = list(self.walk_path.walk()) self.assertEqual(len(all), 4) # We can't know which order SUB1 and SUB2 will appear in. @@ -2589,7 +2583,7 @@ def test_walk_prune(self, walk_path=None): walk_path = self.walk_path # Prune the search. all = [] - for root, dirs, files in self.walk(walk_path): + for root, dirs, files in walk_path.walk(): all.append((root, dirs, files)) # Don't descend into SUB1. if 'SUB1' in dirs: @@ -2608,7 +2602,7 @@ def test_file_like_path(self): def test_walk_bottom_up(self): # Walk bottom-up. - all = list(self.walk(self.walk_path, top_down=False)) + all = list(self.walk_path.walk( top_down=False)) self.assertEqual(len(all), 4, all) # We can't know which order SUB1 and SUB2 will appear in. @@ -2630,7 +2624,7 @@ def test_walk_bottom_up(self): @os_helper.skip_unless_symlink def test_walk_follow_symlinks(self): # Walk, following symlinks. - walk_it = self.walk(self.walk_path, follow_symlinks=True) + walk_it = self.walk_path.walk(follow_symlinks=True) for root, dirs, files in walk_it: if root == self.link_path: self.assertEqual(dirs, []) @@ -2643,13 +2637,13 @@ def test_walk_symlink_location(self): """ Tests whether symlinks end up in filenames or dirnames depending on the `follow_symlinks` argument """ - walk_it = self.walk(self.walk_path, follow_symlinks=False) + walk_it = self.walk_path.walk(follow_symlinks=False) for root, dirs, files in walk_it: if root == self.sub2_path: self.assertIn("link", files) break - walk_it = self.walk(self.walk_path, follow_symlinks=True) + walk_it = self.walk_path.walk(follow_symlinks=True) for root, dirs, files in walk_it: if root == self.sub2_path: self.assertIn("link", dirs) @@ -2658,7 +2652,7 @@ def test_walk_symlink_location(self): def test_walk_bad_dir(self): # Walk top-down. errors = [] - walk_it = self.walk(self.walk_path, on_error=errors.append) + walk_it = self.walk_path.walk(on_error=errors.append) root, dirs, files = next(walk_it) self.assertEqual(errors, []) dir1 = 'SUB1' @@ -2683,14 +2677,14 @@ def test_walk_many_open_files(self): p = P(base, *(['d']*depth)) p.mkdir(parents=True) - iters = [self.walk(base, top_down=False) for _ in range(100)] + iters = [base.walk(top_down=False) for _ in range(100)] for i in range(depth + 1): expected = (p, ['d'] if i else [], []) for it in iters: self.assertEqual(next(it), expected) p = p.parent - iters = [self.walk(base, top_down=True) for _ in range(100)] + iters = [base.walk(top_down=True) for _ in range(100)] p = base for i in range(depth + 1): expected = (p, ['d'] if i < depth else [], []) From 79cf8fdfbbfd18d7dea6d43c52f02c0600577528 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Wed, 13 Jul 2022 21:44:19 +0400 Subject: [PATCH 29/32] Remove backticks around True and False --- Doc/library/pathlib.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index d2a542c88c0f26..c6b2df1988f3d2 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -965,7 +965,7 @@ call fails (for example because the path doesn't exist). If optional argument *top_down* is true or not specified, the triple for a directory is generated before the triples for any of its subdirectories - (directories are walked top-down). If *top_down* is ``False``, the triple + (directories are walked top-down). If *top_down* is false, the triple for a directory is generated after the triples for all of its subdirectories (directories are walked bottom-up). No matter the value of *top_down*, the list of subdirectories is retrieved before the tuples for the directory and @@ -977,7 +977,7 @@ call fails (for example because the path doesn't exist). this can be used to prune the search, or to impose a specific order of visiting, or even to inform :meth:`Path.walk` about directories the caller creates or renames before it resumes :meth:`Path.walk` again. Modifying *dirnames* when - *top_down* is ``False`` has no effect on the behavior of :meth:`Path.walk()`, since the + *top_down* is false has no effect on the behavior of :meth:`Path.walk()`, since the directories in *dirnames* have already been generated by the time *dirnames* is yielded to the caller. @@ -1001,14 +1001,14 @@ call fails (for example because the path doesn't exist). .. note:: :meth:`Path.walk` assumes the directories it walks are not been modified during execution. For example, if a directory from *dirnames* has been replaced - with a symlink and *follow_symlinks* = ``False``, :meth:`Path.walk` will + with a symlink and *follow_symlinks* is false, :meth:`Path.walk` will still try to descend into it. To prevent such behavior, remove directories from *dirnames* as appropriate. .. note:: Unlike :func:`os.walk`, :meth:`Path.walk` lists symlinks to directories into - *filenames* if *follow_symlinks* is ``False``. + *filenames* if *follow_symlinks* is false. This example displays the number of bytes used by all files in each directory, while ignoring ``__pycache__`` directories. From bed850ebfa67a9debae0d9c98630a314a96f32f6 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sun, 17 Jul 2022 17:31:27 +0400 Subject: [PATCH 30/32] Apply suggestions from code review Co-authored-by: Brett Cannon --- Doc/library/pathlib.rst | 23 +++++++++++------------ Lib/test/test_pathlib.py | 24 +++++------------------- 2 files changed, 16 insertions(+), 31 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index c6b2df1988f3d2..36eaa856d88162 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -953,7 +953,7 @@ call fails (for example because the path doesn't exist). For each directory in the directory tree rooted at *self* (including *self* but excluding '.' and '..'), the method yields a 3-tuple of - ``(dirpath, dirnames, filenames)`` + ``(dirpath, dirnames, filenames)``. *dirpath* is a :class:`Path` to the directory currently being walked, *dirnames* is a list of strings for the names of subdirectories in *dirpath* @@ -963,27 +963,27 @@ call fails (for example because the path doesn't exist). ``dirpath / name``. Whether or not the lists are sorted is file system-dependent. - If optional argument *top_down* is true or not specified, the triple for a + If the optional argument *top_down* is true (which is the default), the triple for a directory is generated before the triples for any of its subdirectories (directories are walked top-down). If *top_down* is false, the triple for a directory is generated after the triples for all of its subdirectories (directories are walked bottom-up). No matter the value of *top_down*, the - list of subdirectories is retrieved before the tuples for the directory and + list of subdirectories is retrieved before the triples for the directory and its subdirectories are walked. When *top_down* is true, the caller can modify the *dirnames* list in-place - (For example, using :keyword:`del` or slice assignment), and :meth:`Path.walk` - will only recurse into the subdirectories whose names remain in *dirnames*; - this can be used to prune the search, or to impose a specific order of visiting, + (for example, using :keyword:`del` or slice assignment), and :meth:`Path.walk` + will only recurse into the subdirectories whose names remain in *dirnames*. + This can be used to prune the search, or to impose a specific order of visiting, or even to inform :meth:`Path.walk` about directories the caller creates or renames before it resumes :meth:`Path.walk` again. Modifying *dirnames* when - *top_down* is false has no effect on the behavior of :meth:`Path.walk()`, since the + *top_down* is false has no effect on the behavior of :meth:`Path.walk()` since the directories in *dirnames* have already been generated by the time *dirnames* is yielded to the caller. By default, errors from :func:`os.scandir` are ignored. If the optional argument *on_error* is specified, it should be a callable; it will be - called with one argument, an :exc:`OSError` instance. It can handle the + called with one argument, an :exc:`OSError` instance. The callable can handle the error to continue the walk or re-raise it to stop the walk. Note that the filename is available as the ``filename`` attribute of the exception object. @@ -999,7 +999,7 @@ call fails (for example because the path doesn't exist). does not keep track of the directories it has already visited. .. note:: - :meth:`Path.walk` assumes the directories it walks are not been modified during + :meth:`Path.walk` assumes the directories it walks are not modified during execution. For example, if a directory from *dirnames* has been replaced with a symlink and *follow_symlinks* is false, :meth:`Path.walk` will still try to descend into it. To prevent such behavior, remove directories @@ -1007,12 +1007,11 @@ call fails (for example because the path doesn't exist). .. note:: - Unlike :func:`os.walk`, :meth:`Path.walk` lists symlinks to directories into + Unlike :func:`os.walk`, :meth:`Path.walk` lists symlinks to directories in *filenames* if *follow_symlinks* is false. This example displays the number of bytes used by all files in each directory, - while ignoring ``__pycache__`` directories. - :file:`__pycache__` subdirectory:: + while ignoring ``__pycache__`` directories:: from pathlib import Path for root, dirs, files in Path("cpython/Lib/concurrent").walk(on_error=print): diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 91e12049322fbe..1ecc08198fac53 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2479,15 +2479,6 @@ def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(os.path.join('dirA', '..')) class WalkTests(unittest.TestCase): - """Tests for Path.walk() - - They are mostly just copies of os.walk tests converted to use Paths - because os.walk tests are already mature and cover many edge cases - that we could miss writing new tests. - - It is not combined with Path* tests because its setup is aimed at - testing a bit more complex cases than Path's setup. - """ def setUp(self): P = pathlib.Path @@ -2526,7 +2517,6 @@ def setUp(self): broken_link2_path = self.sub2_path / "broken_link2" broken_link3_path = self.sub2_path / "broken_link3" - # Create stuff. os.makedirs(self.sub11_path) os.makedirs(self.sub2_path) os.makedirs(sub21_path) @@ -2548,7 +2538,7 @@ def setUp(self): self.sub2_tree = (self.sub2_path, ["SUB21"], ["tmp3"]) if not is_emscripten: - # Emscripten fails with inaccessible directory + # Emscripten fails with inaccessible directories. os.chmod(sub21_path, 0) try: os.listdir(sub21_path) @@ -2561,7 +2551,6 @@ def setUp(self): del self.sub2_tree[1][:1] def test_walk_topdown(self): - # Walk top-down. P = pathlib.Path all = list(self.walk_path.walk()) @@ -2585,7 +2574,6 @@ def test_walk_prune(self, walk_path=None): all = [] for root, dirs, files in walk_path.walk(): all.append((root, dirs, files)) - # Don't descend into SUB1. if 'SUB1' in dirs: # Note that this also mutates the dirs we appended to all! dirs.remove('SUB1') @@ -2601,7 +2589,6 @@ def test_file_like_path(self): self.test_walk_prune(FakePath(self.walk_path).__fspath__()) def test_walk_bottom_up(self): - # Walk bottom-up. all = list(self.walk_path.walk( top_down=False)) self.assertEqual(len(all), 4, all) @@ -2623,7 +2610,6 @@ def test_walk_bottom_up(self): @os_helper.skip_unless_symlink def test_walk_follow_symlinks(self): - # Walk, following symlinks. walk_it = self.walk_path.walk(follow_symlinks=True) for root, dirs, files in walk_it: if root == self.link_path: @@ -2634,14 +2620,15 @@ def test_walk_follow_symlinks(self): self.fail("Didn't follow symlink with follow_symlinks=True") def test_walk_symlink_location(self): - """ Tests whether symlinks end up in filenames or dirnames depending - on the `follow_symlinks` argument - """ + # Tests whether symlinks end up in filenames or dirnames depending + # on the `follow_symlinks` argument. walk_it = self.walk_path.walk(follow_symlinks=False) for root, dirs, files in walk_it: if root == self.sub2_path: self.assertIn("link", files) break + else: + self.fail("symlink not found") walk_it = self.walk_path.walk(follow_symlinks=True) for root, dirs, files in walk_it: @@ -2650,7 +2637,6 @@ def test_walk_symlink_location(self): break def test_walk_bad_dir(self): - # Walk top-down. errors = [] walk_it = self.walk_path.walk(on_error=errors.append) root, dirs, files = next(walk_it) From 203ec3d0a3aea92b96df414e82549af1081b09bf Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Mon, 18 Jul 2022 00:09:40 +0400 Subject: [PATCH 31/32] Apply suggestions from code review --- Lib/test/test_pathlib.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 1ecc08198fac53..4cfc3671b2b6d8 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2479,9 +2479,10 @@ def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(os.path.join('dirA', '..')) class WalkTests(unittest.TestCase): + cls = pathlib.Path def setUp(self): - P = pathlib.Path + P = self.cls self.addCleanup(os_helper.rmtree, os_helper.TESTFN) # Build: @@ -2551,7 +2552,7 @@ def setUp(self): del self.sub2_tree[1][:1] def test_walk_topdown(self): - P = pathlib.Path + P = self.cls all = list(self.walk_path.walk()) self.assertEqual(len(all), 4) @@ -2657,7 +2658,7 @@ def test_walk_bad_dir(self): path1new.rename(path1) def test_walk_many_open_files(self): - P = pathlib.Path + P = self.cls depth = 30 base = P(os_helper.TESTFN, 'deep') p = P(base, *(['d']*depth)) From eef6054b680a6ea06e8dbaf803beabe7c50c8a9f Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Fri, 22 Jul 2022 16:29:47 -0700 Subject: [PATCH 32/32] Apply suggestions from code review --- Lib/pathlib.py | 4 ++-- Lib/test/test_pathlib.py | 30 +++++++++++++----------------- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index a6a5b515657759..a4a53f1ee91fe0 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1318,7 +1318,7 @@ def expanduser(self): return self def walk(self, top_down=True, on_error=None, follow_symlinks=False): - """Generate a top-down directory tree from this directory, similar to os.walk()""" + """Walk the directory tree from this directory, similar to os.walk().""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) return self._walk(top_down, on_error, follow_symlinks) @@ -1342,7 +1342,7 @@ def _walk(self, top_down, on_error, follow_symlinks): try: is_dir = entry.is_dir(follow_symlinks=follow_symlinks) except OSError: - # same behavior as os.path.isdir() + # Carried over from os.path.isdir(). is_dir = False if is_dir: diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 4cfc3671b2b6d8..6f3b2a4df890bf 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2479,10 +2479,8 @@ def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(os.path.join('dirA', '..')) class WalkTests(unittest.TestCase): - cls = pathlib.Path def setUp(self): - P = self.cls self.addCleanup(os_helper.rmtree, os_helper.TESTFN) # Build: @@ -2502,7 +2500,7 @@ def setUp(self): # broken_link3 # TEST2/ # tmp4 a lone file - self.walk_path = P(os_helper.TESTFN, "TEST1") + self.walk_path = pathlib.Path(os_helper.TESTFN, "TEST1") self.sub1_path = self.walk_path / "SUB1" self.sub11_path = self.sub1_path / "SUB11" self.sub2_path = self.walk_path / "SUB2" @@ -2512,8 +2510,8 @@ def setUp(self): tmp3_path = self.sub2_path / "tmp3" tmp5_path = sub21_path / "tmp3" self.link_path = self.sub2_path / "link" - t2_path = P(os_helper.TESTFN, "TEST2") - tmp4_path = P(os_helper.TESTFN, "TEST2", "tmp4") + t2_path = pathlib.Path(os_helper.TESTFN, "TEST2") + tmp4_path = pathlib.Path(os_helper.TESTFN, "TEST2", "tmp4") broken_link_path = self.sub2_path / "broken_link" broken_link2_path = self.sub2_path / "broken_link2" broken_link3_path = self.sub2_path / "broken_link3" @@ -2530,8 +2528,8 @@ def setUp(self): if os_helper.can_symlink(): os.symlink(os.path.abspath(t2_path), self.link_path) os.symlink('broken', broken_link_path, True) - os.symlink(P('tmp3', 'broken'), broken_link2_path, True) - os.symlink(P('SUB21', 'tmp5'), broken_link3_path, True) + os.symlink(pathlib.Path('tmp3', 'broken'), broken_link2_path, True) + os.symlink(pathlib.Path('SUB21', 'tmp5'), broken_link3_path, True) self.sub2_tree = (self.sub2_path, ["SUB21"], ["broken_link", "broken_link2", "broken_link3", "link", "tmp3"]) @@ -2552,7 +2550,6 @@ def setUp(self): del self.sub2_tree[1][:1] def test_walk_topdown(self): - P = self.cls all = list(self.walk_path.walk()) self.assertEqual(len(all), 4) @@ -2658,26 +2655,25 @@ def test_walk_bad_dir(self): path1new.rename(path1) def test_walk_many_open_files(self): - P = self.cls depth = 30 - base = P(os_helper.TESTFN, 'deep') - p = P(base, *(['d']*depth)) - p.mkdir(parents=True) + base = pathlib.Path(os_helper.TESTFN, 'deep') + path = pathlib.Path(base, *(['d']*depth)) + path.mkdir(parents=True) iters = [base.walk(top_down=False) for _ in range(100)] for i in range(depth + 1): - expected = (p, ['d'] if i else [], []) + expected = (path, ['d'] if i else [], []) for it in iters: self.assertEqual(next(it), expected) - p = p.parent + path = path.parent iters = [base.walk(top_down=True) for _ in range(100)] - p = base + path = base for i in range(depth + 1): - expected = (p, ['d'] if i < depth else [], []) + expected = (path, ['d'] if i < depth else [], []) for it in iters: self.assertEqual(next(it), expected) - p = p / 'd' + path = path / 'd' class PathTest(_BasePathTest, unittest.TestCase):