From 553a5a80bb66549a1519e5ff8852fd199f0ff311 Mon Sep 17 00:00:00 2001 From: Laurie Opperman Date: Thu, 11 Apr 2019 19:54:30 +1000 Subject: [PATCH 01/13] bpo-36602: pathlib can list recursively Allow pathlib.Path.iterdir to list contents of subdirectories recursively. --- Doc/library/pathlib.rst | 12 +++++++++++- Lib/pathlib.py | 12 +++++++++--- Lib/test/test_pathlib.py | 8 ++++++++ Misc/ACKS | 1 + 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index acbd0e48af2425..6f0caa050145a2 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -844,7 +844,7 @@ call fails (for example because the path doesn't exist). other errors (such as permission errors) are propagated. -.. method:: Path.iterdir() +.. method:: Path.iterdir(recursive=False) When the path points to a directory, yield path objects of the directory contents:: @@ -860,6 +860,16 @@ call fails (for example because the path doesn't exist). PosixPath('docs/_static') PosixPath('docs/Makefile') + Also supports recursing into subdirectories:: + + >>> p = Path('foo') + >>> for child in p.iterdir(recursive=True): child + ... + PosixPath('foo/a.txt') + PosixPath('foo/b.py') + PosixPath('foo/bar/c.txt') + PosixPath('foo/bar/spam/d.rst') + .. method:: Path.lchmod(mode) Like :meth:`Path.chmod` but, if the path points to a symbolic link, the diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 7bd66c10cbdb64..7c997ed6c6cc28 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1098,9 +1098,11 @@ def samefile(self, other_path): other_st = os.stat(other_path) return os.path.samestat(st, other_st) - def iterdir(self): + def iterdir(self, *, recursive=False): """Iterate over the files in this directory. Does not yield any - result for the special paths '.' and '..'. + result for the special paths '.' and '..'. Yields from + subdirectories if recursive=True (but then doesn't yield any + directories). """ if self._closed: self._raise_closed() @@ -1108,7 +1110,11 @@ def iterdir(self): if name in {'.', '..'}: # Yielding a path object for these makes little sense continue - yield self._make_child_relpath(name) + path = self._make_child_relpath(name) + if recursive and path.is_dir(): + yield from path.iterdir(recursive=recursive) + else: + yield path if self._closed: self._raise_closed() diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index f3304f01b4bee3..8f68d9e64d76c9 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1480,6 +1480,14 @@ def test_iterdir(self): expected += ['linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'] self.assertEqual(paths, { P(BASE, q) for q in expected }) + def test_iterdir_recursive(self): + P = self.cls + p = P(join('dirC')) + it = p.iterdir(recursive=True) + paths = set(it) + expected = [('dirD', 'fileD'), ('fileC',)] + self.assertEqual(paths, { P(join('dirC'), *q) for q in expected }) + @support.skip_unless_symlink def test_iterdir_symlink(self): # __iter__ on a symlink to a directory. diff --git a/Misc/ACKS b/Misc/ACKS index d8e2630814a869..590d38b5e2ab7f 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1212,6 +1212,7 @@ Grant Olson Koray Oner Piet van Oostrum Tomas Oppelstrup +Laurie Opperman Jason Orendorff Bastien Orivel orlnub123 From 72bd10467ae023541ea522240d4229c69acbcb40 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" Date: Thu, 11 Apr 2019 11:15:25 +0000 Subject: [PATCH 02/13] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2019-04-11-11-15-24.bpo-36602.zxNvQK.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2019-04-11-11-15-24.bpo-36602.zxNvQK.rst diff --git a/Misc/NEWS.d/next/Library/2019-04-11-11-15-24.bpo-36602.zxNvQK.rst b/Misc/NEWS.d/next/Library/2019-04-11-11-15-24.bpo-36602.zxNvQK.rst new file mode 100644 index 00000000000000..fe97e9eab39900 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-04-11-11-15-24.bpo-36602.zxNvQK.rst @@ -0,0 +1 @@ +``pathlib.Path.iterdir`` can now list items in subdirectories with the optional argument ``recursive=True``. Note that directories will not be yielded in recursive listing \ No newline at end of file From 322028990d10e4d82b8a029c3b550f0a5255f20f Mon Sep 17 00:00:00 2001 From: Laurie Opperman Date: Sat, 13 Apr 2019 12:37:51 +1000 Subject: [PATCH 03/13] bpo-36602: Check for symlink recurse (GH-12785) Symbolic links to directories are still considered directories by 'is_dir', allowing 'iterdir(recursive=True)' to get into a situation where it follows a symbolic link cycle and raises an 'OSError'. The implemented solution to this is to prevent listing contents of a symbolic link to a directory. --- Lib/pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 7c997ed6c6cc28..c1d80050c05bb8 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1111,7 +1111,7 @@ def iterdir(self, *, recursive=False): # Yielding a path object for these makes little sense continue path = self._make_child_relpath(name) - if recursive and path.is_dir(): + if recursive and path.is_dir() and not path.is_symlink(): yield from path.iterdir(recursive=recursive) else: yield path From 660e6168e26db5edfbb2b42ec99eec675ff83d84 Mon Sep 17 00:00:00 2001 From: Laurie Opperman Date: Sat, 13 Apr 2019 12:56:19 +1000 Subject: [PATCH 04/13] bpo-36602: Test cyclic symlinks (GH-12785) Cyclic symlinks will arise in typical use-cases, and 'iterdir(recursive=True)' needs to handle these without failing and in an expected fashion. --- Lib/test/test_pathlib.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 8f68d9e64d76c9..ef7bfeb08ef3c7 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1488,6 +1488,44 @@ def test_iterdir_recursive(self): expected = [('dirD', 'fileD'), ('fileC',)] self.assertEqual(paths, { P(join('dirC'), *q) for q in expected }) + @support.skip_unless_symlink + def test_iterdir_recursive_symlink(self): + P = self.cls + p = P(join('dirB')) + it = p.iterdir(recursive=True) + paths = set(it) + expected = ['fileB', 'linkD'] + self.assertEqual(paths, { P(join('dirB'), q) for q in expected }) + + @support.skip_unless_symlink + def test_iterdir_recursive_symlink_cycle(self): + os.mkdir(join('dirF')) + os.mkdir(join('dirF', 'dirG')) + os.mkdir(join('dirF', 'dirH')) + with open(join('dirF', 'fileF'), 'wb') as f: + f.write(b"this is file F\n") + with open(join('dirF', 'dirG', 'fileG'), 'wb') as f: + f.write(b"this is file G\n") + with open(join('dirF', 'dirH', 'fileH'), 'wb') as f: + f.write(b"this is file H\n") + os.symlink(os.path.join('..', 'dirG'), join('dirF', 'dirH', 'linkG')) + os.symlink(os.path.join('..', 'dirH'), join('dirF', 'dirG', 'linkH')) + try: + P = self.cls + p = P(join('dirF')) + it = p.iterdir(recursive=True) + paths = set(it) + expected = [ + ('fileF',), + ('dirG', 'fileG'), + ('dirG', 'linkH'), + ('dirH', 'fileH'), + ('dirH', 'linkG')] + self.assertEqual(paths, { P(join('dirF'), *q) for q in expected }) + finally: + support.rmtree(join('dirF')) + assert not P(join('dirF')).exists() # TODO: remove + @support.skip_unless_symlink def test_iterdir_symlink(self): # __iter__ on a symlink to a directory. From 82b0978f29c984522b7685a53e0ec6fdbbdb5db7 Mon Sep 17 00:00:00 2001 From: Laurie Opperman Date: Mon, 29 Apr 2019 17:51:47 +1000 Subject: [PATCH 05/13] bpo-36602: Implement cycle detection (GH-12785) Recursively listing subdirectories may run into cycles if symbolic links to directories are also listed. This implementation stores which directories have been listed to prevent cycles. Directories and symbolics links to directories are still omitted in recursive subdirectory listing. --- Lib/pathlib.py | 24 +++++++++++++++++++----- Lib/test/test_pathlib.py | 8 +++----- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c1d80050c05bb8..2e877790a031b1 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1098,6 +1098,21 @@ def samefile(self, other_path): other_st = os.stat(other_path) return os.path.samestat(st, other_st) + def _iterdir_recursive(self): + """Recursive directory listing.""" + to_list = [self] + listed = set() + while len(to_list) > 0: + dirpath = to_list.pop(0) + for path in dirpath.iterdir(recursive=False): + if path.is_dir(): + resolved = path.resolve() + if resolved not in listed: + to_list.append(path) + listed.add(resolved) + else: + yield path + def iterdir(self, *, recursive=False): """Iterate over the files in this directory. Does not yield any result for the special paths '.' and '..'. Yields from @@ -1106,15 +1121,14 @@ def iterdir(self, *, recursive=False): """ if self._closed: self._raise_closed() + if recursive: + yield from self._iterdir_recursive() + return for name in self._accessor.listdir(self): if name in {'.', '..'}: # Yielding a path object for these makes little sense continue - path = self._make_child_relpath(name) - if recursive and path.is_dir() and not path.is_symlink(): - yield from path.iterdir(recursive=recursive) - else: - yield path + yield self._make_child_relpath(name) if self._closed: self._raise_closed() diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index ef7bfeb08ef3c7..66008ff4537b88 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1494,8 +1494,8 @@ def test_iterdir_recursive_symlink(self): p = P(join('dirB')) it = p.iterdir(recursive=True) paths = set(it) - expected = ['fileB', 'linkD'] - self.assertEqual(paths, { P(join('dirB'), q) for q in expected }) + expected = [('fileB',), ('linkD', 'fileB')] + self.assertEqual(paths, { P(join('dirB'), *q) for q in expected }) @support.skip_unless_symlink def test_iterdir_recursive_symlink_cycle(self): @@ -1518,9 +1518,7 @@ def test_iterdir_recursive_symlink_cycle(self): expected = [ ('fileF',), ('dirG', 'fileG'), - ('dirG', 'linkH'), - ('dirH', 'fileH'), - ('dirH', 'linkG')] + ('dirH', 'fileH')] self.assertEqual(paths, { P(join('dirF'), *q) for q in expected }) finally: support.rmtree(join('dirF')) From 516d41bb2367d2579b4e326f3c4a03ffb9272f8b Mon Sep 17 00:00:00 2001 From: Laurie O Date: Thu, 2 May 2019 08:48:46 +1000 Subject: [PATCH 06/13] bpo-36602: Indicate keyword-only (GH-12785) --- Doc/library/pathlib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 6f0caa050145a2..974b961746f4e0 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -844,7 +844,7 @@ call fails (for example because the path doesn't exist). other errors (such as permission errors) are propagated. -.. method:: Path.iterdir(recursive=False) +.. method:: Path.iterdir(*, recursive=False) When the path points to a directory, yield path objects of the directory contents:: From 331ff3b41d5d40ee691c78755f19495896d313e2 Mon Sep 17 00:00:00 2001 From: Laurie Opperman Date: Thu, 2 May 2019 09:16:24 +1000 Subject: [PATCH 07/13] bpo-36602: Add version-changed (GH-12785) --- Doc/library/pathlib.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 974b961746f4e0..3b1f0001f46ed4 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -870,6 +870,9 @@ call fails (for example because the path doesn't exist). PosixPath('foo/bar/c.txt') PosixPath('foo/bar/spam/d.rst') + .. versionchanged:: 3.8 + The *recursive* parameter was added. + .. method:: Path.lchmod(mode) Like :meth:`Path.chmod` but, if the path points to a symbolic link, the From 02ff5cfad812a9cc9b4d9eab4f765c8008cf115a Mon Sep 17 00:00:00 2001 From: Laurie Opperman Date: Thu, 6 Jun 2019 09:36:52 +1000 Subject: [PATCH 08/13] Document recursive listing added in Python 3.9 --- Doc/library/pathlib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 3b1f0001f46ed4..590fb9936f7f54 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -870,7 +870,7 @@ call fails (for example because the path doesn't exist). PosixPath('foo/bar/c.txt') PosixPath('foo/bar/spam/d.rst') - .. versionchanged:: 3.8 + .. versionchanged:: 3.9 The *recursive* parameter was added. .. method:: Path.lchmod(mode) From 79766038e7aab9ddeac258eab1b0fc9a5714078e Mon Sep 17 00:00:00 2001 From: Laurie Opperman Date: Thu, 20 Jun 2019 02:51:43 +1000 Subject: [PATCH 09/13] Don't write text to new files in sumlink test --- Lib/test/test_pathlib.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 66008ff4537b88..ff90d1c18fc317 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1502,12 +1502,9 @@ def test_iterdir_recursive_symlink_cycle(self): os.mkdir(join('dirF')) os.mkdir(join('dirF', 'dirG')) os.mkdir(join('dirF', 'dirH')) - with open(join('dirF', 'fileF'), 'wb') as f: - f.write(b"this is file F\n") - with open(join('dirF', 'dirG', 'fileG'), 'wb') as f: - f.write(b"this is file G\n") - with open(join('dirF', 'dirH', 'fileH'), 'wb') as f: - f.write(b"this is file H\n") + open(join('dirF', 'fileF'), 'w').close() + open(join('dirF', 'dirG', 'fileG'), 'w').close() + open(join('dirF', 'dirH', 'fileH'), 'w').close() os.symlink(os.path.join('..', 'dirG'), join('dirF', 'dirH', 'linkG')) os.symlink(os.path.join('..', 'dirH'), join('dirF', 'dirG', 'linkH')) try: From 43c00c3de4184bbed47bc9b6776a332bb8a65839 Mon Sep 17 00:00:00 2001 From: Laurie Opperman Date: Thu, 20 Jun 2019 02:58:39 +1000 Subject: [PATCH 10/13] Add comment on created directory structure --- Lib/test/test_pathlib.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index ff90d1c18fc317..3b7da5dd0ce3bb 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1507,6 +1507,21 @@ def test_iterdir_recursive_symlink_cycle(self): open(join('dirF', 'dirH', 'fileH'), 'w').close() os.symlink(os.path.join('..', 'dirG'), join('dirF', 'dirH', 'linkG')) os.symlink(os.path.join('..', 'dirH'), join('dirF', 'dirG', 'linkH')) + # Now have structure + # (BASE) + # | + # ... + # | + # |-- dirF + # | |-- dirG + # | | |-- fileG + # | | `-- linkH -> ../dirH + # | |-- dirH + # | | |-- fileH + # | | `-- linkG -> ../dirG + # | `-- fileF + # | + # ... try: P = self.cls p = P(join('dirF')) From 738ef9020cda2429f31df2d96a1db0262330fc5e Mon Sep 17 00:00:00 2001 From: Laurie Opperman Date: Thu, 20 Jun 2019 03:00:39 +1000 Subject: [PATCH 11/13] Remove unwanted assertion --- Lib/test/test_pathlib.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 3b7da5dd0ce3bb..0001228e9fb9c5 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1534,7 +1534,6 @@ def test_iterdir_recursive_symlink_cycle(self): self.assertEqual(paths, { P(join('dirF'), *q) for q in expected }) finally: support.rmtree(join('dirF')) - assert not P(join('dirF')).exists() # TODO: remove @support.skip_unless_symlink def test_iterdir_symlink(self): From 4bb3b2959b890566bb55e220b8f182fdffd9ac94 Mon Sep 17 00:00:00 2001 From: Laurie Opperman Date: Fri, 5 Jul 2019 09:58:20 +1000 Subject: [PATCH 12/13] Use 'pathlib.Path.touch' to create files in test --- Lib/test/test_pathlib.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 0001228e9fb9c5..e89723eb44e396 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1499,12 +1499,13 @@ def test_iterdir_recursive_symlink(self): @support.skip_unless_symlink def test_iterdir_recursive_symlink_cycle(self): + P = self.cls os.mkdir(join('dirF')) os.mkdir(join('dirF', 'dirG')) os.mkdir(join('dirF', 'dirH')) - open(join('dirF', 'fileF'), 'w').close() - open(join('dirF', 'dirG', 'fileG'), 'w').close() - open(join('dirF', 'dirH', 'fileH'), 'w').close() + P(join('dirF', 'fileF')).touch() + P(join('dirF', 'dirG', 'fileG')).touch() + P(join('dirF', 'dirH', 'fileH')).touch() os.symlink(os.path.join('..', 'dirG'), join('dirF', 'dirH', 'linkG')) os.symlink(os.path.join('..', 'dirH'), join('dirF', 'dirG', 'linkH')) # Now have structure @@ -1523,7 +1524,6 @@ def test_iterdir_recursive_symlink_cycle(self): # | # ... try: - P = self.cls p = P(join('dirF')) it = p.iterdir(recursive=True) paths = set(it) From e2f09a4e852e8cba23ed7869e82ede7846266db7 Mon Sep 17 00:00:00 2001 From: Laurie O Date: Wed, 7 Apr 2021 14:20:12 +1000 Subject: [PATCH 13/13] Fix symlink-cycle test --- Lib/test/test_pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index ff533c17a8b006..545ece497c9252 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1626,7 +1626,7 @@ def test_iterdir_recursive_symlink_cycle(self): ('dirH', 'fileH')] self.assertEqual(paths, { P(join('dirF'), *q) for q in expected }) finally: - support.rmtree(join('dirF')) + os_helper.rmtree(join('dirF')) @os_helper.skip_unless_symlink def test_iterdir_symlink(self):