@@ -63,6 +63,12 @@ def _compile_pattern(pat, sep, case_sensitive):
6363 return re .compile (regex , flags = flags ).match
6464
6565
66+ def _select_special (paths , part ):
67+ """Yield special literal children of the given paths."""
68+ for path in paths :
69+ yield path ._make_child_relpath (part )
70+
71+
6672def _select_children (parent_paths , dir_only , follow_symlinks , match ):
6773 """Yield direct children of given paths, filtering by name and type."""
6874 if follow_symlinks is None :
@@ -84,7 +90,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
8490 except OSError :
8591 continue
8692 if match (entry .name ):
87- yield parent_path ._make_child_entry (entry , dir_only )
93+ yield parent_path ._make_child_entry (entry )
8894
8995
9096def _select_recursive (parent_paths , dir_only , follow_symlinks ):
@@ -107,7 +113,7 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks):
107113 for entry in entries :
108114 try :
109115 if entry .is_dir (follow_symlinks = follow_symlinks ):
110- paths .append (path ._make_child_entry (entry , dir_only ))
116+ paths .append (path ._make_child_entry (entry ))
111117 continue
112118 except OSError :
113119 pass
@@ -427,6 +433,14 @@ def is_absolute(self):
427433 a drive)."""
428434 return self .pathmod .isabs (self ._raw_path )
429435
436+ @property
437+ def _pattern_stack (self ):
438+ """Stack of path components, to be used with patterns in glob()."""
439+ anchor , parts = self ._stack
440+ if anchor :
441+ raise NotImplementedError ("Non-relative patterns are unsupported" )
442+ return parts
443+
430444 def match (self , path_pattern , * , case_sensitive = None ):
431445 """
432446 Return True if this path matches the given pattern.
@@ -436,11 +450,10 @@ def match(self, path_pattern, *, case_sensitive=None):
436450 if case_sensitive is None :
437451 case_sensitive = _is_case_sensitive (self .pathmod )
438452 sep = path_pattern .pathmod .sep
439- pattern_str = str (path_pattern )
440453 if path_pattern .anchor :
441- pass
454+ pattern_str = str ( path_pattern )
442455 elif path_pattern .parts :
443- pattern_str = f '**{ sep } { pattern_str } '
456+ pattern_str = str ( '**' / path_pattern )
444457 else :
445458 raise ValueError ("empty pattern" )
446459 match = _compile_pattern (pattern_str , sep , case_sensitive )
@@ -714,10 +727,8 @@ def _scandir(self):
714727 from contextlib import nullcontext
715728 return nullcontext (self .iterdir ())
716729
717- def _make_child_entry (self , entry , is_dir = False ):
730+ def _make_child_entry (self , entry ):
718731 # Transform an entry yielded from _scandir() into a path object.
719- if is_dir :
720- return entry .joinpath ('' )
721732 return entry
722733
723734 def _make_child_relpath (self , name ):
@@ -727,57 +738,35 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
727738 """Iterate over this subtree and yield all existing files (of any
728739 kind, including directories) matching the given relative pattern.
729740 """
730- path_pattern = self .with_segments (pattern )
731- if path_pattern .anchor :
732- raise NotImplementedError ("Non-relative patterns are unsupported" )
733- elif not path_pattern .parts :
734- raise ValueError ("Unacceptable pattern: {!r}" .format (pattern ))
735-
736- pattern_parts = list (path_pattern .parts )
737- if not self .pathmod .split (pattern )[1 ]:
738- # GH-65238: pathlib doesn't preserve trailing slash. Add it back.
739- pattern_parts .append ('' )
740-
741+ if not isinstance (pattern , PurePathBase ):
742+ pattern = self .with_segments (pattern )
741743 if case_sensitive is None :
742744 # TODO: evaluate case-sensitivity of each directory in _select_children().
743745 case_sensitive = _is_case_sensitive (self .pathmod )
744746
745- # If symlinks are handled consistently, and the pattern does not
746- # contain '..' components, then we can use a 'walk-and-match' strategy
747- # when expanding '**' wildcards. When a '**' wildcard is encountered,
748- # all following pattern parts are immediately consumed and used to
749- # build a `re.Pattern` object. This pattern is used to filter the
750- # recursive walk. As a result, pattern parts following a '**' wildcard
751- # do not perform any filesystem access, which can be much faster!
752- filter_paths = follow_symlinks is not None and '..' not in pattern_parts
747+ stack = pattern ._pattern_stack
748+ specials = ('' , '.' , '..' )
749+ filter_paths = False
753750 deduplicate_paths = False
754751 sep = self .pathmod .sep
755752 paths = iter ([self .joinpath ('' )] if self .is_dir () else [])
756- part_idx = 0
757- while part_idx < len (pattern_parts ):
758- part = pattern_parts [part_idx ]
759- part_idx += 1
760- if part == '' :
761- # Trailing slash.
762- pass
763- elif part == '..' :
764- paths = (path ._make_child_relpath ('..' ) for path in paths )
753+ while stack :
754+ part = stack .pop ()
755+ if part in specials :
756+ paths = _select_special (paths , part )
765757 elif part == '**' :
766758 # Consume adjacent '**' components.
767- while part_idx < len (pattern_parts ) and pattern_parts [part_idx ] == '**' :
768- part_idx += 1
769-
770- if filter_paths and part_idx < len (pattern_parts ) and pattern_parts [part_idx ] != '' :
771- dir_only = pattern_parts [- 1 ] == ''
772- paths = _select_recursive (paths , dir_only , follow_symlinks )
759+ while stack and stack [- 1 ] == '**' :
760+ stack .pop ()
773761
774- # Filter out paths that don't match pattern.
775- prefix_len = len (str (self ._make_child_relpath ('_' ))) - 1
776- match = _compile_pattern (str (path_pattern ), sep , case_sensitive )
777- paths = (path for path in paths if match (str (path ), prefix_len ))
778- return paths
762+ # Consume adjacent non-special components and enable post-walk
763+ # regex filtering, provided we're treating symlinks consistently.
764+ if follow_symlinks is not None :
765+ while stack and stack [- 1 ] not in specials :
766+ filter_paths = True
767+ stack .pop ()
779768
780- dir_only = part_idx < len ( pattern_parts )
769+ dir_only = bool ( stack )
781770 paths = _select_recursive (paths , dir_only , follow_symlinks )
782771 if deduplicate_paths :
783772 # De-duplicate if we've already seen a '**' component.
@@ -786,18 +775,25 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
786775 elif '**' in part :
787776 raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
788777 else :
789- dir_only = part_idx < len ( pattern_parts )
778+ dir_only = bool ( stack )
790779 match = _compile_pattern (part , sep , case_sensitive )
791780 paths = _select_children (paths , dir_only , follow_symlinks , match )
781+ if filter_paths :
782+ # Filter out paths that don't match pattern.
783+ prefix_len = len (str (self ._make_child_relpath ('_' ))) - 1
784+ match = _compile_pattern (str (pattern ), sep , case_sensitive )
785+ paths = (path for path in paths if match (str (path ), prefix_len ))
792786 return paths
793787
794788 def rglob (self , pattern , * , case_sensitive = None , follow_symlinks = None ):
795789 """Recursively yield all existing files (of any kind, including
796790 directories) matching the given relative pattern, anywhere in
797791 this subtree.
798792 """
799- return self .glob (
800- f'**/{ pattern } ' , case_sensitive = case_sensitive , follow_symlinks = follow_symlinks )
793+ if not isinstance (pattern , PurePathBase ):
794+ pattern = self .with_segments (pattern )
795+ pattern = '**' / pattern
796+ return self .glob (pattern , case_sensitive = case_sensitive , follow_symlinks = follow_symlinks )
801797
802798 def walk (self , top_down = True , on_error = None , follow_symlinks = False ):
803799 """Walk the directory tree from this directory, similar to os.walk()."""
0 commit comments