@@ -36,6 +36,12 @@ def _ignore_error(exception):
36
36
getattr (exception , 'winerror' , None ) in _IGNORED_WINERRORS )
37
37
38
38
39
+ def _is_wildcard_pattern (pat ):
40
+ """Whether this pattern needs actual matching using fnmatch, or can be
41
+ looked up directly as a file."""
42
+ return "*" in pat or "?" in pat or "[" in pat
43
+
44
+
39
45
@functools .cache
40
46
def _is_case_sensitive (pathmod ):
41
47
return pathmod .normcase ('Aa' ) == 'Aa'
@@ -60,12 +66,42 @@ def _compile_pattern(pat, sep, case_sensitive, recursive=True):
60
66
return re .compile (regex , flags = flags ).match
61
67
62
68
63
- def _select_special (paths , part ):
64
- """Yield special literal children of the given paths."""
69
+ def _select_literal (paths , part ):
70
+ """Yield literal children of the given paths."""
65
71
for path in paths :
66
72
yield path ._make_child_relpath (part )
67
73
68
74
75
+ def _select_directories (paths ):
76
+ """Yield the given paths, filtering out non-directories."""
77
+ for path in paths :
78
+ try :
79
+ if path .is_dir ():
80
+ yield path
81
+ except OSError :
82
+ pass
83
+
84
+
85
+ def _deselect_missing (paths ):
86
+ """Yield the given paths, filtering out missing files."""
87
+ for path in paths :
88
+ try :
89
+ path .stat (follow_symlinks = False )
90
+ yield path
91
+ except OSError :
92
+ pass
93
+
94
+
95
+ def _deselect_symlinks (paths ):
96
+ """Yield the given paths, filtering out symlinks."""
97
+ for path in paths :
98
+ try :
99
+ if not path .is_symlink ():
100
+ yield path
101
+ except OSError :
102
+ pass
103
+
104
+
69
105
def _select_children (parent_paths , dir_only , follow_symlinks , match ):
70
106
"""Yield direct children of given paths, filtering by name and type."""
71
107
if follow_symlinks is None :
@@ -799,16 +835,26 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
799
835
# TODO: evaluate case-sensitivity of each directory in _select_children().
800
836
case_sensitive = _is_case_sensitive (self .pathmod )
801
837
838
+ # User doesn't care about case sensitivity, so for non-wildcard
839
+ # patterns like "foo/bar" we can stat() once rather than scandir()
840
+ # twice. Returned paths may not match real filesystem case.
841
+ case_preserving = False
842
+ else :
843
+ # Explicit case sensitivity choice provided. We must use scandir()
844
+ # to retrieve and match filenames with real filesystem case.
845
+ case_preserving = True
846
+
802
847
stack = pattern ._pattern_stack
803
848
specials = ('' , '.' , '..' )
849
+ check_paths = False
804
850
deduplicate_paths = False
805
851
sep = self .pathmod .sep
806
852
paths = iter ([self ] if self .is_dir () else [])
807
853
while stack :
808
854
part = stack .pop ()
809
855
if part in specials :
810
856
# Join special component (e.g. '..') onto paths.
811
- paths = _select_special (paths , part )
857
+ paths = _select_literal (paths , part )
812
858
813
859
elif part == '**' :
814
860
# Consume following '**' components, which have no effect.
@@ -826,6 +872,11 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
826
872
# re.Pattern object based on those components.
827
873
match = _compile_pattern (part , sep , case_sensitive ) if part != '**' else None
828
874
875
+ # Ensure directories exist.
876
+ if check_paths :
877
+ paths = _select_directories (paths )
878
+ check_paths = False
879
+
829
880
# Recursively walk directories, filtering by type and regex.
830
881
paths = _select_recursive (paths , bool (stack ), follow_symlinks , match )
831
882
@@ -837,13 +888,32 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
837
888
elif '**' in part :
838
889
raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
839
890
840
- else :
891
+ elif case_preserving or _is_wildcard_pattern ( part ) :
841
892
# If the pattern component isn't '*', compile an re.Pattern
842
893
# object based on the component.
843
894
match = _compile_pattern (part , sep , case_sensitive ) if part != '*' else None
844
895
845
896
# Iterate over directories' children filtering by type and regex.
846
897
paths = _select_children (paths , bool (stack ), follow_symlinks , match )
898
+
899
+ # Paths are known to exist: they're directory children from _scandir()
900
+ check_paths = False
901
+
902
+ else :
903
+ # Join non-wildcard component onto paths.
904
+ paths = _select_literal (paths , part )
905
+
906
+ # Filter out non-symlinks if requested.
907
+ if follow_symlinks is False :
908
+ paths = _deselect_symlinks (paths )
909
+
910
+ # Paths might not exist; mark them to be checked.
911
+ check_paths = True
912
+
913
+ if check_paths :
914
+ # Filter out paths that don't exist.
915
+ paths = _deselect_missing (paths )
916
+
847
917
return paths
848
918
849
919
def rglob (self , pattern , * , case_sensitive = None , follow_symlinks = None ):
0 commit comments