Skip to content

Commit 579cf4a

Browse files
authored
Merge pull request #19424 from github/tausbn/python-extract-hidden-file-by-default
Python: Extract files in hidden dirs by default
2 parents cadcb20 + 9ee3e4c commit 579cf4a

File tree

15 files changed

+69
-37
lines changed

15 files changed

+69
-37
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
name: Test Config
2+
paths-ignore:
3+
- "**/.*/**"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
| name |
2+
+-------------------------------+
3+
| .hidden_file.py |
4+
| another_non_hidden.py |
5+
| foo.py |
6+
| visible_file_in_hidden_dir.py |
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
| name |
2+
+-----------------+
3+
| .hidden_file.py |
4+
| foo.py |
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
import python
2+
3+
select any(File f).getShortName() as name order by name

python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_dir/internal_non_hidden/another_non_hidden.py

Whitespace-only changes.

python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_dir/visible_file_in_hidden_dir.py

Whitespace-only changes.

python/extractor/cli-integration-test/hidden-files/repo_dir/.hidden_file.py

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
print(42)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/bash
2+
3+
set -Eeuo pipefail # see https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
4+
5+
set -x
6+
7+
CODEQL=${CODEQL:-codeql}
8+
9+
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
10+
cd "$SCRIPTDIR"
11+
12+
rm -rf db db-skipped
13+
14+
# Test 1: Default behavior should be to extract files in hidden directories
15+
$CODEQL database create db --language python --source-root repo_dir/
16+
$CODEQL query run --database db query.ql > query-default.actual
17+
diff query-default.expected query-default.actual
18+
19+
# Test 2: The default behavior can be overridden by setting `paths-ignore` in the config file
20+
$CODEQL database create db-skipped --language python --source-root repo_dir/ --codescanning-config=config.yml
21+
$CODEQL query run --database db-skipped query.ql > query-skipped.actual
22+
diff query-skipped.expected query-skipped.actual
23+
24+
rm -rf db db-skipped

python/extractor/semmle/path_filters.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,27 @@ def glob_part_to_regex(glob, add_sep):
4141

4242
def glob_to_regex(glob, prefix=""):
4343
'''Convert entire glob to a compiled regex'''
44+
# When the glob ends in `/`, we need to remember this so that we don't accidentally add an
45+
# extra separator to the final regex.
46+
end_sep = "" if glob.endswith("/") else SEP
4447
glob = glob.strip().strip("/")
4548
parts = glob.split("/")
4649
#Trailing '**' is redundant, so strip it off.
4750
if parts[-1] == "**":
4851
parts = parts[:-1]
4952
if not parts:
5053
return ".*"
54+
# The `glob.strip("/")` call above will have removed all trailing slashes, but if there was at
55+
# least one trailing slash, we want there to be an extra part, so we add it explicitly here in
56+
# that case, using the emptyness of `end_sep` as a proxy.
57+
if end_sep == "":
58+
parts += [""]
5159
parts = [ glob_part_to_regex(escape(p), True) for p in parts[:-1] ] + [ glob_part_to_regex(escape(parts[-1]), False) ]
5260
# we need to escape the prefix, specifically because on windows the prefix will be
5361
# something like `C:\\folder\\subfolder\\` and without escaping the
5462
# backslash-path-separators will get interpreted as regex escapes (which might be
5563
# invalid sequences, causing the extractor to crash)
56-
full_pattern = escape(prefix) + ''.join(parts) + "(?:" + SEP + ".*|$)"
64+
full_pattern = escape(prefix) + ''.join(parts) + "(?:" + end_sep + ".*|$)"
5765
return re.compile(full_pattern)
5866

5967
def filter_from_pattern(pattern, prev_filter, prefix):

0 commit comments

Comments
 (0)