From 7df0f6f96ed86c68f3273cba752598c9ce88a796 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Wed, 26 Oct 2022 00:49:07 +0100 Subject: [PATCH 1/6] gh-98692: Enable treating shebang lines as executables in py.exe launcher --- ...2-10-26-17-43-09.gh-issue-98692.bOopfZ.rst | 2 + PC/launcher2.c | 71 ++++++++++++++++++- 2 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2022-10-26-17-43-09.gh-issue-98692.bOopfZ.rst diff --git a/Misc/NEWS.d/next/Windows/2022-10-26-17-43-09.gh-issue-98692.bOopfZ.rst b/Misc/NEWS.d/next/Windows/2022-10-26-17-43-09.gh-issue-98692.bOopfZ.rst new file mode 100644 index 00000000000000..3a5efd9a1cfa34 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2022-10-26-17-43-09.gh-issue-98692.bOopfZ.rst @@ -0,0 +1,2 @@ +Fix the :ref:`launcher` ignoring unrecognized shebang lines instead of +treating them as local paths diff --git a/PC/launcher2.c b/PC/launcher2.c index 1f6f97b82092b9..04a5a15db1a637 100644 --- a/PC/launcher2.c +++ b/PC/launcher2.c @@ -871,6 +871,69 @@ _findCommand(SearchInfo *search, const wchar_t *command, int commandLength) } +int +_useShebangAsExecutable(SearchInfo *search, const wchar_t *shebang, int shebangLength) +{ + wchar_t buffer[MAXLEN]; + wchar_t script[MAXLEN]; + wchar_t command[MAXLEN]; + + int commandLength = 0; + int quoteCount = 0; + int slashCount = 0; + + if (!shebang || !shebangLength) { + return 0; + } + + wchar_t *pC = command; + for (int i = 0; i < shebangLength; ++i) { + wchar_t c = shebang[i]; + if ((isspace(c) || c == L'\r' || c == L'\n') && !quoteCount) { + commandLength = i; + break; + } else if (c == L'"') { + if (slashCount % 2 == 0) { + quoteCount = quoteCount ? 0 : 1; + } + slashCount = 0; + } else if (c == L'/' || c == L'\\') { + slashCount += 1; + // also normalise the slash + *pC++ = L'\\'; + } else { + slashCount = 0; + *pC++ = c; + } + } + *pC = L'\0'; + + if (!GetCurrentDirectoryW(MAXLEN, buffer) || + wcsncpy_s(script, MAXLEN, search->scriptFile, search->scriptFileLength) || + FAILED(PathCchCombineEx(buffer, MAXLEN, buffer, script, + PATHCCH_ALLOW_LONG_PATHS)) || + FAILED(PathCchRemoveFileSpec(buffer, MAXLEN)) || + FAILED(PathCchCombineEx(buffer, MAXLEN, buffer, command, + PATHCCH_ALLOW_LONG_PATHS)) + ) { + return RC_NO_MEMORY; + } + + int n = (int)wcsnlen(buffer, MAXLEN); + wchar_t *path = allocSearchInfoBuffer(search, n + 1); + if (!path) { + return RC_NO_MEMORY; + } + wcscpy_s(path, n + 1, buffer); + search->executablePath = path; + if (commandLength) { + search->executableArgs = &shebang[commandLength]; + search->executableArgsLength = shebangLength - commandLength; + } + return 0; +} + + int checkShebang(SearchInfo *search) { @@ -963,7 +1026,6 @@ checkShebang(SearchInfo *search) L"/usr/bin/env ", L"/usr/bin/", L"/usr/local/bin/", - L"", NULL }; @@ -1012,11 +1074,14 @@ checkShebang(SearchInfo *search) debug(L"# Found shebang command but could not execute it: %.*s\n", commandLength, command); } - break; + // search is done by this point + return 0; } } - return 0; + // Unrecognised commands are joined to the script's directory and treated + // as the executable path + return _useShebangAsExecutable(search, shebang, shebangLength); } From 922a6ad0de621f48883ff3a44530018052a9bacc Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Wed, 26 Oct 2022 21:06:33 +0100 Subject: [PATCH 2/6] Add tests, fix escaping --- Lib/test/test_launcher.py | 32 ++++++++++++++++++++++++++++++++ PC/launcher2.c | 28 ++++++++++++++++++---------- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_launcher.py b/Lib/test/test_launcher.py index 432a44622b5f00..271c71fe59f73f 100644 --- a/Lib/test/test_launcher.py +++ b/Lib/test/test_launcher.py @@ -617,3 +617,35 @@ def test_install(self): self.assertIn("winget.exe", cmd) # Both command lines include the store ID self.assertIn("9PJPW5LDXLZ5", cmd) + + def test_literal_shebang_absolute(self): + with self.script(f"#! C:/some_random_app -witharg") as script: + data = self.run_py([script]) + self.assertEqual( + f"C:\\some_random_app -witharg {script}", + data["stdout"].strip(), + ) + + def test_literal_shebang_relative(self): + with self.script(f"#! ..\\some_random_app -witharg") as script: + data = self.run_py([script]) + self.assertEqual( + f"{script.parent.parent}\\some_random_app -witharg {script}", + data["stdout"].strip(), + ) + + def test_literal_shebang_quoted(self): + with self.script(f'#! "some random app" -witharg') as script: + data = self.run_py([script]) + self.assertEqual( + f'"{script.parent}\\some random app" -witharg {script}', + data["stdout"].strip(), + ) + + def test_literal_shebang_quoted_escape(self): + with self.script(f'#! "some random\\\\\\" app\\\\\\\\" -witharg') as script: + data = self.run_py([script]) + self.assertEqual( + f'"{script.parent}\\some random\\ app\\\\" -witharg {script}', + data["stdout"].strip(), + ) diff --git a/PC/launcher2.c b/PC/launcher2.c index 04a5a15db1a637..74181e82cf1533 100644 --- a/PC/launcher2.c +++ b/PC/launcher2.c @@ -879,8 +879,8 @@ _useShebangAsExecutable(SearchInfo *search, const wchar_t *shebang, int shebangL wchar_t command[MAXLEN]; int commandLength = 0; - int quoteCount = 0; - int slashCount = 0; + int inQuote = 0; + int afterSlash = 0; if (!shebang || !shebangLength) { return 0; @@ -889,20 +889,28 @@ _useShebangAsExecutable(SearchInfo *search, const wchar_t *shebang, int shebangL wchar_t *pC = command; for (int i = 0; i < shebangLength; ++i) { wchar_t c = shebang[i]; - if ((isspace(c) || c == L'\r' || c == L'\n') && !quoteCount) { + if ((isspace(c) || c == L'\r' || c == L'\n') && !inQuote) { commandLength = i; break; } else if (c == L'"') { - if (slashCount % 2 == 0) { - quoteCount = quoteCount ? 0 : 1; + if (!afterSlash) { + // non-escaped quote. either way, we don't add it to the path + inQuote = !inQuote; } - slashCount = 0; + afterSlash = 0; } else if (c == L'/' || c == L'\\') { - slashCount += 1; - // also normalise the slash - *pC++ = L'\\'; + if (afterSlash) { + // escaped slash + *pC++ = L'\\'; + afterSlash = 0; + } else { + afterSlash = 1; + } } else { - slashCount = 0; + if (afterSlash) { + *pC++ = L'\\'; + afterSlash = 0; + } *pC++ = c; } } From 35c57590fec131aea42cf847a85ada25eebfa50d Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 27 Oct 2022 17:33:16 +0100 Subject: [PATCH 3/6] Remove unnecessary line ending checks --- PC/launcher2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PC/launcher2.c b/PC/launcher2.c index 74181e82cf1533..2673297d93743b 100644 --- a/PC/launcher2.c +++ b/PC/launcher2.c @@ -889,7 +889,7 @@ _useShebangAsExecutable(SearchInfo *search, const wchar_t *shebang, int shebangL wchar_t *pC = command; for (int i = 0; i < shebangLength; ++i) { wchar_t c = shebang[i]; - if ((isspace(c) || c == L'\r' || c == L'\n') && !inQuote) { + if (isspace(c) && !inQuote) { commandLength = i; break; } else if (c == L'"') { From 2bc129dac2d9172e553dd0164dba3b4e803fd483 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 27 Oct 2022 20:28:35 +0100 Subject: [PATCH 4/6] Restore specific python shebang template --- Lib/test/test_launcher.py | 8 ++++++++ PC/launcher2.c | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/Lib/test/test_launcher.py b/Lib/test/test_launcher.py index 271c71fe59f73f..7641c390e0d7f3 100644 --- a/Lib/test/test_launcher.py +++ b/Lib/test/test_launcher.py @@ -516,6 +516,14 @@ def test_py_shebang(self): self.assertEqual("3.100", data["SearchInfo.tag"]) self.assertEqual(f"X.Y.exe -prearg {script} -postarg", data["stdout"].strip()) + def test_python_shebang(self): + with self.py_ini(TEST_PY_COMMANDS): + with self.script("#! python -prearg") as script: + data = self.run_py([script, "-postarg"]) + self.assertEqual("PythonTestSuite", data["SearchInfo.company"]) + self.assertEqual("3.100", data["SearchInfo.tag"]) + self.assertEqual(f"X.Y.exe -prearg {script} -postarg", data["stdout"].strip()) + def test_py2_shebang(self): with self.py_ini(TEST_PY_COMMANDS): with self.script("#! /usr/bin/python2 -prearg") as script: diff --git a/PC/launcher2.c b/PC/launcher2.c index 2673297d93743b..3f63f09f25d6b7 100644 --- a/PC/launcher2.c +++ b/PC/launcher2.c @@ -1034,12 +1034,19 @@ checkShebang(SearchInfo *search) L"/usr/bin/env ", L"/usr/bin/", L"/usr/local/bin/", + L"python", NULL }; for (const wchar_t **tmpl = shebangTemplates; *tmpl; ++tmpl) { if (_shebangStartsWith(shebang, shebangLength, *tmpl, &command)) { commandLength = 0; + // Normally "python" is the start of the command, but we also need it + // as a shebang prefix for back-compat. We move the command marker back + // if we match on that one. + if (0 == wcscmp(*tmpl, L"python")) { + command -= 6; + } while (command[commandLength] && !isspace(command[commandLength])) { commandLength += 1; } From 7ef68bbb0991aceef43f948a6f62c44e8e23c80b Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 27 Oct 2022 20:57:08 +0100 Subject: [PATCH 5/6] Docs --- Doc/using/windows.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index b5c2c8ca712041..fdbe4c15a20036 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -866,7 +866,6 @@ minor version. I.e. ``/usr/bin/python3.7-32`` will request usage of the not provably i386/32-bit". To request a specific environment, use the new ``-V:`` argument with the complete tag. - The ``/usr/bin/env`` form of shebang line has one further special property. Before looking for installed Python interpreters, this form will search the executable :envvar:`PATH` for a Python executable. This corresponds to the @@ -876,6 +875,13 @@ be found, it will be handled as described below. Additionally, the environment variable :envvar:`PYLAUNCHER_NO_SEARCH_PATH` may be set (to any value) to skip this additional search. +Shebang lines that do not match any of these patterns are treated as **Windows** +paths that are absolute or relative to the directory containing the script file. +This is a convenience for Windows-only scripts, such as those generated by an +installer, since the behavior is not compatible with Unix-style shells. +These paths may be quoted, and may include multiple arguments, after which the +path to the script and any additional arguments will be appended. + Arguments in shebang lines -------------------------- From c79a2d554a17222d9c2763278d83efbca7986149 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 28 Oct 2022 15:41:53 +0100 Subject: [PATCH 6/6] Simplify argv[0] quote handling to match reality --- Lib/test/test_launcher.py | 11 +++++++++-- PC/launcher2.c | 19 ++----------------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_launcher.py b/Lib/test/test_launcher.py index 7641c390e0d7f3..6ad85dc9c300e1 100644 --- a/Lib/test/test_launcher.py +++ b/Lib/test/test_launcher.py @@ -650,10 +650,17 @@ def test_literal_shebang_quoted(self): data["stdout"].strip(), ) + with self.script(f'#! some" random "app -witharg') as script: + data = self.run_py([script]) + self.assertEqual( + f'"{script.parent}\\some random app" -witharg {script}', + data["stdout"].strip(), + ) + def test_literal_shebang_quoted_escape(self): - with self.script(f'#! "some random\\\\\\" app\\\\\\\\" -witharg') as script: + with self.script(f'#! some\\" random "app -witharg') as script: data = self.run_py([script]) self.assertEqual( - f'"{script.parent}\\some random\\ app\\\\" -witharg {script}', + f'"{script.parent}\\some\\ random app" -witharg {script}', data["stdout"].strip(), ) diff --git a/PC/launcher2.c b/PC/launcher2.c index 3f63f09f25d6b7..d61af4aad13a42 100644 --- a/PC/launcher2.c +++ b/PC/launcher2.c @@ -880,7 +880,6 @@ _useShebangAsExecutable(SearchInfo *search, const wchar_t *shebang, int shebangL int commandLength = 0; int inQuote = 0; - int afterSlash = 0; if (!shebang || !shebangLength) { return 0; @@ -893,24 +892,10 @@ _useShebangAsExecutable(SearchInfo *search, const wchar_t *shebang, int shebangL commandLength = i; break; } else if (c == L'"') { - if (!afterSlash) { - // non-escaped quote. either way, we don't add it to the path - inQuote = !inQuote; - } - afterSlash = 0; + inQuote = !inQuote; } else if (c == L'/' || c == L'\\') { - if (afterSlash) { - // escaped slash - *pC++ = L'\\'; - afterSlash = 0; - } else { - afterSlash = 1; - } + *pC++ = L'\\'; } else { - if (afterSlash) { - *pC++ = L'\\'; - afterSlash = 0; - } *pC++ = c; } }