From 828d1efebb524f3bf4a9bf7f321fb33d6cfa5705 Mon Sep 17 00:00:00 2001 From: Finn Womack Date: Tue, 15 Aug 2023 08:33:00 -0700 Subject: [PATCH 1/3] gh-106242: Fix path truncation in os.path.normpath (GH-106816) --- Include/internal/pycore_fileutils.h | 3 +- Lib/test/test_genericpath.py | 4 +++ ...-08-14-23-11-11.gh-issue-106242.71HMym.rst | 1 + Modules/posixmodule.c | 4 ++- Python/fileutils.c | 29 ++++++++++++++----- 5 files changed, 31 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-08-14-23-11-11.gh-issue-106242.71HMym.rst diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index ef6642d00f1b54..7c2b6ec0bffef5 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -252,7 +252,8 @@ extern int _Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize); extern size_t _Py_find_basename(const wchar_t *filename); -PyAPI_FUNC(wchar_t *) _Py_normpath(wchar_t *path, Py_ssize_t size); +PyAPI_FUNC(wchar_t*) _Py_normpath(wchar_t *path, Py_ssize_t size); +extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *length); // The Windows Games API family does not provide these functions // so provide our own implementations. Remove them in case they get added diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py index 489044f8090d3b..4f311c2d498e9f 100644 --- a/Lib/test/test_genericpath.py +++ b/Lib/test/test_genericpath.py @@ -460,6 +460,10 @@ def test_normpath_issue5827(self): for path in ('', '.', '/', '\\', '///foo/.//bar//'): self.assertIsInstance(self.pathmodule.normpath(path), str) + def test_normpath_issue106242(self): + for path in ('\x00', 'foo\x00bar', '\x00\x00', '\x00foo', 'foo\x00'): + self.assertEqual(self.pathmodule.normpath(path), path) + def test_abspath_issue3426(self): # Check that abspath returns unicode when the arg is unicode # with both ASCII and non-ASCII cwds. diff --git a/Misc/NEWS.d/next/Library/2023-08-14-23-11-11.gh-issue-106242.71HMym.rst b/Misc/NEWS.d/next/Library/2023-08-14-23-11-11.gh-issue-106242.71HMym.rst new file mode 100644 index 00000000000000..44237a9f15708c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-14-23-11-11.gh-issue-106242.71HMym.rst @@ -0,0 +1 @@ +Fixes :func:`os.path.normpath` to handle embedded null characters without truncating the path. diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 342f393b1f0f9c..b9f45c0ce5543d 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5275,7 +5275,9 @@ os__path_normpath_impl(PyObject *module, PyObject *path) if (!buffer) { return NULL; } - PyObject *result = PyUnicode_FromWideChar(_Py_normpath(buffer, len), -1); + Py_ssize_t norm_len; + wchar_t *norm_path = _Py_normpath_and_size(buffer, len, &norm_len); + PyObject *result = PyUnicode_FromWideChar(norm_path, norm_len); PyMem_Free(buffer); return result; } diff --git a/Python/fileutils.c b/Python/fileutils.c index f137ee936502c1..0d31845a0fb11a 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2377,12 +2377,14 @@ _Py_find_basename(const wchar_t *filename) path, which will be within the original buffer. Guaranteed to not make the path longer, and will not fail. 'size' is the length of the path, if known. If -1, the first null character will be assumed - to be the end of the path. */ + to be the end of the path. 'normsize' will be set to contain the + length of the resulting normalized path. */ wchar_t * -_Py_normpath(wchar_t *path, Py_ssize_t size) +_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize) { assert(path != NULL); - if (!path[0] || size == 0) { + if (!path[0] && size < 0 || size == 0) { + *normsize = 0; return path; } wchar_t *pEnd = size >= 0 ? &path[size] : NULL; @@ -2431,11 +2433,7 @@ _Py_normpath(wchar_t *path, Py_ssize_t size) *p2++ = lastC = *p1; } } - if (sepCount) { - minP2 = p2; // Invalid path - } else { - minP2 = p2 - 1; // Absolute path has SEP at minP2 - } + minP2 = p2 - 1; } #else // Skip past two leading SEPs @@ -2495,13 +2493,28 @@ _Py_normpath(wchar_t *path, Py_ssize_t size) while (--p2 != minP2 && *p2 == SEP) { *p2 = L'\0'; } + } else { + --p2; } + *normsize = p2 - path + 1; #undef SEP_OR_END #undef IS_SEP #undef IS_END return path; } +/* In-place path normalisation. Returns the start of the normalized + path, which will be within the original buffer. Guaranteed to not + make the path longer, and will not fail. 'size' is the length of + the path, if known. If -1, the first null character will be assumed + to be the end of the path. */ +wchar_t * +_Py_normpath(wchar_t *path, Py_ssize_t size) +{ + Py_ssize_t norm_length; + return _Py_normpath_and_size(path, size, &norm_length); +} + /* Get the current directory. buflen is the buffer size in wide characters including the null character. Decode the path from the locale encoding. From 5d83c724229a030f32a63d182806a244ed04a869 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 11 Aug 2023 21:13:46 +0300 Subject: [PATCH 2/3] gh-106844: Fix issues in _winapi.LCMapStringEx (GH-107832) * Strings with length from 2**31-1 to 2**32-2 always caused MemoryError, it doesn't matter how much memory is available. * Strings with length exactly 2**32-1 caused OSError. * Strings longer than 2**32-1 characters were truncated due to integer overflow bug. * Strings containing the null character were truncated at the first null character. Now strings longer than 2**31-1 characters caused OverflowError and the null character is allowed. --- Lib/test/test_ntpath.py | 1 + ...-07-18-13-01-26.gh-issue-106844.mci4xO.rst | 1 + Modules/_winapi.c | 34 ++++++++++++++----- Modules/clinic/_winapi.c.h | 12 +++---- 4 files changed, 32 insertions(+), 16 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2023-07-18-13-01-26.gh-issue-106844.mci4xO.rst diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 538d758624c9d6..78e1cb582512b0 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -1036,6 +1036,7 @@ def test_path_normcase(self): self._check_function(self.path.normcase) if sys.platform == 'win32': self.assertEqual(ntpath.normcase('\u03a9\u2126'), 'ωΩ') + self.assertEqual(ntpath.normcase('abc\x00def'), 'abc\x00def') def test_path_isabs(self): self._check_function(self.path.isabs) diff --git a/Misc/NEWS.d/next/Windows/2023-07-18-13-01-26.gh-issue-106844.mci4xO.rst b/Misc/NEWS.d/next/Windows/2023-07-18-13-01-26.gh-issue-106844.mci4xO.rst new file mode 100644 index 00000000000000..1fdf162ef4ecdd --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2023-07-18-13-01-26.gh-issue-106844.mci4xO.rst @@ -0,0 +1 @@ +Fix integer overflow and truncating by the null character in :func:`!_winapi.LCMapStringEx` which affects :func:`ntpath.normcase`. diff --git a/Modules/_winapi.c b/Modules/_winapi.c index d6d2f4a6a9b103..77275408aed868 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1538,40 +1538,56 @@ _winapi.LCMapStringEx locale: LPCWSTR flags: DWORD - src: LPCWSTR + src: unicode [clinic start generated code]*/ static PyObject * _winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, - LPCWSTR src) -/*[clinic end generated code: output=cf4713d80e2b47c9 input=9fe26f95d5ab0001]*/ + PyObject *src) +/*[clinic end generated code: output=b90e6b26e028ff0a input=3e3dcd9b8164012f]*/ { if (flags & (LCMAP_SORTHANDLE | LCMAP_HASH | LCMAP_BYTEREV | LCMAP_SORTKEY)) { return PyErr_Format(PyExc_ValueError, "unsupported flags"); } - int dest_size = LCMapStringEx(locale, flags, src, -1, NULL, 0, + Py_ssize_t src_size; + wchar_t *src_ = PyUnicode_AsWideCharString(src, &src_size); + if (!src_) { + return NULL; + } + if (src_size > INT_MAX) { + PyMem_Free(src_); + PyErr_SetString(PyExc_OverflowError, "input string is too long"); + return NULL; + } + + int dest_size = LCMapStringEx(locale, flags, src_, (int)src_size, NULL, 0, NULL, NULL, 0); - if (dest_size == 0) { - return PyErr_SetFromWindowsErr(0); + if (dest_size <= 0) { + DWORD error = GetLastError(); + PyMem_Free(src_); + return PyErr_SetFromWindowsErr(error); } wchar_t* dest = PyMem_NEW(wchar_t, dest_size); if (dest == NULL) { + PyMem_Free(src_); return PyErr_NoMemory(); } - int nmapped = LCMapStringEx(locale, flags, src, -1, dest, dest_size, + int nmapped = LCMapStringEx(locale, flags, src_, (int)src_size, dest, dest_size, NULL, NULL, 0); - if (nmapped == 0) { + if (nmapped <= 0) { DWORD error = GetLastError(); + PyMem_Free(src_); PyMem_DEL(dest); return PyErr_SetFromWindowsErr(error); } - PyObject *ret = PyUnicode_FromWideChar(dest, dest_size - 1); + PyMem_Free(src_); + PyObject *ret = PyUnicode_FromWideChar(dest, nmapped); PyMem_DEL(dest); return ret; diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index 3767b19d76db05..1394a8fcffd9f6 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -885,7 +885,7 @@ PyDoc_STRVAR(_winapi_LCMapStringEx__doc__, static PyObject * _winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, - LPCWSTR src); + PyObject *src); static PyObject * _winapi_LCMapStringEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -912,16 +912,16 @@ _winapi_LCMapStringEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, static const char * const _keywords[] = {"locale", "flags", "src", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .format = "O&kO&:LCMapStringEx", + .format = "O&kU:LCMapStringEx", .kwtuple = KWTUPLE, }; #undef KWTUPLE LPCWSTR locale = NULL; DWORD flags; - LPCWSTR src = NULL; + PyObject *src; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - _PyUnicode_WideCharString_Converter, &locale, &flags, _PyUnicode_WideCharString_Converter, &src)) { + _PyUnicode_WideCharString_Converter, &locale, &flags, &src)) { goto exit; } return_value = _winapi_LCMapStringEx_impl(module, locale, flags, src); @@ -929,8 +929,6 @@ _winapi_LCMapStringEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, exit: /* Cleanup for locale */ PyMem_Free((void *)locale); - /* Cleanup for src */ - PyMem_Free((void *)src); return return_value; } @@ -1481,4 +1479,4 @@ _winapi_CopyFile2(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO return return_value; } -/*[clinic end generated code: output=be1343b3759e0c96 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9d43ae4bdbe1126a input=a9049054013a1b77]*/ From eadabd812da9942bec63107e6b6570f27bccb8a2 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 15 Aug 2023 17:24:51 +0100 Subject: [PATCH 3/3] gh-106242: Minor fixup to avoid compiler warnings --- Python/fileutils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/fileutils.c b/Python/fileutils.c index 0d31845a0fb11a..268ffa3d61a470 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2383,7 +2383,7 @@ wchar_t * _Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize) { assert(path != NULL); - if (!path[0] && size < 0 || size == 0) { + if ((size < 0 && !path[0]) || size == 0) { *normsize = 0; return path; }