Skip to content

Commit 0932272

Browse files
authored
gh-106242: Fix path truncation in os.path.normpath (GH-106816)
1 parent 607f18c commit 0932272

File tree

5 files changed

+30
-9
lines changed

5 files changed

+30
-9
lines changed

Include/internal/pycore_fileutils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ extern int _Py_add_relfile(wchar_t *dirname,
260260
size_t bufsize);
261261
extern size_t _Py_find_basename(const wchar_t *filename);
262262
PyAPI_FUNC(wchar_t*) _Py_normpath(wchar_t *path, Py_ssize_t size);
263+
extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *length);
263264

264265
// The Windows Games API family does not provide these functions
265266
// so provide our own implementations. Remove them in case they get added

Lib/test/test_genericpath.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,10 @@ def test_normpath_issue5827(self):
460460
for path in ('', '.', '/', '\\', '///foo/.//bar//'):
461461
self.assertIsInstance(self.pathmodule.normpath(path), str)
462462

463+
def test_normpath_issue106242(self):
464+
for path in ('\x00', 'foo\x00bar', '\x00\x00', '\x00foo', 'foo\x00'):
465+
self.assertEqual(self.pathmodule.normpath(path), path)
466+
463467
def test_abspath_issue3426(self):
464468
# Check that abspath returns unicode when the arg is unicode
465469
# with both ASCII and non-ASCII cwds.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixes :func:`os.path.normpath` to handle embedded null characters without truncating the path.

Modules/posixmodule.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5274,7 +5274,9 @@ os__path_normpath_impl(PyObject *module, PyObject *path)
52745274
if (!buffer) {
52755275
return NULL;
52765276
}
5277-
PyObject *result = PyUnicode_FromWideChar(_Py_normpath(buffer, len), -1);
5277+
Py_ssize_t norm_len;
5278+
wchar_t *norm_path = _Py_normpath_and_size(buffer, len, &norm_len);
5279+
PyObject *result = PyUnicode_FromWideChar(norm_path, norm_len);
52785280
PyMem_Free(buffer);
52795281
return result;
52805282
}

Python/fileutils.c

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2377,12 +2377,14 @@ _Py_find_basename(const wchar_t *filename)
23772377
path, which will be within the original buffer. Guaranteed to not
23782378
make the path longer, and will not fail. 'size' is the length of
23792379
the path, if known. If -1, the first null character will be assumed
2380-
to be the end of the path. */
2380+
to be the end of the path. 'normsize' will be set to contain the
2381+
length of the resulting normalized path. */
23812382
wchar_t *
2382-
_Py_normpath(wchar_t *path, Py_ssize_t size)
2383+
_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
23832384
{
23842385
assert(path != NULL);
2385-
if (!path[0] || size == 0) {
2386+
if (!path[0] && size < 0 || size == 0) {
2387+
*normsize = 0;
23862388
return path;
23872389
}
23882390
wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
@@ -2431,11 +2433,7 @@ _Py_normpath(wchar_t *path, Py_ssize_t size)
24312433
*p2++ = lastC = *p1;
24322434
}
24332435
}
2434-
if (sepCount) {
2435-
minP2 = p2; // Invalid path
2436-
} else {
2437-
minP2 = p2 - 1; // Absolute path has SEP at minP2
2438-
}
2436+
minP2 = p2 - 1;
24392437
}
24402438
#else
24412439
// Skip past two leading SEPs
@@ -2495,13 +2493,28 @@ _Py_normpath(wchar_t *path, Py_ssize_t size)
24952493
while (--p2 != minP2 && *p2 == SEP) {
24962494
*p2 = L'\0';
24972495
}
2496+
} else {
2497+
--p2;
24982498
}
2499+
*normsize = p2 - path + 1;
24992500
#undef SEP_OR_END
25002501
#undef IS_SEP
25012502
#undef IS_END
25022503
return path;
25032504
}
25042505

2506+
/* In-place path normalisation. Returns the start of the normalized
2507+
path, which will be within the original buffer. Guaranteed to not
2508+
make the path longer, and will not fail. 'size' is the length of
2509+
the path, if known. If -1, the first null character will be assumed
2510+
to be the end of the path. */
2511+
wchar_t *
2512+
_Py_normpath(wchar_t *path, Py_ssize_t size)
2513+
{
2514+
Py_ssize_t norm_length;
2515+
return _Py_normpath_and_size(path, size, &norm_length);
2516+
}
2517+
25052518

25062519
/* Get the current directory. buflen is the buffer size in wide characters
25072520
including the null character. Decode the path from the locale encoding.

0 commit comments

Comments
 (0)