Skip to content

Commit d488970

Browse files
authored
[3.11] gh-105235: Prevent reading outside buffer during mmap.find() (… (#106710)
[3.11] gh-105235: Prevent reading outside buffer during mmap.find() (GH-105252) * Add a special case for s[-m:] == p in _PyBytes_Find * Add tests for _PyBytes_Find * Make sure that start <= end in mmap.find. (cherry picked from commit ab86426)
1 parent 2186212 commit d488970

File tree

5 files changed

+161
-3
lines changed

5 files changed

+161
-3
lines changed

Lib/test/test_mmap.py

+21
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,27 @@ def test_find_end(self):
299299
self.assertEqual(m.find(b'one', 1, -2), -1)
300300
self.assertEqual(m.find(bytearray(b'one')), 0)
301301

302+
for i in range(-n-1, n+1):
303+
for j in range(-n-1, n+1):
304+
for p in [b"o", b"on", b"two", b"ones", b"s"]:
305+
expected = data.find(p, i, j)
306+
self.assertEqual(m.find(p, i, j), expected, (p, i, j))
307+
308+
def test_find_does_not_access_beyond_buffer(self):
309+
try:
310+
flags = mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS
311+
PAGESIZE = mmap.PAGESIZE
312+
PROT_NONE = 0
313+
PROT_READ = mmap.PROT_READ
314+
except AttributeError as e:
315+
raise unittest.SkipTest("mmap flags unavailable") from e
316+
for i in range(0, 2049):
317+
with mmap.mmap(-1, PAGESIZE * (i + 1),
318+
flags=flags, prot=PROT_NONE) as guard:
319+
with mmap.mmap(-1, PAGESIZE * (i + 2048),
320+
flags=flags, prot=PROT_READ) as fm:
321+
fm.find(b"fo", -2)
322+
302323

303324
def test_rfind(self):
304325
# test the new 'end' parameter works as expected
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Prevent out-of-bounds memory access during ``mmap.find()`` calls.

Modules/_testinternalcapi.c

+114
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "Python.h"
1515
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
1616
#include "pycore_bitutils.h" // _Py_bswap32()
17+
#include "pycore_bytesobject.h" // _PyBytes_Find()
1718
#include "pycore_fileutils.h" // _Py_normpath
1819
#include "pycore_frame.h" // _PyInterpreterFrame
1920
#include "pycore_gc.h" // PyGC_Head
@@ -380,6 +381,118 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args))
380381
}
381382

382383

384+
static int
385+
check_bytes_find(const char *haystack0, const char *needle0,
386+
int offset, Py_ssize_t expected)
387+
{
388+
Py_ssize_t len_haystack = strlen(haystack0);
389+
Py_ssize_t len_needle = strlen(needle0);
390+
Py_ssize_t result_1 = _PyBytes_Find(haystack0, len_haystack,
391+
needle0, len_needle, offset);
392+
if (result_1 != expected) {
393+
PyErr_Format(PyExc_AssertionError,
394+
"Incorrect result_1: '%s' in '%s' (offset=%zd)",
395+
needle0, haystack0, offset);
396+
return -1;
397+
}
398+
// Allocate new buffer with no NULL terminator.
399+
char *haystack = PyMem_Malloc(len_haystack);
400+
if (haystack == NULL) {
401+
PyErr_NoMemory();
402+
return -1;
403+
}
404+
char *needle = PyMem_Malloc(len_needle);
405+
if (needle == NULL) {
406+
PyMem_Free(haystack);
407+
PyErr_NoMemory();
408+
return -1;
409+
}
410+
memcpy(haystack, haystack0, len_haystack);
411+
memcpy(needle, needle0, len_needle);
412+
Py_ssize_t result_2 = _PyBytes_Find(haystack, len_haystack,
413+
needle, len_needle, offset);
414+
PyMem_Free(haystack);
415+
PyMem_Free(needle);
416+
if (result_2 != expected) {
417+
PyErr_Format(PyExc_AssertionError,
418+
"Incorrect result_2: '%s' in '%s' (offset=%zd)",
419+
needle0, haystack0, offset);
420+
return -1;
421+
}
422+
return 0;
423+
}
424+
425+
static int
426+
check_bytes_find_large(Py_ssize_t len_haystack, Py_ssize_t len_needle,
427+
const char *needle)
428+
{
429+
char *zeros = PyMem_RawCalloc(len_haystack, 1);
430+
if (zeros == NULL) {
431+
PyErr_NoMemory();
432+
return -1;
433+
}
434+
Py_ssize_t res = _PyBytes_Find(zeros, len_haystack, needle, len_needle, 0);
435+
PyMem_RawFree(zeros);
436+
if (res != -1) {
437+
PyErr_Format(PyExc_AssertionError,
438+
"check_bytes_find_large(%zd, %zd) found %zd",
439+
len_haystack, len_needle, res);
440+
return -1;
441+
}
442+
return 0;
443+
}
444+
445+
static PyObject *
446+
test_bytes_find(PyObject *self, PyObject *Py_UNUSED(args))
447+
{
448+
#define CHECK(H, N, O, E) do { \
449+
if (check_bytes_find(H, N, O, E) < 0) { \
450+
return NULL; \
451+
} \
452+
} while (0)
453+
454+
CHECK("", "", 0, 0);
455+
CHECK("Python", "", 0, 0);
456+
CHECK("Python", "", 3, 3);
457+
CHECK("Python", "", 6, 6);
458+
CHECK("Python", "yth", 0, 1);
459+
CHECK("ython", "yth", 1, 1);
460+
CHECK("thon", "yth", 2, -1);
461+
CHECK("Python", "thon", 0, 2);
462+
CHECK("ython", "thon", 1, 2);
463+
CHECK("thon", "thon", 2, 2);
464+
CHECK("hon", "thon", 3, -1);
465+
CHECK("Pytho", "zz", 0, -1);
466+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "ab", 0, -1);
467+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "ba", 0, -1);
468+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "bb", 0, -1);
469+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "ab", 0, 30);
470+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaba", "ba", 0, 30);
471+
CHECK("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb", "bb", 0, 30);
472+
#undef CHECK
473+
474+
// Hunt for segfaults
475+
// n, m chosen here so that (n - m) % (m + 1) == 0
476+
// This would make default_find in fastsearch.h access haystack[n].
477+
if (check_bytes_find_large(2048, 2, "ab") < 0) {
478+
return NULL;
479+
}
480+
if (check_bytes_find_large(4096, 16, "0123456789abcdef") < 0) {
481+
return NULL;
482+
}
483+
if (check_bytes_find_large(8192, 2, "ab") < 0) {
484+
return NULL;
485+
}
486+
if (check_bytes_find_large(16384, 4, "abcd") < 0) {
487+
return NULL;
488+
}
489+
if (check_bytes_find_large(32768, 2, "ab") < 0) {
490+
return NULL;
491+
}
492+
Py_RETURN_NONE;
493+
}
494+
495+
383496
static PyObject *
384497
normalize_path(PyObject *self, PyObject *filename)
385498
{
@@ -537,6 +650,7 @@ static PyMethodDef TestMethods[] = {
537650
{"reset_path_config", test_reset_path_config, METH_NOARGS},
538651
{"test_atomic_funcs", test_atomic_funcs, METH_NOARGS},
539652
{"test_edit_cost", test_edit_cost, METH_NOARGS},
653+
{"test_bytes_find", test_bytes_find, METH_NOARGS},
540654
{"normalize_path", normalize_path, METH_O, NULL},
541655
{"get_getpath_codeobject", get_getpath_codeobject, METH_NOARGS, NULL},
542656
{"EncodeLocaleEx", encode_locale_ex, METH_VARARGS},

Modules/mmapmodule.c

+6-1
Original file line numberDiff line numberDiff line change
@@ -351,12 +351,17 @@ mmap_gfind(mmap_object *self,
351351

352352
Py_ssize_t res;
353353
CHECK_VALID_OR_RELEASE(NULL, view);
354-
if (reverse) {
354+
if (end < start) {
355+
res = -1;
356+
}
357+
else if (reverse) {
358+
assert(0 <= start && start <= end && end <= self->size);
355359
res = _PyBytes_ReverseFind(
356360
self->data + start, end - start,
357361
view.buf, view.len, start);
358362
}
359363
else {
364+
assert(0 <= start && start <= end && end <= self->size);
360365
res = _PyBytes_Find(
361366
self->data + start, end - start,
362367
view.buf, view.len, start);

Objects/bytesobject.c

+19-2
Original file line numberDiff line numberDiff line change
@@ -1283,8 +1283,25 @@ _PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
12831283
const char *needle, Py_ssize_t len_needle,
12841284
Py_ssize_t offset)
12851285
{
1286-
return stringlib_find(haystack, len_haystack,
1287-
needle, len_needle, offset);
1286+
assert(len_haystack >= 0);
1287+
assert(len_needle >= 0);
1288+
// Extra checks because stringlib_find accesses haystack[len_haystack].
1289+
if (len_needle == 0) {
1290+
return offset;
1291+
}
1292+
if (len_needle > len_haystack) {
1293+
return -1;
1294+
}
1295+
assert(len_haystack >= 1);
1296+
Py_ssize_t res = stringlib_find(haystack, len_haystack - 1,
1297+
needle, len_needle, offset);
1298+
if (res == -1) {
1299+
Py_ssize_t last_align = len_haystack - len_needle;
1300+
if (memcmp(haystack + last_align, needle, len_needle) == 0) {
1301+
return offset + last_align;
1302+
}
1303+
}
1304+
return res;
12881305
}
12891306

12901307
Py_ssize_t

0 commit comments

Comments
 (0)