Skip to content

Commit 6ddb09f

Browse files
authored
bpo-46848: Use stringlib/fastsearch in mmap (GH-31625)
Speed up mmap.find(). Add _PyBytes_Find() and _PyBytes_ReverseFind().
1 parent 9833bb9 commit 6ddb09f

File tree

4 files changed

+53
-19
lines changed

4 files changed

+53
-19
lines changed

Include/cpython/bytesobject.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,22 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
116116
void *str,
117117
const void *bytes,
118118
Py_ssize_t size);
119+
120+
/* Substring Search.
121+
122+
Returns the index of the first occurence of
123+
a substring ("needle") in a larger text ("haystack").
124+
If the needle is not found, return -1.
125+
If the needle is found, add offset to the index.
126+
*/
127+
128+
PyAPI_FUNC(Py_ssize_t)
129+
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
130+
const char *needle, Py_ssize_t len_needle,
131+
Py_ssize_t offset);
132+
133+
/* Same as above, but search right-to-left */
134+
PyAPI_FUNC(Py_ssize_t)
135+
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
136+
const char *needle, Py_ssize_t len_needle,
137+
Py_ssize_t offset);
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
For performance, use the optimized string-searching implementations
2+
from :meth:`~bytes.find` and :meth:`~bytes.rfind`
3+
for :meth:`~mmap.find` and :meth:`~mmap.rfind`.

Modules/mmapmodule.c

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -315,12 +315,8 @@ mmap_gfind(mmap_object *self,
315315
if (!PyArg_ParseTuple(args, reverse ? "y*|nn:rfind" : "y*|nn:find",
316316
&view, &start, &end)) {
317317
return NULL;
318-
} else {
319-
const char *p, *start_p, *end_p;
320-
int sign = reverse ? -1 : 1;
321-
const char *needle = view.buf;
322-
Py_ssize_t len = view.len;
323-
318+
}
319+
else {
324320
if (start < 0)
325321
start += self->size;
326322
if (start < 0)
@@ -335,21 +331,19 @@ mmap_gfind(mmap_object *self,
335331
else if (end > self->size)
336332
end = self->size;
337333

338-
start_p = self->data + start;
339-
end_p = self->data + end;
340-
341-
for (p = (reverse ? end_p - len : start_p);
342-
(p >= start_p) && (p + len <= end_p); p += sign) {
343-
Py_ssize_t i;
344-
for (i = 0; i < len && needle[i] == p[i]; ++i)
345-
/* nothing */;
346-
if (i == len) {
347-
PyBuffer_Release(&view);
348-
return PyLong_FromSsize_t(p - self->data);
349-
}
334+
Py_ssize_t res;
335+
if (reverse) {
336+
res = _PyBytes_ReverseFind(
337+
self->data + start, end - start,
338+
view.buf, view.len, start);
339+
}
340+
else {
341+
res = _PyBytes_Find(
342+
self->data + start, end - start,
343+
view.buf, view.len, start);
350344
}
351345
PyBuffer_Release(&view);
352-
return PyLong_FromLong(-1);
346+
return PyLong_FromSsize_t(res);
353347
}
354348
}
355349

Objects/bytesobject.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,24 @@ PyBytes_AsStringAndSize(PyObject *obj,
12471247

12481248
#undef STRINGLIB_GET_EMPTY
12491249

1250+
Py_ssize_t
1251+
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1252+
const char *needle, Py_ssize_t len_needle,
1253+
Py_ssize_t offset)
1254+
{
1255+
return stringlib_find(haystack, len_haystack,
1256+
needle, len_needle, offset);
1257+
}
1258+
1259+
Py_ssize_t
1260+
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1261+
const char *needle, Py_ssize_t len_needle,
1262+
Py_ssize_t offset)
1263+
{
1264+
return stringlib_rfind(haystack, len_haystack,
1265+
needle, len_needle, offset);
1266+
}
1267+
12501268
PyObject *
12511269
PyBytes_Repr(PyObject *obj, int smartquotes)
12521270
{

0 commit comments

Comments
 (0)