Skip to content

gh-97982: Reuse PyUnicode_Count in unicode_count #98025

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Oct 12, 2022
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Create ``any_unicode_count`` private helper for
both ``PyUnicode_Count`` and ``unicode_count`` in
``unicodeobject.c``.
80 changes: 23 additions & 57 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -8964,21 +8964,19 @@ _PyUnicode_InsertThousandsGrouping(
return count;
}


Py_ssize_t
PyUnicode_Count(PyObject *str,
PyObject *substr,
Py_ssize_t start,
Py_ssize_t end)
static Py_ssize_t
any_unicode_count(PyObject *str,
PyObject *substr,
Py_ssize_t start,
Py_ssize_t end)
{
// You must ensure that `str` and `substr` are both unicode objects
// before calling this function.
Py_ssize_t result;
int kind1, kind2;
const void *buf1 = NULL, *buf2 = NULL;
Py_ssize_t len1, len2;

if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
return -1;

kind1 = PyUnicode_KIND(str);
kind2 = PyUnicode_KIND(substr);
if (kind1 < kind2)
Expand Down Expand Up @@ -9039,6 +9037,18 @@ PyUnicode_Count(PyObject *str,
return -1;
}

Py_ssize_t
PyUnicode_Count(PyObject *str,
PyObject *substr,
Py_ssize_t start,
Py_ssize_t end)
{
if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
return -1;

return any_unicode_count(str, substr, start, end);
}

Py_ssize_t
PyUnicode_Find(PyObject *str,
PyObject *substr,
Expand Down Expand Up @@ -10858,60 +10868,16 @@ unicode_count(PyObject *self, PyObject *args)
Py_ssize_t start = 0;
Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
int kind1, kind2;
const void *buf1, *buf2;
Py_ssize_t len1, len2, iresult;
Py_ssize_t iresult;

if (!parse_args_finds_unicode("count", args, &substring, &start, &end))
return NULL;

kind1 = PyUnicode_KIND(self);
kind2 = PyUnicode_KIND(substring);
if (kind1 < kind2)
return PyLong_FromLong(0);

len1 = PyUnicode_GET_LENGTH(self);
len2 = PyUnicode_GET_LENGTH(substring);
ADJUST_INDICES(start, end, len1);
if (end - start < len2)
return PyLong_FromLong(0);

buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
switch (kind1) {
case PyUnicode_1BYTE_KIND:
iresult = ucs1lib_count(
((const Py_UCS1*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_2BYTE_KIND:
iresult = ucs2lib_count(
((const Py_UCS2*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_4BYTE_KIND:
iresult = ucs4lib_count(
((const Py_UCS4*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
default:
Py_UNREACHABLE();
}
iresult = any_unicode_count(self, substring, start, end);
if (iresult == -1)
return NULL;

result = PyLong_FromSsize_t(iresult);

assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring)));
if (kind2 != kind1)
PyMem_Free((void *)buf2);

return result;
}

Expand Down