python · vstinner · Oct 12, 2022 · Oct 7, 2022 · Oct 7, 2022 · Oct 7, 2022
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-07-13-09-57.gh-issue-97982.6qR7Qm.rst
@@ -0,0 +1,3 @@
+Create ``any_unicode_count`` private helper for
+both ``PyUnicode_Count`` and ``unicode_count`` in
+``unicodeobject.c``.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
@@ -8964,21 +8964,19 @@ _PyUnicode_InsertThousandsGrouping(
     return count;
 }
 
-
-Py_ssize_t
-PyUnicode_Count(PyObject *str,
-                PyObject *substr,
-                Py_ssize_t start,
-                Py_ssize_t end)
+static Py_ssize_t
+any_unicode_count(PyObject *str,
+                  PyObject *substr,
+                  Py_ssize_t start,
+                  Py_ssize_t end)
 {
+    // You must ensure that `str` and `substr` are both unicode objects
+    // before calling this function.
     Py_ssize_t result;
     int kind1, kind2;
     const void *buf1 = NULL, *buf2 = NULL;
     Py_ssize_t len1, len2;
 
-    if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
-        return -1;
-
     kind1 = PyUnicode_KIND(str);
     kind2 = PyUnicode_KIND(substr);
     if (kind1 < kind2)
@@ -9039,6 +9037,18 @@ PyUnicode_Count(PyObject *str,
     return -1;
 }
 
+Py_ssize_t
+PyUnicode_Count(PyObject *str,
+                PyObject *substr,
+                Py_ssize_t start,
+                Py_ssize_t end)
+{
+    if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
+        return -1;
+
+    return any_unicode_count(str, substr, start, end);
+}
+
 Py_ssize_t
 PyUnicode_Find(PyObject *str,
                PyObject *substr,
@@ -10858,60 +10868,16 @@ unicode_count(PyObject *self, PyObject *args)
     Py_ssize_t start = 0;
     Py_ssize_t end = PY_SSIZE_T_MAX;
     PyObject *result;
-    int kind1, kind2;
-    const void *buf1, *buf2;
-    Py_ssize_t len1, len2, iresult;
+    Py_ssize_t iresult;
 
     if (!parse_args_finds_unicode("count", args, &substring, &start, &end))
         return NULL;
 
-    kind1 = PyUnicode_KIND(self);
-    kind2 = PyUnicode_KIND(substring);
-    if (kind1 < kind2)
-        return PyLong_FromLong(0);
-
-    len1 = PyUnicode_GET_LENGTH(self);
-    len2 = PyUnicode_GET_LENGTH(substring);
-    ADJUST_INDICES(start, end, len1);
-    if (end - start < len2)
-        return PyLong_FromLong(0);
-
-    buf1 = PyUnicode_DATA(self);
-    buf2 = PyUnicode_DATA(substring);
-    if (kind2 != kind1) {
-        buf2 = unicode_askind(kind2, buf2, len2, kind1);
-        if (!buf2)
-            return NULL;
-    }
-    switch (kind1) {
-    case PyUnicode_1BYTE_KIND:
-        iresult = ucs1lib_count(
-            ((const Py_UCS1*)buf1) + start, end - start,
-            buf2, len2, PY_SSIZE_T_MAX
-            );
-        break;
-    case PyUnicode_2BYTE_KIND:
-        iresult = ucs2lib_count(
-            ((const Py_UCS2*)buf1) + start, end - start,
-            buf2, len2, PY_SSIZE_T_MAX
-            );
-        break;
-    case PyUnicode_4BYTE_KIND:
-        iresult = ucs4lib_count(
-            ((const Py_UCS4*)buf1) + start, end - start,
-            buf2, len2, PY_SSIZE_T_MAX
-            );
-        break;
-    default:
-        Py_UNREACHABLE();
-    }
+    iresult = any_unicode_count(self, substring, start, end);
+    if (iresult == -1)
+        return NULL;
 
     result = PyLong_FromSsize_t(iresult);
-
-    assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring)));
-    if (kind2 != kind1)
-        PyMem_Free((void *)buf2);
-
     return result;
 }