From f079ac02370ee71e3482fb3e8ed06e27b8b9072f Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sat, 3 Sep 2022 12:41:44 -0400 Subject: [PATCH 1/5] Move some type-checking out of bisect loops --- Modules/_bisectmodule.c | 142 +++++++++++++++++++++++++++++++++++----- 1 file changed, 126 insertions(+), 16 deletions(-) diff --git a/Modules/_bisectmodule.c b/Modules/_bisectmodule.c index 0caa92b2dc6e02..1e5f5d12f7e4b3 100644 --- a/Modules/_bisectmodule.c +++ b/Modules/_bisectmodule.c @@ -25,6 +25,26 @@ get_bisect_state(PyObject *module) return (bisect_state *)state; } +static ssizeargfunc +get_sq_item(PyObject *s) +{ + PyTypeObject *tp = Py_TYPE(s); + // Inline the important parts of PySequence_GetItem. + PySequenceMethods *m = tp->tp_as_sequence; + if (m && m->sq_item) { + return m->sq_item; + } + const char *msg; + if (tp->tp_as_mapping && tp->tp_as_mapping->mp_subscript) { + msg = "%.200s is not a sequence"; + } + else { + msg = "'%.200s' object does not support indexing"; + } + PyErr_Format(PyExc_TypeError, msg, tp->tp_name); + return NULL; +} + static inline Py_ssize_t internal_bisect_right(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t hi, PyObject* key) @@ -42,32 +62,77 @@ internal_bisect_right(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t if (hi < 0) return -1; } + ssizeargfunc sq_item = get_sq_item(list); + if (sq_item == NULL) { + return -1; + } + if (Py_EnterRecursiveCall("in _bisect.bisect_right") < 0) { + return -1; + } + PyTypeObject *tp = Py_TYPE(item); + richcmpfunc compare = tp->tp_richcompare; while (lo < hi) { /* The (size_t)cast ensures that the addition and subsequent division are performed as unsigned operations, avoiding difficulties from signed overflow. (See issue 13496.) */ mid = ((size_t)lo + hi) / 2; - litem = PySequence_GetItem(list, mid); - if (litem == NULL) - return -1; + assert(mid >= 0); + litem = sq_item(list, mid); + assert((PyErr_Occurred() == NULL) ^ (litem == NULL)); + if (litem == NULL) { + goto error; + } if (key != Py_None) { PyObject *newitem = PyObject_CallOneArg(key, litem); if (newitem == NULL) { - Py_DECREF(litem); - return -1; + goto error; } Py_SETREF(litem, newitem); } - res = PyObject_RichCompareBool(item, litem, Py_LT); + if (compare != NULL && Py_IS_TYPE(litem, tp)) { + PyObject *res_obj = compare(item, litem, Py_LT); + if (res_obj == Py_True) { + Py_DECREF(res_obj); + Py_DECREF(litem); + hi = mid; + continue; + } + if (res_obj == Py_False) { + Py_DECREF(res_obj); + Py_DECREF(litem); + lo = mid + 1; + continue; + } + if (res_obj == NULL) { + goto error; + } + if (res_obj == Py_NotImplemented) { + Py_DECREF(res_obj); + compare = NULL; + res = PyObject_RichCompareBool(item, litem, Py_LT); + } + else { + res = PyObject_IsTrue(res_obj); + } + } + else { + res = PyObject_RichCompareBool(item, litem, Py_LT); + } + if (res < 0) { + goto error; + } Py_DECREF(litem); - if (res < 0) - return -1; if (res) hi = mid; else lo = mid + 1; } + Py_LeaveRecursiveCall(); return lo; +error: + Py_LeaveRecursiveCall(); + Py_DECREF(litem); + return -1; } /*[clinic input] @@ -168,32 +233,77 @@ internal_bisect_left(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t h if (hi < 0) return -1; } + ssizeargfunc sq_item = get_sq_item(list); + if (sq_item == NULL) { + return -1; + } + if (Py_EnterRecursiveCall("in _bisect.bisect_left") < 0) { + return -1; + } + PyTypeObject *tp = Py_TYPE(item); + richcmpfunc compare = tp->tp_richcompare; while (lo < hi) { /* The (size_t)cast ensures that the addition and subsequent division are performed as unsigned operations, avoiding difficulties from signed overflow. (See issue 13496.) */ mid = ((size_t)lo + hi) / 2; - litem = PySequence_GetItem(list, mid); - if (litem == NULL) - return -1; + assert(mid >= 0); + litem = sq_item(list, mid); + assert((PyErr_Occurred() == NULL) ^ (litem == NULL)); + if (litem == NULL) { + goto error; + } if (key != Py_None) { PyObject *newitem = PyObject_CallOneArg(key, litem); if (newitem == NULL) { - Py_DECREF(litem); - return -1; + goto error; } Py_SETREF(litem, newitem); } - res = PyObject_RichCompareBool(litem, item, Py_LT); + if (compare != NULL && Py_IS_TYPE(litem, tp)) { + PyObject *res_obj = compare(litem, item, Py_LT); + if (res_obj == Py_True) { + Py_DECREF(res_obj); + Py_DECREF(litem); + lo = mid + 1; + continue; + } + if (res_obj == Py_False) { + Py_DECREF(res_obj); + Py_DECREF(litem); + hi = mid; + continue; + } + if (res_obj == NULL) { + goto error; + } + if (res_obj == Py_NotImplemented) { + Py_DECREF(res_obj); + compare = NULL; + res = PyObject_RichCompareBool(litem, item, Py_LT); + } + else { + res = PyObject_IsTrue(res_obj); + } + } + else { + res = PyObject_RichCompareBool(litem, item, Py_LT); + } + if (res < 0) { + goto error; + } Py_DECREF(litem); - if (res < 0) - return -1; if (res) lo = mid + 1; else hi = mid; } + Py_LeaveRecursiveCall(); return lo; +error: + Py_LeaveRecursiveCall(); + Py_DECREF(litem); + return -1; } From 2cfc70a77dd81f9502e026b7de790346aa78bf40 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sat, 3 Sep 2022 18:39:08 +0000 Subject: [PATCH 2/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2022-09-03-18-39-05.gh-issue-96538.W156-D.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-09-03-18-39-05.gh-issue-96538.W156-D.rst diff --git a/Misc/NEWS.d/next/Library/2022-09-03-18-39-05.gh-issue-96538.W156-D.rst b/Misc/NEWS.d/next/Library/2022-09-03-18-39-05.gh-issue-96538.W156-D.rst new file mode 100644 index 00000000000000..22e5b4fc259964 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-09-03-18-39-05.gh-issue-96538.W156-D.rst @@ -0,0 +1 @@ +Speed up ``bisect.bisect()`` functions by taking advantage of type-stability. From b85a132fbb5da69bb168ccd65b2803a800ee32a0 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sat, 3 Sep 2022 14:42:54 -0400 Subject: [PATCH 3/5] XDECREF in error label --- Modules/_bisectmodule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_bisectmodule.c b/Modules/_bisectmodule.c index 1e5f5d12f7e4b3..4a166440ec4404 100644 --- a/Modules/_bisectmodule.c +++ b/Modules/_bisectmodule.c @@ -131,7 +131,7 @@ internal_bisect_right(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t return lo; error: Py_LeaveRecursiveCall(); - Py_DECREF(litem); + Py_XDECREF(litem); return -1; } @@ -302,7 +302,7 @@ internal_bisect_left(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t h return lo; error: Py_LeaveRecursiveCall(); - Py_DECREF(litem); + Py_XDECREF(litem); return -1; } From 775708fbd7ddd6efcf2868e13b0e6c233a84c35e Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sat, 3 Sep 2022 14:44:22 -0400 Subject: [PATCH 4/5] improve comment --- Modules/_bisectmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_bisectmodule.c b/Modules/_bisectmodule.c index 4a166440ec4404..e8c08947dc0651 100644 --- a/Modules/_bisectmodule.c +++ b/Modules/_bisectmodule.c @@ -28,8 +28,8 @@ get_bisect_state(PyObject *module) static ssizeargfunc get_sq_item(PyObject *s) { + // The parts of PySequence_GetItem that we only need to do once PyTypeObject *tp = Py_TYPE(s); - // Inline the important parts of PySequence_GetItem. PySequenceMethods *m = tp->tp_as_sequence; if (m && m->sq_item) { return m->sq_item; From 083f0d750f5051c064fa834fd7dba5194bc195ba Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Sun, 4 Sep 2022 13:48:11 -0400 Subject: [PATCH 5/5] Add comments distinguishing fast path from defualt path --- Modules/_bisectmodule.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Modules/_bisectmodule.c b/Modules/_bisectmodule.c index e8c08947dc0651..09f8e5a6f07d61 100644 --- a/Modules/_bisectmodule.c +++ b/Modules/_bisectmodule.c @@ -77,6 +77,7 @@ internal_bisect_right(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t signed overflow. (See issue 13496.) */ mid = ((size_t)lo + hi) / 2; assert(mid >= 0); + // PySequence_GetItem, but we already checked the types. litem = sq_item(list, mid); assert((PyErr_Occurred() == NULL) ^ (litem == NULL)); if (litem == NULL) { @@ -89,7 +90,13 @@ internal_bisect_right(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t } Py_SETREF(litem, newitem); } + /* if item < key(list[mid]): + * hi = mid + * else: + * lo = mid + 1 + */ if (compare != NULL && Py_IS_TYPE(litem, tp)) { + // A fast path for comparing objects of the same type PyObject *res_obj = compare(item, litem, Py_LT); if (res_obj == Py_True) { Py_DECREF(res_obj); @@ -116,6 +123,7 @@ internal_bisect_right(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t } } else { + // A default path for comparing arbitrary objects res = PyObject_RichCompareBool(item, litem, Py_LT); } if (res < 0) { @@ -248,6 +256,7 @@ internal_bisect_left(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t h signed overflow. (See issue 13496.) */ mid = ((size_t)lo + hi) / 2; assert(mid >= 0); + // PySequence_GetItem, but we already checked the types. litem = sq_item(list, mid); assert((PyErr_Occurred() == NULL) ^ (litem == NULL)); if (litem == NULL) { @@ -260,7 +269,13 @@ internal_bisect_left(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t h } Py_SETREF(litem, newitem); } + /* if key(list[mid]) < item: + * lo = mid + 1 + * else: + * hi = mid + */ if (compare != NULL && Py_IS_TYPE(litem, tp)) { + // A fast path for comparing objects of the same type PyObject *res_obj = compare(litem, item, Py_LT); if (res_obj == Py_True) { Py_DECREF(res_obj); @@ -287,6 +302,7 @@ internal_bisect_left(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t h } } else { + // A default path for comparing arbitrary objects res = PyObject_RichCompareBool(litem, item, Py_LT); } if (res < 0) {