From fbbf5f307a60a88be0a7b13d9f8ef6374b409566 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 10 Apr 2024 20:58:36 +0200 Subject: [PATCH 1/9] PoC: str vectorcall --- Objects/unicodeobject.c | 129 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5f15071d7d54ef..0211ab49bc56dc 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14617,6 +14617,134 @@ unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, return unicode; } +static const char * +as_const_char(PyObject *obj) +{ + if (!PyUnicode_Check(obj)) { + _PyArg_BadArgument("str", "argument", "str", obj); + return NULL; + } + Py_ssize_t sz; + const char *str = PyUnicode_AsUTF8AndSize(obj, &sz); + if (str == NULL) { + return NULL; + } + if (strlen(str) != (size_t)sz) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return NULL; + } + return str; +} + +static PyObject * +fallback_to_tp_call(PyObject *type, Py_ssize_t nargs, Py_ssize_t nkwargs, + PyObject *const *args, PyObject *kwnames) +{ + PyObject *tuple = PyTuple_New(nargs); + if (tuple == NULL) { + return NULL; + } + for (Py_ssize_t i = 0; i < nargs; i++) { + PyObject *value = Py_NewRef(args[i]); + PyTuple_SET_ITEM(tuple, i, value); + } + PyObject *dict = PyDict_New(); + if (dict == NULL) { + Py_DECREF(tuple); + return NULL; + } + for (Py_ssize_t j = 0; j < nkwargs; j++) { + PyObject *key = PyTuple_GET_ITEM(kwnames, j); + PyObject *value = args[nargs + j]; + if (PyDict_SetItem(dict, key, value) < 0) { + Py_DECREF(tuple); + Py_DECREF(dict); + return NULL; + } + } + return unicode_new(_PyType_CAST(type), tuple, dict); +} + +static PyObject * +unicode_vectorcall(PyObject *type, PyObject *const *args, + size_t nargsf, PyObject *kwnames) +{ + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + Py_ssize_t nkwargs = (kwnames) ? PyTuple_GET_SIZE(kwnames) : 0; + if (nargs == 0 && nkwargs == 0) { + return unicode_get_empty(); + } + PyObject *object = args[0]; + if (nargs == 1 && nkwargs == 0) { + return unicode_new_impl(_PyType_CAST(type), object, NULL, NULL); + } + if (nargs + nkwargs == 2) { + const char *encoding; + const char *errors; + if (nkwargs == 1) { + PyObject *kw0 = PyTuple_GET_ITEM(kwnames, 0); + if (_PyUnicode_EqualToASCIIString(kw0, "encoding")) { + encoding = as_const_char(args[1]); + if (encoding == NULL) { + return NULL; + } + errors = NULL; + } + else if (_PyUnicode_EqualToASCIIString(kw0, "errors")) { + errors = as_const_char(args[1]); + if (errors == NULL) { + return NULL; + } + encoding = NULL; + } + else { + PyErr_Format(PyExc_TypeError, + "'%S' is an invalid keyword argument for str()", kw0); + return NULL; + } + } + else if (nkwargs == 0) { + encoding = as_const_char(args[1]); + errors = NULL; + } + else { + return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); + } + PyObject *object = args[0]; + return unicode_new_impl(_PyType_CAST(type), object, encoding, errors); + } + if (nargs + nkwargs == 3) { + if (nkwargs == 1) { + PyObject *kw0 = PyTuple_GET_ITEM(kwnames, 0); + if (!_PyUnicode_EqualToASCIIString(kw0, "errors")) { + PyErr_Format(PyExc_TypeError, + "'%S' is an invalid keyword argument for str()", kw0); + return NULL; + } + } + else if (nkwargs != 0) { + return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); + } + PyObject *object = args[0]; + const char *encoding = as_const_char(args[1]); + if (encoding == NULL) { + return NULL; + } + const char *errors = as_const_char(args[2]); + if (errors == NULL) { + return NULL; + } + return unicode_new_impl(_PyType_CAST(type), object, encoding, errors); + } + if (nargs > 3) { + PyErr_Format(PyExc_TypeError, + "str() takes at most 3 arguments (%d given)", nargs + nkwargs); + return NULL; + } + + return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); +} + static PyObject * unicode_subtype_new(PyTypeObject *type, PyObject *unicode) { @@ -14758,6 +14886,7 @@ PyTypeObject PyUnicode_Type = { 0, /* tp_alloc */ unicode_new, /* tp_new */ PyObject_Del, /* tp_free */ + .tp_vectorcall = unicode_vectorcall, }; /* Initialize the Unicode implementation */ From 2a94bfa7da458364a4dee2a6a6c1a695aa9cee26 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 10 Apr 2024 22:15:01 +0200 Subject: [PATCH 2/9] Nicer error message --- Objects/unicodeobject.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0211ab49bc56dc..aac8bfed672db6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14618,10 +14618,12 @@ unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, } static const char * -as_const_char(PyObject *obj) +as_const_char(PyObject *obj, const char *name) { if (!PyUnicode_Check(obj)) { - _PyArg_BadArgument("str", "argument", "str", obj); + PyErr_Format(PyExc_TypeError, + "str() argument '%s' must be str, not %T", + name, obj); return NULL; } Py_ssize_t sz; @@ -14684,14 +14686,14 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, if (nkwargs == 1) { PyObject *kw0 = PyTuple_GET_ITEM(kwnames, 0); if (_PyUnicode_EqualToASCIIString(kw0, "encoding")) { - encoding = as_const_char(args[1]); + encoding = as_const_char(args[1], "encoding"); if (encoding == NULL) { return NULL; } errors = NULL; } else if (_PyUnicode_EqualToASCIIString(kw0, "errors")) { - errors = as_const_char(args[1]); + errors = as_const_char(args[1], "errors"); if (errors == NULL) { return NULL; } @@ -14704,7 +14706,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, } } else if (nkwargs == 0) { - encoding = as_const_char(args[1]); + encoding = as_const_char(args[1], "encoding"); errors = NULL; } else { @@ -14726,11 +14728,11 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); } PyObject *object = args[0]; - const char *encoding = as_const_char(args[1]); + const char *encoding = as_const_char(args[1], "encoding"); if (encoding == NULL) { return NULL; } - const char *errors = as_const_char(args[2]); + const char *errors = as_const_char(args[2], "errors"); if (errors == NULL) { return NULL; } From 1a29140ddb0ae26a8e79fe6bb700de7aaf243bb1 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 10 Apr 2024 22:16:27 +0200 Subject: [PATCH 3/9] NEWS --- .../2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst new file mode 100644 index 00000000000000..1877ac5818cb69 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-10-22-16-18.gh-issue-117709.-_1YL0.rst @@ -0,0 +1,2 @@ +Speed up calls to :func:`str` by using the :pep:`590` ``vectorcall`` calling +convention. Patch by Erlend Aasland. From 311aa3cdca0199b620447425fda90e47feaf16dd Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 10 Apr 2024 22:44:03 +0200 Subject: [PATCH 4/9] Don't call unicode_get_empty() directly; remove some unneeded assignments --- Objects/unicodeobject.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index aac8bfed672db6..01111c47b01abc 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14674,7 +14674,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); Py_ssize_t nkwargs = (kwnames) ? PyTuple_GET_SIZE(kwnames) : 0; if (nargs == 0 && nkwargs == 0) { - return unicode_get_empty(); + return unicode_new_impl(_PyType_CAST(type), NULL, NULL, NULL); } PyObject *object = args[0]; if (nargs == 1 && nkwargs == 0) { @@ -14712,7 +14712,6 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, else { return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); } - PyObject *object = args[0]; return unicode_new_impl(_PyType_CAST(type), object, encoding, errors); } if (nargs + nkwargs == 3) { @@ -14727,7 +14726,6 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, else if (nkwargs != 0) { return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); } - PyObject *object = args[0]; const char *encoding = as_const_char(args[1], "encoding"); if (encoding == NULL) { return NULL; From c59478c0e32c51b7ae0b6743a6aff13a57fa5e2a Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 10 Apr 2024 23:01:52 +0200 Subject: [PATCH 5/9] type is always PyUnicode_Type? --- Objects/unicodeobject.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 01111c47b01abc..f6d1c46ac799cd 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14671,14 +14671,16 @@ static PyObject * unicode_vectorcall(PyObject *type, PyObject *const *args, size_t nargsf, PyObject *kwnames) { + assert(Py_Is(_PyType_CAST(type), &PyUnicode_Type)); + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); Py_ssize_t nkwargs = (kwnames) ? PyTuple_GET_SIZE(kwnames) : 0; if (nargs == 0 && nkwargs == 0) { - return unicode_new_impl(_PyType_CAST(type), NULL, NULL, NULL); + return unicode_get_empty(); } PyObject *object = args[0]; if (nargs == 1 && nkwargs == 0) { - return unicode_new_impl(_PyType_CAST(type), object, NULL, NULL); + return PyObject_Str(object); } if (nargs + nkwargs == 2) { const char *encoding; @@ -14712,7 +14714,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, else { return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames); } - return unicode_new_impl(_PyType_CAST(type), object, encoding, errors); + return PyUnicode_FromEncodedObject(object, encoding, errors); } if (nargs + nkwargs == 3) { if (nkwargs == 1) { @@ -14734,7 +14736,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, if (errors == NULL) { return NULL; } - return unicode_new_impl(_PyType_CAST(type), object, encoding, errors); + return PyUnicode_FromEncodedObject(object, encoding, errors); } if (nargs > 3) { PyErr_Format(PyExc_TypeError, From 068de64f5343e222f73bfc8ef4c1b0e863cb8414 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 10:08:08 +0200 Subject: [PATCH 6/9] Address review: variable naming --- Objects/unicodeobject.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f6d1c46ac799cd..9d8838eb5f6d9f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14686,15 +14686,15 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, const char *encoding; const char *errors; if (nkwargs == 1) { - PyObject *kw0 = PyTuple_GET_ITEM(kwnames, 0); - if (_PyUnicode_EqualToASCIIString(kw0, "encoding")) { + PyObject *key = PyTuple_GET_ITEM(kwnames, 0); + if (_PyUnicode_EqualToASCIIString(key, "encoding")) { encoding = as_const_char(args[1], "encoding"); if (encoding == NULL) { return NULL; } errors = NULL; } - else if (_PyUnicode_EqualToASCIIString(kw0, "errors")) { + else if (_PyUnicode_EqualToASCIIString(key, "errors")) { errors = as_const_char(args[1], "errors"); if (errors == NULL) { return NULL; @@ -14703,7 +14703,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, } else { PyErr_Format(PyExc_TypeError, - "'%S' is an invalid keyword argument for str()", kw0); + "'%S' is an invalid keyword argument for str()", key); return NULL; } } @@ -14718,10 +14718,10 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, } if (nargs + nkwargs == 3) { if (nkwargs == 1) { - PyObject *kw0 = PyTuple_GET_ITEM(kwnames, 0); - if (!_PyUnicode_EqualToASCIIString(kw0, "errors")) { + PyObject *key = PyTuple_GET_ITEM(kwnames, 0); + if (!_PyUnicode_EqualToASCIIString(key, "errors")) { PyErr_Format(PyExc_TypeError, - "'%S' is an invalid keyword argument for str()", kw0); + "'%S' is an invalid keyword argument for str()", key); return NULL; } } From 57c01a4f57e79407058dad119d7a2159cff3b507 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 10:09:27 +0200 Subject: [PATCH 7/9] Align exception messages with tp_call --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9d8838eb5f6d9f..dc3934233f1542 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14703,7 +14703,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, } else { PyErr_Format(PyExc_TypeError, - "'%S' is an invalid keyword argument for str()", key); + "str() got an unexpected keyword argument %R", key); return NULL; } } @@ -14721,7 +14721,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, PyObject *key = PyTuple_GET_ITEM(kwnames, 0); if (!_PyUnicode_EqualToASCIIString(key, "errors")) { PyErr_Format(PyExc_TypeError, - "'%S' is an invalid keyword argument for str()", key); + "str() got an unexpected keyword argument %R", key); return NULL; } } From 37b45ed8c5e69a97b911db20adaca46474646413 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 10:13:39 +0200 Subject: [PATCH 8/9] Address review: use better internal APIs --- Objects/unicodeobject.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index dc3934233f1542..5de4e0e8fa560e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14626,15 +14626,10 @@ as_const_char(PyObject *obj, const char *name) name, obj); return NULL; } - Py_ssize_t sz; - const char *str = PyUnicode_AsUTF8AndSize(obj, &sz); + const char *str = _PyUnicode_AsUTF8NoNUL(obj); if (str == NULL) { return NULL; } - if (strlen(str) != (size_t)sz) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - return NULL; - } return str; } @@ -14642,28 +14637,15 @@ static PyObject * fallback_to_tp_call(PyObject *type, Py_ssize_t nargs, Py_ssize_t nkwargs, PyObject *const *args, PyObject *kwnames) { - PyObject *tuple = PyTuple_New(nargs); + PyObject *tuple = _PyTuple_FromArray(args, nargs); if (tuple == NULL) { return NULL; } - for (Py_ssize_t i = 0; i < nargs; i++) { - PyObject *value = Py_NewRef(args[i]); - PyTuple_SET_ITEM(tuple, i, value); - } - PyObject *dict = PyDict_New(); + PyObject *dict = _PyStack_AsDict(args + nargs, kwnames); if (dict == NULL) { Py_DECREF(tuple); return NULL; } - for (Py_ssize_t j = 0; j < nkwargs; j++) { - PyObject *key = PyTuple_GET_ITEM(kwnames, j); - PyObject *value = args[nargs + j]; - if (PyDict_SetItem(dict, key, value) < 0) { - Py_DECREF(tuple); - Py_DECREF(dict); - return NULL; - } - } return unicode_new(_PyType_CAST(type), tuple, dict); } From 7e141e792b5f733e4636958f5e5ef289c6a770e9 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 11 Apr 2024 10:14:47 +0200 Subject: [PATCH 9/9] Address review: explcitly initialise variables --- Objects/unicodeobject.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5de4e0e8fa560e..2a6f05d59baecd 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14665,8 +14665,8 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, return PyObject_Str(object); } if (nargs + nkwargs == 2) { - const char *encoding; - const char *errors; + const char *encoding = NULL; + const char *errors = NULL; if (nkwargs == 1) { PyObject *key = PyTuple_GET_ITEM(kwnames, 0); if (_PyUnicode_EqualToASCIIString(key, "encoding")) { @@ -14674,14 +14674,12 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, if (encoding == NULL) { return NULL; } - errors = NULL; } else if (_PyUnicode_EqualToASCIIString(key, "errors")) { errors = as_const_char(args[1], "errors"); if (errors == NULL) { return NULL; } - encoding = NULL; } else { PyErr_Format(PyExc_TypeError, @@ -14691,7 +14689,6 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, } else if (nkwargs == 0) { encoding = as_const_char(args[1], "encoding"); - errors = NULL; } else { return fallback_to_tp_call(type, nargs, nkwargs, args, kwnames);