From 389dfd9113d1864eb8f819e5cc47c103b5af103a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 12 Apr 2024 18:36:39 +0300 Subject: [PATCH 1/9] gh-112068: C API: Add support of nullable arguments in PyArg_Parse --- Lib/test/test_capi/test_getargs.py | 53 +++++++ Modules/_ctypes/_ctypes.c | 11 +- Modules/_json.c | 13 +- Modules/_threadmodule.c | 14 +- Modules/mmapmodule.c | 3 +- Python/getargs.c | 216 ++++++++++++++++++----------- 6 files changed, 199 insertions(+), 111 deletions(-) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 232aa2a80025dc..cc356cc6132a91 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1352,6 +1352,59 @@ def test_nested_tuple(self): "argument 1 must be sequence of length 1, not 0"): parse(((),), {}, '(' + f + ')', ['a']) + def test_nullable(self): + parse = _testcapi.parse_tuple_and_keywords + + def check(format, arg, allows_none=False): + # Because some format units (such as y*) require cleanup, + # we force the parsing code to perform the cleanup by adding + # an argument that always fails. + # By checking for an exception, we ensure that the parsing + # of the first argument was successful. + self.assertRaises(OverflowError, parse, + (arg, 256), {}, format + '?b', ['a', 'b']) + self.assertRaises(OverflowError, parse, + (None, 256), {}, format + '?b', ['a', 'b']) + self.assertRaises(OverflowError, parse, + (arg, 256), {}, format + 'b', ['a', 'b']) + self.assertRaises(OverflowError if allows_none else TypeError, parse, + (None, 256), {}, format + 'b', ['a', 'b']) + + check('b', 42) + check('B', 42) + check('h', 42) + check('H', 42) + check('i', 42) + check('I', 42) + check('n', 42) + check('l', 42) + check('k', 42) + check('L', 42) + check('K', 42) + check('f', 2.5) + check('d', 2.5) + check('D', 2.5j) + check('c', b'a') + check('C', 'a') + check('p', True, allows_none=True) + check('y', b'buffer') + check('y*', b'buffer') + check('y#', b'buffer') + check('s', 'string') + check('s*', 'string') + check('s#', 'string') + check('z', 'string', allows_none=True) + check('z*', 'string', allows_none=True) + check('z#', 'string', allows_none=True) + check('w*', bytearray(b'buffer')) + check('U', 'string') + check('S', b'bytes') + check('Y', bytearray(b'bytearray')) + check('O', object, allows_none=True) + + # TODO: Not tested: es?, es#?, et?, et#?, O!, O& + # TODO: Not implemented: (...)? + @unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi') def test_gh_119213(self): rc, out, err = script_helper.assert_python_ok("-c", """if True: diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 1ff108a39320cf..c21715fda7c601 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -3620,8 +3620,7 @@ _validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags) PyObject *name = Py_None; PyObject *defval; PyObject *typ; - if (!PyArg_ParseTuple(item, "i|OO", &flag, &name, &defval) || - !(name == Py_None || PyUnicode_Check(name))) + if (!PyArg_ParseTuple(item, "i|U?O", &flag, &name, &defval)) { PyErr_SetString(PyExc_TypeError, "paramflags must be a sequence of (int [,string [,value]]) tuples"); @@ -3686,10 +3685,8 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds) void *handle; PyObject *paramflags = NULL; - if (!PyArg_ParseTuple(args, "O|O", &ftuple, ¶mflags)) + if (!PyArg_ParseTuple(args, "O|O?", &ftuple, ¶mflags)) return NULL; - if (paramflags == Py_None) - paramflags = NULL; ftuple = PySequence_Tuple(ftuple); if (!ftuple) @@ -3804,10 +3801,8 @@ PyCFuncPtr_FromVtblIndex(PyTypeObject *type, PyObject *args, PyObject *kwds) GUID *iid = NULL; Py_ssize_t iid_len = 0; - if (!PyArg_ParseTuple(args, "is|Oz#", &index, &name, ¶mflags, &iid, &iid_len)) + if (!PyArg_ParseTuple(args, "is|O?z#", &index, &name, ¶mflags, &iid, &iid_len)) return NULL; - if (paramflags == Py_None) - paramflags = NULL; ctypes_state *st = get_module_state_by_def(Py_TYPE(type)); if (!_validate_paramflags(st, type, paramflags)) { diff --git a/Modules/_json.c b/Modules/_json.c index c7fe1561bb1018..4fb18d3882ddf1 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1208,23 +1208,16 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; PyEncoderObject *s; - PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; + PyObject *markers = Py_None, *defaultfn, *encoder, *indent, *key_separator; PyObject *item_separator; int sort_keys, skipkeys, allow_nan; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist, - &markers, &defaultfn, &encoder, &indent, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!?OOOUUppp:make_encoder", kwlist, + &PyDict_Type, &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, &sort_keys, &skipkeys, &allow_nan)) return NULL; - if (markers != Py_None && !PyDict_Check(markers)) { - PyErr_Format(PyExc_TypeError, - "make_encoder() argument 1 must be dict or None, " - "not %.200s", Py_TYPE(markers)->tp_name); - return NULL; - } - s = (PyEncoderObject *)type->tp_alloc(type, 0); if (s == NULL) return NULL; diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 39d309729d88b8..0834f46a847d8e 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -1878,10 +1878,10 @@ thread_PyThread_start_joinable_thread(PyObject *module, PyObject *fargs, PyObject *func = NULL; int daemon = 1; thread_module_state *state = get_thread_state(module); - PyObject *hobj = NULL; + PyObject *hobj = Py_None; if (!PyArg_ParseTupleAndKeywords(fargs, fkwargs, - "O|Op:start_joinable_thread", keywords, - &func, &hobj, &daemon)) { + "O|O!?p:start_joinable_thread", keywords, + &func, state->thread_handle_type, &hobj, &daemon)) { return NULL; } @@ -1891,14 +1891,6 @@ thread_PyThread_start_joinable_thread(PyObject *module, PyObject *fargs, return NULL; } - if (hobj == NULL) { - hobj = Py_None; - } - else if (hobj != Py_None && !Py_IS_TYPE(hobj, state->thread_handle_type)) { - PyErr_SetString(PyExc_TypeError, "'handle' must be a _ThreadHandle"); - return NULL; - } - if (PySys_Audit("_thread.start_joinable_thread", "OiO", func, daemon, hobj) < 0) { return NULL; diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 99a85e9e49ad47..9e0df26bac1c7c 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -23,7 +23,6 @@ #endif #include -#include "pycore_abstract.h" // _Py_convert_optional_to_ssize_t() #include "pycore_bytesobject.h" // _PyBytes_Find() #include "pycore_fileutils.h" // _Py_stat_struct @@ -512,7 +511,7 @@ mmap_read_method(mmap_object *self, Py_ssize_t num_bytes = PY_SSIZE_T_MAX, remaining; CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "|O&:read", _Py_convert_optional_to_ssize_t, &num_bytes)) + if (!PyArg_ParseTuple(args, "|n?:read", &num_bytes)) return NULL; CHECK_VALID(NULL); diff --git a/Python/getargs.c b/Python/getargs.c index b96ce3a22dae7c..b2aad4696b6c52 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -21,6 +21,7 @@ PyAPI_FUNC(int) _PyArg_VaParseTupleAndKeywords_SizeT(PyObject *, PyObject *, const char *, const char * const *, va_list); #define FLAG_COMPAT 1 +#define FLAG_NULLABLE 4 typedef int (*destr_t)(PyObject *, void *); @@ -487,8 +488,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (!PySequence_Check(arg) || PyBytes_Check(arg)) { levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, - "must be %d-item sequence, not %.50s", + "must be %d-item sequence%s, not %.50s", n, + (flags & FLAG_NULLABLE) ? " or None" : "", arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); return msgbuf; } @@ -502,6 +504,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, return msgbuf; } + flags &= ~FLAG_NULLABLE; format = *p_format; for (i = 0; i < n; i++) { const char *msg; @@ -573,7 +576,7 @@ _PyArg_BadArgument(const char *fname, const char *displayname, } static const char * -converterr(const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) +converterr(int flags, const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) { assert(expected != NULL); assert(arg != NULL); @@ -583,20 +586,25 @@ converterr(const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) } else { PyOS_snprintf(msgbuf, bufsize, - "must be %.50s, not %.50s", expected, + "must be %.50s%s, not %.50s", expected, + ((flags & FLAG_NULLABLE) && !strstr(expected, " or None")) + ? " or None" : "", arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); } return msgbuf; } static const char * -convertcharerr(const char *expected, const char *what, Py_ssize_t size, +convertcharerr(int flags, const char *expected, const char *what, Py_ssize_t size, char *msgbuf, size_t bufsize) { assert(expected != NULL); PyOS_snprintf(msgbuf, bufsize, - "must be %.50s, not %.50s of length %zd", - expected, what, size); + "must be %.50s%s, not %.50s of length %zd", + expected, + ((flags & FLAG_NULLABLE) && !strstr(expected, " or None")) + ? " or None" : "", + what, size); return msgbuf; } @@ -616,6 +624,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, char *msgbuf, size_t bufsize, freelist_t *freelist) { #define RETURN_ERR_OCCURRED return msgbuf +#define HANDLE_NULLABLE \ + if (*format == '?') { \ + format++; \ + if (arg == Py_None) { \ + break; \ + } \ + flags |= FLAG_NULLABLE; \ + } + const char *format = *p_format; char c = *format++; @@ -625,6 +642,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'b': { /* unsigned byte -- very short int */ unsigned char *p = va_arg(*p_va, unsigned char *); + HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -638,7 +656,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, "unsigned byte integer is greater than maximum"); RETURN_ERR_OCCURRED; } - else *p = (unsigned char) ival; break; } @@ -646,6 +663,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'B': {/* byte sized bitfield - both signed and unsigned values allowed */ unsigned char *p = va_arg(*p_va, unsigned char *); + HANDLE_NULLABLE; unsigned long ival = PyLong_AsUnsignedLongMask(arg); if (ival == (unsigned long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -656,6 +674,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'h': {/* signed short int */ short *p = va_arg(*p_va, short *); + HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -677,6 +696,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'H': { /* short int sized bitfield, both signed and unsigned allowed */ unsigned short *p = va_arg(*p_va, unsigned short *); + HANDLE_NULLABLE; unsigned long ival = PyLong_AsUnsignedLongMask(arg); if (ival == (unsigned long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -687,6 +707,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'i': {/* signed int */ int *p = va_arg(*p_va, int *); + HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -708,6 +729,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'I': { /* int sized bitfield, both signed and unsigned allowed */ unsigned int *p = va_arg(*p_va, unsigned int *); + HANDLE_NULLABLE; unsigned long ival = PyLong_AsUnsignedLongMask(arg); if (ival == (unsigned long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -720,6 +742,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, { PyObject *iobj; Py_ssize_t *p = va_arg(*p_va, Py_ssize_t *); + HANDLE_NULLABLE; Py_ssize_t ival = -1; iobj = _PyNumber_Index(arg); if (iobj != NULL) { @@ -733,6 +756,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } case 'l': {/* long int */ long *p = va_arg(*p_va, long *); + HANDLE_NULLABLE; long ival = PyLong_AsLong(arg); if (ival == -1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -743,17 +767,19 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'k': { /* long sized bitfield */ unsigned long *p = va_arg(*p_va, unsigned long *); + HANDLE_NULLABLE; unsigned long ival; if (PyLong_Check(arg)) ival = PyLong_AsUnsignedLongMask(arg); else - return converterr("int", arg, msgbuf, bufsize); + return converterr(flags, "int", arg, msgbuf, bufsize); *p = ival; break; } case 'L': {/* long long */ long long *p = va_arg( *p_va, long long * ); + HANDLE_NULLABLE; long long ival = PyLong_AsLongLong(arg); if (ival == (long long)-1 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -764,17 +790,19 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'K': { /* long long sized bitfield */ unsigned long long *p = va_arg(*p_va, unsigned long long *); + HANDLE_NULLABLE; unsigned long long ival; if (PyLong_Check(arg)) ival = PyLong_AsUnsignedLongLongMask(arg); else - return converterr("int", arg, msgbuf, bufsize); + return converterr(flags, "int", arg, msgbuf, bufsize); *p = ival; break; } case 'f': {/* float */ float *p = va_arg(*p_va, float *); + HANDLE_NULLABLE; double dval = PyFloat_AsDouble(arg); if (dval == -1.0 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -785,6 +813,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'd': {/* double */ double *p = va_arg(*p_va, double *); + HANDLE_NULLABLE; double dval = PyFloat_AsDouble(arg); if (dval == -1.0 && PyErr_Occurred()) RETURN_ERR_OCCURRED; @@ -795,6 +824,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'D': {/* complex double */ Py_complex *p = va_arg(*p_va, Py_complex *); + HANDLE_NULLABLE; Py_complex cval; cval = PyComplex_AsCComplex(arg); if (PyErr_Occurred()) @@ -806,9 +836,10 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'c': {/* char */ char *p = va_arg(*p_va, char *); + HANDLE_NULLABLE; if (PyBytes_Check(arg)) { if (PyBytes_GET_SIZE(arg) != 1) { - return convertcharerr("a byte string of length 1", + return convertcharerr(flags, "a byte string of length 1", "a bytes object", PyBytes_GET_SIZE(arg), msgbuf, bufsize); } @@ -816,27 +847,28 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else if (PyByteArray_Check(arg)) { if (PyByteArray_GET_SIZE(arg) != 1) { - return convertcharerr("a byte string of length 1", + return convertcharerr(flags, "a byte string of length 1", "a bytearray object", PyByteArray_GET_SIZE(arg), msgbuf, bufsize); } *p = PyByteArray_AS_STRING(arg)[0]; } else - return converterr("a byte string of length 1", arg, msgbuf, bufsize); + return converterr(flags, "a byte string of length 1", arg, msgbuf, bufsize); break; } case 'C': {/* unicode char */ int *p = va_arg(*p_va, int *); + HANDLE_NULLABLE; int kind; const void *data; if (!PyUnicode_Check(arg)) - return converterr("a unicode character", arg, msgbuf, bufsize); + return converterr(flags, "a unicode character", arg, msgbuf, bufsize); if (PyUnicode_GET_LENGTH(arg) != 1) { - return convertcharerr("a unicode character", + return convertcharerr(flags, "a unicode character", "a string", PyUnicode_GET_LENGTH(arg), msgbuf, bufsize); } @@ -849,6 +881,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'p': {/* boolean *p*redicate */ int *p = va_arg(*p_va, int *); + HANDLE_NULLABLE; int val = PyObject_IsTrue(arg); if (val > 0) *p = 1; @@ -867,24 +900,31 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *buf; Py_ssize_t count; if (*format == '*') { - if (getbuffer(arg, (Py_buffer*)p, &buf) < 0) - return converterr(buf, arg, msgbuf, bufsize); format++; + HANDLE_NULLABLE; + if (getbuffer(arg, (Py_buffer*)p, &buf) < 0) + return converterr(flags, buf, arg, msgbuf, bufsize); if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - "(cleanup problem)", + flags, "(cleanup problem)", arg, msgbuf, bufsize); } break; } - count = convertbuffer(arg, (const void **)p, &buf); - if (count < 0) - return converterr(buf, arg, msgbuf, bufsize); - if (*format == '#') { + else if (*format == '#') { Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); - *psize = count; format++; - } else { + HANDLE_NULLABLE; + count = convertbuffer(arg, (const void **)p, &buf); + if (count < 0) + return converterr(flags, buf, arg, msgbuf, bufsize); + *psize = count; + } + else { + HANDLE_NULLABLE; + count = convertbuffer(arg, (const void **)p, &buf); + if (count < 0) + return converterr(flags, buf, arg, msgbuf, bufsize); if (strlen(*p) != (size_t)count) { PyErr_SetString(PyExc_ValueError, "embedded null byte"); RETURN_ERR_OCCURRED; @@ -900,32 +940,35 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, /* "s*" or "z*" */ Py_buffer *p = (Py_buffer *)va_arg(*p_va, Py_buffer *); + format++; + HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) PyBuffer_FillInfo(p, NULL, NULL, 0, 1, 0); else if (PyUnicode_Check(arg)) { Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(CONV_UNICODE, + return converterr(flags, CONV_UNICODE, arg, msgbuf, bufsize); PyBuffer_FillInfo(p, arg, (void *)sarg, len, 1, 0); } else { /* any bytes-like object */ const char *buf; if (getbuffer(arg, p, &buf) < 0) - return converterr(buf, arg, msgbuf, bufsize); + return converterr(flags, buf, arg, msgbuf, bufsize); } if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - "(cleanup problem)", + flags, "(cleanup problem)", arg, msgbuf, bufsize); } - format++; } else if (*format == '#') { /* a string or read-only bytes-like object */ /* "s#" or "z#" */ const void **p = (const void **)va_arg(*p_va, const char **); Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); + format++; + HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) { *p = NULL; *psize = 0; @@ -934,7 +977,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(CONV_UNICODE, + return converterr(flags, CONV_UNICODE, arg, msgbuf, bufsize); *p = sarg; *psize = len; @@ -944,22 +987,22 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *buf; Py_ssize_t count = convertbuffer(arg, p, &buf); if (count < 0) - return converterr(buf, arg, msgbuf, bufsize); + return converterr(flags, buf, arg, msgbuf, bufsize); *psize = count; } - format++; } else { /* "s" or "z" */ const char **p = va_arg(*p_va, const char **); Py_ssize_t len; sarg = NULL; + HANDLE_NULLABLE; if (c == 'z' && arg == Py_None) *p = NULL; else if (PyUnicode_Check(arg)) { sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(CONV_UNICODE, + return converterr(flags, CONV_UNICODE, arg, msgbuf, bufsize); if (strlen(sarg) != (size_t)len) { PyErr_SetString(PyExc_ValueError, "embedded null character"); @@ -968,7 +1011,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, *p = sarg; } else - return converterr(c == 'z' ? "str or None" : "str", + return converterr(flags, c == 'z' ? "str or None" : "str", arg, msgbuf, bufsize); } break; @@ -997,13 +1040,46 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, recode_strings = 0; else return converterr( - "(unknown parser marker combination)", + flags, "(unknown parser marker combination)", arg, msgbuf, bufsize); buffer = (char **)va_arg(*p_va, char **); format++; if (buffer == NULL) - return converterr("(buffer is NULL)", + return converterr(flags, "(buffer is NULL)", arg, msgbuf, bufsize); + Py_ssize_t *psize = NULL; + if (*format == '#') { + /* Using buffer length parameter '#': + + - if *buffer is NULL, a new buffer of the + needed size is allocated and the data + copied into it; *buffer is updated to point + to the new buffer; the caller is + responsible for PyMem_Free()ing it after + usage + + - if *buffer is not NULL, the data is + copied to *buffer; *buffer_len has to be + set to the size of the buffer on input; + buffer overflow is signalled with an error; + buffer has to provide enough room for the + encoded string plus the trailing 0-byte + + - in both cases, *buffer_len is updated to + the size of the buffer /excluding/ the + trailing 0-byte + + */ + psize = va_arg(*p_va, Py_ssize_t*); + + format++; + if (psize == NULL) { + return converterr( + flags, "(buffer_len is NULL)", + arg, msgbuf, bufsize); + } + } + HANDLE_NULLABLE; /* Encode object */ if (!recode_strings && @@ -1024,7 +1100,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, encoding, NULL); if (s == NULL) - return converterr("(encoding failed)", + return converterr(flags, "(encoding failed)", arg, msgbuf, bufsize); assert(PyBytes_Check(s)); size = PyBytes_GET_SIZE(s); @@ -1034,42 +1110,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else { return converterr( - recode_strings ? "str" : "str, bytes or bytearray", + flags, recode_strings ? "str" : "str, bytes or bytearray", arg, msgbuf, bufsize); } /* Write output; output is guaranteed to be 0-terminated */ - if (*format == '#') { - /* Using buffer length parameter '#': - - - if *buffer is NULL, a new buffer of the - needed size is allocated and the data - copied into it; *buffer is updated to point - to the new buffer; the caller is - responsible for PyMem_Free()ing it after - usage - - - if *buffer is not NULL, the data is - copied to *buffer; *buffer_len has to be - set to the size of the buffer on input; - buffer overflow is signalled with an error; - buffer has to provide enough room for the - encoded string plus the trailing 0-byte - - - in both cases, *buffer_len is updated to - the size of the buffer /excluding/ the - trailing 0-byte - - */ - Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); - - format++; - if (psize == NULL) { - Py_DECREF(s); - return converterr( - "(buffer_len is NULL)", - arg, msgbuf, bufsize); - } + if (psize != NULL) { if (*buffer == NULL) { *buffer = PyMem_NEW(char, size + 1); if (*buffer == NULL) { @@ -1080,7 +1126,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); return converterr( - "(cleanup problem)", + flags, "(cleanup problem)", arg, msgbuf, bufsize); } } else { @@ -1114,7 +1160,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if ((Py_ssize_t)strlen(ptr) != size) { Py_DECREF(s); return converterr( - "encoded string without null bytes", + flags, "encoded string without null bytes", arg, msgbuf, bufsize); } *buffer = PyMem_NEW(char, size + 1); @@ -1125,7 +1171,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); - return converterr("(cleanup problem)", + return converterr(flags, "(cleanup problem)", arg, msgbuf, bufsize); } memcpy(*buffer, ptr, size+1); @@ -1136,29 +1182,32 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'S': { /* PyBytes object */ PyObject **p = va_arg(*p_va, PyObject **); + HANDLE_NULLABLE; if (PyBytes_Check(arg)) *p = arg; else - return converterr("bytes", arg, msgbuf, bufsize); + return converterr(flags, "bytes", arg, msgbuf, bufsize); break; } case 'Y': { /* PyByteArray object */ PyObject **p = va_arg(*p_va, PyObject **); + HANDLE_NULLABLE; if (PyByteArray_Check(arg)) *p = arg; else - return converterr("bytearray", arg, msgbuf, bufsize); + return converterr(flags, "bytearray", arg, msgbuf, bufsize); break; } case 'U': { /* PyUnicode object */ PyObject **p = va_arg(*p_va, PyObject **); + HANDLE_NULLABLE; if (PyUnicode_Check(arg)) { *p = arg; } else - return converterr("str", arg, msgbuf, bufsize); + return converterr(flags, "str", arg, msgbuf, bufsize); break; } @@ -1169,10 +1218,11 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, type = va_arg(*p_va, PyTypeObject*); p = va_arg(*p_va, PyObject **); format++; + HANDLE_NULLABLE; if (PyType_IsSubtype(Py_TYPE(arg), type)) *p = arg; else - return converterr(type->tp_name, arg, msgbuf, bufsize); + return converterr(flags, type->tp_name, arg, msgbuf, bufsize); } else if (*format == '&') { @@ -1181,16 +1231,18 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, void *addr = va_arg(*p_va, void *); int res; format++; + HANDLE_NULLABLE; if (! (res = (*convert)(arg, addr))) - return converterr("(unspecified)", + return converterr(flags, "(unspecified)", arg, msgbuf, bufsize); if (res == Py_CLEANUP_SUPPORTED && addcleanup(addr, freelist, convert) == -1) - return converterr("(cleanup problem)", + return converterr(flags, "(cleanup problem)", arg, msgbuf, bufsize); } else { p = va_arg(*p_va, PyObject **); + HANDLE_NULLABLE; *p = arg; } break; @@ -1202,29 +1254,30 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (*format != '*') return converterr( - "(invalid use of 'w' format character)", + flags, "(invalid use of 'w' format character)", arg, msgbuf, bufsize); format++; + HANDLE_NULLABLE; /* Caller is interested in Py_buffer, and the object supports it directly. The request implicitly asks for PyBUF_SIMPLE, so the result is C-contiguous with format 'B'. */ if (PyObject_GetBuffer(arg, (Py_buffer*)p, PyBUF_WRITABLE) < 0) { PyErr_Clear(); - return converterr("read-write bytes-like object", + return converterr(flags, "read-write bytes-like object", arg, msgbuf, bufsize); } assert(PyBuffer_IsContiguous((Py_buffer *)p, 'C')); if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - "(cleanup problem)", + flags, "(cleanup problem)", arg, msgbuf, bufsize); } break; } default: - return converterr("(impossible)", arg, msgbuf, bufsize); + return converterr(flags, "(impossible)", arg, msgbuf, bufsize); } @@ -2754,6 +2807,9 @@ skipitem(const char **p_format, va_list *p_va, int flags) return "impossible"; } + if (*format == '?') { + format++; + } *p_format = format; return NULL; From 19be6b792a026b9c2fd4ca667cf1ec340ddbc4c0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 3 Jul 2024 10:11:25 +0300 Subject: [PATCH 2/9] Add more tests. --- Lib/test/test_capi/test_getargs.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index cc356cc6132a91..525173c7073a4f 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1402,7 +1402,14 @@ def check(format, arg, allows_none=False): check('Y', bytearray(b'bytearray')) check('O', object, allows_none=True) - # TODO: Not tested: es?, es#?, et?, et#?, O!, O& + parse((None,), {}, 'es?', ['a']) + parse((None,), {}, 'es#?', ['a']) + parse((None,), {}, 'et?', ['a']) + parse((None,), {}, 'et#?', ['a']) + parse((None,), {}, 'O!?', ['a']) + parse((None,), {}, 'O&?', ['a']) + + # TODO: More tests for es?, es#?, et?, et#?, O!, O& # TODO: Not implemented: (...)? @unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi') From cd90539cbdae85877fb1516c03fbe5a13978df65 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 3 Jul 2024 18:38:50 +0300 Subject: [PATCH 3/9] Add more tests. --- Lib/test/test_capi/test_getargs.py | 53 ++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 525173c7073a4f..9490c78d328e6c 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1352,6 +1352,59 @@ def test_nested_tuple(self): "argument 1 must be sequence of length 1, not 0"): parse(((),), {}, '(' + f + ')', ['a']) + def test_specific_type_errors(self): + parse = _testcapi.parse_tuple_and_keywords + + def check(format, arg, expected, got='list'): + errmsg = f'must be {expected}, not {got}' + with self.assertRaisesRegex(TypeError, errmsg): + parse((arg,), {}, format, ['a']) + + check('k', [], 'int') + check('k?', [], 'int or None') + check('K', [], 'int') + check('K?', [], 'int or None') + check('c', [], 'a byte string of length 1') + check('c?', [], 'a byte string of length 1 or None') + check('c', b'abc', 'a byte string of length 1', + 'a bytes object of length 3') + check('c?', b'abc', 'a byte string of length 1 or None', + 'a bytes object of length 3') + check('c', bytearray(b'abc'), 'a byte string of length 1', + 'a bytearray object of length 3') + check('c?', bytearray(b'abc'), 'a byte string of length 1 or None', + 'a bytearray object of length 3') + check('C', [], 'a unicode character') + check('C?', [], 'a unicode character or None') + check('C', 'abc', 'a unicode character', + 'a string of length 3') + check('C?', 'abc', 'a unicode character or None', + 'a string of length 3') + check('s', [], 'str') + check('s?', [], 'str or None') + check('z', [], 'str or None') + check('z?', [], 'str or None') + check('es', [], 'str') + check('es?', [], 'str or None') + check('es#', [], 'str') + check('es#?', [], 'str or None') + check('et', [], 'str, bytes or bytearray') + check('et?', [], 'str, bytes or bytearray or None') + check('et#', [], 'str, bytes or bytearray') + check('et#?', [], 'str, bytes or bytearray or None') + check('w*', [], 'read-write bytes-like object') + check('w*?', [], 'read-write bytes-like object or None') + check('S', [], 'bytes') + check('S?', [], 'bytes or None') + check('U', [], 'str') + check('U?', [], 'str or None') + check('Y', [], 'bytearray') + check('Y?', [], 'bytearray or None') + check('(OO)', 42, '2-item sequence', 'int') + check('(OO)', (1, 2, 3), 'sequence of length 2', '3') + + # TODO: Not implemented: (...)? + def test_nullable(self): parse = _testcapi.parse_tuple_and_keywords From 75e8c5e4e0309fb8ba0f31931caffaf9f5bc402e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 4 Jul 2024 09:40:00 +0300 Subject: [PATCH 4/9] Implement (...)?. --- Lib/test/test_capi/test_getargs.py | 10 +++++++--- Python/getargs.c | 29 ++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 9490c78d328e6c..b16e2455af61e8 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1401,10 +1401,9 @@ def check(format, arg, expected, got='list'): check('Y', [], 'bytearray') check('Y?', [], 'bytearray or None') check('(OO)', 42, '2-item sequence', 'int') + check('(OO)?', 42, '2-item sequence or None', 'int') check('(OO)', (1, 2, 3), 'sequence of length 2', '3') - # TODO: Not implemented: (...)? - def test_nullable(self): parse = _testcapi.parse_tuple_and_keywords @@ -1455,6 +1454,12 @@ def check(format, arg, allows_none=False): check('Y', bytearray(b'bytearray')) check('O', object, allows_none=True) + check('(OO)', (1, 2)) + self.assertEqual(parse((((1, 2), 3),), {}, '((OO)?O)', ['a']), (1, 2, 3)) + self.assertEqual(parse(((None, 3),), {}, '((OO)?O)', ['a']), (NULL, NULL, 3)) + self.assertEqual(parse((((1, 2), 3),), {}, '((OO)O)', ['a']), (1, 2, 3)) + self.assertRaises(TypeError, parse, ((None, 3),), {}, '((OO)O)', ['a']) + parse((None,), {}, 'es?', ['a']) parse((None,), {}, 'es#?', ['a']) parse((None,), {}, 'et?', ['a']) @@ -1463,7 +1468,6 @@ def check(format, arg, allows_none=False): parse((None,), {}, 'O&?', ['a']) # TODO: More tests for es?, es#?, et?, et#?, O!, O& - # TODO: Not implemented: (...)? @unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi') def test_gh_119213(self): diff --git a/Python/getargs.c b/Python/getargs.c index b2aad4696b6c52..096be328cf1036 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -54,6 +54,7 @@ static const char *converttuple(PyObject *, const char **, va_list *, int, int *, char *, size_t, freelist_t *); static const char *convertsimple(PyObject *, const char **, va_list *, int, char *, size_t, freelist_t *); +static const char * converterr(int, const char *, PyObject *, char *, size_t); static Py_ssize_t convertbuffer(PyObject *, const void **p, const char **); static int getbuffer(PyObject *, Py_buffer *, const char**); @@ -467,6 +468,8 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, int i; Py_ssize_t len; + assert(*format == '('); + format++; for (;;) { int c = *format++; if (c == '(') { @@ -475,8 +478,12 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, level++; } else if (c == ')') { - if (level == 0) + if (level == 0) { + if (*format == '?') { + flags |= FLAG_NULLABLE; + } break; + } level--; } else if (c == ':' || c == ';' || c == '\0') @@ -485,6 +492,17 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, n++; } + if (arg == Py_None && (flags & FLAG_NULLABLE)) { + format = *p_format; + const char *msg = skipitem(&format, p_va, flags); + if (msg == NULL) { + *p_format = format; + } + else { + levels[0] = 0; + } + return msg; + } if (!PySequence_Check(arg) || PyBytes_Check(arg)) { levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, @@ -505,7 +523,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } flags &= ~FLAG_NULLABLE; - format = *p_format; + format = *p_format + 1; for (i = 0; i < n; i++) { const char *msg; PyObject *item; @@ -527,6 +545,10 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } } + format++; + if (*format == '?') { + format++; + } *p_format = format; return NULL; } @@ -542,11 +564,8 @@ convertitem(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *format = *p_format; if (*format == '(' /* ')' */) { - format++; msg = converttuple(arg, &format, p_va, flags, levels, msgbuf, bufsize, freelist); - if (msg == NULL) - format++; } else { msg = convertsimple(arg, &format, p_va, flags, From 02e05f61ca949babf2890bfcb5051e62f363c495 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 4 Jul 2024 09:46:44 +0300 Subject: [PATCH 5/9] Improve error message for et? and et#?. --- Lib/test/test_capi/test_getargs.py | 4 ++-- Python/getargs.c | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index b16e2455af61e8..75d49d071ef81b 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1389,9 +1389,9 @@ def check(format, arg, expected, got='list'): check('es#', [], 'str') check('es#?', [], 'str or None') check('et', [], 'str, bytes or bytearray') - check('et?', [], 'str, bytes or bytearray or None') + check('et?', [], 'str, bytes, bytearray or None') check('et#', [], 'str, bytes or bytearray') - check('et#?', [], 'str, bytes or bytearray or None') + check('et#?', [], 'str, bytes, bytearray or None') check('w*', [], 'read-write bytes-like object') check('w*?', [], 'read-write bytes-like object or None') check('S', [], 'bytes') diff --git a/Python/getargs.c b/Python/getargs.c index 096be328cf1036..4fcd5e88098f80 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1129,7 +1129,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else { return converterr( - flags, recode_strings ? "str" : "str, bytes or bytearray", + flags, + recode_strings ? "str" : (flags & FLAG_NULLABLE) ? + "str, bytes, bytearray" : "str, bytes or bytearray", arg, msgbuf, bufsize); } From 9261aee2f1d86cb084528314a3b3ec8f922b25a5 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 4 Jul 2024 11:35:28 +0300 Subject: [PATCH 6/9] Refactoring. --- Python/getargs.c | 110 +++++++++++++++++++++++------------------------ 1 file changed, 53 insertions(+), 57 deletions(-) diff --git a/Python/getargs.c b/Python/getargs.c index 4fcd5e88098f80..7905fc710824b7 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1,6 +1,8 @@ /* New getargs implementation */ +#include + #define PY_CXX_CONST const #include "Python.h" #include "pycore_abstract.h" // _PyNumber_Index() @@ -21,7 +23,6 @@ PyAPI_FUNC(int) _PyArg_VaParseTupleAndKeywords_SizeT(PyObject *, PyObject *, const char *, const char * const *, va_list); #define FLAG_COMPAT 1 -#define FLAG_NULLABLE 4 typedef int (*destr_t)(PyObject *, void *); @@ -54,7 +55,6 @@ static const char *converttuple(PyObject *, const char **, va_list *, int, int *, char *, size_t, freelist_t *); static const char *convertsimple(PyObject *, const char **, va_list *, int, char *, size_t, freelist_t *); -static const char * converterr(int, const char *, PyObject *, char *, size_t); static Py_ssize_t convertbuffer(PyObject *, const void **p, const char **); static int getbuffer(PyObject *, Py_buffer *, const char**); @@ -467,6 +467,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *format = *p_format; int i; Py_ssize_t len; + bool nullable = false; assert(*format == '('); format++; @@ -480,7 +481,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, else if (c == ')') { if (level == 0) { if (*format == '?') { - flags |= FLAG_NULLABLE; + nullable = true; } break; } @@ -492,13 +493,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, n++; } - if (arg == Py_None && (flags & FLAG_NULLABLE)) { - format = *p_format; - const char *msg = skipitem(&format, p_va, flags); - if (msg == NULL) { - *p_format = format; - } - else { + if (arg == Py_None && nullable) { + const char *msg = skipitem(p_format, p_va, flags); + if (msg != NULL) { levels[0] = 0; } return msg; @@ -508,7 +505,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, PyOS_snprintf(msgbuf, bufsize, "must be %d-item sequence%s, not %.50s", n, - (flags & FLAG_NULLABLE) ? " or None" : "", + nullable ? " or None" : "", arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); return msgbuf; } @@ -522,7 +519,6 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, return msgbuf; } - flags &= ~FLAG_NULLABLE; format = *p_format + 1; for (i = 0; i < n; i++) { const char *msg; @@ -595,7 +591,7 @@ _PyArg_BadArgument(const char *fname, const char *displayname, } static const char * -converterr(int flags, const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) +converterr(bool nullable, const char *expected, PyObject *arg, char *msgbuf, size_t bufsize) { assert(expected != NULL); assert(arg != NULL); @@ -606,23 +602,21 @@ converterr(int flags, const char *expected, PyObject *arg, char *msgbuf, size_t else { PyOS_snprintf(msgbuf, bufsize, "must be %.50s%s, not %.50s", expected, - ((flags & FLAG_NULLABLE) && !strstr(expected, " or None")) - ? " or None" : "", + nullable ? " or None" : "", arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); } return msgbuf; } static const char * -convertcharerr(int flags, const char *expected, const char *what, Py_ssize_t size, +convertcharerr(bool nullable, const char *expected, const char *what, Py_ssize_t size, char *msgbuf, size_t bufsize) { assert(expected != NULL); PyOS_snprintf(msgbuf, bufsize, "must be %.50s%s, not %.50s of length %zd", expected, - ((flags & FLAG_NULLABLE) && !strstr(expected, " or None")) - ? " or None" : "", + nullable ? " or None" : "", what, size); return msgbuf; } @@ -649,13 +643,14 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (arg == Py_None) { \ break; \ } \ - flags |= FLAG_NULLABLE; \ + nullable = true; \ } const char *format = *p_format; char c = *format++; const char *sarg; + bool nullable = false; switch (c) { @@ -791,7 +786,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (PyLong_Check(arg)) ival = PyLong_AsUnsignedLongMask(arg); else - return converterr(flags, "int", arg, msgbuf, bufsize); + return converterr(nullable, "int", arg, msgbuf, bufsize); *p = ival; break; } @@ -814,7 +809,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (PyLong_Check(arg)) ival = PyLong_AsUnsignedLongLongMask(arg); else - return converterr(flags, "int", arg, msgbuf, bufsize); + return converterr(nullable, "int", arg, msgbuf, bufsize); *p = ival; break; } @@ -858,7 +853,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, HANDLE_NULLABLE; if (PyBytes_Check(arg)) { if (PyBytes_GET_SIZE(arg) != 1) { - return convertcharerr(flags, "a byte string of length 1", + return convertcharerr(nullable, "a byte string of length 1", "a bytes object", PyBytes_GET_SIZE(arg), msgbuf, bufsize); } @@ -866,14 +861,14 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else if (PyByteArray_Check(arg)) { if (PyByteArray_GET_SIZE(arg) != 1) { - return convertcharerr(flags, "a byte string of length 1", + return convertcharerr(nullable, "a byte string of length 1", "a bytearray object", PyByteArray_GET_SIZE(arg), msgbuf, bufsize); } *p = PyByteArray_AS_STRING(arg)[0]; } else - return converterr(flags, "a byte string of length 1", arg, msgbuf, bufsize); + return converterr(nullable, "a byte string of length 1", arg, msgbuf, bufsize); break; } @@ -884,10 +879,10 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const void *data; if (!PyUnicode_Check(arg)) - return converterr(flags, "a unicode character", arg, msgbuf, bufsize); + return converterr(nullable, "a unicode character", arg, msgbuf, bufsize); if (PyUnicode_GET_LENGTH(arg) != 1) { - return convertcharerr(flags, "a unicode character", + return convertcharerr(nullable, "a unicode character", "a string", PyUnicode_GET_LENGTH(arg), msgbuf, bufsize); } @@ -922,10 +917,10 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, format++; HANDLE_NULLABLE; if (getbuffer(arg, (Py_buffer*)p, &buf) < 0) - return converterr(flags, buf, arg, msgbuf, bufsize); + return converterr(nullable, buf, arg, msgbuf, bufsize); if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - flags, "(cleanup problem)", + nullable, "(cleanup problem)", arg, msgbuf, bufsize); } break; @@ -936,14 +931,14 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, HANDLE_NULLABLE; count = convertbuffer(arg, (const void **)p, &buf); if (count < 0) - return converterr(flags, buf, arg, msgbuf, bufsize); + return converterr(nullable, buf, arg, msgbuf, bufsize); *psize = count; } else { HANDLE_NULLABLE; count = convertbuffer(arg, (const void **)p, &buf); if (count < 0) - return converterr(flags, buf, arg, msgbuf, bufsize); + return converterr(nullable, buf, arg, msgbuf, bufsize); if (strlen(*p) != (size_t)count) { PyErr_SetString(PyExc_ValueError, "embedded null byte"); RETURN_ERR_OCCURRED; @@ -967,18 +962,18 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(flags, CONV_UNICODE, + return converterr(nullable, CONV_UNICODE, arg, msgbuf, bufsize); PyBuffer_FillInfo(p, arg, (void *)sarg, len, 1, 0); } else { /* any bytes-like object */ const char *buf; if (getbuffer(arg, p, &buf) < 0) - return converterr(flags, buf, arg, msgbuf, bufsize); + return converterr(nullable, buf, arg, msgbuf, bufsize); } if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - flags, "(cleanup problem)", + nullable, "(cleanup problem)", arg, msgbuf, bufsize); } } else if (*format == '#') { /* a string or read-only bytes-like object */ @@ -996,7 +991,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, Py_ssize_t len; sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(flags, CONV_UNICODE, + return converterr(nullable, CONV_UNICODE, arg, msgbuf, bufsize); *p = sarg; *psize = len; @@ -1006,7 +1001,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *buf; Py_ssize_t count = convertbuffer(arg, p, &buf); if (count < 0) - return converterr(flags, buf, arg, msgbuf, bufsize); + return converterr(nullable, buf, arg, msgbuf, bufsize); *psize = count; } } else { @@ -1021,7 +1016,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, else if (PyUnicode_Check(arg)) { sarg = PyUnicode_AsUTF8AndSize(arg, &len); if (sarg == NULL) - return converterr(flags, CONV_UNICODE, + return converterr(nullable, CONV_UNICODE, arg, msgbuf, bufsize); if (strlen(sarg) != (size_t)len) { PyErr_SetString(PyExc_ValueError, "embedded null character"); @@ -1030,7 +1025,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, *p = sarg; } else - return converterr(flags, c == 'z' ? "str or None" : "str", + return converterr(c == 'z' || nullable, "str", arg, msgbuf, bufsize); } break; @@ -1059,12 +1054,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, recode_strings = 0; else return converterr( - flags, "(unknown parser marker combination)", + nullable, "(unknown parser marker combination)", arg, msgbuf, bufsize); buffer = (char **)va_arg(*p_va, char **); format++; if (buffer == NULL) - return converterr(flags, "(buffer is NULL)", + return converterr(nullable, "(buffer is NULL)", arg, msgbuf, bufsize); Py_ssize_t *psize = NULL; if (*format == '#') { @@ -1094,7 +1089,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, format++; if (psize == NULL) { return converterr( - flags, "(buffer_len is NULL)", + nullable, "(buffer_len is NULL)", arg, msgbuf, bufsize); } } @@ -1119,7 +1114,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, encoding, NULL); if (s == NULL) - return converterr(flags, "(encoding failed)", + return converterr(nullable, "(encoding failed)", arg, msgbuf, bufsize); assert(PyBytes_Check(s)); size = PyBytes_GET_SIZE(s); @@ -1129,9 +1124,10 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else { return converterr( - flags, - recode_strings ? "str" : (flags & FLAG_NULLABLE) ? - "str, bytes, bytearray" : "str, bytes or bytearray", + nullable, + recode_strings ? "str" + : nullable ? "str, bytes, bytearray" + : "str, bytes or bytearray", arg, msgbuf, bufsize); } @@ -1147,7 +1143,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); return converterr( - flags, "(cleanup problem)", + nullable, "(cleanup problem)", arg, msgbuf, bufsize); } } else { @@ -1181,7 +1177,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if ((Py_ssize_t)strlen(ptr) != size) { Py_DECREF(s); return converterr( - flags, "encoded string without null bytes", + nullable, "encoded string without null bytes", arg, msgbuf, bufsize); } *buffer = PyMem_NEW(char, size + 1); @@ -1192,7 +1188,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } if (addcleanup(buffer, freelist, cleanup_ptr)) { Py_DECREF(s); - return converterr(flags, "(cleanup problem)", + return converterr(nullable, "(cleanup problem)", arg, msgbuf, bufsize); } memcpy(*buffer, ptr, size+1); @@ -1207,7 +1203,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (PyBytes_Check(arg)) *p = arg; else - return converterr(flags, "bytes", arg, msgbuf, bufsize); + return converterr(nullable, "bytes", arg, msgbuf, bufsize); break; } @@ -1217,7 +1213,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (PyByteArray_Check(arg)) *p = arg; else - return converterr(flags, "bytearray", arg, msgbuf, bufsize); + return converterr(nullable, "bytearray", arg, msgbuf, bufsize); break; } @@ -1228,7 +1224,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, *p = arg; } else - return converterr(flags, "str", arg, msgbuf, bufsize); + return converterr(nullable, "str", arg, msgbuf, bufsize); break; } @@ -1243,7 +1239,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (PyType_IsSubtype(Py_TYPE(arg), type)) *p = arg; else - return converterr(flags, type->tp_name, arg, msgbuf, bufsize); + return converterr(nullable, type->tp_name, arg, msgbuf, bufsize); } else if (*format == '&') { @@ -1254,11 +1250,11 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, format++; HANDLE_NULLABLE; if (! (res = (*convert)(arg, addr))) - return converterr(flags, "(unspecified)", + return converterr(nullable, "(unspecified)", arg, msgbuf, bufsize); if (res == Py_CLEANUP_SUPPORTED && addcleanup(addr, freelist, convert) == -1) - return converterr(flags, "(cleanup problem)", + return converterr(nullable, "(cleanup problem)", arg, msgbuf, bufsize); } else { @@ -1275,7 +1271,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (*format != '*') return converterr( - flags, "(invalid use of 'w' format character)", + nullable, "(invalid use of 'w' format character)", arg, msgbuf, bufsize); format++; HANDLE_NULLABLE; @@ -1285,20 +1281,20 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, result is C-contiguous with format 'B'. */ if (PyObject_GetBuffer(arg, (Py_buffer*)p, PyBUF_WRITABLE) < 0) { PyErr_Clear(); - return converterr(flags, "read-write bytes-like object", + return converterr(nullable, "read-write bytes-like object", arg, msgbuf, bufsize); } assert(PyBuffer_IsContiguous((Py_buffer *)p, 'C')); if (addcleanup(p, freelist, cleanup_buffer)) { return converterr( - flags, "(cleanup problem)", + nullable, "(cleanup problem)", arg, msgbuf, bufsize); } break; } default: - return converterr(flags, "(impossible)", arg, msgbuf, bufsize); + return converterr(nullable, "(impossible)", arg, msgbuf, bufsize); } From 8df697e3a86fafdf15cf442c8ba391a4e4d65c29 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 9 Jan 2025 11:07:17 +0200 Subject: [PATCH 7/9] Add more examples. --- Modules/_interpretersmodule.c | 9 +++------ Modules/_threadmodule.c | 4 ++-- Modules/mmapmodule.c | 12 +++--------- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index a36823c4bb982b..03c29e9ae2350d 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -1245,14 +1245,11 @@ interp_get_config(PyObject *self, PyObject *args, PyObject *kwds) PyObject *idobj = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "O|$p:get_config", kwlist, + "O?|$p:get_config", kwlist, &idobj, &restricted)) { return NULL; } - if (idobj == Py_None) { - idobj = NULL; - } int reqready = 0; PyInterpreterState *interp = \ @@ -1369,14 +1366,14 @@ capture_exception(PyObject *self, PyObject *args, PyObject *kwds) static char *kwlist[] = {"exc", NULL}; PyObject *exc_arg = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "|O:capture_exception", kwlist, + "|O?:capture_exception", kwlist, &exc_arg)) { return NULL; } PyObject *exc = exc_arg; - if (exc == NULL || exc == Py_None) { + if (exc == NULL) { exc = PyErr_GetRaisedException(); if (exc == NULL) { Py_RETURN_NONE; diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index fd0ff279d9c5a6..56a568f6dbd920 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -641,12 +641,12 @@ PyThreadHandleObject_join(PyObject *op, PyObject *args) PyThreadHandleObject *self = (PyThreadHandleObject*)op; PyObject *timeout_obj = NULL; - if (!PyArg_ParseTuple(args, "|O:join", &timeout_obj)) { + if (!PyArg_ParseTuple(args, "|O?:join", &timeout_obj)) { return NULL; } PyTime_t timeout_ns = -1; - if (timeout_obj != NULL && timeout_obj != Py_None) { + if (timeout_obj != NULL) { if (_PyTime_FromSecondsObject(&timeout_ns, timeout_obj, _PyTime_ROUND_TIMEOUT) < 0) { return NULL; diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index c7a730bcea237e..ad45ffbac76424 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -1689,7 +1689,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) DWORD off_lo; /* lower 32 bits of offset */ DWORD size_hi; /* upper 32 bits of size */ DWORD size_lo; /* lower 32 bits of size */ - PyObject *tagname = Py_None; + PyObject *tagname = NULL; DWORD dwErr = 0; int fileno; HANDLE fh = 0; @@ -1699,7 +1699,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) "tagname", "access", "offset", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|OiL", keywords, + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|U?iL", keywords, &fileno, &map_size, &tagname, &access, &offset)) { return NULL; @@ -1832,13 +1832,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) m_obj->weakreflist = NULL; m_obj->exports = 0; /* set the tag name */ - if (!Py_IsNone(tagname)) { - if (!PyUnicode_Check(tagname)) { - Py_DECREF(m_obj); - return PyErr_Format(PyExc_TypeError, "expected str or None for " - "'tagname', not %.200s", - Py_TYPE(tagname)->tp_name); - } + if (tagname != NULL) { m_obj->tagname = PyUnicode_AsWideCharString(tagname, NULL); if (m_obj->tagname == NULL) { Py_DECREF(m_obj); From 7c1c3354456227602871058869de04fd8c959dd5 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 8 Apr 2025 15:03:46 +0300 Subject: [PATCH 8/9] Fix test for changed error message. --- Lib/test/test_mmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index b2a299ed172967..fd4197b7086976 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -732,7 +732,7 @@ def test_tagname(self): m2.close() m1.close() - with self.assertRaisesRegex(TypeError, 'tagname'): + with self.assertRaisesRegex(TypeError, 'must be str or None'): mmap.mmap(-1, 8, tagname=1) @cpython_only From d30e8094015d6003671c1f1760786d73b333eb9a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 8 Apr 2025 15:33:03 +0300 Subject: [PATCH 9/9] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/c-api/arg.rst | 2 +- .../C_API/2025-01-08-18-55-57.gh-issue-112068.ofI5Fl.rst | 2 +- Modules/_ctypes/_ctypes.c | 3 +-- Python/getargs.c | 6 +++--- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 81dc7bd3e3fd70..0375bc57b6cf1e 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -375,7 +375,7 @@ Other objects If the argument is not ``None``, it is parsed according to the specified format unit. - .. versionadded:: 3.14 + .. versionadded:: next A few other characters have a meaning in a format string. These may not occur inside nested parentheses. They are: diff --git a/Misc/NEWS.d/next/C_API/2025-01-08-18-55-57.gh-issue-112068.ofI5Fl.rst b/Misc/NEWS.d/next/C_API/2025-01-08-18-55-57.gh-issue-112068.ofI5Fl.rst index 6022eb4306581f..d49b1735825e8a 100644 --- a/Misc/NEWS.d/next/C_API/2025-01-08-18-55-57.gh-issue-112068.ofI5Fl.rst +++ b/Misc/NEWS.d/next/C_API/2025-01-08-18-55-57.gh-issue-112068.ofI5Fl.rst @@ -1,3 +1,3 @@ Add support of nullable arguments in :c:func:`PyArg_Parse` and similar -functions. Adding ``?`` after any format unit makea ``None`` be accepted as +functions. Adding ``?`` after any format unit makes ``None`` be accepted as a value. diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index facc500ee10994..55e5eee0eb081a 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -3848,8 +3848,7 @@ _validate_paramflags(ctypes_state *st, PyTypeObject *type, PyObject *paramflags) PyObject *name = Py_None; PyObject *defval; PyObject *typ; - if (!PyArg_ParseTuple(item, "i|U?O", &flag, &name, &defval)) - { + if (!PyArg_ParseTuple(item, "i|U?O", &flag, &name, &defval)) { PyErr_SetString(PyExc_TypeError, "paramflags must be a sequence of (int [,string [,value]]) tuples"); return 0; diff --git a/Python/getargs.c b/Python/getargs.c index 6dfe64f7fc0826..53cc910a26d5c9 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -505,9 +505,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, "must be %d-item sequence%s, not %.50s", - n, - nullable ? " or None" : "", - arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); + n, + nullable ? " or None" : "", + arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); return msgbuf; }