diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 4ea20bde38c1db..503a5ea014d4de 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -341,6 +341,72 @@ APIs: .. versionadded:: 3.3 +.. c:function:: const void* PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, Py_ssize_t *nbytes, uint32_t *format) + + Export the contents of the *unicode* string in one of the requested format + *requested_formats*. + + * On success, set *\*nbytes* and *\*format*, and return the contents. + * On error, set an exception and return ``NULL``. + + The contents is valid as long as *unicode* is valid. + + The export must be released by :c:func:`PyUnicode_ReleaseExport`. + The contents of the buffer are valid until they are released. + + The buffer must not be modified. + + *unicode*, *nbytes* and *format* must not be NULL. + + Available formats: + + .. c:namespace:: NULL + + =================================== ======== =========================== + Constant Identifier Value Description + =================================== ======== =========================== + .. c:macro:: PyUnicode_FORMAT_ASCII ``0x01`` ASCII string (``Py_UCS1*``) + .. c:macro:: PyUnicode_FORMAT_UCS1 ``0x02`` UCS-1 string (``Py_UCS1*``) + .. c:macro:: PyUnicode_FORMAT_UCS2 ``0x04`` UCS-2 string (``Py_UCS2*``) + .. c:macro:: PyUnicode_FORMAT_UCS4 ``0x08`` UCS-4 string (``Py_UCS4*``) + .. c:macro:: PyUnicode_FORMAT_UTF8 ``0x10`` UTF-8 string (``char*``) + =================================== ======== =========================== + + *requested_formats* can be a single format or a bitwise combination of the + formats in the table above. + On success, *\*format* will be set to a single one of the requested flags. + + Note that future versions of Python may introduce additional formats. + + .. versionadded:: 3.14 + + +.. c:function:: void PyUnicode_ReleaseExport(PyObject *unicode, const void* data, uint32_t format) + + Release an export created by :c:func:`PyUnicode_Export`. + + Each argument must match the corresponding argument or result of + a single earlier call to :c:func:`PyUnicode_Export`. + In particular, this means that you must hold a reference to *unicode* + while an export is valid. + + .. versionadded:: 3.14 + + +.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format) + + Create a string object from a buffer in an “export format”. + + * Return a reference to a new string object on success. + * Set an exception and return ``NULL`` on error. + + *data* must not be NULL. *nbytes* must be positive or zero. + + See :c:func:`PyUnicode_Export` for the available formats. + + .. versionadded:: 3.14 + + .. c:function:: PyObject* PyUnicode_FromKindAndData(int kind, const void *buffer, \ Py_ssize_t size) diff --git a/Doc/conf.py b/Doc/conf.py index 8a14646801ebac..f23e4a93fd3311 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -140,6 +140,7 @@ ('c:type', 'size_t'), ('c:type', 'ssize_t'), ('c:type', 'time_t'), + ('c:type', 'uint32_t'), ('c:type', 'uint64_t'), ('c:type', 'uintmax_t'), ('c:type', 'uintptr_t'), diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index c18c813104cf65..80222096f3a0b6 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -776,6 +776,7 @@ function,PyUnicode_EncodeFSDefault,3.2,, function,PyUnicode_EncodeLocale,3.7,, function,PyUnicode_EqualToUTF8,3.13,, function,PyUnicode_EqualToUTF8AndSize,3.13,, +function,PyUnicode_Export,3.14,, function,PyUnicode_FSConverter,3.2,, function,PyUnicode_FSDecoder,3.2,, function,PyUnicode_Find,3.2,, @@ -791,6 +792,7 @@ function,PyUnicode_FromStringAndSize,3.2,, function,PyUnicode_FromWideChar,3.2,, function,PyUnicode_GetDefaultEncoding,3.2,, function,PyUnicode_GetLength,3.7,, +function,PyUnicode_Import,3.14,, function,PyUnicode_InternFromString,3.2,, function,PyUnicode_InternInPlace,3.2,, function,PyUnicode_IsIdentifier,3.2,, @@ -799,6 +801,7 @@ function,PyUnicode_Partition,3.2,, function,PyUnicode_RPartition,3.2,, function,PyUnicode_RSplit,3.2,, function,PyUnicode_ReadChar,3.7,, +function,PyUnicode_ReleaseExport,3.14,, function,PyUnicode_Replace,3.2,, function,PyUnicode_Resize,3.2,, function,PyUnicode_RichCompare,3.2,, diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 2eefa232cdcd02..9d256f5a3e0494 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -300,6 +300,11 @@ New Features (Contributed by Victor Stinner in :gh:`119182`.) +* Add :c:func:`PyUnicode_Export` and :c:func:`PyUnicode_Import` functions to + export and import strings. + (Contributed by Victor Stinner in :gh:`119609`.) + + Porting to Python 3.14 ---------------------- diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index dee00715b3c51d..8263b6b64a04f4 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -248,6 +248,37 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( const char *u /* UTF-8 encoded string */ ); +#define PyUnicode_FORMAT_ASCII 0x01 +#define PyUnicode_FORMAT_UCS1 0x02 +#define PyUnicode_FORMAT_UCS2 0x04 +#define PyUnicode_FORMAT_UCS4 0x08 +#define PyUnicode_FORMAT_UTF8 0x10 + +// Get the content of a string in the requested format: +// - Return the content, set '*nbytes' and '*format' on success. +// - Set an exception and return NULL on error. +// +// The export must be released by PyUnicode_ReleaseExport(). +PyAPI_FUNC(const void*) PyUnicode_Export( + PyObject *unicode, + uint32_t requested_formats, + Py_ssize_t *nbytes, + uint32_t *format); + +// Release an export created by PyUnicode_Export(). +PyAPI_FUNC(void) PyUnicode_ReleaseExport( + PyObject *unicode, + const void* data, + uint32_t format); + +// Create a string object from a string in the format 'format'. +// - Return a reference to a new string object on success. +// - Set an exception and return NULL on error. +PyAPI_FUNC(PyObject*) PyUnicode_Import( + const void *data, + Py_ssize_t nbytes, + uint32_t format); + /* --- wchar_t support for platforms which support it --------------------- */ #ifdef HAVE_WCHAR_H diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 36106b0730dd26..03483ecf116bb8 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -28,6 +28,14 @@ class Str(str): pass +PyUnicode_FORMAT_ASCII = 0x01 +PyUnicode_FORMAT_UCS1 = 0x02 +PyUnicode_FORMAT_UCS2 = 0x04 +PyUnicode_FORMAT_UCS4 = 0x08 +PyUnicode_FORMAT_UTF8 = 0x10 +# Invalid native format +PyUnicode_FORMAT_INVALID = 0x20 + class CAPITest(unittest.TestCase): @support.cpython_only @@ -1679,6 +1687,162 @@ def test_pep393_utf8_caching_bug(self): # Check that the second call returns the same result self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) + def test_unicode_export(self): + # Test PyUnicode_Export() and PyUnicode_FreeExport() + unicode_export = _testlimitedcapi.unicode_export + if sys.byteorder == 'little': + ucs2_enc = 'utf-16le' + ucs4_enc = 'utf-32le' + else: + ucs2_enc = 'utf-16be' + ucs4_enc = 'utf-32be' + + # export to the native format + formats = (PyUnicode_FORMAT_ASCII + | PyUnicode_FORMAT_UCS1 + | PyUnicode_FORMAT_UCS2 + | PyUnicode_FORMAT_UCS4) + self.assertEqual(unicode_export("abc", formats), + (b'abc', PyUnicode_FORMAT_ASCII)) + self.assertEqual(unicode_export("latin1:\xe9", formats), + (b'latin1:\xe9', PyUnicode_FORMAT_UCS1)) + self.assertEqual(unicode_export('ucs2:\u20ac', formats), + ('ucs2:\u20ac'.encode(ucs2_enc), + PyUnicode_FORMAT_UCS2)) + self.assertEqual(unicode_export('ucs4:\U0010ffff', formats), + ('ucs4:\U0010ffff'.encode(ucs4_enc), + PyUnicode_FORMAT_UCS4)) + + # export ASCII as UCS1 + self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS1), + (b'abc', PyUnicode_FORMAT_UCS1)) + + # export ASCII and UCS1 to UCS2 + self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS2), + ('abc'.encode(ucs2_enc), PyUnicode_FORMAT_UCS2)) + self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UCS2), + ('latin1:\xe9'.encode(ucs2_enc), PyUnicode_FORMAT_UCS2)) + + # always export to UCS4 + self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS4), + ('abc'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4)) + self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UCS4), + ('latin1:\xe9'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4)) + self.assertEqual(unicode_export('ucs2:\u20ac', PyUnicode_FORMAT_UCS4), + ('ucs2:\u20ac'.encode(ucs4_enc), + PyUnicode_FORMAT_UCS4)) + self.assertEqual(unicode_export('ucs4:\U0010ffff', PyUnicode_FORMAT_UCS4), + ('ucs4:\U0010ffff'.encode(ucs4_enc), + PyUnicode_FORMAT_UCS4)) + + # always export to UTF8 + self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UTF8), + ('abc'.encode('utf8'), PyUnicode_FORMAT_UTF8)) + self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UTF8), + ('latin1:\xe9'.encode('utf8'), PyUnicode_FORMAT_UTF8)) + self.assertEqual(unicode_export('ucs2:\u20ac', PyUnicode_FORMAT_UTF8), + ('ucs2:\u20ac'.encode('utf8'), + PyUnicode_FORMAT_UTF8)) + self.assertEqual(unicode_export('ucs4:\U0010ffff', PyUnicode_FORMAT_UTF8), + ('ucs4:\U0010ffff'.encode('utf8'), + PyUnicode_FORMAT_UTF8)) + + # No supported format or invalid format + with self.assertRaisesRegex(ValueError, + "unable to find a matching export format"): + unicode_export('abc', 0) + with self.assertRaisesRegex(ValueError, + "unable to find a matching export format"): + unicode_export('abc', PyUnicode_FORMAT_INVALID) + + def test_unicode_import(self): + # Test PyUnicode_Import() + unicode_import = _testlimitedcapi.unicode_import + if sys.byteorder == 'little': + ucs2_enc = 'utf-16le' + ucs4_enc = 'utf-32le' + else: + ucs2_enc = 'utf-16be' + ucs4_enc = 'utf-32be' + + self.assertEqual(unicode_import(b'abc', PyUnicode_FORMAT_ASCII), + "abc") + self.assertEqual(unicode_import(b'latin1:\xe9', PyUnicode_FORMAT_UCS1), + "latin1:\xe9") + + self.assertEqual(unicode_import('ucs2:\u20ac'.encode(ucs2_enc), + PyUnicode_FORMAT_UCS2), + 'ucs2:\u20ac') + + self.assertEqual(unicode_import('ucs4:\U0010ffff'.encode(ucs4_enc), + PyUnicode_FORMAT_UCS4), + 'ucs4:\U0010ffff') + + text = "abc\xe9\U0010ffff" + self.assertEqual(unicode_import(text.encode('utf8'), + PyUnicode_FORMAT_UTF8), + text) + + # Empty string + for native_format in ( + PyUnicode_FORMAT_ASCII, + PyUnicode_FORMAT_UCS1, + PyUnicode_FORMAT_UCS2, + PyUnicode_FORMAT_UCS4, + PyUnicode_FORMAT_UTF8, + ): + with self.subTest(native_format=native_format): + self.assertEqual(unicode_import(b'', native_format), + '') + + # Invalid format + with self.assertRaises(ValueError): + unicode_import(b'', PyUnicode_FORMAT_INVALID) + + # Invalid size + ucs2 = 'ucs2:\u20ac'.encode(ucs2_enc) + with self.assertRaises(ValueError): + unicode_import(ucs2[:-1], PyUnicode_FORMAT_UCS2) + ucs4 = 'ucs4:\U0010ffff'.encode(ucs4_enc) + with self.assertRaises(ValueError): + unicode_import(ucs4[:-1], PyUnicode_FORMAT_UCS4) + with self.assertRaises(ValueError): + unicode_import(ucs4[:-2], PyUnicode_FORMAT_UCS4) + with self.assertRaises(ValueError): + unicode_import(ucs4[:-3], PyUnicode_FORMAT_UCS4) + + def test_unicode_export_import_roundtrip(self): + unicode_export = _testlimitedcapi.unicode_export + unicode_import = _testlimitedcapi.unicode_import + + ASCII = PyUnicode_FORMAT_ASCII + UCS1 = PyUnicode_FORMAT_UCS1 + UCS2 = PyUnicode_FORMAT_UCS2 + UCS4 = PyUnicode_FORMAT_UCS4 + UTF8 = PyUnicode_FORMAT_UTF8 + ALL = (ASCII | UCS1 | UCS2 | UCS4 | UTF8) + + for string, allowed_formats in ( + ('', {ASCII, UCS1, UCS2, UCS4, UTF8}), + ('ascii', {ASCII, UCS1, UCS2, UCS4, UTF8}), + ('latin1:\xe9', {UCS1, UCS2, UCS4, UTF8}), + ('ucs2:\u20ac', {UCS2, UCS4, UTF8}), + ('ucs4:\U0001f638', {UCS4, UTF8}), + ): + for format in ASCII, UCS1, UCS2, UCS4, UTF8: + with self.subTest(string=string, format=format): + if format not in allowed_formats: + with self.assertRaises(ValueError): + unicode_export(string, format) + else: + buf, buf_fmt = unicode_export(string, format) + restored = unicode_import(buf, buf_fmt) + self.assertEqual(restored, string) + + buf, buf_fmt = unicode_export(string, ALL) + restored = unicode_import(buf, buf_fmt) + self.assertEqual(restored, string) + class PyUnicodeWriterTest(unittest.TestCase): def create_writer(self, size): diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 47dff5c28f6ff8..b4e977f4e972e2 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -798,6 +798,7 @@ def test_windows_feature_macros(self): "PyUnicode_EncodeLocale", "PyUnicode_EqualToUTF8", "PyUnicode_EqualToUTF8AndSize", + "PyUnicode_Export", "PyUnicode_FSConverter", "PyUnicode_FSDecoder", "PyUnicode_Find", @@ -814,6 +815,7 @@ def test_windows_feature_macros(self): "PyUnicode_GetDefaultEncoding", "PyUnicode_GetLength", "PyUnicode_GetSize", + "PyUnicode_Import", "PyUnicode_InternFromString", "PyUnicode_InternImmortal", "PyUnicode_InternInPlace", @@ -823,6 +825,7 @@ def test_windows_feature_macros(self): "PyUnicode_RPartition", "PyUnicode_RSplit", "PyUnicode_ReadChar", + "PyUnicode_ReleaseExport", "PyUnicode_Replace", "PyUnicode_Resize", "PyUnicode_RichCompare", diff --git a/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst b/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst new file mode 100644 index 00000000000000..3eae4543f087d0 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst @@ -0,0 +1,2 @@ +Add :c:func:`PyUnicode_Export` and :c:func:`PyUnicode_Import` functions to +export and import strings. Patch by Victor Stinner. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 305978f9f0c5c4..e3c89af3799480 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2510,3 +2510,19 @@ [function.Py_TYPE] added = '3.14' +[function.PyUnicode_Export] + added = '3.14' +[function.PyUnicode_ReleaseExport] + added = '3.14' +[function.PyUnicode_Import] + added = '3.14' +[const.PyUnicode_FORMAT_ASCII] + added = '3.14' +[const.PyUnicode_FORMAT_UCS1] + added = '3.14' +[const.PyUnicode_FORMAT_UCS2] + added = '3.14' +[const.PyUnicode_FORMAT_UCS4] + added = '3.14' +[const.PyUnicode_FORMAT_UTF8] + added = '3.14' diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c index 2b70d09108a333..e059d349a18aa3 100644 --- a/Modules/_testlimitedcapi/unicode.c +++ b/Modules/_testlimitedcapi/unicode.c @@ -1837,6 +1837,66 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored)) #undef CHECK_FORMAT_0 } + +// Test PyUnicode_Export() +static PyObject* +unicode_export(PyObject *self, PyObject *args) +{ + PyObject *obj; + unsigned int requested_formats; + if (!PyArg_ParseTuple(args, "OI", &obj, &requested_formats)) { + return NULL; + } + + Py_ssize_t nbytes; + uint32_t format; + const char *data = PyUnicode_Export(obj, requested_formats, &nbytes, &format); + if (data == NULL) { + return NULL; + } + + // Make sure that the exported string ends with a NUL character + switch (format) + { + case PyUnicode_FORMAT_ASCII: + case PyUnicode_FORMAT_UCS1: + assert(data[nbytes] == 0); + break; + case PyUnicode_FORMAT_UCS2: + assert(data[nbytes] == 0); + assert(data[nbytes+1] == 0); + break; + case PyUnicode_FORMAT_UCS4: + assert(data[nbytes] == 0); + assert(data[nbytes+1] == 0); + assert(data[nbytes+2] == 0); + assert(data[nbytes+3] == 0); + break; + case PyUnicode_FORMAT_UTF8: + assert(data[nbytes] == 0); + break; + } + + PyObject *res = Py_BuildValue("y#I", data, nbytes, (unsigned int)format); + PyUnicode_ReleaseExport(obj, data, format); + return res; +} + + +// Test PyUnicode_Import() +static PyObject* +unicode_import(PyObject *self, PyObject *args) +{ + const void *data; + Py_ssize_t nbytes; + unsigned int format; + if (!PyArg_ParseTuple(args, "y#I", &data, &nbytes, &format)) { + return NULL; + } + return PyUnicode_Import(data, nbytes, format); +} + + static PyMethodDef TestMethods[] = { {"codec_incrementalencoder", codec_incrementalencoder, METH_VARARGS}, {"codec_incrementaldecoder", codec_incrementaldecoder, METH_VARARGS}, @@ -1924,6 +1984,8 @@ static PyMethodDef TestMethods[] = { {"unicode_format", unicode_format, METH_VARARGS}, {"unicode_contains", unicode_contains, METH_VARARGS}, {"unicode_isidentifier", unicode_isidentifier, METH_O}, + {"unicode_export", unicode_export, METH_VARARGS}, + {"unicode_import", unicode_import, METH_VARARGS}, {NULL}, }; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4c174cbc751091..480e00fbc9a15e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2289,6 +2289,171 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size) return res; } +const void* +PyUnicode_Export(PyObject *unicode, uint32_t requested_formats, + Py_ssize_t *nbytes, uint32_t *format) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode); + goto error; + } + Py_ssize_t len = PyUnicode_GET_LENGTH(unicode); + + // Native ASCII + if (PyUnicode_IS_ASCII(unicode) + && (requested_formats & PyUnicode_FORMAT_ASCII)) + { + *format = PyUnicode_FORMAT_ASCII; + *nbytes = len; + return PyUnicode_1BYTE_DATA(unicode); + } + + // Native UCS1 + int kind = PyUnicode_KIND(unicode); + if (kind == PyUnicode_1BYTE_KIND + && (requested_formats & PyUnicode_FORMAT_UCS1)) + { + *format = PyUnicode_FORMAT_UCS1; + *nbytes = len; + return PyUnicode_1BYTE_DATA(unicode); + } + + // Native UCS2 + if (kind == PyUnicode_2BYTE_KIND + && (requested_formats & PyUnicode_FORMAT_UCS2)) + { + *format = PyUnicode_FORMAT_UCS2; + *nbytes = len * 2; + return PyUnicode_2BYTE_DATA(unicode); + } + + // Convert ASCII or UCS1 to UCS2 + if (kind == PyUnicode_1BYTE_KIND + && requested_formats & PyUnicode_FORMAT_UCS2) + { + Py_UCS2 *ucs2 = PyMem_Malloc((len + 1) * sizeof(Py_UCS2)); + if (!ucs2) { + PyErr_NoMemory(); + goto error; + } + + _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS2, + PyUnicode_1BYTE_DATA(unicode), + PyUnicode_1BYTE_DATA(unicode) + len, + ucs2); + ucs2[len] = 0; + + *format = PyUnicode_FORMAT_UCS2; + *nbytes = len * 2; + return ucs2; + } + + // Native UCS4 + if (kind == PyUnicode_4BYTE_KIND + && (requested_formats & PyUnicode_FORMAT_UCS4)) + { + *format = PyUnicode_FORMAT_UCS4; + *nbytes = len * 4; + return PyUnicode_4BYTE_DATA(unicode); + } + + // Convert ASCII, UCS1 or UCS2 to UCS4 + if (requested_formats & PyUnicode_FORMAT_UCS4) { + Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode); + if (ucs4 == NULL) { + goto error; + } + *format = PyUnicode_FORMAT_UCS4; + *nbytes = len * 4; + return ucs4; + } + + // Convert to UTF-8 + if (requested_formats & PyUnicode_FORMAT_UTF8) { + // Encode UCS1, UCS2 or UCS4 to UTF-8 + const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, nbytes); + if (utf8 == NULL) { + goto error; + } + *format = PyUnicode_FORMAT_UTF8; + return utf8; + } + + PyErr_Format(PyExc_ValueError, "unable to find a matching export format"); + +error: + *nbytes = 0; + *format = 0; + return NULL; +} + +void +PyUnicode_ReleaseExport(PyObject *unicode, const void* data, + uint32_t format) +{ + switch (format) + { + case PyUnicode_FORMAT_ASCII: + break; + case PyUnicode_FORMAT_UCS1: + break; + case PyUnicode_FORMAT_UCS2: + break; + case PyUnicode_FORMAT_UCS4: + if (PyUnicode_KIND(unicode) != PyUnicode_4BYTE_KIND) { + PyMem_Free((void*)data); + } + break; + case PyUnicode_FORMAT_UTF8: + break; + default: + // ignore silently an unknown format + break; + } +} + +PyObject* +PyUnicode_Import(const void *data, Py_ssize_t nbytes, + uint32_t format) +{ + if (nbytes < 0) { + PyErr_SetString(PyExc_ValueError, "Negative nbytes"); + return NULL; + } + + switch (format) + { + case PyUnicode_FORMAT_ASCII: + return PyUnicode_DecodeASCII((const char*)data, nbytes, NULL); + + case PyUnicode_FORMAT_UCS1: + return _PyUnicode_FromUCS1(data, nbytes); + + case PyUnicode_FORMAT_UCS2: + if (nbytes % 2) { + PyErr_Format(PyExc_ValueError, "nbytes must be a multiple of 2: %zd", + nbytes); + return NULL; + } + return _PyUnicode_FromUCS2(data, nbytes / 2); + + case PyUnicode_FORMAT_UCS4: + if (nbytes % 4) { + PyErr_Format(PyExc_ValueError, "nbytes must be a multiple of 4: %zd", + nbytes); + return NULL; + } + return _PyUnicode_FromUCS4(data, nbytes / 4); + + case PyUnicode_FORMAT_UTF8: + return PyUnicode_DecodeUTF8((const char*)data, nbytes, NULL); + + default: + PyErr_Format(PyExc_ValueError, "unknown format: %i", format); + return NULL; + } +} + PyObject* PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size) { diff --git a/PC/python3dll.c b/PC/python3dll.c index 0bcf1cc507e1e8..3086a08c0b70f5 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -708,6 +708,7 @@ EXPORT_FUNC(PyUnicode_EncodeFSDefault) EXPORT_FUNC(PyUnicode_EncodeLocale) EXPORT_FUNC(PyUnicode_EqualToUTF8) EXPORT_FUNC(PyUnicode_EqualToUTF8AndSize) +EXPORT_FUNC(PyUnicode_Export) EXPORT_FUNC(PyUnicode_Find) EXPORT_FUNC(PyUnicode_FindChar) EXPORT_FUNC(PyUnicode_Format) @@ -724,6 +725,7 @@ EXPORT_FUNC(PyUnicode_FSDecoder) EXPORT_FUNC(PyUnicode_GetDefaultEncoding) EXPORT_FUNC(PyUnicode_GetLength) EXPORT_FUNC(PyUnicode_GetSize) +EXPORT_FUNC(PyUnicode_Import) EXPORT_FUNC(PyUnicode_InternFromString) EXPORT_FUNC(PyUnicode_InternImmortal) EXPORT_FUNC(PyUnicode_InternInPlace) @@ -731,6 +733,7 @@ EXPORT_FUNC(PyUnicode_IsIdentifier) EXPORT_FUNC(PyUnicode_Join) EXPORT_FUNC(PyUnicode_Partition) EXPORT_FUNC(PyUnicode_ReadChar) +EXPORT_FUNC(PyUnicode_ReleaseExport) EXPORT_FUNC(PyUnicode_Replace) EXPORT_FUNC(PyUnicode_Resize) EXPORT_FUNC(PyUnicode_RichCompare)