Skip to content

Commit d6d0203

Browse files
committed
gh-119182: Add PyUnicodeWriter_WriteUCS4() function
1 parent 4628320 commit d6d0203

File tree

7 files changed

+110
-1
lines changed

7 files changed

+110
-1
lines changed

Doc/c-api/unicode.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1563,6 +1563,16 @@ object.
15631563
On success, return ``0``.
15641564
On error, set an exception, leave the writer unchanged, and return ``-1``.
15651565
1566+
.. c:function:: int PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *writer, Py_UCS4 *str, Py_ssize_t size)
1567+
1568+
Writer the UCS4 string *str* into *writer*.
1569+
1570+
*size* is a number of UCS4 characters. If *size* is equal to ``-1``, get the
1571+
string length (search the NUL character).
1572+
1573+
On success, return ``0``.
1574+
On error, set an exception, leave the writer unchanged, and return ``-1``.
1575+
15661576
.. c:function:: int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
15671577
15681578
Call :c:func:`PyObject_Str` on *obj* and write the output into *writer*.

Doc/whatsnew/3.14.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ New Features
291291
* :c:func:`PyUnicodeWriter_Finish`.
292292
* :c:func:`PyUnicodeWriter_WriteChar`.
293293
* :c:func:`PyUnicodeWriter_WriteUTF8`.
294+
* :c:func:`PyUnicodeWriter_WriteUCS4`.
294295
* :c:func:`PyUnicodeWriter_WriteWideChar`.
295296
* :c:func:`PyUnicodeWriter_WriteStr`.
296297
* :c:func:`PyUnicodeWriter_WriteRepr`.

Include/cpython/unicodeobject.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar(
463463
PyUnicodeWriter *writer,
464464
const wchar_t *str,
465465
Py_ssize_t size);
466+
PyAPI_FUNC(int) PyUnicodeWriter_WriteUCS4(
467+
PyUnicodeWriter *writer,
468+
Py_UCS4 *str,
469+
Py_ssize_t size);
466470

467471
PyAPI_FUNC(int) PyUnicodeWriter_WriteStr(
468472
PyUnicodeWriter *writer,

Lib/test/test_capi/test_unicode.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1784,8 +1784,24 @@ def test_widechar(self):
17841784
writer.write_widechar("latin1=\xE9")
17851785
writer.write_widechar("-")
17861786
writer.write_widechar("euro=\u20AC")
1787+
writer.write_char("-")
1788+
writer.write_ucs4("max=\U0010ffff", -1)
17871789
writer.write_char('.')
1788-
self.assertEqual(writer.finish(), "latin1=\xE9-euro=\u20AC.")
1790+
self.assertEqual(writer.finish(),
1791+
"latin1=\xE9-euro=\u20AC-max=\U0010ffff.")
1792+
1793+
def test_ucs4(self):
1794+
writer = self.create_writer(0)
1795+
writer.write_ucs4("ascii", -1)
1796+
writer.write_char("-")
1797+
writer.write_ucs4("latin1=\xe9", -1)
1798+
writer.write_char("-")
1799+
writer.write_ucs4("euro=\u20ac", -1)
1800+
writer.write_char("-")
1801+
writer.write_ucs4("max=\U0010ffff", -1)
1802+
writer.write_char(".")
1803+
self.assertEqual(writer.finish(),
1804+
"ascii-latin1=\xE9-euro=\u20AC-max=\U0010ffff.")
17891805

17901806

17911807
@unittest.skipIf(ctypes is None, 'need ctypes')

Misc/NEWS.d/next/C API/2024-06-07-22-12-30.gh-issue-119182.yt8Ar7.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@ Add a new :c:type:`PyUnicodeWriter` API to create a Python :class:`str` object:
55
* :c:func:`PyUnicodeWriter_Finish`.
66
* :c:func:`PyUnicodeWriter_WriteChar`.
77
* :c:func:`PyUnicodeWriter_WriteUTF8`.
8+
* :c:func:`PyUnicodeWriter_WriteUCS4`.
9+
* :c:func:`PyUnicodeWriter_WriteWideChar`.
810
* :c:func:`PyUnicodeWriter_WriteStr`.
911
* :c:func:`PyUnicodeWriter_WriteRepr`.
1012
* :c:func:`PyUnicodeWriter_WriteSubstring`.
1113
* :c:func:`PyUnicodeWriter_Format`.
14+
* :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`.
1215

1316
Patch by Victor Stinner.

Modules/_testcapi/unicode.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,36 @@ writer_write_widechar(PyObject *self_raw, PyObject *args)
360360
}
361361

362362

363+
static PyObject*
364+
writer_write_ucs4(PyObject *self_raw, PyObject *args)
365+
{
366+
WriterObject *self = (WriterObject *)self_raw;
367+
if (writer_check(self) < 0) {
368+
return NULL;
369+
}
370+
371+
PyObject *str;
372+
Py_ssize_t size;
373+
if (!PyArg_ParseTuple(args, "Un", &str, &size)) {
374+
return NULL;
375+
}
376+
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
377+
size = Py_MIN(size, len);
378+
379+
Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(str);
380+
if (ucs4 == NULL) {
381+
return NULL;
382+
}
383+
384+
int res = PyUnicodeWriter_WriteUCS4(self->writer, ucs4, size);
385+
PyMem_Free(ucs4);
386+
if (res < 0) {
387+
return NULL;
388+
}
389+
Py_RETURN_NONE;
390+
}
391+
392+
363393
static PyObject*
364394
writer_write_str(PyObject *self_raw, PyObject *args)
365395
{
@@ -484,6 +514,7 @@ static PyMethodDef writer_methods[] = {
484514
{"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
485515
{"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
486516
{"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
517+
{"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS},
487518
{"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},
488519
{"write_repr", _PyCFunction_CAST(writer_write_repr), METH_VARARGS},
489520
{"write_substring", _PyCFunction_CAST(writer_write_substring), METH_VARARGS},

Objects/unicodeobject.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2289,6 +2289,50 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
22892289
return res;
22902290
}
22912291

2292+
2293+
int
2294+
PyUnicodeWriter_WriteUCS4(PyUnicodeWriter *pub_writer,
2295+
Py_UCS4 *str,
2296+
Py_ssize_t size)
2297+
{
2298+
_PyUnicodeWriter *writer = (_PyUnicodeWriter*)pub_writer;
2299+
2300+
if (size < 0) {
2301+
size = 0;
2302+
for (; str[size] != '\0'; size++);
2303+
}
2304+
2305+
if (size == 0) {
2306+
return 0;
2307+
}
2308+
2309+
Py_UCS4 max_char = ucs4lib_find_max_char(str, str + size);
2310+
2311+
if (_PyUnicodeWriter_Prepare(writer, size, max_char) < 0) {
2312+
return -1;
2313+
}
2314+
2315+
int kind = writer->kind;
2316+
void *data = (Py_UCS1*)writer->data + writer->pos * kind;
2317+
if (kind == PyUnicode_1BYTE_KIND) {
2318+
_PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS1,
2319+
str, str + size,
2320+
data);
2321+
}
2322+
else if (kind == PyUnicode_2BYTE_KIND) {
2323+
_PyUnicode_CONVERT_BYTES(Py_UCS4, Py_UCS2,
2324+
str, str + size,
2325+
data);
2326+
}
2327+
else {
2328+
memcpy(data, str, size * sizeof(Py_UCS4));
2329+
}
2330+
writer->pos += size;
2331+
2332+
return 0;
2333+
}
2334+
2335+
22922336
PyObject*
22932337
PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
22942338
{

0 commit comments

Comments
 (0)