Skip to content

Commit f49a07b

Browse files
vstinnerZeroIntensitypicnixz
authored
gh-133968: Add PyUnicodeWriter_WriteASCII() function (#133973)
Replace most PyUnicodeWriter_WriteUTF8() calls with PyUnicodeWriter_WriteASCII(). Unrelated change to please the linter: remove an unused import in test_ctypes. Co-authored-by: Peter Bierma <[email protected]> Co-authored-by: Bénédikt Tran <[email protected]>
1 parent 4109a9c commit f49a07b

File tree

17 files changed

+103
-31
lines changed

17 files changed

+103
-31
lines changed

Doc/c-api/unicode.rst

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1802,9 +1802,24 @@ object.
18021802
18031803
See also :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`.
18041804
1805+
.. c:function:: int PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, const char *str, Py_ssize_t size)
1806+
1807+
Write the ASCII string *str* into *writer*.
1808+
1809+
*size* is the string length in bytes. If *size* is equal to ``-1``, call
1810+
``strlen(str)`` to get the string length.
1811+
1812+
*str* must only contain ASCII characters. The behavior is undefined if
1813+
*str* contains non-ASCII characters.
1814+
1815+
On success, return ``0``.
1816+
On error, set an exception, leave the writer unchanged, and return ``-1``.
1817+
1818+
.. versionadded:: next
1819+
18051820
.. c:function:: int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, const wchar_t *str, Py_ssize_t size)
18061821
1807-
Writer the wide string *str* into *writer*.
1822+
Write the wide string *str* into *writer*.
18081823
18091824
*size* is a number of wide characters. If *size* is equal to ``-1``, call
18101825
``wcslen(str)`` to get the string length.

Doc/whatsnew/3.15.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,13 @@ New features
226226
functions as replacements for :c:func:`PySys_GetObject`.
227227
(Contributed by Serhiy Storchaka in :gh:`108512`.)
228228

229+
* Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string
230+
into a :c:type:`PyUnicodeWriter`. The function is faster than
231+
:c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the
232+
input string contains non-ASCII characters.
233+
(Contributed by Victor Stinner in :gh:`133968`.)
234+
235+
229236
Porting to Python 3.15
230237
----------------------
231238

Include/cpython/unicodeobject.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8(
478478
PyUnicodeWriter *writer,
479479
const char *str,
480480
Py_ssize_t size);
481+
PyAPI_FUNC(int) PyUnicodeWriter_WriteASCII(
482+
PyUnicodeWriter *writer,
483+
const char *str,
484+
Py_ssize_t size);
481485
PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar(
482486
PyUnicodeWriter *writer,
483487
const wchar_t *str,

Lib/test/test_capi/test_unicode.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1776,6 +1776,13 @@ def test_utf8(self):
17761776
self.assertEqual(writer.finish(),
17771777
"ascii-latin1=\xE9-euro=\u20AC.")
17781778

1779+
def test_ascii(self):
1780+
writer = self.create_writer(0)
1781+
writer.write_ascii(b"Hello ", -1)
1782+
writer.write_ascii(b"", 0)
1783+
writer.write_ascii(b"Python! <truncated>", 6)
1784+
self.assertEqual(writer.finish(), "Hello Python")
1785+
17791786
def test_invalid_utf8(self):
17801787
writer = self.create_writer(0)
17811788
with self.assertRaises(UnicodeDecodeError):

Lib/test/test_ctypes/test_incomplete.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import ctypes
22
import unittest
3-
import warnings
43
from ctypes import Structure, POINTER, pointer, c_char_p
54

65
# String-based "incomplete pointers" were implemented in ctypes 0.6.3 (2003, when
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Add :c:func:`PyUnicodeWriter_WriteASCII` function to write an ASCII string
2+
into a :c:type:`PyUnicodeWriter`. The function is faster than
3+
:c:func:`PyUnicodeWriter_WriteUTF8`, but has an undefined behavior if the
4+
input string contains non-ASCII characters. Patch by Victor Stinner.

Modules/_json.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,13 +1476,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
14761476
int rv;
14771477

14781478
if (obj == Py_None) {
1479-
return PyUnicodeWriter_WriteUTF8(writer, "null", 4);
1479+
return PyUnicodeWriter_WriteASCII(writer, "null", 4);
14801480
}
14811481
else if (obj == Py_True) {
1482-
return PyUnicodeWriter_WriteUTF8(writer, "true", 4);
1482+
return PyUnicodeWriter_WriteASCII(writer, "true", 4);
14831483
}
14841484
else if (obj == Py_False) {
1485-
return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
1485+
return PyUnicodeWriter_WriteASCII(writer, "false", 5);
14861486
}
14871487
else if (PyUnicode_Check(obj)) {
14881488
PyObject *encoded = encoder_encode_string(s, obj);
@@ -1649,7 +1649,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
16491649

16501650
if (PyDict_GET_SIZE(dct) == 0) {
16511651
/* Fast path */
1652-
return PyUnicodeWriter_WriteUTF8(writer, "{}", 2);
1652+
return PyUnicodeWriter_WriteASCII(writer, "{}", 2);
16531653
}
16541654

16551655
if (s->markers != Py_None) {
@@ -1753,7 +1753,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
17531753
return -1;
17541754
if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
17551755
Py_DECREF(s_fast);
1756-
return PyUnicodeWriter_WriteUTF8(writer, "[]", 2);
1756+
return PyUnicodeWriter_WriteASCII(writer, "[]", 2);
17571757
}
17581758

17591759
if (s->markers != Py_None) {

Modules/_ssl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ fill_and_set_sslerror(_sslmodulestate *state,
563563
goto fail;
564564
}
565565
}
566-
if (PyUnicodeWriter_WriteUTF8(writer, "] ", 2) < 0) {
566+
if (PyUnicodeWriter_WriteASCII(writer, "] ", 2) < 0) {
567567
goto fail;
568568
}
569569
}

Modules/_testcapi/unicode.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,27 @@ writer_write_utf8(PyObject *self_raw, PyObject *args)
332332
}
333333

334334

335+
static PyObject*
336+
writer_write_ascii(PyObject *self_raw, PyObject *args)
337+
{
338+
WriterObject *self = (WriterObject *)self_raw;
339+
if (writer_check(self) < 0) {
340+
return NULL;
341+
}
342+
343+
char *str;
344+
Py_ssize_t size;
345+
if (!PyArg_ParseTuple(args, "yn", &str, &size)) {
346+
return NULL;
347+
}
348+
349+
if (PyUnicodeWriter_WriteASCII(self->writer, str, size) < 0) {
350+
return NULL;
351+
}
352+
Py_RETURN_NONE;
353+
}
354+
355+
335356
static PyObject*
336357
writer_write_widechar(PyObject *self_raw, PyObject *args)
337358
{
@@ -513,6 +534,7 @@ writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args))
513534
static PyMethodDef writer_methods[] = {
514535
{"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS},
515536
{"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS},
537+
{"write_ascii", _PyCFunction_CAST(writer_write_ascii), METH_VARARGS},
516538
{"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS},
517539
{"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS},
518540
{"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS},

Objects/genericaliasobject.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ ga_repr_items_list(PyUnicodeWriter *writer, PyObject *p)
6565

6666
for (Py_ssize_t i = 0; i < len; i++) {
6767
if (i > 0) {
68-
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
68+
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
6969
return -1;
7070
}
7171
}
@@ -109,7 +109,7 @@ ga_repr(PyObject *self)
109109
}
110110
for (Py_ssize_t i = 0; i < len; i++) {
111111
if (i > 0) {
112-
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
112+
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
113113
goto error;
114114
}
115115
}
@@ -126,7 +126,7 @@ ga_repr(PyObject *self)
126126
}
127127
if (len == 0) {
128128
// for something like tuple[()] we should print a "()"
129-
if (PyUnicodeWriter_WriteUTF8(writer, "()", 2) < 0) {
129+
if (PyUnicodeWriter_WriteASCII(writer, "()", 2) < 0) {
130130
goto error;
131131
}
132132
}

Objects/typevarobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ constevaluator_call(PyObject *self, PyObject *args, PyObject *kwargs)
192192
for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(value); i++) {
193193
PyObject *item = PyTuple_GET_ITEM(value, i);
194194
if (i > 0) {
195-
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
195+
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
196196
PyUnicodeWriter_Discard(writer);
197197
return NULL;
198198
}
@@ -273,7 +273,7 @@ _Py_typing_type_repr(PyUnicodeWriter *writer, PyObject *p)
273273
}
274274

275275
if (p == (PyObject *)&_PyNone_Type) {
276-
return PyUnicodeWriter_WriteUTF8(writer, "None", 4);
276+
return PyUnicodeWriter_WriteASCII(writer, "None", 4);
277277
}
278278

279279
if ((rc = PyObject_HasAttrWithError(p, &_Py_ID(__origin__))) > 0 &&

Objects/unicodeobject.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14083,6 +14083,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
1408314083
return 0;
1408414084
}
1408514085

14086+
14087+
int
14088+
PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
14089+
const char *str,
14090+
Py_ssize_t size)
14091+
{
14092+
assert(writer != NULL);
14093+
_Py_AssertHoldsTstate();
14094+
14095+
_PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
14096+
return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
14097+
}
14098+
14099+
1408614100
int
1408714101
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
1408814102
const char *str,

Objects/unionobject.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ union_repr(PyObject *self)
290290
}
291291

292292
for (Py_ssize_t i = 0; i < len; i++) {
293-
if (i > 0 && PyUnicodeWriter_WriteUTF8(writer, " | ", 3) < 0) {
293+
if (i > 0 && PyUnicodeWriter_WriteASCII(writer, " | ", 3) < 0) {
294294
goto error;
295295
}
296296
PyObject *p = PyTuple_GET_ITEM(alias->args, i);
@@ -300,12 +300,12 @@ union_repr(PyObject *self)
300300
}
301301

302302
#if 0
303-
PyUnicodeWriter_WriteUTF8(writer, "|args=", 6);
303+
PyUnicodeWriter_WriteASCII(writer, "|args=", 6);
304304
PyUnicodeWriter_WriteRepr(writer, alias->args);
305-
PyUnicodeWriter_WriteUTF8(writer, "|h=", 3);
305+
PyUnicodeWriter_WriteASCII(writer, "|h=", 3);
306306
PyUnicodeWriter_WriteRepr(writer, alias->hashable_args);
307307
if (alias->unhashable_args) {
308-
PyUnicodeWriter_WriteUTF8(writer, "|u=", 3);
308+
PyUnicodeWriter_WriteASCII(writer, "|u=", 3);
309309
PyUnicodeWriter_WriteRepr(writer, alias->unhashable_args);
310310
}
311311
#endif

Parser/asdl_c.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1512,7 +1512,7 @@ def visitModule(self, mod):
15121512
15131513
for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) {
15141514
if (i > 0) {
1515-
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
1515+
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
15161516
goto error;
15171517
}
15181518
}
@@ -1536,7 +1536,7 @@ def visitModule(self, mod):
15361536
}
15371537
15381538
if (i == 0 && length > 2) {
1539-
if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) {
1539+
if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) {
15401540
goto error;
15411541
}
15421542
}
@@ -1640,7 +1640,7 @@ def visitModule(self, mod):
16401640
}
16411641
16421642
if (i > 0) {
1643-
if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) {
1643+
if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) {
16441644
Py_DECREF(name);
16451645
Py_DECREF(value_repr);
16461646
goto error;

Python/Python-ast.c

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/context.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -979,15 +979,15 @@ contextvar_tp_repr(PyObject *op)
979979
return NULL;
980980
}
981981

982-
if (PyUnicodeWriter_WriteUTF8(writer, "<ContextVar name=", 17) < 0) {
982+
if (PyUnicodeWriter_WriteASCII(writer, "<ContextVar name=", 17) < 0) {
983983
goto error;
984984
}
985985
if (PyUnicodeWriter_WriteRepr(writer, self->var_name) < 0) {
986986
goto error;
987987
}
988988

989989
if (self->var_default != NULL) {
990-
if (PyUnicodeWriter_WriteUTF8(writer, " default=", 9) < 0) {
990+
if (PyUnicodeWriter_WriteASCII(writer, " default=", 9) < 0) {
991991
goto error;
992992
}
993993
if (PyUnicodeWriter_WriteRepr(writer, self->var_default) < 0) {
@@ -1182,15 +1182,15 @@ token_tp_repr(PyObject *op)
11821182
if (writer == NULL) {
11831183
return NULL;
11841184
}
1185-
if (PyUnicodeWriter_WriteUTF8(writer, "<Token", 6) < 0) {
1185+
if (PyUnicodeWriter_WriteASCII(writer, "<Token", 6) < 0) {
11861186
goto error;
11871187
}
11881188
if (self->tok_used) {
1189-
if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) {
1189+
if (PyUnicodeWriter_WriteASCII(writer, " used", 5) < 0) {
11901190
goto error;
11911191
}
11921192
}
1193-
if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) {
1193+
if (PyUnicodeWriter_WriteASCII(writer, " var=", 5) < 0) {
11941194
goto error;
11951195
}
11961196
if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) {

Python/hamt.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,7 +1176,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
11761176
}
11771177

11781178
if (key_or_null == NULL) {
1179-
if (PyUnicodeWriter_WriteUTF8(writer, "NULL:\n", -1) < 0) {
1179+
if (PyUnicodeWriter_WriteASCII(writer, "NULL:\n", 6) < 0) {
11801180
goto error;
11811181
}
11821182

@@ -1194,7 +1194,7 @@ hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
11941194
}
11951195
}
11961196

1197-
if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) {
1197+
if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) {
11981198
goto error;
11991199
}
12001200
}
@@ -1915,7 +1915,7 @@ hamt_node_array_dump(PyHamtNode_Array *node,
19151915
goto error;
19161916
}
19171917

1918-
if (PyUnicodeWriter_WriteUTF8(writer, "\n", 1) < 0) {
1918+
if (PyUnicodeWriter_WriteASCII(writer, "\n", 1) < 0) {
19191919
goto error;
19201920
}
19211921
}

0 commit comments

Comments
 (0)