Skip to content

gh-133968: Use private unicode writer for json #133832

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 31, 2025
89 changes: 40 additions & 49 deletions Modules/_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,11 @@ encoder_dealloc(PyObject *self);
static int
encoder_clear(PyObject *self);
static int
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache);
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache);
static int
encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache);
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache);
static int
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache);
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache);
static PyObject *
_encoded_const(PyObject *obj);
static void
Expand Down Expand Up @@ -360,13 +360,6 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
return tpl;
}

static inline int
_PyUnicodeWriter_IsEmpty(PyUnicodeWriter *writer_pub)
{
_PyUnicodeWriter *writer = (_PyUnicodeWriter*)writer_pub;
return (writer->pos == 0);
}

static PyObject *
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
{
Expand All @@ -385,10 +378,9 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
const void *buf;
int kind;

PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
goto bail;
}
_PyUnicodeWriter writer;
_PyUnicodeWriter_Init(&writer);
writer.overallocate = 1;

len = PyUnicode_GET_LENGTH(pystr);
buf = PyUnicode_DATA(pystr);
Expand Down Expand Up @@ -419,12 +411,11 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next

if (c == '"') {
// Fast path for simple case.
if (_PyUnicodeWriter_IsEmpty(writer)) {
if (writer.buffer == NULL) {
PyObject *ret = PyUnicode_Substring(pystr, end, next);
if (ret == NULL) {
goto bail;
}
PyUnicodeWriter_Discard(writer);
*next_end_ptr = next + 1;;
return ret;
}
Expand All @@ -436,7 +427,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next

/* Pick up this chunk if it's not zero length */
if (next != end) {
if (PyUnicodeWriter_WriteSubstring(writer, pystr, end, next) < 0) {
if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
goto bail;
}
}
Expand Down Expand Up @@ -527,18 +518,18 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
end -= 6;
}
}
if (PyUnicodeWriter_WriteChar(writer, c) < 0) {
if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
goto bail;
}
}

rval = PyUnicodeWriter_Finish(writer);
rval = _PyUnicodeWriter_Finish(&writer);
*next_end_ptr = end;
return rval;

bail:
*next_end_ptr = -1;
PyUnicodeWriter_Discard(writer);
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}

Expand Down Expand Up @@ -1341,11 +1332,11 @@ get_item_separator(PyEncoderObject *s,
}

static int
write_newline_indent(PyUnicodeWriter *writer,
write_newline_indent(_PyUnicodeWriter *writer,
Py_ssize_t indent_level, PyObject *indent_cache)
{
PyObject *newline_indent = PyList_GET_ITEM(indent_cache, indent_level * 2);
return PyUnicodeWriter_WriteStr(writer, newline_indent);
return _PyUnicodeWriter_WriteStr(writer, newline_indent);
}


Expand All @@ -1356,33 +1347,33 @@ encoder_call(PyObject *op, PyObject *args, PyObject *kwds)
static char *kwlist[] = {"obj", "_current_indent_level", NULL};
PyObject *obj;
Py_ssize_t indent_level;
_PyUnicodeWriter writer;
PyEncoderObject *self = PyEncoderObject_CAST(op);

if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
&obj, &indent_level))
return NULL;

PyUnicodeWriter *writer = PyUnicodeWriter_Create(0);
if (writer == NULL) {
return NULL;
}
_PyUnicodeWriter_Init(&writer);
writer.overallocate = 1;


PyObject *indent_cache = NULL;
if (self->indent != Py_None) {
indent_cache = create_indent_cache(self, indent_level);
if (indent_cache == NULL) {
PyUnicodeWriter_Discard(writer);
_PyUnicodeWriter_Dealloc(&writer);
return NULL;
}
}
if (encoder_listencode_obj(self, writer, obj, indent_level, indent_cache)) {
PyUnicodeWriter_Discard(writer);
if (encoder_listencode_obj(self, &writer, obj, indent_level, indent_cache)) {
_PyUnicodeWriter_Dealloc(&writer);
Py_XDECREF(indent_cache);
return NULL;
}
Py_XDECREF(indent_cache);

PyObject *str = PyUnicodeWriter_Finish(writer);
PyObject *str = _PyUnicodeWriter_Finish(&writer);
if (str == NULL) {
return NULL;
}
Expand Down Expand Up @@ -1458,16 +1449,16 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
}

static int
_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
{
/* Append stolen and then decrement its reference count */
int rval = PyUnicodeWriter_WriteStr(writer, stolen);
int rval = _PyUnicodeWriter_WriteStr(writer, stolen);
Py_DECREF(stolen);
return rval;
}

static int
encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *obj,
Py_ssize_t indent_level, PyObject *indent_cache)
{
Expand All @@ -1476,13 +1467,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
int rv;

if (obj == Py_None) {
return PyUnicodeWriter_WriteUTF8(writer, "null", 4);
return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4);
}
else if (obj == Py_True) {
return PyUnicodeWriter_WriteUTF8(writer, "true", 4);
return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4);
}
else if (obj == Py_False) {
return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5);
}
else if (PyUnicode_Check(obj)) {
PyObject *encoded = encoder_encode_string(s, obj);
Expand All @@ -1493,7 +1484,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
else if (PyLong_Check(obj)) {
if (PyLong_CheckExact(obj)) {
// Fast-path for exact integers
return PyUnicodeWriter_WriteRepr(writer, obj);
return PyUnicodeWriter_WriteRepr((PyUnicodeWriter*)writer, obj);
}
PyObject *encoded = PyLong_Type.tp_repr(obj);
if (encoded == NULL)
Expand Down Expand Up @@ -1571,7 +1562,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
}

static int
encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first,
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
PyObject *dct, PyObject *key, PyObject *value,
Py_ssize_t indent_level, PyObject *indent_cache,
PyObject *item_separator)
Expand Down Expand Up @@ -1611,7 +1602,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
*first = false;
}
else {
if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) {
Py_DECREF(keystr);
return -1;
}
Expand All @@ -1626,7 +1617,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
if (_steal_accumulate(writer, encoded) < 0) {
return -1;
}
if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
return -1;
}
if (encoder_listencode_obj(s, writer, value, indent_level, indent_cache) < 0) {
Expand All @@ -1637,7 +1628,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
}

static int
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *dct,
Py_ssize_t indent_level, PyObject *indent_cache)
{
Expand All @@ -1649,7 +1640,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,

if (PyDict_GET_SIZE(dct) == 0) {
/* Fast path */
return PyUnicodeWriter_WriteUTF8(writer, "{}", 2);
return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
}

if (s->markers != Py_None) {
Expand All @@ -1668,7 +1659,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
}
}

if (PyUnicodeWriter_WriteChar(writer, '{')) {
if (_PyUnicodeWriter_WriteChar(writer, '{')) {
goto bail;
}

Expand Down Expand Up @@ -1727,7 +1718,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
}
}

if (PyUnicodeWriter_WriteChar(writer, '}')) {
if (_PyUnicodeWriter_WriteChar(writer, '}')) {
goto bail;
}
return 0;
Expand All @@ -1739,7 +1730,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
}

static int
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
PyObject *seq,
Py_ssize_t indent_level, PyObject *indent_cache)
{
Expand All @@ -1753,7 +1744,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
return -1;
if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
Py_DECREF(s_fast);
return PyUnicodeWriter_WriteUTF8(writer, "[]", 2);
return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2);
}

if (s->markers != Py_None) {
Expand All @@ -1772,7 +1763,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
}
}

if (PyUnicodeWriter_WriteChar(writer, '[')) {
if (_PyUnicodeWriter_WriteChar(writer, '[')) {
goto bail;
}

Expand All @@ -1789,7 +1780,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
if (i) {
if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
goto bail;
}
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
Expand All @@ -1810,7 +1801,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
}
}

if (PyUnicodeWriter_WriteChar(writer, ']')) {
if (_PyUnicodeWriter_WriteChar(writer, ']')) {
goto bail;
}
Py_DECREF(s_fast);
Expand Down
Loading