-
-
Notifications
You must be signed in to change notification settings - Fork 33.6k
gh-95382: Improve performance of json encoder with indent #118105
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 19 commits
c1cfcf5
30f2e72
1da39f3
0d85551
cc02a13
a444701
8d4d48a
ed989b8
5df567b
5fdc279
1ec550f
5e47a41
eefc508
a7f4bc6
35601c7
311b7df
ed2c806
2faf554
a407b84
ac86ee4
3b55d64
bb4ff43
9ef9332
ed029a6
36e3313
b69d08e
e78ff6a
5c40126
a43f2f2
f2b0c06
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -85,11 +85,11 @@ encoder_dealloc(PyObject *self); | |
| static int | ||
| encoder_clear(PyEncoderObject *self); | ||
| static int | ||
| encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level); | ||
| encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject* current_newline_indent); | ||
| static int | ||
| encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level); | ||
| encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *current_newline_indent); | ||
| static int | ||
| encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level); | ||
| encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject* current_newline_indent); | ||
| static PyObject * | ||
| _encoded_const(PyObject *obj); | ||
| static void | ||
|
|
@@ -1251,6 +1251,25 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |
| return (PyObject *)s; | ||
| } | ||
|
|
||
| static PyObject * | ||
| _create_newline_indent(PyObject* indent, Py_ssize_t indent_level) | ||
eendebakpt marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| { | ||
| PyObject* current_indent = PySequence_Repeat(indent, indent_level); | ||
| if (current_indent == NULL) { | ||
| return NULL; | ||
| } | ||
| PyObject* start = PyUnicode_FromOrdinal('\n'); | ||
| if (start == NULL) { | ||
| Py_DECREF(current_indent); | ||
| return NULL; | ||
| } | ||
|
|
||
| PyObject* newline_indent = PyUnicode_Concat(start, current_indent); | ||
| Py_DECREF(current_indent); | ||
| Py_DECREF(start); | ||
| return newline_indent; | ||
| } | ||
|
|
||
| static PyObject * | ||
| encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds) | ||
| { | ||
|
|
@@ -1267,10 +1286,21 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds) | |
| _PyUnicodeWriter_Init(&writer); | ||
| writer.overallocate = 1; | ||
|
|
||
| if (encoder_listencode_obj(self, &writer, obj, indent_level)) { | ||
| PyObject * current_newline_indent = NULL; | ||
| if (self->indent != Py_None) { | ||
| current_newline_indent = _create_newline_indent(self->indent, | ||
| indent_level); | ||
| if (current_newline_indent == NULL) { | ||
| _PyUnicodeWriter_Dealloc(&writer); | ||
| return NULL; | ||
| } | ||
| } | ||
| if (encoder_listencode_obj(self, &writer, obj, indent_level, | ||
|
||
| current_newline_indent)) { | ||
eendebakpt marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| _PyUnicodeWriter_Dealloc(&writer); | ||
serhiy-storchaka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return NULL; | ||
| } | ||
| Py_XDECREF(current_newline_indent); | ||
|
|
||
| result = PyTuple_New(1); | ||
| if (result == NULL || | ||
|
|
@@ -1358,7 +1388,8 @@ _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen) | |
|
|
||
| static int | ||
| encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||
| PyObject *obj, Py_ssize_t indent_level) | ||
| PyObject *obj, Py_ssize_t indent_level, | ||
| PyObject *current_newline_indent) | ||
| { | ||
| /* Encode Python object obj to a JSON term */ | ||
| PyObject *newobj; | ||
|
|
@@ -1394,14 +1425,16 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, | |
| else if (PyList_Check(obj) || PyTuple_Check(obj)) { | ||
| if (_Py_EnterRecursiveCall(" while encoding a JSON object")) | ||
| return -1; | ||
| rv = encoder_listencode_list(s, writer, obj, indent_level); | ||
| rv = encoder_listencode_list(s, writer, obj, indent_level, | ||
| current_newline_indent); | ||
| _Py_LeaveRecursiveCall(); | ||
| return rv; | ||
| } | ||
| else if (PyDict_Check(obj)) { | ||
| if (_Py_EnterRecursiveCall(" while encoding a JSON object")) | ||
| return -1; | ||
| rv = encoder_listencode_dict(s, writer, obj, indent_level); | ||
| rv = encoder_listencode_dict(s, writer, obj, indent_level, | ||
| current_newline_indent); | ||
| _Py_LeaveRecursiveCall(); | ||
| return rv; | ||
| } | ||
|
|
@@ -1435,7 +1468,8 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, | |
| Py_XDECREF(ident); | ||
| return -1; | ||
| } | ||
| rv = encoder_listencode_obj(s, writer, newobj, indent_level); | ||
| rv = encoder_listencode_obj(s, writer, newobj, indent_level, | ||
| current_newline_indent); | ||
| _Py_LeaveRecursiveCall(); | ||
|
|
||
| Py_DECREF(newobj); | ||
|
|
@@ -1456,7 +1490,9 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, | |
|
|
||
| static int | ||
| encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, | ||
| PyObject *key, PyObject *value, Py_ssize_t indent_level) | ||
| PyObject *key, PyObject *value, Py_ssize_t indent_level, | ||
| PyObject *current_newline_indent, | ||
| PyObject *current_item_separator) | ||
|
||
| { | ||
| PyObject *keystr = NULL; | ||
| PyObject *encoded; | ||
|
|
@@ -1493,7 +1529,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir | |
| *first = false; | ||
| } | ||
| else { | ||
| if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) { | ||
| if (_PyUnicodeWriter_WriteStr(writer, current_item_separator) < 0) { | ||
| Py_DECREF(keystr); | ||
| return -1; | ||
| } | ||
|
|
@@ -1511,21 +1547,25 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir | |
| if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { | ||
| return -1; | ||
| } | ||
| if (encoder_listencode_obj(s, writer, value, indent_level) < 0) { | ||
| if (encoder_listencode_obj(s, writer, value, indent_level, | ||
| current_newline_indent) < 0) { | ||
| return -1; | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| static int | ||
| encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||
| PyObject *dct, Py_ssize_t indent_level) | ||
| PyObject *dct, Py_ssize_t indent_level, | ||
| PyObject *current_newline_indent) | ||
| { | ||
| /* Encode Python dict dct a JSON term */ | ||
| PyObject *ident = NULL; | ||
| PyObject *items = NULL; | ||
| PyObject *key, *value; | ||
| bool first = true; | ||
| PyObject *newline_indent = NULL; | ||
| PyObject *separator_indent = NULL; | ||
|
|
||
| if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ | ||
| return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); | ||
|
|
@@ -1549,14 +1589,22 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | |
| if (_PyUnicodeWriter_WriteChar(writer, '{')) | ||
| goto bail; | ||
|
|
||
| PyObject *current_item_separator = s->item_separator; // borrowed reference | ||
| if (s->indent != Py_None) { | ||
| /* TODO: DOES NOT RUN */ | ||
| indent_level += 1; | ||
| /* | ||
| newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) | ||
| separator = _item_separator + newline_indent | ||
| buf += newline_indent | ||
| */ | ||
| newline_indent = PyUnicode_Concat(current_newline_indent, s->indent); | ||
| if (newline_indent == NULL) { | ||
| goto bail; | ||
| } | ||
| separator_indent = PyUnicode_Concat(current_item_separator, newline_indent); | ||
| if (separator_indent == NULL) { | ||
| goto bail; | ||
| } | ||
| // update item separator with a borrowed reference | ||
| current_item_separator = separator_indent; | ||
| if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { | ||
| goto bail; | ||
| } | ||
AlexWaygood marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| if (s->sort_keys || !PyDict_CheckExact(dct)) { | ||
|
|
@@ -1574,15 +1622,19 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | |
|
|
||
| key = PyTuple_GET_ITEM(item, 0); | ||
| value = PyTuple_GET_ITEM(item, 1); | ||
| if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0) | ||
| if (encoder_encode_key_value(s, writer, &first, key, value, | ||
| indent_level, newline_indent, | ||
| current_item_separator) < 0) | ||
| goto bail; | ||
| } | ||
| Py_CLEAR(items); | ||
|
|
||
| } else { | ||
| Py_ssize_t pos = 0; | ||
| while (PyDict_Next(dct, &pos, &key, &value)) { | ||
| if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0) | ||
| if (encoder_encode_key_value(s, writer, &first, key, value, | ||
| indent_level, newline_indent, | ||
| current_item_separator) < 0) | ||
| goto bail; | ||
| } | ||
| } | ||
|
|
@@ -1592,29 +1644,38 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | |
| goto bail; | ||
| Py_CLEAR(ident); | ||
| } | ||
| /* TODO DOES NOT RUN; dead code | ||
| if (s->indent != Py_None) { | ||
| indent_level -= 1; | ||
| Py_CLEAR(newline_indent); | ||
| Py_CLEAR(separator_indent); | ||
| indent_level--; | ||
|
|
||
| if (_PyUnicodeWriter_WriteStr(writer, current_newline_indent) < 0) { | ||
| goto bail; | ||
| } | ||
| } | ||
|
|
||
| yield '\n' + (' ' * (_indent * _current_indent_level)) | ||
| }*/ | ||
| if (_PyUnicodeWriter_WriteChar(writer, '}')) | ||
| goto bail; | ||
| return 0; | ||
|
|
||
| bail: | ||
| Py_XDECREF(items); | ||
| Py_XDECREF(ident); | ||
| Py_XDECREF(separator_indent); | ||
| Py_XDECREF(newline_indent); | ||
| return -1; | ||
| } | ||
|
|
||
| static int | ||
| encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||
| PyObject *seq, Py_ssize_t indent_level) | ||
| PyObject *seq, Py_ssize_t indent_level, | ||
| PyObject *current_newline_indent) | ||
| { | ||
| PyObject *ident = NULL; | ||
| PyObject *s_fast = NULL; | ||
| Py_ssize_t i; | ||
| PyObject *newline_indent = NULL; | ||
| PyObject *separator_indent = NULL; | ||
|
|
||
| ident = NULL; | ||
| s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); | ||
|
|
@@ -1643,22 +1704,32 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, | |
|
|
||
| if (_PyUnicodeWriter_WriteChar(writer, '[')) | ||
| goto bail; | ||
|
|
||
| PyObject *separator = s->item_separator; // borrowed reference | ||
| if (s->indent != Py_None) { | ||
| /* TODO: DOES NOT RUN */ | ||
| indent_level += 1; | ||
| /* | ||
| newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) | ||
| separator = _item_separator + newline_indent | ||
| buf += newline_indent | ||
| */ | ||
| indent_level++; | ||
| newline_indent = PyUnicode_Concat(current_newline_indent, s->indent); | ||
| if (newline_indent == NULL) { | ||
| goto bail; | ||
| } | ||
|
|
||
| if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { | ||
| goto bail; | ||
| } | ||
|
|
||
| separator_indent = PyUnicode_Concat(separator, newline_indent); | ||
| if (separator_indent == NULL) { | ||
| goto bail; | ||
| } | ||
| separator = separator_indent; // assign separator with borrowed reference | ||
| } | ||
| for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { | ||
| PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); | ||
| if (i) { | ||
| if (_PyUnicodeWriter_WriteStr(writer, s->item_separator)) | ||
| if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) | ||
| goto bail; | ||
| } | ||
| if (encoder_listencode_obj(s, writer, obj, indent_level)) | ||
| if (encoder_listencode_obj(s, writer, obj, indent_level, newline_indent)) | ||
| goto bail; | ||
| } | ||
| if (ident != NULL) { | ||
|
|
@@ -1667,12 +1738,15 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, | |
| Py_CLEAR(ident); | ||
| } | ||
|
|
||
| /* TODO: DOES NOT RUN | ||
| if (s->indent != Py_None) { | ||
| indent_level -= 1; | ||
| indent_level--; | ||
| Py_CLEAR(newline_indent); | ||
| Py_CLEAR(separator_indent); | ||
| if (_PyUnicodeWriter_WriteStr(writer, current_newline_indent) < 0) { | ||
| goto bail; | ||
| } | ||
| } | ||
|
|
||
| yield '\n' + (' ' * (_indent * _current_indent_level)) | ||
| }*/ | ||
| if (_PyUnicodeWriter_WriteChar(writer, ']')) | ||
| goto bail; | ||
| Py_DECREF(s_fast); | ||
|
|
@@ -1681,6 +1755,8 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, | |
| bail: | ||
| Py_XDECREF(ident); | ||
| Py_DECREF(s_fast); | ||
| Py_XDECREF(separator_indent); | ||
| Py_XDECREF(newline_indent); | ||
| return -1; | ||
| } | ||
|
|
||
|
|
@@ -1721,7 +1797,7 @@ encoder_clear(PyEncoderObject *self) | |
| return 0; | ||
| } | ||
|
|
||
| PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); | ||
| PyDoc_STRVAR(encoder_doc, "Encoder(markers, default, encoder, indent, key_separator, item_separator, sort_keys, skipkeys, allow_nan)"); | ||
|
|
||
| static PyType_Slot PyEncoderType_slots[] = { | ||
| {Py_tp_doc, (void *)encoder_doc}, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.