diff --git a/Lib/test/test_free_threading/test_json.py b/Lib/test/test_free_threading/test_json.py new file mode 100644 index 00000000000000..4e5d727a8a0857 --- /dev/null +++ b/Lib/test/test_free_threading/test_json.py @@ -0,0 +1,86 @@ +import unittest +from threading import Barrier, Thread +from test.test_json import CTest +from test.support import threading_helper + + +def encode_json_helper(json, worker, data, number_of_threads, number_of_json_encodings=100): + worker_threads = [] + barrier = Barrier(number_of_threads) + for index in range(number_of_threads): + worker_threads.append(Thread(target=worker, args=[barrier, data, index])) + for t in worker_threads: + t.start() + for ii in range(number_of_json_encodings): + json.dumps(data) + data.clear() + for t in worker_threads: + t.join() + + +class MyMapping(dict): + def __init__(self): + self.mapping = [] + + def items(self): + return self.mapping + + +@threading_helper.reap_threads +@threading_helper.requires_working_threading() +class TestJsonEncoding(CTest): + # Test encoding json with concurrent threads modifying the data cannot + # corrupt the interpreter + + def test_json_mutating_list(self): + + def worker(barrier, data, index): + barrier.wait() + while data: + for d in data: + if len(d) > 5: + d.clear() + else: + d.append(index) + d.append(index) + d.append(index) + encode_json_helper(self.json, worker, [[], []], number_of_threads=16) + + def test_json_mutating_dict(self): + + def worker(barrier, data, index): + barrier.wait() + while data: + for d in data: + if len(d) > 5: + try: + d.pop(list(d)[0]) + except (KeyError, IndexError): + pass + else: + d[index] = index + + encode_json_helper(self.json, worker, [{}, {}], number_of_threads=16) + + def test_json_mutating_mapping(self): + + def worker(barrier, data, index): + barrier.wait() + while data: + for d in data: + if len(d.mapping) > 3: + d.mapping.clear() + else: + d.mapping.append((index, index)) + + data = [MyMapping(), MyMapping()] + encode_json_helper(self.json, worker, data, number_of_threads=16) + + +if __name__ == "__main__": + import time + + t0 = time.time() + unittest.main() + dt = time.time()-t0 + print(f'Done: {dt:.2f}') diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-20-26-21.gh-issue-116738.q_hPYq.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-20-26-21.gh-issue-116738.q_hPYq.rst new file mode 100644 index 00000000000000..fe6a7229223de8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-20-26-21.gh-issue-116738.q_hPYq.rst @@ -0,0 +1 @@ +Make the module :mod:`json` safe to use under the free-threading build. diff --git a/Modules/_json.c b/Modules/_json.c index 6b5f6ea42df4d1..9d6ce0932b0bb5 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -10,6 +10,7 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_EnterRecursiveCall() +#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION() #include "pycore_global_strings.h" // _Py_ID() #include "pycore_pyerrors.h" // _PyErr_FormatNote #include "pycore_runtime.h" // _PyRuntime @@ -1636,15 +1637,58 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs return 0; } +static inline int +_encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, + bool *first, PyObject *dct, PyObject *items, + Py_ssize_t indent_level, PyObject *indent_cache, + PyObject *separator) +{ + PyObject *key, *value; + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { + PyObject *item = PyList_GET_ITEM(items, i); + + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + return -1; + } + + key = PyTuple_GET_ITEM(item, 0); + value = PyTuple_GET_ITEM(item, 1); + if (encoder_encode_key_value(s, writer, first, dct, key, value, + indent_level, indent_cache, + separator) < 0) { + return -1; + } + } + + return 0; +} + +static inline int +_encoder_iterate_dict_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, + bool *first, PyObject *dct, Py_ssize_t indent_level, + PyObject *indent_cache, PyObject *separator) +{ + PyObject *key, *value; + Py_ssize_t pos = 0; + while (PyDict_Next(dct, &pos, &key, &value)) { + if (encoder_encode_key_value(s, writer, first, dct, key, value, + indent_level, indent_cache, + separator) < 0) { + return -1; + } + } + return 0; +} + static int encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, - PyObject *dct, + PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache) { /* Encode Python dict dct a JSON term */ PyObject *ident = NULL; PyObject *items = NULL; - PyObject *key, *value; bool first = true; if (PyDict_GET_SIZE(dct) == 0) { @@ -1682,33 +1726,29 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, if (s->sort_keys || !PyDict_CheckExact(dct)) { items = PyMapping_Items(dct); - if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0)) + if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0)) { goto bail; - - for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { - PyObject *item = PyList_GET_ITEM(items, i); - - if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { - PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); - goto bail; - } - - key = PyTuple_GET_ITEM(item, 0); - value = PyTuple_GET_ITEM(item, 1); - if (encoder_encode_key_value(s, writer, &first, dct, key, value, - indent_level, indent_cache, - separator) < 0) - goto bail; } + + int result; + Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(items); + result = _encoder_iterate_mapping_lock_held(s, writer, &first, dct, + items, indent_level, indent_cache, separator); + Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); Py_CLEAR(items); + if (result < 0) { + Py_XDECREF(items); + goto bail; + } } else { - Py_ssize_t pos = 0; - while (PyDict_Next(dct, &pos, &key, &value)) { - if (encoder_encode_key_value(s, writer, &first, dct, key, value, - indent_level, indent_cache, - separator) < 0) - goto bail; + int result; + Py_BEGIN_CRITICAL_SECTION(dct); + result = _encoder_iterate_dict_lock_held(s, writer, &first, dct, + indent_level, indent_cache, separator); + Py_END_CRITICAL_SECTION(); + if (result < 0) { + goto bail; } } @@ -1735,6 +1775,26 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, return -1; } +static inline int +_encoder_iterate_fast_seq_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, + PyObject *seq, PyObject *s_fast, + Py_ssize_t indent_level, PyObject *indent_cache, PyObject *separator) +{ + for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { + PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); + if (i) { + if (PyUnicodeWriter_WriteStr(writer, separator) < 0) { + return -1; + } + } + if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { + _PyErr_FormatNote("when serializing %T item %zd", seq, i); + return -1; + } + } + return 0; +} + static int encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, @@ -1742,10 +1802,8 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, { PyObject *ident = NULL; PyObject *s_fast = NULL; - Py_ssize_t i; - ident = NULL; - s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); + s_fast = PySequence_Fast(seq, "encoder_listencode_list needs a sequence"); if (s_fast == NULL) return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { @@ -1783,16 +1841,13 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, goto bail; } } - for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { - PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); - if (i) { - if (PyUnicodeWriter_WriteStr(writer, separator) < 0) - goto bail; - } - if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { - _PyErr_FormatNote("when serializing %T item %zd", seq, i); - goto bail; - } + int result; + Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(seq); + result = _encoder_iterate_fast_seq_lock_held(s, writer, seq, s_fast, + indent_level, indent_cache, separator); + Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); + if (result < 0) { + goto bail; } if (ident != NULL) { if (PyDict_DelItem(s->markers, ident))