-
-
Notifications
You must be signed in to change notification settings - Fork 32.7k
gh-116738: Make _json module safe in the free-threading build #119438
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
da0e917
3797dfa
366654c
5b72cdf
c4c24c3
370191b
93c4466
eafd3c1
daeec46
d54baf2
e5fa305
d4ddf5d
67d942f
4ffc1b2
384ca59
64e20aa
e6ce9c9
34885a0
2fe760b
eebccac
db8947c
c19ad14
8b12e0f
78d3595
6e8615f
39ebc00
7c5b185
adf78c7
acd0ad1
41e3dee
75884cb
0424c58
9c964f9
5acc999
5173373
7d00562
00f4d7f
b95c07f
cedc2c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import unittest | ||
from threading import Barrier, Thread | ||
from test.test_json import CTest | ||
from test.support import threading_helper | ||
|
||
|
||
def encode_json_helper( | ||
json, worker, data, number_of_threads=12, number_of_json_encodings=100 | ||
): | ||
worker_threads = [] | ||
barrier = Barrier(number_of_threads) | ||
for index in range(number_of_threads): | ||
worker_threads.append( | ||
Thread(target=worker, args=[barrier, data, index]) | ||
) | ||
for t in worker_threads: | ||
t.start() | ||
for ii in range(number_of_json_encodings): | ||
json.dumps(data) | ||
data.clear() | ||
for t in worker_threads: | ||
t.join() | ||
|
||
|
||
class MyMapping(dict): | ||
def __init__(self): | ||
self.mapping = [] | ||
|
||
def items(self): | ||
return self.mapping | ||
|
||
|
||
@threading_helper.reap_threads | ||
@threading_helper.requires_working_threading() | ||
class TestJsonEncoding(CTest): | ||
# Test encoding json with concurrent threads modifying the data cannot | ||
# corrupt the interpreter | ||
|
||
def test_json_mutating_list(self): | ||
def worker(barrier, data, index): | ||
barrier.wait() | ||
while data: | ||
for d in data: | ||
if len(d) > 5: | ||
d.clear() | ||
else: | ||
d.append(index) | ||
|
||
data = [[], []] | ||
encode_json_helper(self.json, worker, data) | ||
|
||
def test_json_mutating_exact_dict(self): | ||
def worker(barrier, data, index): | ||
barrier.wait() | ||
while data: | ||
for d in data: | ||
if len(d) > 5: | ||
try: | ||
key = list(d)[0] | ||
d.pop() | ||
except (KeyError, IndexError): | ||
pass | ||
else: | ||
d[index] = index | ||
|
||
data = [{}, {}] | ||
encode_json_helper(self.json, worker, data) | ||
|
||
def test_json_mutating_mapping(self): | ||
def worker(barrier, data, index): | ||
barrier.wait() | ||
while data: | ||
for d in data: | ||
if len(d.mapping) > 3: | ||
d.mapping.clear() | ||
else: | ||
d.mapping.append((index, index)) | ||
|
||
data = [MyMapping(), MyMapping()] | ||
encode_json_helper(self.json, worker, data) | ||
|
||
|
||
if __name__ == "__main__": | ||
import time | ||
t0 = time.time() | ||
unittest.main() | ||
dt = time.time() - t0 | ||
print(f"Done: {dt:.2f}") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Make the module :mod:`json` safe to use under the free-threading build. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
|
||
#include "Python.h" | ||
#include "pycore_ceval.h" // _Py_EnterRecursiveCall() | ||
#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION() | ||
#include "pycore_global_strings.h" // _Py_ID() | ||
#include "pycore_pyerrors.h" // _PyErr_FormatNote | ||
#include "pycore_runtime.h" // _PyRuntime | ||
|
@@ -1456,7 +1457,7 @@ write_newline_indent(PyUnicodeWriter *writer, | |
static PyObject * | ||
encoder_call(PyObject *op, PyObject *args, PyObject *kwds) | ||
{ | ||
/* Python callable interface to encode_listencode_obj */ | ||
/* Python callable interface to encoder_listencode_obj */ | ||
static char *kwlist[] = {"obj", "_current_indent_level", NULL}; | ||
PyObject *obj; | ||
Py_ssize_t indent_level; | ||
|
@@ -1743,15 +1744,70 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs | |
return 0; | ||
} | ||
|
||
static inline int | ||
_encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, | ||
bool *first, PyObject *dct, PyObject *items, | ||
Py_ssize_t indent_level, PyObject *indent_cache, | ||
PyObject *separator) | ||
{ | ||
PyObject *key, *value; | ||
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { | ||
PyObject *item = PyList_GET_ITEM(items, i); | ||
|
||
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { | ||
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); | ||
return -1; | ||
} | ||
|
||
key = PyTuple_GET_ITEM(item, 0); | ||
value = PyTuple_GET_ITEM(item, 1); | ||
if (encoder_encode_key_value(s, writer, first, dct, key, value, | ||
indent_level, indent_cache, | ||
separator) < 0) { | ||
return -1; | ||
} | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
static inline int | ||
_encoder_iterate_dict_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, | ||
bool *first, PyObject *dct, Py_ssize_t indent_level, | ||
PyObject *indent_cache, PyObject *separator) | ||
{ | ||
PyObject *key, *value; | ||
Py_ssize_t pos = 0; | ||
while (PyDict_Next(dct, &pos, &key, &value)) { | ||
#ifdef Py_GIL_DISABLED | ||
// gh-119438: in the free-threading build the lock on dct can get suspended | ||
Py_IncRef(key); | ||
Py_IncRef(value); | ||
#endif | ||
if (encoder_encode_key_value(s, writer, first, dct, key, value, | ||
indent_level, indent_cache, | ||
separator) < 0) { | ||
#ifdef Py_GIL_DISABLED | ||
Py_DecRef(key); | ||
Py_DecRef(value); | ||
#endif | ||
return -1; | ||
} | ||
#ifdef Py_GIL_DISABLED | ||
Py_DecRef(key); | ||
Py_DecRef(value); | ||
#endif | ||
} | ||
return 0; | ||
} | ||
|
||
static int | ||
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, | ||
PyObject *dct, | ||
PyObject *dct, | ||
Py_ssize_t indent_level, PyObject *indent_cache) | ||
{ | ||
/* Encode Python dict dct a JSON term */ | ||
PyObject *ident = NULL; | ||
PyObject *items = NULL; | ||
PyObject *key, *value; | ||
bool first = true; | ||
|
||
if (PyDict_GET_SIZE(dct) == 0) { | ||
|
@@ -1788,34 +1844,30 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, | |
} | ||
|
||
if (s->sort_keys || !PyDict_CheckExact(dct)) { | ||
items = PyMapping_Items(dct); | ||
if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0)) | ||
PyObject *items = PyMapping_Items(dct); | ||
if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0)) { | ||
Py_XDECREF(items); | ||
goto bail; | ||
} | ||
|
||
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { | ||
PyObject *item = PyList_GET_ITEM(items, i); | ||
|
||
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { | ||
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); | ||
goto bail; | ||
} | ||
|
||
key = PyTuple_GET_ITEM(item, 0); | ||
value = PyTuple_GET_ITEM(item, 1); | ||
if (encoder_encode_key_value(s, writer, &first, dct, key, value, | ||
indent_level, indent_cache, | ||
separator) < 0) | ||
goto bail; | ||
int result; | ||
Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(items); | ||
result = _encoder_iterate_mapping_lock_held(s, writer, &first, dct, | ||
items, indent_level, indent_cache, separator); | ||
Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); | ||
Py_DECREF(items); | ||
if (result < 0) { | ||
goto bail; | ||
} | ||
Py_CLEAR(items); | ||
|
||
} else { | ||
Py_ssize_t pos = 0; | ||
while (PyDict_Next(dct, &pos, &key, &value)) { | ||
if (encoder_encode_key_value(s, writer, &first, dct, key, value, | ||
indent_level, indent_cache, | ||
separator) < 0) | ||
goto bail; | ||
int result; | ||
Py_BEGIN_CRITICAL_SECTION(dct); | ||
result = _encoder_iterate_dict_lock_held(s, writer, &first, dct, | ||
indent_level, indent_cache, separator); | ||
Py_END_CRITICAL_SECTION(); | ||
if (result < 0) { | ||
goto bail; | ||
} | ||
} | ||
|
||
|
@@ -1837,22 +1889,52 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, | |
return 0; | ||
|
||
bail: | ||
Py_XDECREF(items); | ||
Py_XDECREF(ident); | ||
return -1; | ||
} | ||
|
||
static inline int | ||
_encoder_iterate_fast_seq_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer, | ||
PyObject *seq, PyObject *s_fast, | ||
Py_ssize_t indent_level, PyObject *indent_cache, PyObject *separator) | ||
{ | ||
for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { | ||
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using borrowed reference is not safe here because if the critical section on sequence get's suspended then other thread can decref or free the object. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @eendebakpt Can you update the PR to use strong references? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kumaraditya303 Yes I will (probably with a macro to continue using borrowed ones in the normal build). If I understand the mechanism correctly, then also the I tried creating a minimal example to add to the tests, but have not succeeded so far. Test script included below. Suggestions to get a minimal example are welcome. Test script
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, it should say to use strong references. This is only a problem if somehow the critical section of dict gets suspended and in that case another thread can free the the borrowed object. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I created #138098 to update the docs. |
||
#ifdef Py_GIL_DISABLED | ||
// gh-119438: in the free-threading build the lock on s_fast can get suspended | ||
Py_IncRef(obj); | ||
#endif | ||
if (i) { | ||
if (PyUnicodeWriter_WriteStr(writer, separator) < 0) { | ||
#ifdef Py_GIL_DISABLED | ||
Py_DecRef(obj); | ||
#endif | ||
return -1; | ||
} | ||
} | ||
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { | ||
_PyErr_FormatNote("when serializing %T item %zd", seq, i); | ||
#ifdef Py_GIL_DISABLED | ||
Py_DecRef(obj); | ||
#endif | ||
return -1; | ||
} | ||
#ifdef Py_GIL_DISABLED | ||
Py_DecRef(obj); | ||
#endif | ||
} | ||
return 0; | ||
} | ||
|
||
static int | ||
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, | ||
PyObject *seq, | ||
Py_ssize_t indent_level, PyObject *indent_cache) | ||
{ | ||
PyObject *ident = NULL; | ||
PyObject *s_fast = NULL; | ||
Py_ssize_t i; | ||
|
||
ident = NULL; | ||
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); | ||
s_fast = PySequence_Fast(seq, "encoder_listencode_list needs a sequence"); | ||
if (s_fast == NULL) | ||
return -1; | ||
if (PySequence_Fast_GET_SIZE(s_fast) == 0) { | ||
|
@@ -1890,16 +1972,13 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, | |
goto bail; | ||
} | ||
} | ||
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { | ||
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); | ||
if (i) { | ||
if (PyUnicodeWriter_WriteStr(writer, separator) < 0) | ||
goto bail; | ||
} | ||
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { | ||
_PyErr_FormatNote("when serializing %T item %zd", seq, i); | ||
goto bail; | ||
} | ||
int result; | ||
Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(seq); | ||
result = _encoder_iterate_fast_seq_lock_held(s, writer, seq, s_fast, | ||
indent_level, indent_cache, separator); | ||
Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); | ||
if (result < 0) { | ||
goto bail; | ||
} | ||
if (ident != NULL) { | ||
if (PyDict_DelItem(s->markers, ident)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should use
Py_INCREF
and similar macros