Skip to content

Commit 5d413ec

Browse files
pythongh-128013: fix data race in PyUnicode_AsUTF8AndSize on free-threading (python#128021)
1 parent 16c89f8 commit 5d413ec

File tree

2 files changed

+52
-19
lines changed

2 files changed

+52
-19
lines changed

Lib/test/test_capi/test_unicode.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import unittest
22
import sys
33
from test import support
4-
from test.support import import_helper
4+
from test.support import threading_helper
55

66
try:
77
import _testcapi
@@ -959,6 +959,24 @@ def test_asutf8(self):
959959
self.assertRaises(TypeError, unicode_asutf8, [], 0)
960960
# CRASHES unicode_asutf8(NULL, 0)
961961

962+
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
963+
@threading_helper.requires_working_threading()
964+
def test_asutf8_race(self):
965+
"""Test that there's no race condition in PyUnicode_AsUTF8()"""
966+
unicode_asutf8 = _testcapi.unicode_asutf8
967+
from threading import Thread
968+
969+
data = "😊"
970+
971+
def worker():
972+
for _ in range(1000):
973+
self.assertEqual(unicode_asutf8(data, 5), b'\xf0\x9f\x98\x8a\0')
974+
975+
threads = [Thread(target=worker) for _ in range(10)]
976+
with threading_helper.start_threads(threads):
977+
pass
978+
979+
962980
@support.cpython_only
963981
@unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module')
964982
def test_asutf8andsize(self):

Objects/unicodeobject.c

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ NOTE: In the interpreter's initialization phase, some globals are currently
113113

114114
static inline char* _PyUnicode_UTF8(PyObject *op)
115115
{
116-
return (_PyCompactUnicodeObject_CAST(op)->utf8);
116+
return FT_ATOMIC_LOAD_PTR_ACQUIRE(_PyCompactUnicodeObject_CAST(op)->utf8);
117117
}
118118

119119
static inline char* PyUnicode_UTF8(PyObject *op)
@@ -129,7 +129,7 @@ static inline char* PyUnicode_UTF8(PyObject *op)
129129

130130
static inline void PyUnicode_SET_UTF8(PyObject *op, char *utf8)
131131
{
132-
_PyCompactUnicodeObject_CAST(op)->utf8 = utf8;
132+
FT_ATOMIC_STORE_PTR_RELEASE(_PyCompactUnicodeObject_CAST(op)->utf8, utf8);
133133
}
134134

135135
static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op)
@@ -683,7 +683,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
683683
|| kind == PyUnicode_2BYTE_KIND
684684
|| kind == PyUnicode_4BYTE_KIND);
685685
CHECK(ascii->state.ascii == 0);
686-
CHECK(compact->utf8 != data);
686+
CHECK(_PyUnicode_UTF8(op) != data);
687687
}
688688
else {
689689
PyUnicodeObject *unicode = _PyUnicodeObject_CAST(op);
@@ -695,16 +695,17 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
695695
CHECK(ascii->state.compact == 0);
696696
CHECK(data != NULL);
697697
if (ascii->state.ascii) {
698-
CHECK(compact->utf8 == data);
698+
CHECK(_PyUnicode_UTF8(op) == data);
699699
CHECK(compact->utf8_length == ascii->length);
700700
}
701701
else {
702-
CHECK(compact->utf8 != data);
702+
CHECK(_PyUnicode_UTF8(op) != data);
703703
}
704704
}
705-
706-
if (compact->utf8 == NULL)
705+
#ifndef Py_GIL_DISABLED
706+
if (_PyUnicode_UTF8(op) == NULL)
707707
CHECK(compact->utf8_length == 0);
708+
#endif
708709
}
709710

710711
/* check that the best kind is used: O(n) operation */
@@ -1148,8 +1149,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
11481149

11491150
if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
11501151
PyMem_Free(_PyUnicode_UTF8(unicode));
1151-
PyUnicode_SET_UTF8(unicode, NULL);
11521152
PyUnicode_SET_UTF8_LENGTH(unicode, 0);
1153+
PyUnicode_SET_UTF8(unicode, NULL);
11531154
}
11541155
#ifdef Py_TRACE_REFS
11551156
_Py_ForgetReference(unicode);
@@ -1202,8 +1203,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
12021203
if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode))
12031204
{
12041205
PyMem_Free(_PyUnicode_UTF8(unicode));
1205-
PyUnicode_SET_UTF8(unicode, NULL);
12061206
PyUnicode_SET_UTF8_LENGTH(unicode, 0);
1207+
PyUnicode_SET_UTF8(unicode, NULL);
12071208
}
12081209

12091210
data = (PyObject *)PyObject_Realloc(data, new_size);
@@ -1213,8 +1214,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
12131214
}
12141215
_PyUnicode_DATA_ANY(unicode) = data;
12151216
if (share_utf8) {
1216-
PyUnicode_SET_UTF8(unicode, data);
12171217
PyUnicode_SET_UTF8_LENGTH(unicode, length);
1218+
PyUnicode_SET_UTF8(unicode, data);
12181219
}
12191220
_PyUnicode_LENGTH(unicode) = length;
12201221
PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
@@ -4085,6 +4086,21 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
40854086

40864087
static int unicode_fill_utf8(PyObject *unicode);
40874088

4089+
4090+
static int
4091+
unicode_ensure_utf8(PyObject *unicode)
4092+
{
4093+
int err = 0;
4094+
if (PyUnicode_UTF8(unicode) == NULL) {
4095+
Py_BEGIN_CRITICAL_SECTION(unicode);
4096+
if (PyUnicode_UTF8(unicode) == NULL) {
4097+
err = unicode_fill_utf8(unicode);
4098+
}
4099+
Py_END_CRITICAL_SECTION();
4100+
}
4101+
return err;
4102+
}
4103+
40884104
const char *
40894105
PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
40904106
{
@@ -4096,13 +4112,11 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
40964112
return NULL;
40974113
}
40984114

4099-
if (PyUnicode_UTF8(unicode) == NULL) {
4100-
if (unicode_fill_utf8(unicode) == -1) {
4101-
if (psize) {
4102-
*psize = -1;
4103-
}
4104-
return NULL;
4115+
if (unicode_ensure_utf8(unicode) == -1) {
4116+
if (psize) {
4117+
*psize = -1;
41054118
}
4119+
return NULL;
41064120
}
41074121

41084122
if (psize) {
@@ -5434,6 +5448,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
54345448
static int
54355449
unicode_fill_utf8(PyObject *unicode)
54365450
{
5451+
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(unicode);
54375452
/* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
54385453
assert(!PyUnicode_IS_ASCII(unicode));
54395454

@@ -5475,10 +5490,10 @@ unicode_fill_utf8(PyObject *unicode)
54755490
PyErr_NoMemory();
54765491
return -1;
54775492
}
5478-
PyUnicode_SET_UTF8(unicode, cache);
5479-
PyUnicode_SET_UTF8_LENGTH(unicode, len);
54805493
memcpy(cache, start, len);
54815494
cache[len] = '\0';
5495+
PyUnicode_SET_UTF8_LENGTH(unicode, len);
5496+
PyUnicode_SET_UTF8(unicode, cache);
54825497
_PyBytesWriter_Dealloc(&writer);
54835498
return 0;
54845499
}

0 commit comments

Comments
 (0)