From 587f9d6ce305daccd84dee954f1c1a07e53cbbee Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Wed, 18 Dec 2024 16:11:56 +0000 Subject: [PATCH 1/3] fix race --- Lib/test/test_capi/test_unicode.py | 22 ++++++++++++-- Objects/unicodeobject.c | 46 +++++++++++++++++++----------- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 65d8242ad3fc60..d749e34bd00a3e 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1,7 +1,7 @@ import unittest import sys from test import support -from test.support import import_helper +from test.support import threading_helper try: import _testcapi @@ -1005,6 +1005,24 @@ def test_asutf8(self): self.assertRaises(TypeError, unicode_asutf8, [], 0) # CRASHES unicode_asutf8(NULL, 0) + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + @threading_helper.requires_working_threading() + def test_asutf8_race(self): + """Test that there's no race condition in PyUnicode_AsUTF8()""" + unicode_asutf8 = _testcapi.unicode_asutf8 + from threading import Thread + + data = "😊" + + def worker(): + for _ in range(1000): + self.assertEqual(unicode_asutf8(data, 5), b'\xf0\x9f\x98\x8a\0') + + threads = [Thread(target=worker) for _ in range(10)] + with threading_helper.start_threads(threads): + pass + + @support.cpython_only @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module') def test_asutf8andsize(self): @@ -1938,4 +1956,4 @@ def copy(text): if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 53be6f5b9017a3..0def2aedabbeb8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -114,7 +114,7 @@ NOTE: In the interpreter's initialization phase, some globals are currently static inline char* _PyUnicode_UTF8(PyObject *op) { - return (_PyCompactUnicodeObject_CAST(op)->utf8); + return FT_ATOMIC_LOAD_PTR_ACQUIRE(_PyCompactUnicodeObject_CAST(op)->utf8); } static inline char* PyUnicode_UTF8(PyObject *op) @@ -130,7 +130,7 @@ static inline char* PyUnicode_UTF8(PyObject *op) static inline void PyUnicode_SET_UTF8(PyObject *op, char *utf8) { - _PyCompactUnicodeObject_CAST(op)->utf8 = utf8; + FT_ATOMIC_STORE_PTR_RELEASE(_PyCompactUnicodeObject_CAST(op)->utf8, utf8); } static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op) @@ -700,15 +700,15 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) CHECK(ascii->state.compact == 0); CHECK(data != NULL); if (ascii->state.ascii) { - CHECK(compact->utf8 == data); + CHECK(_PyUnicode_UTF8(op) == data); CHECK(compact->utf8_length == ascii->length); } else { - CHECK(compact->utf8 != data); + CHECK(_PyUnicode_UTF8(op) != data); } } - if (compact->utf8 == NULL) + if (_PyUnicode_UTF8(op) == NULL) CHECK(compact->utf8_length == 0); } @@ -1156,8 +1156,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length) if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyMem_Free(_PyUnicode_UTF8(unicode)); - PyUnicode_SET_UTF8(unicode, NULL); PyUnicode_SET_UTF8_LENGTH(unicode, 0); + PyUnicode_SET_UTF8(unicode, NULL); } #ifdef Py_TRACE_REFS _Py_ForgetReference(unicode); @@ -1210,8 +1210,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyMem_Free(_PyUnicode_UTF8(unicode)); - PyUnicode_SET_UTF8(unicode, NULL); PyUnicode_SET_UTF8_LENGTH(unicode, 0); + PyUnicode_SET_UTF8(unicode, NULL); } data = (PyObject *)PyObject_Realloc(data, new_size); @@ -1221,8 +1221,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) } _PyUnicode_DATA_ANY(unicode) = data; if (share_utf8) { - PyUnicode_SET_UTF8(unicode, data); PyUnicode_SET_UTF8_LENGTH(unicode, length); + PyUnicode_SET_UTF8(unicode, data); } _PyUnicode_LENGTH(unicode) = length; PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0); @@ -4216,6 +4216,21 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr) static int unicode_fill_utf8(PyObject *unicode); + +static int +unicode_ensure_utf8(PyObject *unicode) +{ + int err = 0; + if (PyUnicode_UTF8(unicode) == NULL) { + Py_BEGIN_CRITICAL_SECTION(unicode); + if (PyUnicode_UTF8(unicode) == NULL) { + err = unicode_fill_utf8(unicode); + } + Py_END_CRITICAL_SECTION(); + } + return err; +} + const char * PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) { @@ -4227,13 +4242,11 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) return NULL; } - if (PyUnicode_UTF8(unicode) == NULL) { - if (unicode_fill_utf8(unicode) == -1) { - if (psize) { - *psize = -1; - } - return NULL; + if (unicode_ensure_utf8(unicode) == -1) { + if (psize) { + *psize = -1; } + return NULL; } if (psize) { @@ -5854,6 +5867,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler, static int unicode_fill_utf8(PyObject *unicode) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(unicode); /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */ assert(!PyUnicode_IS_ASCII(unicode)); @@ -5895,10 +5909,10 @@ unicode_fill_utf8(PyObject *unicode) PyErr_NoMemory(); return -1; } - PyUnicode_SET_UTF8(unicode, cache); - PyUnicode_SET_UTF8_LENGTH(unicode, len); memcpy(cache, start, len); cache[len] = '\0'; + PyUnicode_SET_UTF8_LENGTH(unicode, len); + PyUnicode_SET_UTF8(unicode, cache); _PyBytesWriter_Dealloc(&writer); return 0; } From d829ed023a956e9f7d31ebd62a2af8578c5a8ba6 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Wed, 18 Dec 2024 16:20:13 +0000 Subject: [PATCH 2/3] add newline --- Lib/test/test_capi/test_unicode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index d749e34bd00a3e..3408c10f426058 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1956,4 +1956,4 @@ def copy(text): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() From 5fd952efc4fb83e0a3d2bae7c788237f86842ee7 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Thu, 19 Dec 2024 07:41:51 +0000 Subject: [PATCH 3/3] fix _PyUnicode_CheckConsistency --- Objects/unicodeobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0def2aedabbeb8..1aab9cf37768a8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -707,9 +707,10 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) CHECK(_PyUnicode_UTF8(op) != data); } } - +#ifndef Py_GIL_DISABLED if (_PyUnicode_UTF8(op) == NULL) CHECK(compact->utf8_length == 0); +#endif } /* check that the best kind is used: O(n) operation */