@@ -114,7 +114,7 @@ NOTE: In the interpreter's initialization phase, some globals are currently
114114
115115static inline char * _PyUnicode_UTF8 (PyObject * op )
116116{
117- return (_PyCompactUnicodeObject_CAST (op )-> utf8 );
117+ return FT_ATOMIC_LOAD_PTR_ACQUIRE (_PyCompactUnicodeObject_CAST (op )-> utf8 );
118118}
119119
120120static inline char * PyUnicode_UTF8 (PyObject * op )
@@ -130,7 +130,7 @@ static inline char* PyUnicode_UTF8(PyObject *op)
130130
131131static inline void PyUnicode_SET_UTF8 (PyObject * op , char * utf8 )
132132{
133- _PyCompactUnicodeObject_CAST (op )-> utf8 = utf8 ;
133+ FT_ATOMIC_STORE_PTR_RELEASE ( _PyCompactUnicodeObject_CAST (op )-> utf8 , utf8 ) ;
134134}
135135
136136static inline Py_ssize_t PyUnicode_UTF8_LENGTH (PyObject * op )
@@ -700,16 +700,17 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
700700 CHECK (ascii -> state .compact == 0 );
701701 CHECK (data != NULL );
702702 if (ascii -> state .ascii ) {
703- CHECK (compact -> utf8 == data );
703+ CHECK (_PyUnicode_UTF8 ( op ) == data );
704704 CHECK (compact -> utf8_length == ascii -> length );
705705 }
706706 else {
707- CHECK (compact -> utf8 != data );
707+ CHECK (_PyUnicode_UTF8 ( op ) != data );
708708 }
709709 }
710-
711- if (compact -> utf8 == NULL )
710+ #ifndef Py_GIL_DISABLED
711+ if (_PyUnicode_UTF8 ( op ) == NULL )
712712 CHECK (compact -> utf8_length == 0 );
713+ #endif
713714 }
714715
715716 /* check that the best kind is used: O(n) operation */
@@ -1156,8 +1157,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
11561157
11571158 if (_PyUnicode_HAS_UTF8_MEMORY (unicode )) {
11581159 PyMem_Free (_PyUnicode_UTF8 (unicode ));
1159- PyUnicode_SET_UTF8 (unicode , NULL );
11601160 PyUnicode_SET_UTF8_LENGTH (unicode , 0 );
1161+ PyUnicode_SET_UTF8 (unicode , NULL );
11611162 }
11621163#ifdef Py_TRACE_REFS
11631164 _Py_ForgetReference (unicode );
@@ -1210,8 +1211,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
12101211 if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY (unicode ))
12111212 {
12121213 PyMem_Free (_PyUnicode_UTF8 (unicode ));
1213- PyUnicode_SET_UTF8 (unicode , NULL );
12141214 PyUnicode_SET_UTF8_LENGTH (unicode , 0 );
1215+ PyUnicode_SET_UTF8 (unicode , NULL );
12151216 }
12161217
12171218 data = (PyObject * )PyObject_Realloc (data , new_size );
@@ -1221,8 +1222,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
12211222 }
12221223 _PyUnicode_DATA_ANY (unicode ) = data ;
12231224 if (share_utf8 ) {
1224- PyUnicode_SET_UTF8 (unicode , data );
12251225 PyUnicode_SET_UTF8_LENGTH (unicode , length );
1226+ PyUnicode_SET_UTF8 (unicode , data );
12261227 }
12271228 _PyUnicode_LENGTH (unicode ) = length ;
12281229 PyUnicode_WRITE (PyUnicode_KIND (unicode ), data , length , 0 );
@@ -4216,6 +4217,21 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
42164217
42174218static int unicode_fill_utf8 (PyObject * unicode );
42184219
4220+
4221+ static int
4222+ unicode_ensure_utf8 (PyObject * unicode )
4223+ {
4224+ int err = 0 ;
4225+ if (PyUnicode_UTF8 (unicode ) == NULL ) {
4226+ Py_BEGIN_CRITICAL_SECTION (unicode );
4227+ if (PyUnicode_UTF8 (unicode ) == NULL ) {
4228+ err = unicode_fill_utf8 (unicode );
4229+ }
4230+ Py_END_CRITICAL_SECTION ();
4231+ }
4232+ return err ;
4233+ }
4234+
42194235const char *
42204236PyUnicode_AsUTF8AndSize (PyObject * unicode , Py_ssize_t * psize )
42214237{
@@ -4227,13 +4243,11 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
42274243 return NULL ;
42284244 }
42294245
4230- if (PyUnicode_UTF8 (unicode ) == NULL ) {
4231- if (unicode_fill_utf8 (unicode ) == -1 ) {
4232- if (psize ) {
4233- * psize = -1 ;
4234- }
4235- return NULL ;
4246+ if (unicode_ensure_utf8 (unicode ) == -1 ) {
4247+ if (psize ) {
4248+ * psize = -1 ;
42364249 }
4250+ return NULL ;
42374251 }
42384252
42394253 if (psize ) {
@@ -5854,6 +5868,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
58545868static int
58555869unicode_fill_utf8 (PyObject * unicode )
58565870{
5871+ _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED (unicode );
58575872 /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
58585873 assert (!PyUnicode_IS_ASCII (unicode ));
58595874
@@ -5895,10 +5910,10 @@ unicode_fill_utf8(PyObject *unicode)
58955910 PyErr_NoMemory ();
58965911 return -1 ;
58975912 }
5898- PyUnicode_SET_UTF8 (unicode , cache );
5899- PyUnicode_SET_UTF8_LENGTH (unicode , len );
59005913 memcpy (cache , start , len );
59015914 cache [len ] = '\0' ;
5915+ PyUnicode_SET_UTF8_LENGTH (unicode , len );
5916+ PyUnicode_SET_UTF8 (unicode , cache );
59025917 _PyBytesWriter_Dealloc (& writer );
59035918 return 0 ;
59045919}
0 commit comments