@@ -111,47 +111,80 @@ NOTE: In the interpreter's initialization phase, some globals are currently
111
111
# define _PyUnicode_CHECK (op ) PyUnicode_Check(op)
112
112
#endif
113
113
114
- #define _PyUnicode_UTF8 (op ) \
115
- (_PyCompactUnicodeObject_CAST(op)->utf8)
116
- #define PyUnicode_UTF8 (op ) \
117
- (assert(_PyUnicode_CHECK(op)), \
118
- PyUnicode_IS_COMPACT_ASCII(op) ? \
119
- ((char*)(_PyASCIIObject_CAST(op) + 1)) : \
120
- _PyUnicode_UTF8(op))
121
- #define _PyUnicode_UTF8_LENGTH (op ) \
122
- (_PyCompactUnicodeObject_CAST(op)->utf8_length)
123
- #define PyUnicode_UTF8_LENGTH (op ) \
124
- (assert(_PyUnicode_CHECK(op)), \
125
- PyUnicode_IS_COMPACT_ASCII(op) ? \
126
- _PyASCIIObject_CAST(op)->length : \
127
- _PyUnicode_UTF8_LENGTH(op))
114
+ static inline char * _PyUnicode_UTF8 (PyObject * op )
115
+ {
116
+ return FT_ATOMIC_LOAD_PTR_ACQUIRE (_PyCompactUnicodeObject_CAST (op )-> utf8 );
117
+ }
118
+
119
+ static inline char * PyUnicode_UTF8 (PyObject * op )
120
+ {
121
+ assert (_PyUnicode_CHECK (op ));
122
+ if (PyUnicode_IS_COMPACT_ASCII (op )) {
123
+ return ((char * )(_PyASCIIObject_CAST (op ) + 1 ));
124
+ }
125
+ else {
126
+ return _PyUnicode_UTF8 (op );
127
+ }
128
+ }
129
+
130
+ static inline void PyUnicode_SET_UTF8 (PyObject * op , char * utf8 )
131
+ {
132
+ FT_ATOMIC_STORE_PTR_RELEASE (_PyCompactUnicodeObject_CAST (op )-> utf8 , utf8 );
133
+ }
134
+
135
+ static inline Py_ssize_t PyUnicode_UTF8_LENGTH (PyObject * op )
136
+ {
137
+ assert (_PyUnicode_CHECK (op ));
138
+ if (PyUnicode_IS_COMPACT_ASCII (op )) {
139
+ return _PyASCIIObject_CAST (op )-> length ;
140
+ }
141
+ else {
142
+ return _PyCompactUnicodeObject_CAST (op )-> utf8_length ;
143
+ }
144
+ }
145
+
146
+ static inline void PyUnicode_SET_UTF8_LENGTH (PyObject * op , Py_ssize_t length )
147
+ {
148
+ _PyCompactUnicodeObject_CAST (op )-> utf8_length = length ;
149
+ }
128
150
129
151
#define _PyUnicode_LENGTH (op ) \
130
152
(_PyASCIIObject_CAST(op)->length)
131
153
#define _PyUnicode_STATE (op ) \
132
154
(_PyASCIIObject_CAST(op)->state)
133
155
#define _PyUnicode_HASH (op ) \
134
156
(_PyASCIIObject_CAST(op)->hash)
135
- #define _PyUnicode_KIND (op ) \
136
- (assert(_PyUnicode_CHECK(op)), \
137
- _PyASCIIObject_CAST(op)->state.kind)
138
- #define _PyUnicode_GET_LENGTH (op ) \
139
- (assert(_PyUnicode_CHECK(op)), \
140
- _PyASCIIObject_CAST(op)->length)
157
+
158
+ static inline Py_hash_t PyUnicode_HASH (PyObject * op )
159
+ {
160
+ assert (_PyUnicode_CHECK (op ));
161
+ return FT_ATOMIC_LOAD_SSIZE_RELAXED (_PyASCIIObject_CAST (op )-> hash );
162
+ }
163
+
164
+ static inline void PyUnicode_SET_HASH (PyObject * op , Py_hash_t hash )
165
+ {
166
+ FT_ATOMIC_STORE_SSIZE_RELAXED (_PyASCIIObject_CAST (op )-> hash , hash );
167
+ }
168
+
141
169
#define _PyUnicode_DATA_ANY (op ) \
142
170
(_PyUnicodeObject_CAST(op)->data.any)
143
171
144
- #define _PyUnicode_SHARE_UTF8 (op ) \
145
- (assert(_PyUnicode_CHECK(op)), \
146
- assert(!PyUnicode_IS_COMPACT_ASCII(op)), \
147
- (_PyUnicode_UTF8(op) == PyUnicode_DATA(op)))
172
+ static inline int _PyUnicode_SHARE_UTF8 (PyObject * op )
173
+ {
174
+ assert (_PyUnicode_CHECK (op ));
175
+ assert (!PyUnicode_IS_COMPACT_ASCII (op ));
176
+ return (_PyUnicode_UTF8 (op ) == PyUnicode_DATA (op ));
177
+ }
148
178
149
179
/* true if the Unicode object has an allocated UTF-8 memory block
150
180
(not shared with other data) */
151
- #define _PyUnicode_HAS_UTF8_MEMORY (op ) \
152
- ((!PyUnicode_IS_COMPACT_ASCII(op) \
153
- && _PyUnicode_UTF8(op) \
154
- && _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
181
+ static inline int _PyUnicode_HAS_UTF8_MEMORY (PyObject * op )
182
+ {
183
+ return (!PyUnicode_IS_COMPACT_ASCII (op )
184
+ && _PyUnicode_UTF8 (op ) != NULL
185
+ && _PyUnicode_UTF8 (op ) != PyUnicode_DATA (op ));
186
+ }
187
+
155
188
156
189
/* Generic helper macro to convert characters of different types.
157
190
from_type and to_type have to be valid type names, begin and end
@@ -650,7 +683,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
650
683
|| kind == PyUnicode_2BYTE_KIND
651
684
|| kind == PyUnicode_4BYTE_KIND );
652
685
CHECK (ascii -> state .ascii == 0 );
653
- CHECK (compact -> utf8 != data );
686
+ CHECK (_PyUnicode_UTF8 ( op ) != data );
654
687
}
655
688
else {
656
689
PyUnicodeObject * unicode = _PyUnicodeObject_CAST (op );
@@ -662,16 +695,17 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
662
695
CHECK (ascii -> state .compact == 0 );
663
696
CHECK (data != NULL );
664
697
if (ascii -> state .ascii ) {
665
- CHECK (compact -> utf8 == data );
698
+ CHECK (_PyUnicode_UTF8 ( op ) == data );
666
699
CHECK (compact -> utf8_length == ascii -> length );
667
700
}
668
701
else {
669
- CHECK (compact -> utf8 != data );
702
+ CHECK (_PyUnicode_UTF8 ( op ) != data );
670
703
}
671
704
}
672
-
673
- if (compact -> utf8 == NULL )
705
+ #ifndef Py_GIL_DISABLED
706
+ if (_PyUnicode_UTF8 ( op ) == NULL )
674
707
CHECK (compact -> utf8_length == 0 );
708
+ #endif
675
709
}
676
710
677
711
/* check that the best kind is used: O(n) operation */
@@ -1115,8 +1149,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
1115
1149
1116
1150
if (_PyUnicode_HAS_UTF8_MEMORY (unicode )) {
1117
1151
PyMem_Free (_PyUnicode_UTF8 (unicode ));
1118
- _PyUnicode_UTF8 (unicode ) = NULL ;
1119
- _PyUnicode_UTF8_LENGTH (unicode ) = 0 ;
1152
+ PyUnicode_SET_UTF8_LENGTH (unicode , 0 ) ;
1153
+ PyUnicode_SET_UTF8 (unicode , NULL ) ;
1120
1154
}
1121
1155
#ifdef Py_TRACE_REFS
1122
1156
_Py_ForgetReference (unicode );
@@ -1169,8 +1203,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
1169
1203
if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY (unicode ))
1170
1204
{
1171
1205
PyMem_Free (_PyUnicode_UTF8 (unicode ));
1172
- _PyUnicode_UTF8 (unicode ) = NULL ;
1173
- _PyUnicode_UTF8_LENGTH (unicode ) = 0 ;
1206
+ PyUnicode_SET_UTF8_LENGTH (unicode , 0 ) ;
1207
+ PyUnicode_SET_UTF8 (unicode , NULL ) ;
1174
1208
}
1175
1209
1176
1210
data = (PyObject * )PyObject_Realloc (data , new_size );
@@ -1180,8 +1214,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length)
1180
1214
}
1181
1215
_PyUnicode_DATA_ANY (unicode ) = data ;
1182
1216
if (share_utf8 ) {
1183
- _PyUnicode_UTF8 (unicode ) = data ;
1184
- _PyUnicode_UTF8_LENGTH (unicode ) = length ;
1217
+ PyUnicode_SET_UTF8_LENGTH (unicode , length ) ;
1218
+ PyUnicode_SET_UTF8 (unicode , data ) ;
1185
1219
}
1186
1220
_PyUnicode_LENGTH (unicode ) = length ;
1187
1221
PyUnicode_WRITE (PyUnicode_KIND (unicode ), data , length , 0 );
@@ -1411,12 +1445,12 @@ unicode_convert_wchar_to_ucs4(const wchar_t *begin, const wchar_t *end,
1411
1445
1412
1446
assert (unicode != NULL );
1413
1447
assert (_PyUnicode_CHECK (unicode ));
1414
- assert (_PyUnicode_KIND (unicode ) == PyUnicode_4BYTE_KIND );
1448
+ assert (PyUnicode_KIND (unicode ) == PyUnicode_4BYTE_KIND );
1415
1449
ucs4_out = PyUnicode_4BYTE_DATA (unicode );
1416
1450
1417
1451
for (iter = begin ; iter < end ; ) {
1418
1452
assert (ucs4_out < (PyUnicode_4BYTE_DATA (unicode ) +
1419
- _PyUnicode_GET_LENGTH (unicode )));
1453
+ PyUnicode_GET_LENGTH (unicode )));
1420
1454
if (Py_UNICODE_IS_HIGH_SURROGATE (iter [0 ])
1421
1455
&& (iter + 1 ) < end
1422
1456
&& Py_UNICODE_IS_LOW_SURROGATE (iter [1 ]))
@@ -1430,7 +1464,7 @@ unicode_convert_wchar_to_ucs4(const wchar_t *begin, const wchar_t *end,
1430
1464
}
1431
1465
}
1432
1466
assert (ucs4_out == (PyUnicode_4BYTE_DATA (unicode ) +
1433
- _PyUnicode_GET_LENGTH (unicode )));
1467
+ PyUnicode_GET_LENGTH (unicode )));
1434
1468
1435
1469
}
1436
1470
#endif
@@ -1801,7 +1835,7 @@ unicode_modifiable(PyObject *unicode)
1801
1835
assert (_PyUnicode_CHECK (unicode ));
1802
1836
if (Py_REFCNT (unicode ) != 1 )
1803
1837
return 0 ;
1804
- if (FT_ATOMIC_LOAD_SSIZE_RELAXED ( _PyUnicode_HASH ( unicode ) ) != -1 )
1838
+ if (PyUnicode_HASH ( unicode ) != -1 )
1805
1839
return 0 ;
1806
1840
if (PyUnicode_CHECK_INTERNED (unicode ))
1807
1841
return 0 ;
@@ -4052,6 +4086,21 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
4052
4086
4053
4087
static int unicode_fill_utf8 (PyObject * unicode );
4054
4088
4089
+
4090
+ static int
4091
+ unicode_ensure_utf8 (PyObject * unicode )
4092
+ {
4093
+ int err = 0 ;
4094
+ if (PyUnicode_UTF8 (unicode ) == NULL ) {
4095
+ Py_BEGIN_CRITICAL_SECTION (unicode );
4096
+ if (PyUnicode_UTF8 (unicode ) == NULL ) {
4097
+ err = unicode_fill_utf8 (unicode );
4098
+ }
4099
+ Py_END_CRITICAL_SECTION ();
4100
+ }
4101
+ return err ;
4102
+ }
4103
+
4055
4104
const char *
4056
4105
PyUnicode_AsUTF8AndSize (PyObject * unicode , Py_ssize_t * psize )
4057
4106
{
@@ -4063,13 +4112,11 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
4063
4112
return NULL ;
4064
4113
}
4065
4114
4066
- if (PyUnicode_UTF8 (unicode ) == NULL ) {
4067
- if (unicode_fill_utf8 (unicode ) == -1 ) {
4068
- if (psize ) {
4069
- * psize = -1 ;
4070
- }
4071
- return NULL ;
4115
+ if (unicode_ensure_utf8 (unicode ) == -1 ) {
4116
+ if (psize ) {
4117
+ * psize = -1 ;
4072
4118
}
4119
+ return NULL ;
4073
4120
}
4074
4121
4075
4122
if (psize ) {
@@ -5401,6 +5448,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
5401
5448
static int
5402
5449
unicode_fill_utf8 (PyObject * unicode )
5403
5450
{
5451
+ _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED (unicode );
5404
5452
/* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
5405
5453
assert (!PyUnicode_IS_ASCII (unicode ));
5406
5454
@@ -5442,10 +5490,10 @@ unicode_fill_utf8(PyObject *unicode)
5442
5490
PyErr_NoMemory ();
5443
5491
return -1 ;
5444
5492
}
5445
- _PyUnicode_UTF8 (unicode ) = cache ;
5446
- _PyUnicode_UTF8_LENGTH (unicode ) = len ;
5447
5493
memcpy (cache , start , len );
5448
5494
cache [len ] = '\0' ;
5495
+ PyUnicode_SET_UTF8_LENGTH (unicode , len );
5496
+ PyUnicode_SET_UTF8 (unicode , cache );
5449
5497
_PyBytesWriter_Dealloc (& writer );
5450
5498
return 0 ;
5451
5499
}
@@ -10996,9 +11044,9 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
10996
11044
return 0 ;
10997
11045
}
10998
11046
10999
- Py_hash_t right_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED ( _PyUnicode_HASH ( right_uni ) );
11047
+ Py_hash_t right_hash = PyUnicode_HASH ( right_uni );
11000
11048
assert (right_hash != -1 );
11001
- Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED ( _PyUnicode_HASH ( left ) );
11049
+ Py_hash_t hash = PyUnicode_HASH ( left );
11002
11050
if (hash != -1 && hash != right_hash ) {
11003
11051
return 0 ;
11004
11052
}
@@ -11484,14 +11532,14 @@ unicode_hash(PyObject *self)
11484
11532
#ifdef Py_DEBUG
11485
11533
assert (_Py_HashSecret_Initialized );
11486
11534
#endif
11487
- Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED ( _PyUnicode_HASH ( self ) );
11535
+ Py_hash_t hash = PyUnicode_HASH ( self );
11488
11536
if (hash != -1 ) {
11489
11537
return hash ;
11490
11538
}
11491
11539
x = _Py_HashBytes (PyUnicode_DATA (self ),
11492
11540
PyUnicode_GET_LENGTH (self ) * PyUnicode_KIND (self ));
11493
11541
11494
- FT_ATOMIC_STORE_SSIZE_RELAXED ( _PyUnicode_HASH ( self ) , x );
11542
+ PyUnicode_SET_HASH ( self , x );
11495
11543
return x ;
11496
11544
}
11497
11545
@@ -14888,8 +14936,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
14888
14936
_PyUnicode_STATE (self ).compact = 0 ;
14889
14937
_PyUnicode_STATE (self ).ascii = _PyUnicode_STATE (unicode ).ascii ;
14890
14938
_PyUnicode_STATE (self ).statically_allocated = 0 ;
14891
- _PyUnicode_UTF8_LENGTH (self ) = 0 ;
14892
- _PyUnicode_UTF8 (self ) = NULL ;
14939
+ PyUnicode_SET_UTF8_LENGTH (self , 0 ) ;
14940
+ PyUnicode_SET_UTF8 (self , NULL ) ;
14893
14941
_PyUnicode_DATA_ANY (self ) = NULL ;
14894
14942
14895
14943
share_utf8 = 0 ;
@@ -14919,8 +14967,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
14919
14967
14920
14968
_PyUnicode_DATA_ANY (self ) = data ;
14921
14969
if (share_utf8 ) {
14922
- _PyUnicode_UTF8_LENGTH (self ) = length ;
14923
- _PyUnicode_UTF8 (self ) = data ;
14970
+ PyUnicode_SET_UTF8_LENGTH (self , length ) ;
14971
+ PyUnicode_SET_UTF8 (self , data ) ;
14924
14972
}
14925
14973
14926
14974
memcpy (data , PyUnicode_DATA (unicode ), kind * (length + 1 ));
0 commit comments