@@ -12309,31 +12309,22 @@ unicode_isnumeric_impl(PyObject *self)
12309
12309
Py_RETURN_TRUE ;
12310
12310
}
12311
12311
12312
- int
12313
- PyUnicode_IsIdentifier (PyObject * self )
12312
+ Py_ssize_t
12313
+ _PyUnicode_ScanIdentifier (PyObject * self )
12314
12314
{
12315
12315
Py_ssize_t i ;
12316
- int ready = PyUnicode_IS_READY (self );
12316
+ if (PyUnicode_READY (self ) == -1 )
12317
+ return -1 ;
12317
12318
12318
- Py_ssize_t len = ready ? PyUnicode_GET_LENGTH ( self ) : PyUnicode_GET_SIZE (self );
12319
+ Py_ssize_t len = PyUnicode_GET_LENGTH (self );
12319
12320
if (len == 0 ) {
12320
12321
/* an empty string is not a valid identifier */
12321
12322
return 0 ;
12322
12323
}
12323
12324
12324
- int kind = 0 ;
12325
- const void * data = NULL ;
12326
- const wchar_t * wstr = NULL ;
12327
- Py_UCS4 ch ;
12328
- if (ready ) {
12329
- kind = PyUnicode_KIND (self );
12330
- data = PyUnicode_DATA (self );
12331
- ch = PyUnicode_READ (kind , data , 0 );
12332
- }
12333
- else {
12334
- wstr = _PyUnicode_WSTR (self );
12335
- ch = wstr [0 ];
12336
- }
12325
+ int kind = PyUnicode_KIND (self );
12326
+ const void * data = PyUnicode_DATA (self );
12327
+ Py_UCS4 ch = PyUnicode_READ (kind , data , 0 );
12337
12328
/* PEP 3131 says that the first character must be in
12338
12329
XID_Start and subsequent characters in XID_Continue,
12339
12330
and for the ASCII range, the 2.x rules apply (i.e
@@ -12347,17 +12338,44 @@ PyUnicode_IsIdentifier(PyObject *self)
12347
12338
}
12348
12339
12349
12340
for (i = 1 ; i < len ; i ++ ) {
12350
- if (ready ) {
12351
- ch = PyUnicode_READ (kind , data , i );
12341
+ ch = PyUnicode_READ (kind , data , i );
12342
+ if (!_PyUnicode_IsXidContinue (ch )) {
12343
+ return i ;
12352
12344
}
12353
- else {
12354
- ch = wstr [i ];
12345
+ }
12346
+ return i ;
12347
+ }
12348
+
12349
+ int
12350
+ PyUnicode_IsIdentifier (PyObject * self )
12351
+ {
12352
+ if (PyUnicode_IS_READY (self )) {
12353
+ Py_ssize_t i = _PyUnicode_ScanIdentifier (self );
12354
+ Py_ssize_t len = PyUnicode_GET_LENGTH (self );
12355
+ /* an empty string is not a valid identifier */
12356
+ return len && i == len ;
12357
+ }
12358
+ else {
12359
+ Py_ssize_t i , len = PyUnicode_GET_SIZE (self );
12360
+ if (len == 0 ) {
12361
+ /* an empty string is not a valid identifier */
12362
+ return 0 ;
12355
12363
}
12356
- if (!_PyUnicode_IsXidContinue (ch )) {
12364
+
12365
+ const wchar_t * wstr = _PyUnicode_WSTR (self );
12366
+ Py_UCS4 ch = wstr [0 ];
12367
+ if (!_PyUnicode_IsXidStart (ch ) && ch != 0x5F /* LOW LINE */ ) {
12357
12368
return 0 ;
12358
12369
}
12370
+
12371
+ for (i = 1 ; i < len ; i ++ ) {
12372
+ ch = wstr [i ];
12373
+ if (!_PyUnicode_IsXidContinue (ch )) {
12374
+ return 0 ;
12375
+ }
12376
+ }
12377
+ return 1 ;
12359
12378
}
12360
- return 1 ;
12361
12379
}
12362
12380
12363
12381
/*[clinic input]
0 commit comments