@@ -12309,31 +12309,22 @@ unicode_isnumeric_impl(PyObject *self)
1230912309 Py_RETURN_TRUE ;
1231012310}
1231112311
12312- int
12313- PyUnicode_IsIdentifier (PyObject * self )
12312+ Py_ssize_t
12313+ _PyUnicode_ScanIdentifier (PyObject * self )
1231412314{
1231512315 Py_ssize_t i ;
12316- int ready = PyUnicode_IS_READY (self );
12316+ if (PyUnicode_READY (self ) == -1 )
12317+ return -1 ;
1231712318
12318- Py_ssize_t len = ready ? PyUnicode_GET_LENGTH ( self ) : PyUnicode_GET_SIZE (self );
12319+ Py_ssize_t len = PyUnicode_GET_LENGTH (self );
1231912320 if (len == 0 ) {
1232012321 /* an empty string is not a valid identifier */
1232112322 return 0 ;
1232212323 }
1232312324
12324- int kind = 0 ;
12325- const void * data = NULL ;
12326- const wchar_t * wstr = NULL ;
12327- Py_UCS4 ch ;
12328- if (ready ) {
12329- kind = PyUnicode_KIND (self );
12330- data = PyUnicode_DATA (self );
12331- ch = PyUnicode_READ (kind , data , 0 );
12332- }
12333- else {
12334- wstr = _PyUnicode_WSTR (self );
12335- ch = wstr [0 ];
12336- }
12325+ int kind = PyUnicode_KIND (self );
12326+ const void * data = PyUnicode_DATA (self );
12327+ Py_UCS4 ch = PyUnicode_READ (kind , data , 0 );
1233712328 /* PEP 3131 says that the first character must be in
1233812329 XID_Start and subsequent characters in XID_Continue,
1233912330 and for the ASCII range, the 2.x rules apply (i.e
@@ -12347,17 +12338,44 @@ PyUnicode_IsIdentifier(PyObject *self)
1234712338 }
1234812339
1234912340 for (i = 1 ; i < len ; i ++ ) {
12350- if (ready ) {
12351- ch = PyUnicode_READ (kind , data , i );
12341+ ch = PyUnicode_READ (kind , data , i );
12342+ if (!_PyUnicode_IsXidContinue (ch )) {
12343+ return i ;
1235212344 }
12353- else {
12354- ch = wstr [i ];
12345+ }
12346+ return i ;
12347+ }
12348+
12349+ int
12350+ PyUnicode_IsIdentifier (PyObject * self )
12351+ {
12352+ if (PyUnicode_IS_READY (self )) {
12353+ Py_ssize_t i = _PyUnicode_ScanIdentifier (self );
12354+ Py_ssize_t len = PyUnicode_GET_LENGTH (self );
12355+ /* an empty string is not a valid identifier */
12356+ return len && i == len ;
12357+ }
12358+ else {
12359+ Py_ssize_t i , len = PyUnicode_GET_SIZE (self );
12360+ if (len == 0 ) {
12361+ /* an empty string is not a valid identifier */
12362+ return 0 ;
1235512363 }
12356- if (!_PyUnicode_IsXidContinue (ch )) {
12364+
12365+ const wchar_t * wstr = _PyUnicode_WSTR (self );
12366+ Py_UCS4 ch = wstr [0 ];
12367+ if (!_PyUnicode_IsXidStart (ch ) && ch != 0x5F /* LOW LINE */ ) {
1235712368 return 0 ;
1235812369 }
12370+
12371+ for (i = 1 ; i < len ; i ++ ) {
12372+ ch = wstr [i ];
12373+ if (!_PyUnicode_IsXidContinue (ch )) {
12374+ return 0 ;
12375+ }
12376+ }
12377+ return 1 ;
1235912378 }
12360- return 1 ;
1236112379}
1236212380
1236312381/*[clinic input]
0 commit comments