@@ -899,6 +899,7 @@ ensure_unicode(PyObject *obj)
899
899
#include "stringlib/count.h"
900
900
#include "stringlib/find.h"
901
901
#include "stringlib/replace.h"
902
+ #include "stringlib/repr.h"
902
903
#include "stringlib/find_max_char.h"
903
904
#include "stringlib/undef.h"
904
905
@@ -909,6 +910,7 @@ ensure_unicode(PyObject *obj)
909
910
#include "stringlib/count.h"
910
911
#include "stringlib/find.h"
911
912
#include "stringlib/replace.h"
913
+ #include "stringlib/repr.h"
912
914
#include "stringlib/find_max_char.h"
913
915
#include "stringlib/undef.h"
914
916
@@ -919,6 +921,7 @@ ensure_unicode(PyObject *obj)
919
921
#include "stringlib/count.h"
920
922
#include "stringlib/find.h"
921
923
#include "stringlib/replace.h"
924
+ #include "stringlib/repr.h"
922
925
#include "stringlib/find_max_char.h"
923
926
#include "stringlib/undef.h"
924
927
@@ -12336,24 +12339,17 @@ unicode_removesuffix_impl(PyObject *self, PyObject *suffix)
12336
12339
static PyObject *
12337
12340
unicode_repr (PyObject * unicode )
12338
12341
{
12339
- PyObject * repr ;
12340
- Py_ssize_t isize ;
12341
- Py_ssize_t osize , squote , dquote , i , o ;
12342
- Py_UCS4 max , quote ;
12343
- int ikind , okind , unchanged ;
12344
- const void * idata ;
12345
- void * odata ;
12346
-
12347
- isize = PyUnicode_GET_LENGTH (unicode );
12348
- idata = PyUnicode_DATA (unicode );
12342
+ Py_ssize_t isize = PyUnicode_GET_LENGTH (unicode );
12343
+ const void * idata = PyUnicode_DATA (unicode );
12349
12344
12350
12345
/* Compute length of output, quote characters, and
12351
12346
maximum character */
12352
- osize = 0 ;
12353
- max = 127 ;
12354
- squote = dquote = 0 ;
12355
- ikind = PyUnicode_KIND (unicode );
12356
- for (i = 0 ; i < isize ; i ++ ) {
12347
+ Py_ssize_t osize = 0 ;
12348
+ Py_UCS4 maxch = 127 ;
12349
+ Py_ssize_t squote = 0 ;
12350
+ Py_ssize_t dquote = 0 ;
12351
+ int ikind = PyUnicode_KIND (unicode );
12352
+ for (Py_ssize_t i = 0 ; i < isize ; i ++ ) {
12357
12353
Py_UCS4 ch = PyUnicode_READ (ikind , idata , i );
12358
12354
Py_ssize_t incr = 1 ;
12359
12355
switch (ch ) {
@@ -12369,7 +12365,7 @@ unicode_repr(PyObject *unicode)
12369
12365
else if (ch < 0x7f )
12370
12366
;
12371
12367
else if (Py_UNICODE_ISPRINTABLE (ch ))
12372
- max = ch > max ? ch : max ;
12368
+ maxch = ( ch > maxch ) ? ch : maxch ;
12373
12369
else if (ch < 0x100 )
12374
12370
incr = 4 ; /* \xHH */
12375
12371
else if (ch < 0x10000 )
@@ -12385,10 +12381,10 @@ unicode_repr(PyObject *unicode)
12385
12381
osize += incr ;
12386
12382
}
12387
12383
12388
- quote = '\'' ;
12389
- unchanged = (osize = = isize );
12384
+ Py_UCS4 quote = '\'' ;
12385
+ int changed = (osize ! = isize );
12390
12386
if (squote ) {
12391
- unchanged = 0 ;
12387
+ changed = 1 ;
12392
12388
if (dquote )
12393
12389
/* Both squote and dquote present. Use squote,
12394
12390
and escape them */
@@ -12398,99 +12394,35 @@ unicode_repr(PyObject *unicode)
12398
12394
}
12399
12395
osize += 2 ; /* quotes */
12400
12396
12401
- repr = PyUnicode_New (osize , max );
12397
+ PyObject * repr = PyUnicode_New (osize , maxch );
12402
12398
if (repr == NULL )
12403
12399
return NULL ;
12404
- okind = PyUnicode_KIND (repr );
12405
- odata = PyUnicode_DATA (repr );
12400
+ int okind = PyUnicode_KIND (repr );
12401
+ void * odata = PyUnicode_DATA (repr );
12402
+
12403
+ if (!changed ) {
12404
+ PyUnicode_WRITE (okind , odata , 0 , quote );
12406
12405
12407
- PyUnicode_WRITE (okind , odata , 0 , quote );
12408
- PyUnicode_WRITE (okind , odata , osize - 1 , quote );
12409
- if (unchanged ) {
12410
12406
_PyUnicode_FastCopyCharacters (repr , 1 ,
12411
12407
unicode , 0 ,
12412
12408
isize );
12409
+
12410
+ PyUnicode_WRITE (okind , odata , osize - 1 , quote );
12413
12411
}
12414
12412
else {
12415
- for (i = 0 , o = 1 ; i < isize ; i ++ ) {
12416
- Py_UCS4 ch = PyUnicode_READ (ikind , idata , i );
12417
-
12418
- /* Escape quotes and backslashes */
12419
- if ((ch == quote ) || (ch == '\\' )) {
12420
- PyUnicode_WRITE (okind , odata , o ++ , '\\' );
12421
- PyUnicode_WRITE (okind , odata , o ++ , ch );
12422
- continue ;
12423
- }
12424
-
12425
- /* Map special whitespace to '\t', \n', '\r' */
12426
- if (ch == '\t' ) {
12427
- PyUnicode_WRITE (okind , odata , o ++ , '\\' );
12428
- PyUnicode_WRITE (okind , odata , o ++ , 't' );
12429
- }
12430
- else if (ch == '\n' ) {
12431
- PyUnicode_WRITE (okind , odata , o ++ , '\\' );
12432
- PyUnicode_WRITE (okind , odata , o ++ , 'n' );
12433
- }
12434
- else if (ch == '\r' ) {
12435
- PyUnicode_WRITE (okind , odata , o ++ , '\\' );
12436
- PyUnicode_WRITE (okind , odata , o ++ , 'r' );
12437
- }
12438
-
12439
- /* Map non-printable US ASCII to '\xhh' */
12440
- else if (ch < ' ' || ch == 0x7F ) {
12441
- PyUnicode_WRITE (okind , odata , o ++ , '\\' );
12442
- PyUnicode_WRITE (okind , odata , o ++ , 'x' );
12443
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 4 ) & 0x000F ]);
12444
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [ch & 0x000F ]);
12445
- }
12446
-
12447
- /* Copy ASCII characters as-is */
12448
- else if (ch < 0x7F ) {
12449
- PyUnicode_WRITE (okind , odata , o ++ , ch );
12450
- }
12451
-
12452
- /* Non-ASCII characters */
12453
- else {
12454
- /* Map Unicode whitespace and control characters
12455
- (categories Z* and C* except ASCII space)
12456
- */
12457
- if (!Py_UNICODE_ISPRINTABLE (ch )) {
12458
- PyUnicode_WRITE (okind , odata , o ++ , '\\' );
12459
- /* Map 8-bit characters to '\xhh' */
12460
- if (ch <= 0xff ) {
12461
- PyUnicode_WRITE (okind , odata , o ++ , 'x' );
12462
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 4 ) & 0x000F ]);
12463
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [ch & 0x000F ]);
12464
- }
12465
- /* Map 16-bit characters to '\uxxxx' */
12466
- else if (ch <= 0xffff ) {
12467
- PyUnicode_WRITE (okind , odata , o ++ , 'u' );
12468
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 12 ) & 0xF ]);
12469
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 8 ) & 0xF ]);
12470
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 4 ) & 0xF ]);
12471
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [ch & 0xF ]);
12472
- }
12473
- /* Map 21-bit characters to '\U00xxxxxx' */
12474
- else {
12475
- PyUnicode_WRITE (okind , odata , o ++ , 'U' );
12476
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 28 ) & 0xF ]);
12477
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 24 ) & 0xF ]);
12478
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 20 ) & 0xF ]);
12479
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 16 ) & 0xF ]);
12480
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 12 ) & 0xF ]);
12481
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 8 ) & 0xF ]);
12482
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [(ch >> 4 ) & 0xF ]);
12483
- PyUnicode_WRITE (okind , odata , o ++ , Py_hexdigits [ch & 0xF ]);
12484
- }
12485
- }
12486
- /* Copy characters as-is */
12487
- else {
12488
- PyUnicode_WRITE (okind , odata , o ++ , ch );
12489
- }
12490
- }
12413
+ switch (okind ) {
12414
+ case PyUnicode_1BYTE_KIND :
12415
+ ucs1lib_repr (unicode , quote , odata );
12416
+ break ;
12417
+ case PyUnicode_2BYTE_KIND :
12418
+ ucs2lib_repr (unicode , quote , odata );
12419
+ break ;
12420
+ default :
12421
+ assert (okind == PyUnicode_4BYTE_KIND );
12422
+ ucs4lib_repr (unicode , quote , odata );
12491
12423
}
12492
12424
}
12493
- /* Closing quote already added at the beginning */
12425
+
12494
12426
assert (_PyUnicode_CheckConsistency (repr , 1 ));
12495
12427
return repr ;
12496
12428
}
0 commit comments