14
14
# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
15
15
_whitespace = '\t \n \x0b \x0c \r '
16
16
17
+ try :
18
+ from unicodedata import east_asian_width
19
+
20
+ def _width (text ):
21
+ """Return the display width of the text in columns, according to
22
+ unicodedata.east_asian_width only.
23
+ """
24
+ return sum (2 if east_asian_width (char ) in {'F' , 'W' } else 1
25
+ for char in text )
26
+
27
+ def _slice (text , index ):
28
+ """Return the two slices of text cut to index.
29
+ """
30
+ width = 0
31
+ pos = 0
32
+ for char in text :
33
+ width += 2 if east_asian_width (char ) in {'F' , 'W' } else 1
34
+ if width > index :
35
+ break
36
+ pos += 1
37
+ return text [:pos ], text [pos :]
38
+
39
+ except ImportError :
40
+
41
+ def _width (text ):
42
+ """Fallback in case unicodedata is not available: The display width of
43
+ a text is just its number of characters.
44
+ """
45
+ return len (text )
46
+
47
+ def _slice (text , index ):
48
+ return text [:index ], text [index :]
49
+
50
+
17
51
class TextWrapper :
18
52
"""
19
53
Object for wrapping/filling text. The public interface consists of
@@ -215,8 +249,9 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
215
249
# If we're allowed to break long words, then do so: put as much
216
250
# of the next chunk onto the current line as will fit.
217
251
if self .break_long_words :
218
- cur_line .append (reversed_chunks [- 1 ][:space_left ])
219
- reversed_chunks [- 1 ] = reversed_chunks [- 1 ][space_left :]
252
+ left , right = _slice (reversed_chunks [- 1 ], space_left )
253
+ cur_line .append (left )
254
+ reversed_chunks [- 1 ] = right
220
255
221
256
# Otherwise, we have to preserve the long word intact. Only add
222
257
# it to the current line if there's nothing already there --
@@ -244,14 +279,13 @@ def _wrap_chunks(self, chunks):
244
279
lines, but apart from that whitespace is preserved.
245
280
"""
246
281
lines = []
247
- if self .width <= 0 :
248
- raise ValueError ("invalid width %r (must be > 0)" % self .width )
249
282
if self .max_lines is not None :
250
283
if self .max_lines > 1 :
251
284
indent = self .subsequent_indent
252
285
else :
253
286
indent = self .initial_indent
254
- if len (indent ) + len (self .placeholder .lstrip ()) > self .width :
287
+ if (_width (indent ) +
288
+ _width (self .placeholder .lstrip ()) > self .width ):
255
289
raise ValueError ("placeholder too large for max width" )
256
290
257
291
# Arrange in reverse order so items can be efficiently popped
@@ -272,15 +306,15 @@ def _wrap_chunks(self, chunks):
272
306
indent = self .initial_indent
273
307
274
308
# Maximum width for this line.
275
- width = self .width - len (indent )
309
+ width = self .width - _width (indent )
276
310
277
311
# First chunk on line is whitespace -- drop it, unless this
278
312
# is the very beginning of the text (ie. no lines started yet).
279
313
if self .drop_whitespace and chunks [- 1 ].strip () == '' and lines :
280
314
del chunks [- 1 ]
281
315
282
316
while chunks :
283
- l = len (chunks [- 1 ])
317
+ l = _width (chunks [- 1 ])
284
318
285
319
# Can at least squeeze this chunk onto the current line.
286
320
if cur_len + l <= width :
@@ -290,16 +324,15 @@ def _wrap_chunks(self, chunks):
290
324
# Nope, this line is full.
291
325
else :
292
326
break
293
-
294
327
# The current line is full, and the next chunk is too big to
295
328
# fit on *any* line (not just this one).
296
- if chunks and len (chunks [- 1 ]) > width :
329
+ if chunks and _width (chunks [- 1 ]) > width :
297
330
self ._handle_long_word (chunks , cur_line , cur_len , width )
298
- cur_len = sum (map (len , cur_line ))
331
+ cur_len = sum (map (_width , cur_line ))
299
332
300
333
# If the last chunk on this line is all whitespace, drop it.
301
334
if self .drop_whitespace and cur_line and cur_line [- 1 ].strip () == '' :
302
- cur_len -= len (cur_line [- 1 ])
335
+ cur_len -= _width (cur_line [- 1 ])
303
336
del cur_line [- 1 ]
304
337
305
338
if cur_line :
@@ -315,17 +348,17 @@ def _wrap_chunks(self, chunks):
315
348
else :
316
349
while cur_line :
317
350
if (cur_line [- 1 ].strip () and
318
- cur_len + len (self .placeholder ) <= width ):
351
+ cur_len + _width (self .placeholder ) <= width ):
319
352
cur_line .append (self .placeholder )
320
353
lines .append (indent + '' .join (cur_line ))
321
354
break
322
- cur_len -= len (cur_line [- 1 ])
355
+ cur_len -= _width (cur_line [- 1 ])
323
356
del cur_line [- 1 ]
324
357
else :
325
358
if lines :
326
359
prev_line = lines [- 1 ].rstrip ()
327
- if (len (prev_line ) + len ( self . placeholder ) <=
328
- self .width ):
360
+ if (_width (prev_line ) +
361
+ _width ( self . placeholder ) <= self .width ):
329
362
lines [- 1 ] = prev_line + self .placeholder
330
363
break
331
364
lines .append (indent + self .placeholder .lstrip ())
@@ -348,6 +381,10 @@ def wrap(self, text):
348
381
and all other whitespace characters (including newline) are
349
382
converted to space.
350
383
"""
384
+ if self .width <= 0 :
385
+ raise ValueError ("invalid width %r (must be > 0)" % self .width )
386
+ elif self .width == 1 and _width (text ) > len (text ):
387
+ raise ValueError ("invalid width 1 (must be > 1 when CJK chars)" )
351
388
chunks = self ._split_chunks (text )
352
389
if self .fix_sentence_endings :
353
390
self ._fix_sentence_endings (chunks )
0 commit comments