Skip to content

Commit 39b1d7e

Browse files
committed
Rewrite _85encode memory-optimized for performance
Use a dedicated generator function to iterate more efficiently over the buffer, using unpack of 512 bytes until the last 512 bytes. Making that much less calls to the unpack method
1 parent 619ac7f commit 39b1d7e

File tree

1 file changed

+26
-8
lines changed

1 file changed

+26
-8
lines changed

Lib/base64.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -299,19 +299,36 @@ def b16decode(s, casefold=False):
299299
_A85START = b"<~"
300300
_A85END = b"~>"
301301

302+
def _85buffer_iter_words(b):
303+
# Utility method for _85encode
304+
# yield unpacked int32 words from buffer, hopefully in an efficient manner,
305+
# padding the last part with NULL bytes if necessary
306+
n1 = len(b) // 512 # number of 512 bytes unpack
307+
n2 = (len(b) - n1 * 512) // 4 # number of 4 bytes unpack
308+
padding = (-len(b)) % 4
309+
310+
unpack512 = struct.Struct("!128I").unpack
311+
unpack4 = struct.Struct("!I").unpack
312+
313+
offset = 0
314+
for _ in range(n1):
315+
for c in unpack512(b[offset:offset+512]):
316+
yield c
317+
offset += 512
318+
319+
for _ in range(n2):
320+
yield unpack4(b[offset:offset+4])[0]
321+
offset += 4
322+
323+
if padding:
324+
yield unpack4(b[offset:] + b'\0' * padding)[0]
325+
302326
def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
303327
# Helper function for a85encode and b85encode
304328
if not isinstance(b, bytes_types):
305329
b = memoryview(b).tobytes()
306330

307-
padding = (-len(b)) % 4
308-
if padding:
309-
b = b + b'\0' * padding
310-
311-
unpack = struct.Struct("!I").unpack
312-
ibytes = (b[i:i+4] for i in range(0, len(b), 4)) # 4 bytes each
313-
words = (unpack(i)[0] for i in ibytes)
314-
331+
words = _85buffer_iter_words(b)
315332
chunks = (b'z' if foldnuls and not word else
316333
b'y' if foldspaces and word == 0x20202020 else
317334
(chars2[word // 614125] +
@@ -325,6 +342,7 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
325342
ret += chunk
326343

327344
# update of the last chunk afterwards
345+
padding = (-len(b)) % 4
328346
if chunk and padding and not pad:
329347
ret[-len(chunk):] = []
330348

0 commit comments

Comments
 (0)