Skip to content

Commit bc6c12c

Browse files
author
Ma Lin
authored
bpo-44439: BZ2File.write() / LZMAFile.write() handle buffer protocol correctly (GH-26764)
No longer use len() to get the length of the input data. For some buffer protocol objects, the length obtained by using len() is wrong.
1 parent 92c2e91 commit bc6c12c

File tree

7 files changed

+55
-9
lines changed

7 files changed

+55
-9
lines changed

Lib/bz2.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,14 +219,22 @@ def write(self, data):
219219
"""Write a byte string to the file.
220220
221221
Returns the number of uncompressed bytes written, which is
222-
always len(data). Note that due to buffering, the file on disk
223-
may not reflect the data written until close() is called.
222+
always the length of data in bytes. Note that due to buffering,
223+
the file on disk may not reflect the data written until close()
224+
is called.
224225
"""
225226
self._check_can_write()
227+
if isinstance(data, (bytes, bytearray)):
228+
length = len(data)
229+
else:
230+
# accept any data that supports the buffer protocol
231+
data = memoryview(data)
232+
length = data.nbytes
233+
226234
compressed = self._compressor.compress(data)
227235
self._fp.write(compressed)
228-
self._pos += len(data)
229-
return len(data)
236+
self._pos += length
237+
return length
230238

231239
def writelines(self, seq):
232240
"""Write a sequence of byte strings to the file.

Lib/gzip.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def write(self,data):
278278
if self.fileobj is None:
279279
raise ValueError("write() on closed GzipFile object")
280280

281-
if isinstance(data, bytes):
281+
if isinstance(data, (bytes, bytearray)):
282282
length = len(data)
283283
else:
284284
# accept any data that supports the buffer protocol

Lib/lzma.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -229,14 +229,22 @@ def write(self, data):
229229
"""Write a bytes object to the file.
230230
231231
Returns the number of uncompressed bytes written, which is
232-
always len(data). Note that due to buffering, the file on disk
233-
may not reflect the data written until close() is called.
232+
always the length of data in bytes. Note that due to buffering,
233+
the file on disk may not reflect the data written until close()
234+
is called.
234235
"""
235236
self._check_can_write()
237+
if isinstance(data, (bytes, bytearray)):
238+
length = len(data)
239+
else:
240+
# accept any data that supports the buffer protocol
241+
data = memoryview(data)
242+
length = data.nbytes
243+
236244
compressed = self._compressor.compress(data)
237245
self._fp.write(compressed)
238-
self._pos += len(data)
239-
return len(data)
246+
self._pos += length
247+
return length
240248

241249
def seek(self, offset, whence=io.SEEK_SET):
242250
"""Change the file position.

Lib/test/test_bz2.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from test import support
22
from test.support import bigmemtest, _4G
33

4+
import array
45
import unittest
56
from io import BytesIO, DEFAULT_BUFFER_SIZE
67
import os
@@ -620,6 +621,14 @@ def test_read_truncated(self):
620621
with BZ2File(BytesIO(truncated[:i])) as f:
621622
self.assertRaises(EOFError, f.read, 1)
622623

624+
def test_issue44439(self):
625+
q = array.array('Q', [1, 2, 3, 4, 5])
626+
LENGTH = len(q) * q.itemsize
627+
628+
with BZ2File(BytesIO(), 'w') as f:
629+
self.assertEqual(f.write(q), LENGTH)
630+
self.assertEqual(f.tell(), LENGTH)
631+
623632

624633
class BZ2CompressorTest(BaseTest):
625634
def testCompress(self):

Lib/test/test_gzip.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,15 @@ def test_prepend_error(self):
592592
with gzip.open(self.filename, "rb") as f:
593593
f._buffer.raw._fp.prepend()
594594

595+
def test_issue44439(self):
596+
q = array.array('Q', [1, 2, 3, 4, 5])
597+
LENGTH = len(q) * q.itemsize
598+
599+
with gzip.GzipFile(fileobj=io.BytesIO(), mode='w') as f:
600+
self.assertEqual(f.write(q), LENGTH)
601+
self.assertEqual(f.tell(), LENGTH)
602+
603+
595604
class TestOpen(BaseTest):
596605
def test_binary_modes(self):
597606
uncompressed = data1 * 50

Lib/test/test_lzma.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import _compression
2+
import array
23
from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE
34
import os
45
import pathlib
@@ -1231,6 +1232,14 @@ def test_issue21872(self):
12311232
self.assertTrue(d2.eof)
12321233
self.assertEqual(out1 + out2, entire)
12331234

1235+
def test_issue44439(self):
1236+
q = array.array('Q', [1, 2, 3, 4, 5])
1237+
LENGTH = len(q) * q.itemsize
1238+
1239+
with LZMAFile(BytesIO(), 'w') as f:
1240+
self.assertEqual(f.write(q), LENGTH)
1241+
self.assertEqual(f.tell(), LENGTH)
1242+
12341243

12351244
class OpenTestCase(unittest.TestCase):
12361245

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix in :meth:`bz2.BZ2File.write` / :meth:`lzma.LZMAFile.write` methods, when
2+
the input data is an object that supports the buffer protocol, the file length
3+
may be wrong.

0 commit comments

Comments
 (0)