Skip to content

Commit 50cd4b6

Browse files
authored
bpo-26253: Add compressionlevel to tarfile stream (GH-2962)
`tarfile` already accepts a compressionlevel argument for creating files. This patch adds the same for stream-based tarfile usage. The default is 9, the value that was previously hard-coded.
1 parent 81e91c9 commit 50cd4b6

File tree

4 files changed

+88
-11
lines changed

4 files changed

+88
-11
lines changed

Doc/library/tarfile.rst

+5-2
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ Some facts and figures:
9898
If *fileobj* is specified, it is used as an alternative to a :term:`file object`
9999
opened in binary mode for *name*. It is supposed to be at position 0.
100100

101-
For modes ``'w:gz'``, ``'r:gz'``, ``'w:bz2'``, ``'r:bz2'``, ``'x:gz'``,
102-
``'x:bz2'``, :func:`tarfile.open` accepts the keyword argument
101+
For modes ``'w:gz'``, ``'x:gz'``, ``'w|gz'``, ``'w:bz2'``, ``'x:bz2'``,
102+
``'w|bz2'``, :func:`tarfile.open` accepts the keyword argument
103103
*compresslevel* (default ``9``) to specify the compression level of the file.
104104

105105
For modes ``'w:xz'`` and ``'x:xz'``, :func:`tarfile.open` accepts the
@@ -152,6 +152,9 @@ Some facts and figures:
152152
.. versionchanged:: 3.6
153153
The *name* parameter accepts a :term:`path-like object`.
154154

155+
.. versionchanged:: 3.12
156+
The *compresslevel* keyword argument also works for streams.
157+
155158

156159
.. class:: TarFile
157160
:noindex:

Lib/tarfile.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,8 @@ class _Stream:
336336
_Stream is intended to be used only internally.
337337
"""
338338

339-
def __init__(self, name, mode, comptype, fileobj, bufsize):
339+
def __init__(self, name, mode, comptype, fileobj, bufsize,
340+
compresslevel):
340341
"""Construct a _Stream object.
341342
"""
342343
self._extfileobj = True
@@ -371,7 +372,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize):
371372
self._init_read_gz()
372373
self.exception = zlib.error
373374
else:
374-
self._init_write_gz()
375+
self._init_write_gz(compresslevel)
375376

376377
elif comptype == "bz2":
377378
try:
@@ -383,7 +384,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize):
383384
self.cmp = bz2.BZ2Decompressor()
384385
self.exception = OSError
385386
else:
386-
self.cmp = bz2.BZ2Compressor()
387+
self.cmp = bz2.BZ2Compressor(compresslevel)
387388

388389
elif comptype == "xz":
389390
try:
@@ -410,13 +411,14 @@ def __del__(self):
410411
if hasattr(self, "closed") and not self.closed:
411412
self.close()
412413

413-
def _init_write_gz(self):
414+
def _init_write_gz(self, compresslevel):
414415
"""Initialize for writing with gzip compression.
415416
"""
416-
self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
417-
-self.zlib.MAX_WBITS,
418-
self.zlib.DEF_MEM_LEVEL,
419-
0)
417+
self.cmp = self.zlib.compressobj(compresslevel,
418+
self.zlib.DEFLATED,
419+
-self.zlib.MAX_WBITS,
420+
self.zlib.DEF_MEM_LEVEL,
421+
0)
420422
timestamp = struct.pack("<L", int(time.time()))
421423
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
422424
if self.name.endswith(".gz"):
@@ -1659,7 +1661,9 @@ def not_compressed(comptype):
16591661
if filemode not in ("r", "w"):
16601662
raise ValueError("mode must be 'r' or 'w'")
16611663

1662-
stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1664+
compresslevel = kwargs.pop("compresslevel", 9)
1665+
stream = _Stream(name, filemode, comptype, fileobj, bufsize,
1666+
compresslevel)
16631667
try:
16641668
t = cls(name, filemode, stream, **kwargs)
16651669
except:

Lib/test/test_tarfile.py

+68
Original file line numberDiff line numberDiff line change
@@ -1554,6 +1554,74 @@ class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
15541554
class LzmaStreamWriteTest(LzmaTest, StreamWriteTest):
15551555
decompressor = lzma.LZMADecompressor if lzma else None
15561556

1557+
class _CompressedWriteTest(TarTest):
1558+
# This is not actually a standalone test.
1559+
# It does not inherit WriteTest because it only makes sense with gz,bz2
1560+
source = (b"And we move to Bristol where they have a special, " +
1561+
b"Very Silly candidate")
1562+
1563+
def _compressed_tar(self, compresslevel):
1564+
fobj = io.BytesIO()
1565+
with tarfile.open(tmpname, self.mode, fobj,
1566+
compresslevel=compresslevel) as tarfl:
1567+
tarfl.addfile(tarfile.TarInfo("foo"), io.BytesIO(self.source))
1568+
return fobj
1569+
1570+
def _test_bz2_header(self, compresslevel):
1571+
fobj = self._compressed_tar(compresslevel)
1572+
self.assertEqual(fobj.getvalue()[0:10],
1573+
b"BZh%d1AY&SY" % compresslevel)
1574+
1575+
def _test_gz_header(self, compresslevel):
1576+
fobj = self._compressed_tar(compresslevel)
1577+
self.assertEqual(fobj.getvalue()[:3], b"\x1f\x8b\x08")
1578+
1579+
class Bz2CompressWriteTest(Bz2Test, _CompressedWriteTest, unittest.TestCase):
1580+
prefix = "w:"
1581+
def test_compression_levels(self):
1582+
self._test_bz2_header(1)
1583+
self._test_bz2_header(5)
1584+
self._test_bz2_header(9)
1585+
1586+
class Bz2CompressStreamWriteTest(Bz2Test, _CompressedWriteTest,
1587+
unittest.TestCase):
1588+
prefix = "w|"
1589+
def test_compression_levels(self):
1590+
self._test_bz2_header(1)
1591+
self._test_bz2_header(5)
1592+
self._test_bz2_header(9)
1593+
1594+
class GzCompressWriteTest(GzipTest, _CompressedWriteTest, unittest.TestCase):
1595+
prefix = "w:"
1596+
def test_compression_levels(self):
1597+
self._test_gz_header(1)
1598+
self._test_gz_header(5)
1599+
self._test_gz_header(9)
1600+
1601+
class GzCompressStreamWriteTest(GzipTest, _CompressedWriteTest,
1602+
unittest.TestCase):
1603+
prefix = "w|"
1604+
def test_compression_levels(self):
1605+
self._test_gz_header(1)
1606+
self._test_gz_header(5)
1607+
self._test_gz_header(9)
1608+
1609+
class CompressLevelRaises(unittest.TestCase):
1610+
def test_compresslevel_wrong_modes(self):
1611+
compresslevel = 5
1612+
fobj = io.BytesIO()
1613+
with self.assertRaises(TypeError):
1614+
tarfile.open(tmpname, "w:", fobj, compresslevel=compresslevel)
1615+
1616+
def test_wrong_compresslevels(self):
1617+
# BZ2 checks that the compresslevel is in [1,9]. gz does not
1618+
fobj = io.BytesIO()
1619+
with self.assertRaises(ValueError):
1620+
tarfile.open(tmpname, "w:bz2", fobj, compresslevel=0)
1621+
with self.assertRaises(ValueError):
1622+
tarfile.open(tmpname, "w:bz2", fobj, compresslevel=10)
1623+
with self.assertRaises(ValueError):
1624+
tarfile.open(tmpname, "w|bz2", fobj, compresslevel=10)
15571625

15581626
class GNUWriteTest(unittest.TestCase):
15591627
# This testcase checks for correct creation of GNU Longname
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Allow adjustable compression level for tarfile streams in
2+
:func:`tarfile.open`.

0 commit comments

Comments
 (0)