Skip to content

gh-103477: Write gzip trailer with zlib #112199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/internal/pycore_global_objects_fini_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Include/internal/pycore_global_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(globals)
STRUCT_FOR_ID(groupindex)
STRUCT_FOR_ID(groups)
STRUCT_FOR_ID(gzip_trailer)
STRUCT_FOR_ID(h)
STRUCT_FOR_ID(handle)
STRUCT_FOR_ID(handle_seq)
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_runtime_init_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Include/internal/pycore_unicodeobject_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 7 additions & 12 deletions Lib/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ def __init__(self, filename=None, mode=None,
zlib.DEFLATED,
-zlib.MAX_WBITS,
zlib.DEF_MEM_LEVEL,
0)
0,
gzip_trailer=True)
self._write_mtime = mtime
self._buffer_size = _WRITE_BUFFER_SIZE
self._buffer = io.BufferedWriter(_WriteBufferStream(self),
Expand All @@ -245,8 +246,6 @@ def __repr__(self):

def _init_write(self, filename):
self.name = filename
self.crc = zlib.crc32(b"")
self.size = 0
self.writebuf = []
self.bufsize = 0
self.offset = 0 # Current file offset for seek(), tell(), etc
Expand Down Expand Up @@ -310,8 +309,6 @@ def _write_raw(self, data):

if length > 0:
self.fileobj.write(self.compress.compress(data))
self.size += length
self.crc = zlib.crc32(data, self.crc)
self.offset += length

return length
Expand Down Expand Up @@ -355,9 +352,6 @@ def close(self):
if self.mode == WRITE:
self._buffer.flush()
fileobj.write(self.compress.flush())
write32u(fileobj, self.crc)
# self.size may exceed 2 GiB, or even 4 GiB
write32u(fileobj, self.size & 0xffffffff)
elif self.mode == READ:
self._buffer.close()
finally:
Expand Down Expand Up @@ -611,10 +605,11 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
# This is faster and with less overhead.
return zlib.compress(data, level=compresslevel, wbits=31)
header = _create_simple_gzip_header(compresslevel, mtime)
trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff))
# Wbits=-15 creates a raw deflate block.
return (header + zlib.compress(data, level=compresslevel, wbits=-15) +
trailer)
# Wbits=-15 creates a raw deflate block. Gzip_trailer=True computes CRC32
# and writes gzip trailer with zlib, which on some platforms is faster
# than doing it manually.
return (header + zlib.compress(data, level=compresslevel, wbits=-15,
gzip_trailer=True))


def decompress(data):
Expand Down
9 changes: 2 additions & 7 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,6 @@ def __init__(self, name, mode, comptype, fileobj, bufsize,
except ImportError:
raise CompressionError("zlib module is not available") from None
self.zlib = zlib
self.crc = zlib.crc32(b"")
if mode == "r":
self.exception = zlib.error
self._init_read_gz()
Expand Down Expand Up @@ -421,7 +420,8 @@ def _init_write_gz(self, compresslevel):
self.zlib.DEFLATED,
-self.zlib.MAX_WBITS,
self.zlib.DEF_MEM_LEVEL,
0)
0,
gzip_trailer=True)
timestamp = struct.pack("<L", int(time.time()))
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
if self.name.endswith(".gz"):
Expand All @@ -434,8 +434,6 @@ def _init_write_gz(self, compresslevel):
def write(self, s):
"""Write string s to the stream.
"""
if self.comptype == "gz":
self.crc = self.zlib.crc32(s, self.crc)
self.pos += len(s)
if self.comptype != "tar":
s = self.cmp.compress(s)
Expand Down Expand Up @@ -465,9 +463,6 @@ def close(self):
if self.mode == "w" and self.buf:
self.fileobj.write(self.buf)
self.buf = b""
if self.comptype == "gz":
self.fileobj.write(struct.pack("<L", self.crc))
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
finally:
if not self._extfileobj:
self.fileobj.close()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Write gzip trailer with zlib, improving gzip compression performance on s390x by roughly 40%.
71 changes: 49 additions & 22 deletions Modules/clinic/zlibmodule.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading