From 523430fe57f292c3fffe3b72968a21fae459adaa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 23 Jan 2023 06:33:34 +0100 Subject: [PATCH 01/94] Add zlib-ng module --- .gitmodules | 3 +++ src/zlib-ng | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 src/zlib-ng diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..aba045f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "src/zlib-ng"] + path = src/zlib-ng + url = https://github.com/zlib-ng/zlib-ng.git diff --git a/src/zlib-ng b/src/zlib-ng new file mode 160000 index 0000000..b56a2fd --- /dev/null +++ b/src/zlib-ng @@ -0,0 +1 @@ +Subproject commit b56a2fd0b126cfe5f13e68ab9090cd4f6a773286 From efaa514a5ed5b7f5ad7f6ece57bd16ad453d919b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 23 Jan 2023 07:24:14 +0100 Subject: [PATCH 02/94] Working static build system --- .gitmodules | 2 +- README.rst | 0 setup.py | 157 ++++++++++++++++++++++++++++++++++++ src/{ => zlib_ng}/zlib-ng | 0 src/zlib_ng/zlib_ngmodule.c | 0 5 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 README.rst create mode 100644 setup.py rename src/{ => zlib_ng}/zlib-ng (100%) create mode 100644 src/zlib_ng/zlib_ngmodule.c diff --git a/.gitmodules b/.gitmodules index aba045f..da39485 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "src/zlib-ng"] - path = src/zlib-ng + path = src/zlib_ng/zlib-ng url = https://github.com/zlib-ng/zlib-ng.git diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..42830e7 --- /dev/null +++ b/setup.py @@ -0,0 +1,157 @@ +# Copyright (c) 2020 Leiden University Medical Center +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 +# Python Software Foundation; All Rights Reserved + +# This file is part of python-isal which is distributed under the +# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. + +import functools +import os +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +from setuptools import Extension, find_packages, setup +from setuptools.command.build_ext import build_ext + +ZLIB_NG_SOURCE = os.path.join("src", "zlib_ng", "zlib-ng") + +SYSTEM_IS_UNIX = (sys.platform.startswith("linux") or + sys.platform.startswith("darwin")) +SYSTEM_IS_WINDOWS = sys.platform.startswith("win") + +# Since pip builds in a temp directory by default, setting a fixed file in +# /tmp works during the entire session. +DEFAULT_CACHE_FILE = Path(tempfile.gettempdir() + ).absolute() / ".zlib_ng_build_cache" +BUILD_CACHE = os.environ.get("PYTHON_ZLIB_NG_BUILD_CACHE") +BUILD_CACHE_FILE = Path(os.environ.get("PYTHON_ZLIB_NG_BUILD_CACHE_FILE", + DEFAULT_CACHE_FILE)) + +EXTENSIONS = [ + Extension("zlib_ng", ["src/zlib_ng/zlib_ngmodule.c"]), + ] + + +class BuildZlibNGExt(build_ext): + def build_extension(self, ext): + # Add option to link dynamically for packaging systems such as conda. + # Always link dynamically on readthedocs to simplify install. + if (os.getenv("PYTHON_ZLIB_NG_LINK_DYNAMIC") is not None or + os.environ.get("READTHEDOCS") is not None): + # Check for zlib_ng include directories. This is useful when + # installing in a conda environment. + possible_prefixes = [sys.exec_prefix, sys.base_exec_prefix] + for prefix in possible_prefixes: + if Path(prefix, "include", "zlib-ng.h").exists(): + ext.include_dirs = [os.path.join(prefix, "include")] + ext.library_dirs = [os.path.join(prefix, "lib")] + break # Only one include directory is needed. + # On windows include is in Library apparently + elif Path(prefix, "Library", "include", "zlib-ng.h").exists(): + ext.include_dirs = [os.path.join(prefix, "Library", + "include")] + ext.library_dirs = [os.path.join(prefix, "Library", "lib")] + break + if SYSTEM_IS_UNIX: + ext.libraries = ["z-ng"] # libz-ng.so* + elif SYSTEM_IS_WINDOWS: + ext.libraries = ["zlib-ng"] # zlib-ng*.dll + else: + raise NotImplementedError( + f"Unsupported platform: {sys.platform}") + else: + build_dir = build_zlib_ng() + if SYSTEM_IS_UNIX: + ext.extra_objects = [ + os.path.join(build_dir, "libz-ng.a")] + elif SYSTEM_IS_WINDOWS: + ext.extra_objects = [ + os.path.join(build_dir, "zlib-ng.lib")] + else: + raise NotImplementedError( + f"Unsupported platform: {sys.platform}") + ext.include_dirs = [build_dir] + # -fPIC needed for proper static linking + # ext.extra_compile_args = ["-fPIC"] + pass + super().build_extension(ext) + + +# Use a cache to prevent zlib-ng from being build twice. +@functools.lru_cache(maxsize=None) +def build_zlib_ng(): + # Check for cache + if BUILD_CACHE: + if BUILD_CACHE_FILE.exists(): + cache_path = Path(BUILD_CACHE_FILE.read_text()) + if (cache_path / "include" / "zlib-ng.h").exists(): + return str(cache_path) + + # Creating temporary directories + build_dir = tempfile.mktemp() + shutil.copytree(ZLIB_NG_SOURCE, build_dir) + + if hasattr(os, "sched_getaffinity"): + cpu_count = len(os.sched_getaffinity(0)) + else: # sched_getaffinity not available on all platforms + cpu_count = os.cpu_count() or 1 # os.cpu_count() can return None + # Build environment is a copy of OS environment to allow user to influence + # it. + build_env = os.environ.copy() + # Add -fPIC flag to allow static compilation + run_args = dict(cwd=build_dir, env=build_env) + subprocess.run(["cmake", build_dir], **run_args) + subprocess.run(["cmake", "--build", build_dir, "--config", "Release", + "-v", "-j", str(cpu_count)], **run_args) + subprocess.run(["ctest", "--verbose", "-C", "Release", "-j", str(cpu_count)], + **run_args) + if BUILD_CACHE: + BUILD_CACHE_FILE.write_text(build_dir) + return build_dir + + +setup( + name="zlib-ng", + version="0.1.0", + description="Drop-in replacement for zlib and gzip modules using zlib-ng", + author="Leiden University Medical Center", + author_email="r.h.p.vorderman@lumc.nl", # A placeholder for now + long_description=Path("README.rst").read_text(), + long_description_content_type="text/x-rst", + cmdclass={"build_ext": BuildZlibNGExt}, + license="PSF-2.0", + keywords="zlib-ng zlib compression deflate gzip", + zip_safe=False, + packages=find_packages('src'), + package_dir={'': 'src'}, + package_data={'zlib_ng': ['*.pyi', 'py.typed', + # Include isa-l LICENSE and other relevant files + # with the binary distribution. + 'isa-l/LICENSE', 'isa-l/README.md', + 'isa-l/Release_notes.txt']}, + url="https://github.com/pycompression/python-isal", + classifiers=[ + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Programming Language :: C", + "Development Status :: 3 - Alpha", + "Topic :: System :: Archiving :: Compression", + "License :: OSI Approved :: Python Software Foundation License", + "Operating System :: POSIX :: Linux", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + ], + python_requires=">=3.7", # uses METH_FASTCALL + ext_modules=EXTENSIONS +) diff --git a/src/zlib-ng b/src/zlib_ng/zlib-ng similarity index 100% rename from src/zlib-ng rename to src/zlib_ng/zlib-ng diff --git a/src/zlib_ng/zlib_ngmodule.c b/src/zlib_ng/zlib_ngmodule.c new file mode 100644 index 0000000..e69de29 From ef56bd6f96a9a36d5d2bf65ccb70911e2777ad7a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 23 Jan 2023 07:27:43 +0100 Subject: [PATCH 03/94] Include LICENSE and README for zlib-ng in all distributions --- setup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 42830e7..63a65b6 100644 --- a/setup.py +++ b/setup.py @@ -129,10 +129,9 @@ def build_zlib_ng(): packages=find_packages('src'), package_dir={'': 'src'}, package_data={'zlib_ng': ['*.pyi', 'py.typed', - # Include isa-l LICENSE and other relevant files + # Include zlib-ng LICENSE and other relevant files # with the binary distribution. - 'isa-l/LICENSE', 'isa-l/README.md', - 'isa-l/Release_notes.txt']}, + 'zlib-ng/LICENSE', 'zlib-ng/README.md']}, url="https://github.com/pycompression/python-isal", classifiers=[ "Programming Language :: Python :: 3 :: Only", From 85a33e7af40e220d9703c74f8772fcf08dfd08f5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 23 Jan 2023 07:37:53 +0100 Subject: [PATCH 04/94] Start on packaging --- LICENSE | 48 +++++++++++++++++++++++++++++++++ MANIFEST.in | 1 + setup.py | 6 ++--- src/zlib_ng/py.typed | 1 + src/zlib_ng/zlib_ng.pyi | 60 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 src/zlib_ng/py.typed create mode 100644 src/zlib_ng/zlib_ng.pyi diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..35acd7f --- /dev/null +++ b/LICENSE @@ -0,0 +1,48 @@ +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Python Software Foundation; +All Rights Reserved" are retained in Python alone or in any derivative version +prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..49eb90b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +graft src/zlib_ng/zlib-ng diff --git a/setup.py b/setup.py index 63a65b6..5321370 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 # Python Software Foundation; All Rights Reserved -# This file is part of python-isal which is distributed under the +# This file is part of python-zlib-ng which is distributed under the # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. import functools @@ -131,8 +131,8 @@ def build_zlib_ng(): package_data={'zlib_ng': ['*.pyi', 'py.typed', # Include zlib-ng LICENSE and other relevant files # with the binary distribution. - 'zlib-ng/LICENSE', 'zlib-ng/README.md']}, - url="https://github.com/pycompression/python-isal", + 'zlib-ng/LICENSE.md', 'zlib-ng/README.md']}, + url="https://github.com/pycompression/python-zlib-ng", classifiers=[ "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3", diff --git a/src/zlib_ng/py.typed b/src/zlib_ng/py.typed new file mode 100644 index 0000000..1242d43 --- /dev/null +++ b/src/zlib_ng/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. diff --git a/src/zlib_ng/zlib_ng.pyi b/src/zlib_ng/zlib_ng.pyi new file mode 100644 index 0000000..489c363 --- /dev/null +++ b/src/zlib_ng/zlib_ng.pyi @@ -0,0 +1,60 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 +# Python Software Foundation; All Rights Reserved + +# This file is part of python-isal which is distributed under the +# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. + +Z_BEST_SPEED: int +Z_BEST_COMPRESSION: int +Z_DEFAULT_COMPRESSION: int + +DEF_BUF_SIZE: int +DEF_MEM_LEVEL: int +MAX_WBITS: int + +DEFLATED: int + +Z_DEFAULT_STRATEGY: int +Z_RLE: int +Z_HUFFMAN_ONLY: int +Z_FILTERED: int +Z_FIXED: int + +Z_NO_FLUSH: int +Z_SYNC_FLUSH: int +Z_FULL_FLUSH: int +Z_FINISH: int + +error: Exception + +def adler32(__data, __value: int = 1) -> int: ... +def crc32(__data, __value: int = 0) -> int: ... + +def compress(__data, + level: int = Z_DEFAULT_COMPRESSION, + wbits: int = MAX_WBITS) -> bytes: ... +def decompress(__data, + wbits: int = MAX_WBITS, + bufsize: int = DEF_BUF_SIZE) -> bytes: ... + +class Compress: + def compress(self, __data) -> bytes: ... + def flush(self, mode: int = Z_FINISH) -> bytes: ... + +class Decompress: + unused_data: bytes + unconsumed_tail: bytes + eof: bool + + def decompress(self, __data, max_length: int = 0) -> bytes: ... + def flush(self, length: int = DEF_BUF_SIZE) -> bytes: ... + +def compressobj(level: int = Z_DEFAULT_COMPRESSION, + method: int = DEFLATED, + wbits: int = MAX_WBITS, + memLevel: int = DEF_MEM_LEVEL, + strategy: int = Z_DEFAULT_STRATEGY, + zdict = None) -> Compress: ... + +def decompressobj(wbits: int = MAX_WBITS, zdict = None) -> Decompress: ... From c2078a41625bdb45ae829f78822c31cbad86c948 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 23 Jan 2023 07:50:56 +0100 Subject: [PATCH 05/94] Make C module a sub extension --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5321370..7bfaa61 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ DEFAULT_CACHE_FILE)) EXTENSIONS = [ - Extension("zlib_ng", ["src/zlib_ng/zlib_ngmodule.c"]), + Extension("zlib_ng.zlib_ng", ["src/zlib_ng/zlib_ngmodule.c"]), ] From dd6d169e1a8fe0fc125693ada69b5bb603933dc1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 23 Jan 2023 10:25:35 +0100 Subject: [PATCH 06/94] Add __init__.py to properly init package --- setup.py | 3 +-- src/zlib_ng/__init__.py | 6 ++++++ 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 src/zlib_ng/__init__.py diff --git a/setup.py b/setup.py index 7bfaa61..5356972 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,5 @@ -# Copyright (c) 2020 Leiden University Medical Center # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, -# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 # Python Software Foundation; All Rights Reserved # This file is part of python-zlib-ng which is distributed under the diff --git a/src/zlib_ng/__init__.py b/src/zlib_ng/__init__.py new file mode 100644 index 0000000..a6c8f90 --- /dev/null +++ b/src/zlib_ng/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 +# Python Software Foundation; All Rights Reserved + +# This file is part of python-zlib-ng which is distributed under the +# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. From d93bcc0c2c9ce752b16d860fc2405fad4441ea2e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 23 Jan 2023 10:51:43 +0100 Subject: [PATCH 07/94] Add zlib and gzip tests --- src/zlib_ng/gzip_ng.py | 0 tests/test_gzip_compliance.py | 881 +++++++++++++++++++++++++ tests/test_zlib_compliance.py | 1130 +++++++++++++++++++++++++++++++++ 3 files changed, 2011 insertions(+) create mode 100644 src/zlib_ng/gzip_ng.py create mode 100644 tests/test_gzip_compliance.py create mode 100644 tests/test_zlib_compliance.py diff --git a/src/zlib_ng/gzip_ng.py b/src/zlib_ng/gzip_ng.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_gzip_compliance.py b/tests/test_gzip_compliance.py new file mode 100644 index 0000000..0c59032 --- /dev/null +++ b/tests/test_gzip_compliance.py @@ -0,0 +1,881 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 +# Python Software Foundation; All Rights Reserved + +# This file is part of python-zlib-ng which is distributed under the +# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. + +"""Test script for the gzip_ng module. +Adapted from test_gzip.py in CPython's lib/test directory. +Python software license applies: +https://github.com/python/cpython/blob/master/LICENSE +Changes made: +- removed test.support specific functionally that is not distributed with the + binary releases of python: + - tempfile module was used to create the temporary files and dirs + - replaced os_helper.unlink with os.unlink + - replaced os_helper.rmtree with shutil.rmtree +""" +import array +import functools +import io +import os +import pathlib +import shutil +import struct +import sys +import tempfile +import unittest +from subprocess import PIPE, Popen +from test.support import _4G, bigmemtest +from test.support.script_helper import assert_python_ok, assert_python_failure + +from zlib_ng import gzip_ng as gzip + +TESTFN_ASCII = '@test' +TESTFN = TESTFN_ASCII + + +data1 = b""" int length=DEFAULTALLOC, err = Z_OK; + PyObject *RetVal; + int flushmode = Z_FINISH; + unsigned long start_total_out; + +""" + +data2 = b"""/* zlibmodule.c -- gzip-compatible data compression */ +/* See http://www.gzip.org/zlib/ +/* See http://www.winimage.com/zLibDll for Windows */ +""" + + +TEMPDIR = tempfile.mkdtemp() + + +class UnseekableIO(io.BytesIO): + def seekable(self): + return False + + def tell(self): + raise io.UnsupportedOperation + + def seek(self, *args): + raise io.UnsupportedOperation + + +class BaseTest(unittest.TestCase): + filename = TESTFN + + def setUp(self): + os.unlink(self.filename) + + def tearDown(self): + os.unlink(self.filename) + + +class TestGzip(BaseTest): + def write_and_read_back(self, data, mode='b'): + b_data = bytes(data) + with gzip.GzipFile(self.filename, 'w'+mode) as f: + l = f.write(data) + self.assertEqual(l, len(b_data)) + with gzip.GzipFile(self.filename, 'r'+mode) as f: + self.assertEqual(f.read(), b_data) + + def test_write(self): + with gzip.GzipFile(self.filename, 'wb') as f: + f.write(data1 * 50) + + # Try flush and fileno. + f.flush() + f.fileno() + if hasattr(os, 'fsync'): + os.fsync(f.fileno()) + f.close() + + # Test multiple close() calls. + f.close() + + def test_write_read_with_pathlike_file(self): + filename = pathlib.Path(self.filename) + with gzip.GzipFile(filename, 'w') as f: + f.write(data1 * 50) + self.assertIsInstance(f.name, str) + with gzip.GzipFile(filename, 'a') as f: + f.write(data1) + with gzip.GzipFile(filename) as f: + d = f.read() + self.assertEqual(d, data1 * 51) + self.assertIsInstance(f.name, str) + + # The following test_write_xy methods test that write accepts + # the corresponding bytes-like object type as input + # and that the data written equals bytes(xy) in all cases. + def test_write_memoryview(self): + self.write_and_read_back(memoryview(data1 * 50)) + m = memoryview(bytes(range(256))) + data = m.cast('B', shape=[8,8,4]) + self.write_and_read_back(data) + + def test_write_bytearray(self): + self.write_and_read_back(bytearray(data1 * 50)) + + def test_write_array(self): + self.write_and_read_back(array.array('I', data1 * 40)) + + def test_write_incompatible_type(self): + # Test that non-bytes-like types raise TypeError. + # Issue #21560: attempts to write incompatible types + # should not affect the state of the fileobject + with gzip.GzipFile(self.filename, 'wb') as f: + with self.assertRaises(TypeError): + f.write('') + with self.assertRaises(TypeError): + f.write([]) + f.write(data1) + with gzip.GzipFile(self.filename, 'rb') as f: + self.assertEqual(f.read(), data1) + + def test_read(self): + self.test_write() + # Try reading. + with gzip.GzipFile(self.filename, 'r') as f: + d = f.read() + self.assertEqual(d, data1*50) + + def test_read1(self): + self.test_write() + blocks = [] + nread = 0 + with gzip.GzipFile(self.filename, 'r') as f: + while True: + d = f.read1() + if not d: + break + blocks.append(d) + nread += len(d) + # Check that position was updated correctly (see issue10791). + self.assertEqual(f.tell(), nread) + self.assertEqual(b''.join(blocks), data1 * 50) + + @bigmemtest(size=_4G, memuse=1) + def test_read_large(self, size): + # Read chunk size over UINT_MAX should be supported, despite zlib's + # limitation per low-level call + compressed = gzip.compress(data1, compresslevel=1) + f = gzip.GzipFile(fileobj=io.BytesIO(compressed), mode='rb') + self.assertEqual(f.read(size), data1) + + def test_io_on_closed_object(self): + # Test that I/O operations on closed GzipFile objects raise a + # ValueError, just like the corresponding functions on file objects. + + # Write to a file, open it for reading, then close it. + self.test_write() + f = gzip.GzipFile(self.filename, 'r') + fileobj = f.fileobj + self.assertFalse(fileobj.closed) + f.close() + self.assertTrue(fileobj.closed) + with self.assertRaises(ValueError): + f.read(1) + with self.assertRaises(ValueError): + f.seek(0) + with self.assertRaises(ValueError): + f.tell() + # Open the file for writing, then close it. + f = gzip.GzipFile(self.filename, 'w') + fileobj = f.fileobj + self.assertFalse(fileobj.closed) + f.close() + self.assertTrue(fileobj.closed) + with self.assertRaises(ValueError): + f.write(b'') + with self.assertRaises(ValueError): + f.flush() + + def test_append(self): + self.test_write() + # Append to the previous file + with gzip.GzipFile(self.filename, 'ab') as f: + f.write(data2 * 15) + + with gzip.GzipFile(self.filename, 'rb') as f: + d = f.read() + self.assertEqual(d, (data1*50) + (data2*15)) + + def test_many_append(self): + # Bug #1074261 was triggered when reading a file that contained + # many, many members. Create such a file and verify that reading it + # works. + with gzip.GzipFile(self.filename, 'wb', 9) as f: + f.write(b'a') + for i in range(0, 200): + with gzip.GzipFile(self.filename, "ab", 9) as f: # append + f.write(b'a') + + # Try reading the file + with gzip.GzipFile(self.filename, "rb") as zgfile: + contents = b"" + while 1: + ztxt = zgfile.read(8192) + contents += ztxt + if not ztxt: break + self.assertEqual(contents, b'a'*201) + + def test_exclusive_write(self): + with gzip.GzipFile(self.filename, 'xb') as f: + f.write(data1 * 50) + with gzip.GzipFile(self.filename, 'rb') as f: + self.assertEqual(f.read(), data1 * 50) + with self.assertRaises(FileExistsError): + gzip.GzipFile(self.filename, 'xb') + + def test_buffered_reader(self): + # Issue #7471: a GzipFile can be wrapped in a BufferedReader for + # performance. + self.test_write() + + with gzip.GzipFile(self.filename, 'rb') as f: + with io.BufferedReader(f) as r: + lines = [line for line in r] + + self.assertEqual(lines, 50 * data1.splitlines(keepends=True)) + + def test_readline(self): + self.test_write() + # Try .readline() with varying line lengths + + with gzip.GzipFile(self.filename, 'rb') as f: + line_length = 0 + while 1: + L = f.readline(line_length) + if not L and line_length != 0: break + self.assertTrue(len(L) <= line_length) + line_length = (line_length + 1) % 50 + + def test_readlines(self): + self.test_write() + # Try .readlines() + + with gzip.GzipFile(self.filename, 'rb') as f: + L = f.readlines() + + with gzip.GzipFile(self.filename, 'rb') as f: + while 1: + L = f.readlines(150) + if L == []: break + + def test_seek_read(self): + self.test_write() + # Try seek, read test + + with gzip.GzipFile(self.filename) as f: + while 1: + oldpos = f.tell() + line1 = f.readline() + if not line1: break + newpos = f.tell() + f.seek(oldpos) # negative seek + if len(line1)>10: + amount = 10 + else: + amount = len(line1) + line2 = f.read(amount) + self.assertEqual(line1[:amount], line2) + f.seek(newpos) # positive seek + + def test_seek_whence(self): + self.test_write() + # Try seek(whence=1), read test + + with gzip.GzipFile(self.filename) as f: + f.read(10) + f.seek(10, whence=1) + y = f.read(10) + self.assertEqual(y, data1[20:30]) + + def test_seek_write(self): + # Try seek, write test + with gzip.GzipFile(self.filename, 'w') as f: + for pos in range(0, 256, 16): + f.seek(pos) + f.write(b'GZ\n') + + def test_mode(self): + self.test_write() + with gzip.GzipFile(self.filename, 'r') as f: + self.assertEqual(f.myfileobj.mode, 'rb') + os.unlink(self.filename) + with gzip.GzipFile(self.filename, 'x') as f: + self.assertEqual(f.myfileobj.mode, 'xb') + + def test_1647484(self): + for mode in ('wb', 'rb'): + with gzip.GzipFile(self.filename, mode) as f: + self.assertTrue(hasattr(f, "name")) + self.assertEqual(f.name, self.filename) + + def test_paddedfile_getattr(self): + self.test_write() + with gzip.GzipFile(self.filename, 'rb') as f: + self.assertTrue(hasattr(f.fileobj, "name")) + self.assertEqual(f.fileobj.name, self.filename) + + def test_mtime(self): + mtime = 123456789 + with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite: + fWrite.write(data1) + with gzip.GzipFile(self.filename) as fRead: + self.assertTrue(hasattr(fRead, 'mtime')) + self.assertIsNone(fRead.mtime) + dataRead = fRead.read() + self.assertEqual(dataRead, data1) + self.assertEqual(fRead.mtime, mtime) + + def test_metadata(self): + mtime = 123456789 + + with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite: + fWrite.write(data1) + + with open(self.filename, 'rb') as fRead: + # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html + + idBytes = fRead.read(2) + self.assertEqual(idBytes, b'\x1f\x8b') # gzip ID + + cmByte = fRead.read(1) + self.assertEqual(cmByte, b'\x08') # deflate + + try: + expectedname = self.filename.encode('Latin-1') + b'\x00' + expectedflags = b'\x08' # only the FNAME flag is set + except UnicodeEncodeError: + expectedname = b'' + expectedflags = b'\x00' + + flagsByte = fRead.read(1) + self.assertEqual(flagsByte, expectedflags) + + mtimeBytes = fRead.read(4) + self.assertEqual(mtimeBytes, struct.pack('=4 GiB are handled correctly. +class ChecksumBigBufferTestCase(unittest.TestCase): + + @bigmemtest(size=_4G + 4, memuse=1, dry_run=False) + def test_big_buffer(self, size): + data = b"nyan" * (_1G + 1) + self.assertEqual(zlib.crc32(data), 1044521549) + self.assertEqual(zlib.adler32(data), 2256789997) + + +class ExceptionTestCase(unittest.TestCase): + # make sure we generate some expected errors + def test_badlevel(self): + # specifying compression level out of range causes an error + # (but -1 is Z_DEFAULT_COMPRESSION and apparently the zlib + # accepts 0 too) + self.assertRaises(zlib.error, zlib.compress, b'ERROR', 10) + + def test_badargs(self): + self.assertRaises(TypeError, zlib.adler32) + self.assertRaises(TypeError, zlib.crc32) + self.assertRaises(TypeError, zlib.compress) + self.assertRaises(TypeError, zlib.decompress) + for arg in (42, None, '', 'abc', (), []): + self.assertRaises(TypeError, zlib.adler32, arg) + self.assertRaises(TypeError, zlib.crc32, arg) + self.assertRaises(TypeError, zlib.compress, arg) + self.assertRaises(TypeError, zlib.decompress, arg) + + def test_badcompressobj(self): + # verify failure on building compress object with bad params + self.assertRaises(ValueError, zlib.compressobj, 1, zlib.DEFLATED, 0) + # specifying total bits too large causes an error + self.assertRaises(ValueError, + zlib.compressobj, 1, zlib.DEFLATED, zlib.MAX_WBITS + 1) + + def test_baddecompressobj(self): + # verify failure on building decompress object with bad params + self.assertRaises(ValueError, zlib.decompressobj, -1) + + def test_decompressobj_badflush(self): + # verify failure on calling decompressobj.flush with bad params + self.assertRaises(ValueError, zlib.decompressobj().flush, 0) + self.assertRaises(ValueError, zlib.decompressobj().flush, -1) + + @support.cpython_only + def test_overflow(self): + with self.assertRaisesRegex(OverflowError, 'int too large'): + zlib.decompress(b'', 15, sys.maxsize + 1) + with self.assertRaisesRegex(OverflowError, 'int too large'): + zlib.decompressobj().decompress(b'', sys.maxsize + 1) + with self.assertRaisesRegex(OverflowError, 'int too large'): + zlib.decompressobj().flush(sys.maxsize + 1) + + @support.cpython_only + def test_disallow_instantiation(self): + # Ensure that the type disallows instantiation (bpo-43916) + support.check_disallow_instantiation(self, type(zlib.compressobj())) + support.check_disallow_instantiation(self, type(zlib.decompressobj())) + + +class BaseCompressTestCase(object): + def check_big_compress_buffer(self, size, compress_func): + _1M = 1024 * 1024 + # Generate 10 MiB worth of random, and expand it by repeating it. + # The assumption is that zlib's memory is not big enough to exploit + # such spread out redundancy. + data = random.randbytes(_1M * 10) + data = data * (size // len(data) + 1) + try: + compress_func(data) + finally: + # Release memory + data = None + + def check_big_decompress_buffer(self, size, decompress_func): + data = b'x' * size + try: + compressed = zlib.compress(data, 1) + finally: + # Release memory + data = None + data = decompress_func(compressed) + # Sanity check + try: + self.assertEqual(len(data), size) + self.assertEqual(len(data.strip(b'x')), 0) + finally: + data = None + + +class CompressTestCase(BaseCompressTestCase, unittest.TestCase): + # Test compression in one go (whole message compression) + def test_speech(self): + x = zlib.compress(HAMLET_SCENE) + self.assertEqual(zlib.decompress(x), HAMLET_SCENE) + + def test_keywords(self): + x = zlib.compress(HAMLET_SCENE, level=3) + self.assertEqual(zlib.decompress(x), HAMLET_SCENE) + with self.assertRaises(TypeError): + zlib.compress(data=HAMLET_SCENE, level=3) + self.assertEqual(zlib.decompress(x, + wbits=zlib.MAX_WBITS, + bufsize=zlib.DEF_BUF_SIZE), + HAMLET_SCENE) + + @skip_on_s390x + def test_speech128(self): + # compress more data + data = HAMLET_SCENE * 128 + x = zlib.compress(data) + self.assertEqual(zlib.compress(bytearray(data)), x) + for ob in x, bytearray(x): + self.assertEqual(zlib.decompress(ob), data) + + def test_incomplete_stream(self): + # A useful error message is given + x = zlib.compress(HAMLET_SCENE) + self.assertRaisesRegex(zlib.error, + "Error -5 while decompressing data: incomplete or truncated stream", + zlib.decompress, x[:-1]) + + # Memory use of the following functions takes into account overallocation + + @bigmemtest(size=_1G + 1024 * 1024, memuse=3) + def test_big_compress_buffer(self, size): + compress = lambda s: zlib.compress(s, 1) + self.check_big_compress_buffer(size, compress) + + @bigmemtest(size=_1G + 1024 * 1024, memuse=2) + def test_big_decompress_buffer(self, size): + self.check_big_decompress_buffer(size, zlib.decompress) + + @bigmemtest(size=_4G, memuse=1) + def test_large_bufsize(self, size): + # Test decompress(bufsize) parameter greater than the internal limit + data = HAMLET_SCENE * 10 + compressed = zlib.compress(data, 1) + self.assertEqual(zlib.decompress(compressed, 15, size), data) + + def test_custom_bufsize(self): + data = HAMLET_SCENE * 10 + compressed = zlib.compress(data, 1) + self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data) + + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @bigmemtest(size=_4G + 100, memuse=4) + def test_64bit_compress(self, size): + data = b'x' * size + try: + comp = zlib.compress(data, 0) + self.assertEqual(zlib.decompress(comp), data) + finally: + comp = data = None + + +class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): + # Test compression object + @skip_on_s390x + def test_pair(self): + # straightforward compress/decompress objects + datasrc = HAMLET_SCENE * 128 + datazip = zlib.compress(datasrc) + # should compress both bytes and bytearray data + for data in (datasrc, bytearray(datasrc)): + co = zlib.compressobj() + x1 = co.compress(data) + x2 = co.flush() + self.assertRaises(zlib.error, co.flush) # second flush should not work + self.assertEqual(x1 + x2, datazip) + for v1, v2 in ((x1, x2), (bytearray(x1), bytearray(x2))): + dco = zlib.decompressobj() + y1 = dco.decompress(v1 + v2) + y2 = dco.flush() + self.assertEqual(data, y1 + y2) + self.assertIsInstance(dco.unconsumed_tail, bytes) + self.assertIsInstance(dco.unused_data, bytes) + + def test_keywords(self): + level = 2 + method = zlib.DEFLATED + wbits = -12 + memLevel = 9 + strategy = zlib.Z_FILTERED + co = zlib.compressobj(level=level, + method=method, + wbits=wbits, + memLevel=memLevel, + strategy=strategy, + zdict=b"") + do = zlib.decompressobj(wbits=wbits, zdict=b"") + with self.assertRaises(TypeError): + co.compress(data=HAMLET_SCENE) + with self.assertRaises(TypeError): + do.decompress(data=zlib.compress(HAMLET_SCENE)) + x = co.compress(HAMLET_SCENE) + co.flush() + y = do.decompress(x, max_length=len(HAMLET_SCENE)) + do.flush() + self.assertEqual(HAMLET_SCENE, y) + + def test_compressoptions(self): + # specify lots of options to compressobj() + level = 2 + method = zlib.DEFLATED + wbits = -12 + memLevel = 9 + strategy = zlib.Z_FILTERED + co = zlib.compressobj(level, method, wbits, memLevel, strategy) + x1 = co.compress(HAMLET_SCENE) + x2 = co.flush() + dco = zlib.decompressobj(wbits) + y1 = dco.decompress(x1 + x2) + y2 = dco.flush() + self.assertEqual(HAMLET_SCENE, y1 + y2) + + def test_compressincremental(self): + # compress object in steps, decompress object as one-shot + data = HAMLET_SCENE * 128 + co = zlib.compressobj() + bufs = [] + for i in range(0, len(data), 256): + bufs.append(co.compress(data[i:i+256])) + bufs.append(co.flush()) + combuf = b''.join(bufs) + + dco = zlib.decompressobj() + y1 = dco.decompress(b''.join(bufs)) + y2 = dco.flush() + self.assertEqual(data, y1 + y2) + + def test_decompinc(self, flush=False, source=None, cx=256, dcx=64): + # compress object in steps, decompress object in steps + source = source or HAMLET_SCENE + data = source * 128 + co = zlib.compressobj() + bufs = [] + for i in range(0, len(data), cx): + bufs.append(co.compress(data[i:i+cx])) + bufs.append(co.flush()) + combuf = b''.join(bufs) + + decombuf = zlib.decompress(combuf) + # Test type of return value + self.assertIsInstance(decombuf, bytes) + + self.assertEqual(data, decombuf) + + dco = zlib.decompressobj() + bufs = [] + for i in range(0, len(combuf), dcx): + bufs.append(dco.decompress(combuf[i:i+dcx])) + self.assertEqual(b'', dco.unconsumed_tail, ######## + "(A) uct should be b'': not %d long" % + len(dco.unconsumed_tail)) + self.assertEqual(b'', dco.unused_data) + if flush: + bufs.append(dco.flush()) + else: + while True: + chunk = dco.decompress(b'') + if chunk: + bufs.append(chunk) + else: + break + self.assertEqual(b'', dco.unconsumed_tail, ######## + "(B) uct should be b'': not %d long" % + len(dco.unconsumed_tail)) + self.assertEqual(b'', dco.unused_data) + self.assertEqual(data, b''.join(bufs)) + # Failure means: "decompressobj with init options failed" + + def test_decompincflush(self): + self.test_decompinc(flush=True) + + def test_decompimax(self, source=None, cx=256, dcx=64): + # compress in steps, decompress in length-restricted steps + source = source or HAMLET_SCENE + # Check a decompression object with max_length specified + data = source * 128 + co = zlib.compressobj() + bufs = [] + for i in range(0, len(data), cx): + bufs.append(co.compress(data[i:i+cx])) + bufs.append(co.flush()) + combuf = b''.join(bufs) + self.assertEqual(data, zlib.decompress(combuf), + 'compressed data failure') + + dco = zlib.decompressobj() + bufs = [] + cb = combuf + while cb: + #max_length = 1 + len(cb)//10 + chunk = dco.decompress(cb, dcx) + self.assertFalse(len(chunk) > dcx, + 'chunk too big (%d>%d)' % (len(chunk), dcx)) + bufs.append(chunk) + cb = dco.unconsumed_tail + bufs.append(dco.flush()) + self.assertEqual(data, b''.join(bufs), 'Wrong data retrieved') + + def test_decompressmaxlen(self, flush=False): + # Check a decompression object with max_length specified + data = HAMLET_SCENE * 128 + co = zlib.compressobj() + bufs = [] + for i in range(0, len(data), 256): + bufs.append(co.compress(data[i:i+256])) + bufs.append(co.flush()) + combuf = b''.join(bufs) + self.assertEqual(data, zlib.decompress(combuf), + 'compressed data failure') + + dco = zlib.decompressobj() + bufs = [] + cb = combuf + while cb: + max_length = 1 + len(cb)//10 + chunk = dco.decompress(cb, max_length) + self.assertFalse(len(chunk) > max_length, + 'chunk too big (%d>%d)' % (len(chunk),max_length)) + bufs.append(chunk) + cb = dco.unconsumed_tail + if flush: + bufs.append(dco.flush()) + else: + while chunk: + chunk = dco.decompress(b'', max_length) + self.assertFalse(len(chunk) > max_length, + 'chunk too big (%d>%d)' % (len(chunk),max_length)) + bufs.append(chunk) + self.assertEqual(data, b''.join(bufs), 'Wrong data retrieved') + + def test_decompressmaxlenflush(self): + self.test_decompressmaxlen(flush=True) + + def test_maxlenmisc(self): + # Misc tests of max_length + dco = zlib.decompressobj() + self.assertRaises(ValueError, dco.decompress, b"", -1) + self.assertEqual(b'', dco.unconsumed_tail) + + def test_maxlen_large(self): + # Sizes up to sys.maxsize should be accepted, although zlib is + # internally limited to expressing sizes with unsigned int + data = HAMLET_SCENE * 10 + self.assertGreater(len(data), zlib.DEF_BUF_SIZE) + compressed = zlib.compress(data, 1) + dco = zlib.decompressobj() + self.assertEqual(dco.decompress(compressed, sys.maxsize), data) + + def test_maxlen_custom(self): + data = HAMLET_SCENE * 10 + compressed = zlib.compress(data, 1) + dco = zlib.decompressobj() + self.assertEqual(dco.decompress(compressed, CustomInt()), data[:100]) + + def test_clear_unconsumed_tail(self): + # Issue #12050: calling decompress() without providing max_length + # should clear the unconsumed_tail attribute. + cdata = b"x\x9cKLJ\x06\x00\x02M\x01" # "abc" + dco = zlib.decompressobj() + ddata = dco.decompress(cdata, 1) + ddata += dco.decompress(dco.unconsumed_tail) + self.assertEqual(dco.unconsumed_tail, b"") + + def test_flushes(self): + # Test flush() with the various options, using all the + # different levels in order to provide more variations. + sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH', + 'Z_PARTIAL_FLUSH'] + + ver = tuple(int(v) for v in zlib.ZLIB_RUNTIME_VERSION.split('.')) + # Z_BLOCK has a known failure prior to 1.2.5.3 + if ver >= (1, 2, 5, 3): + sync_opt.append('Z_BLOCK') + + sync_opt = [getattr(zlib, opt) for opt in sync_opt + if hasattr(zlib, opt)] + data = HAMLET_SCENE * 8 + + for sync in sync_opt: + for level in range(10): + try: + obj = zlib.compressobj( level ) + a = obj.compress( data[:3000] ) + b = obj.flush( sync ) + c = obj.compress( data[3000:] ) + d = obj.flush() + except: + print("Error for flush mode={}, level={}" + .format(sync, level)) + raise + self.assertEqual(zlib.decompress(b''.join([a,b,c,d])), + data, ("Decompress failed: flush " + "mode=%i, level=%i") % (sync, level)) + del obj + + @unittest.skipUnless(hasattr(zlib, 'Z_SYNC_FLUSH'), + 'requires zlib.Z_SYNC_FLUSH') + def test_odd_flush(self): + # Test for odd flushing bugs noted in 2.0, and hopefully fixed in 2.1 + import random + # Testing on 17K of "random" data + + # Create compressor and decompressor objects + co = zlib.compressobj(zlib.Z_BEST_COMPRESSION) + dco = zlib.decompressobj() + + # Try 17K of data + # generate random data stream + try: + # In 2.3 and later, WichmannHill is the RNG of the bug report + gen = random.WichmannHill() + except AttributeError: + try: + # 2.2 called it Random + gen = random.Random() + except AttributeError: + # others might simply have a single RNG + gen = random + gen.seed(1) + data = gen.randbytes(17 * 1024) + + # compress, sync-flush, and decompress + first = co.compress(data) + second = co.flush(zlib.Z_SYNC_FLUSH) + expanded = dco.decompress(first + second) + + # if decompressed data is different from the input data, choke. + self.assertEqual(expanded, data, "17K random source doesn't match") + + def test_empty_flush(self): + # Test that calling .flush() on unused objects works. + # (Bug #1083110 -- calling .flush() on decompress objects + # caused a core dump.) + + co = zlib.compressobj(zlib.Z_BEST_COMPRESSION) + self.assertTrue(co.flush()) # Returns a zlib header + dco = zlib.decompressobj() + self.assertEqual(dco.flush(), b"") # Returns nothing + + def test_dictionary(self): + h = HAMLET_SCENE + # Build a simulated dictionary out of the words in HAMLET. + words = h.split() + random.shuffle(words) + zdict = b''.join(words) + # Use it to compress HAMLET. + co = zlib.compressobj(zdict=zdict) + cd = co.compress(h) + co.flush() + # Verify that it will decompress with the dictionary. + dco = zlib.decompressobj(zdict=zdict) + self.assertEqual(dco.decompress(cd) + dco.flush(), h) + # Verify that it fails when not given the dictionary. + dco = zlib.decompressobj() + self.assertRaises(zlib.error, dco.decompress, cd) + + def test_dictionary_streaming(self): + # This simulates the reuse of a compressor object for compressing + # several separate data streams. + co = zlib.compressobj(zdict=HAMLET_SCENE) + do = zlib.decompressobj(zdict=HAMLET_SCENE) + piece = HAMLET_SCENE[1000:1500] + d0 = co.compress(piece) + co.flush(zlib.Z_SYNC_FLUSH) + d1 = co.compress(piece[100:]) + co.flush(zlib.Z_SYNC_FLUSH) + d2 = co.compress(piece[:-100]) + co.flush(zlib.Z_SYNC_FLUSH) + self.assertEqual(do.decompress(d0), piece) + self.assertEqual(do.decompress(d1), piece[100:]) + self.assertEqual(do.decompress(d2), piece[:-100]) + + def test_decompress_incomplete_stream(self): + # This is 'foo', deflated + x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' + # For the record + self.assertEqual(zlib.decompress(x), b'foo') + self.assertRaises(zlib.error, zlib.decompress, x[:-5]) + # Omitting the stream end works with decompressor objects + # (see issue #8672). + dco = zlib.decompressobj() + y = dco.decompress(x[:-5]) + y += dco.flush() + self.assertEqual(y, b'foo') + + def test_decompress_eof(self): + x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo' + dco = zlib.decompressobj() + self.assertFalse(dco.eof) + dco.decompress(x[:-5]) + self.assertFalse(dco.eof) + dco.decompress(x[-5:]) + self.assertTrue(dco.eof) + dco.flush() + self.assertTrue(dco.eof) + + def test_decompress_eof_incomplete_stream(self): + x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo' + dco = zlib.decompressobj() + self.assertFalse(dco.eof) + dco.decompress(x[:-5]) + self.assertFalse(dco.eof) + dco.flush() + self.assertFalse(dco.eof) + + def test_decompress_unused_data(self): + # Repeated calls to decompress() after EOF should accumulate data in + # dco.unused_data, instead of just storing the arg to the last call. + source = b'abcdefghijklmnopqrstuvwxyz' + remainder = b'0123456789' + y = zlib.compress(source) + x = y + remainder + for maxlen in 0, 1000: + for step in 1, 2, len(y), len(x): + dco = zlib.decompressobj() + data = b'' + for i in range(0, len(x), step): + if i < len(y): + self.assertEqual(dco.unused_data, b'') + if maxlen == 0: + data += dco.decompress(x[i : i + step]) + self.assertEqual(dco.unconsumed_tail, b'') + else: + data += dco.decompress( + dco.unconsumed_tail + x[i : i + step], maxlen) + data += dco.flush() + self.assertTrue(dco.eof) + self.assertEqual(data, source) + self.assertEqual(dco.unconsumed_tail, b'') + self.assertEqual(dco.unused_data, remainder) + + # issue27164 + def test_decompress_raw_with_dictionary(self): + zdict = b'abcdefghijklmnopqrstuvwxyz' + co = zlib.compressobj(wbits=-zlib.MAX_WBITS, zdict=zdict) + comp = co.compress(zdict) + co.flush() + dco = zlib.decompressobj(wbits=-zlib.MAX_WBITS, zdict=zdict) + uncomp = dco.decompress(comp) + dco.flush() + self.assertEqual(zdict, uncomp) + + def test_flush_with_freed_input(self): + # Issue #16411: decompressor accesses input to last decompress() call + # in flush(), even if this object has been freed in the meanwhile. + input1 = b'abcdefghijklmnopqrstuvwxyz' + input2 = b'QWERTYUIOPASDFGHJKLZXCVBNM' + data = zlib.compress(input1) + dco = zlib.decompressobj() + dco.decompress(data, 1) + del data + data = zlib.compress(input2) + self.assertEqual(dco.flush(), input1[1:]) + + @bigmemtest(size=_4G, memuse=1) + def test_flush_large_length(self, size): + # Test flush(length) parameter greater than internal limit UINT_MAX + input = HAMLET_SCENE * 10 + data = zlib.compress(input, 1) + dco = zlib.decompressobj() + dco.decompress(data, 1) + self.assertEqual(dco.flush(size), input[1:]) + + def test_flush_custom_length(self): + input = HAMLET_SCENE * 10 + data = zlib.compress(input, 1) + dco = zlib.decompressobj() + dco.decompress(data, 1) + self.assertEqual(dco.flush(CustomInt()), input[1:]) + + @requires_Compress_copy + def test_compresscopy(self): + # Test copying a compression object + data0 = HAMLET_SCENE + data1 = bytes(str(HAMLET_SCENE, "ascii").swapcase(), "ascii") + for func in lambda c: c.copy(), copy.copy, copy.deepcopy: + c0 = zlib.compressobj(zlib.Z_BEST_COMPRESSION) + bufs0 = [] + bufs0.append(c0.compress(data0)) + + c1 = func(c0) + bufs1 = bufs0[:] + + bufs0.append(c0.compress(data0)) + bufs0.append(c0.flush()) + s0 = b''.join(bufs0) + + bufs1.append(c1.compress(data1)) + bufs1.append(c1.flush()) + s1 = b''.join(bufs1) + + self.assertEqual(zlib.decompress(s0),data0+data0) + self.assertEqual(zlib.decompress(s1),data0+data1) + + @requires_Compress_copy + def test_badcompresscopy(self): + # Test copying a compression object in an inconsistent state + c = zlib.compressobj() + c.compress(HAMLET_SCENE) + c.flush() + self.assertRaises(ValueError, c.copy) + self.assertRaises(ValueError, copy.copy, c) + self.assertRaises(ValueError, copy.deepcopy, c) + + @requires_Decompress_copy + def test_decompresscopy(self): + # Test copying a decompression object + data = HAMLET_SCENE + comp = zlib.compress(data) + # Test type of return value + self.assertIsInstance(comp, bytes) + + for func in lambda c: c.copy(), copy.copy, copy.deepcopy: + d0 = zlib.decompressobj() + bufs0 = [] + bufs0.append(d0.decompress(comp[:32])) + + d1 = func(d0) + bufs1 = bufs0[:] + + bufs0.append(d0.decompress(comp[32:])) + s0 = b''.join(bufs0) + + bufs1.append(d1.decompress(comp[32:])) + s1 = b''.join(bufs1) + + self.assertEqual(s0,s1) + self.assertEqual(s0,data) + + @requires_Decompress_copy + def test_baddecompresscopy(self): + # Test copying a compression object in an inconsistent state + data = zlib.compress(HAMLET_SCENE) + d = zlib.decompressobj() + d.decompress(data) + d.flush() + self.assertRaises(ValueError, d.copy) + self.assertRaises(ValueError, copy.copy, d) + self.assertRaises(ValueError, copy.deepcopy, d) + + def test_compresspickle(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.assertRaises((TypeError, pickle.PicklingError)): + pickle.dumps(zlib.compressobj(zlib.Z_BEST_COMPRESSION), proto) + + def test_decompresspickle(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.assertRaises((TypeError, pickle.PicklingError)): + pickle.dumps(zlib.decompressobj(), proto) + + # Memory use of the following functions takes into account overallocation + + @bigmemtest(size=_1G + 1024 * 1024, memuse=3) + def test_big_compress_buffer(self, size): + c = zlib.compressobj(1) + compress = lambda s: c.compress(s) + c.flush() + self.check_big_compress_buffer(size, compress) + + @bigmemtest(size=_1G + 1024 * 1024, memuse=2) + def test_big_decompress_buffer(self, size): + d = zlib.decompressobj() + decompress = lambda s: d.decompress(s) + d.flush() + self.check_big_decompress_buffer(size, decompress) + + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @bigmemtest(size=_4G + 100, memuse=4) + def test_64bit_compress(self, size): + data = b'x' * size + co = zlib.compressobj(0) + do = zlib.decompressobj() + try: + comp = co.compress(data) + co.flush() + uncomp = do.decompress(comp) + do.flush() + self.assertEqual(uncomp, data) + finally: + comp = uncomp = data = None + + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @bigmemtest(size=_4G + 100, memuse=3) + def test_large_unused_data(self, size): + data = b'abcdefghijklmnop' + unused = b'x' * size + comp = zlib.compress(data) + unused + do = zlib.decompressobj() + try: + uncomp = do.decompress(comp) + do.flush() + self.assertEqual(unused, do.unused_data) + self.assertEqual(uncomp, data) + finally: + unused = comp = do = None + + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @bigmemtest(size=_4G + 100, memuse=5) + def test_large_unconsumed_tail(self, size): + data = b'x' * size + do = zlib.decompressobj() + try: + comp = zlib.compress(data, 0) + uncomp = do.decompress(comp, 1) + do.flush() + self.assertEqual(uncomp, data) + self.assertEqual(do.unconsumed_tail, b'') + finally: + comp = uncomp = data = None + + def test_wbits(self): + # wbits=0 only supported since zlib v1.2.3.5 + # Register "1.2.3" as "1.2.3.0" + # or "1.2.0-linux","1.2.0.f","1.2.0.f-linux" + v = zlib.ZLIB_RUNTIME_VERSION.split('-', 1)[0].split('.') + if len(v) < 4: + v.append('0') + elif not v[-1].isnumeric(): + v[-1] = '0' + + v = tuple(map(int, v)) + supports_wbits_0 = v >= (1, 2, 3, 5) + + co = zlib.compressobj(level=1, wbits=15) + zlib15 = co.compress(HAMLET_SCENE) + co.flush() + self.assertEqual(zlib.decompress(zlib15, 15), HAMLET_SCENE) + if supports_wbits_0: + self.assertEqual(zlib.decompress(zlib15, 0), HAMLET_SCENE) + self.assertEqual(zlib.decompress(zlib15, 32 + 15), HAMLET_SCENE) + with self.assertRaisesRegex(zlib.error, 'invalid window size'): + zlib.decompress(zlib15, 14) + dco = zlib.decompressobj(wbits=32 + 15) + self.assertEqual(dco.decompress(zlib15), HAMLET_SCENE) + dco = zlib.decompressobj(wbits=14) + with self.assertRaisesRegex(zlib.error, 'invalid window size'): + dco.decompress(zlib15) + + co = zlib.compressobj(level=1, wbits=9) + zlib9 = co.compress(HAMLET_SCENE) + co.flush() + self.assertEqual(zlib.decompress(zlib9, 9), HAMLET_SCENE) + self.assertEqual(zlib.decompress(zlib9, 15), HAMLET_SCENE) + if supports_wbits_0: + self.assertEqual(zlib.decompress(zlib9, 0), HAMLET_SCENE) + self.assertEqual(zlib.decompress(zlib9, 32 + 9), HAMLET_SCENE) + dco = zlib.decompressobj(wbits=32 + 9) + self.assertEqual(dco.decompress(zlib9), HAMLET_SCENE) + + co = zlib.compressobj(level=1, wbits=-15) + deflate15 = co.compress(HAMLET_SCENE) + co.flush() + self.assertEqual(zlib.decompress(deflate15, -15), HAMLET_SCENE) + dco = zlib.decompressobj(wbits=-15) + self.assertEqual(dco.decompress(deflate15), HAMLET_SCENE) + + co = zlib.compressobj(level=1, wbits=-9) + deflate9 = co.compress(HAMLET_SCENE) + co.flush() + self.assertEqual(zlib.decompress(deflate9, -9), HAMLET_SCENE) + self.assertEqual(zlib.decompress(deflate9, -15), HAMLET_SCENE) + dco = zlib.decompressobj(wbits=-9) + self.assertEqual(dco.decompress(deflate9), HAMLET_SCENE) + + co = zlib.compressobj(level=1, wbits=16 + 15) + gzip = co.compress(HAMLET_SCENE) + co.flush() + self.assertEqual(zlib.decompress(gzip, 16 + 15), HAMLET_SCENE) + self.assertEqual(zlib.decompress(gzip, 32 + 15), HAMLET_SCENE) + dco = zlib.decompressobj(32 + 15) + self.assertEqual(dco.decompress(gzip), HAMLET_SCENE) + + for wbits in (-15, 15, 31): + with self.subTest(wbits=wbits): + expected = HAMLET_SCENE + actual = zlib.decompress( + zlib.compress(HAMLET_SCENE, wbits=wbits), wbits=wbits + ) + self.assertEqual(expected, actual) + +def choose_lines(source, number, seed=None, generator=random): + """Return a list of number lines randomly chosen from the source""" + if seed is not None: + generator.seed(seed) + sources = source.split('\n') + return [generator.choice(sources) for n in range(number)] + + +HAMLET_SCENE = b""" +LAERTES + + O, fear me not. + I stay too long: but here my father comes. + + Enter POLONIUS + + A double blessing is a double grace, + Occasion smiles upon a second leave. + +LORD POLONIUS + + Yet here, Laertes! aboard, aboard, for shame! + The wind sits in the shoulder of your sail, + And you are stay'd for. There; my blessing with thee! + And these few precepts in thy memory + See thou character. Give thy thoughts no tongue, + Nor any unproportioned thought his act. + Be thou familiar, but by no means vulgar. + Those friends thou hast, and their adoption tried, + Grapple them to thy soul with hoops of steel; + But do not dull thy palm with entertainment + Of each new-hatch'd, unfledged comrade. Beware + Of entrance to a quarrel, but being in, + Bear't that the opposed may beware of thee. + Give every man thy ear, but few thy voice; + Take each man's censure, but reserve thy judgment. + Costly thy habit as thy purse can buy, + But not express'd in fancy; rich, not gaudy; + For the apparel oft proclaims the man, + And they in France of the best rank and station + Are of a most select and generous chief in that. + Neither a borrower nor a lender be; + For loan oft loses both itself and friend, + And borrowing dulls the edge of husbandry. + This above all: to thine ownself be true, + And it must follow, as the night the day, + Thou canst not then be false to any man. + Farewell: my blessing season this in thee! + +LAERTES + + Most humbly do I take my leave, my lord. + +LORD POLONIUS + + The time invites you; go; your servants tend. + +LAERTES + + Farewell, Ophelia; and remember well + What I have said to you. + +OPHELIA + + 'Tis in my memory lock'd, + And you yourself shall keep the key of it. + +LAERTES + + Farewell. +""" + + +class ZlibDecompressorTest(): + # Test adopted from test_bz2.py + TEXT = HAMLET_SCENE + DATA = zlib.compress(HAMLET_SCENE) + BAD_DATA = b"Not a valid deflate block" + def test_Constructor(self): + self.assertRaises(TypeError, zlib._ZlibDecompressor, 42) + + def testDecompress(self): + zlibd = zlib._ZlibDecompressor() + self.assertRaises(TypeError, zlibd.decompress) + text = zlibd.decompress(self.DATA) + self.assertEqual(text, self.TEXT) + + def testDecompressChunks10(self): + zlibd = zlib._ZlibDecompressor() + text = b'' + n = 0 + while True: + str = self.DATA[n*10:(n+1)*10] + if not str: + break + text += zlibd.decompress(str) + n += 1 + self.assertEqual(text, self.TEXT) + + def testDecompressUnusedData(self): + zlibd = zlib._ZlibDecompressor() + unused_data = b"this is unused data" + text = zlibd.decompress(self.DATA+unused_data) + self.assertEqual(text, self.TEXT) + self.assertEqual(zlibd.unused_data, unused_data) + + def testEOFError(self): + zlibd = zlib._ZlibDecompressor() + text = zlibd.decompress(self.DATA) + self.assertRaises(EOFError, zlibd.decompress, b"anything") + self.assertRaises(EOFError, zlibd.decompress, b"") + + @support.skip_if_pgo_task + @bigmemtest(size=_4G + 100, memuse=3.3) + def testDecompress4G(self, size): + # "Test zlib._ZlibDecompressor.decompress() with >4GiB input" + blocksize = 10 * 1024 * 1024 + block = random.randbytes(blocksize) + try: + data = block * (size // blocksize + 1) + compressed = zlib.compress(data) + zlibd = zlib._ZlibDecompressor() + decompressed = zlibd.decompress(compressed) + self.assertTrue(decompressed == data) + finally: + data = None + compressed = None + decompressed = None + + def testPickle(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.assertRaises(TypeError): + pickle.dumps(zlib._ZlibDecompressor(), proto) + + def testDecompressorChunksMaxsize(self): + zlibd = zlib._ZlibDecompressor() + max_length = 100 + out = [] + + # Feed some input + len_ = len(self.BIG_DATA) - 64 + out.append(zlibd.decompress(self.BIG_DATA[:len_], + max_length=max_length)) + self.assertFalse(zlibd.needs_input) + self.assertEqual(len(out[-1]), max_length) + + # Retrieve more data without providing more input + out.append(zlibd.decompress(b'', max_length=max_length)) + self.assertFalse(zlibd.needs_input) + self.assertEqual(len(out[-1]), max_length) + + # Retrieve more data while providing more input + out.append(zlibd.decompress(self.BIG_DATA[len_:], + max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + # Retrieve remaining uncompressed data + while not zlibd.eof: + out.append(zlibd.decompress(b'', max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + out = b"".join(out) + self.assertEqual(out, self.BIG_TEXT) + self.assertEqual(zlibd.unused_data, b"") + + def test_decompressor_inputbuf_1(self): + # Test reusing input buffer after moving existing + # contents to beginning + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create input buffer and fill it + self.assertEqual(zlibd.decompress(self.DATA[:100], + max_length=0), b'') + + # Retrieve some results, freeing capacity at beginning + # of input buffer + out.append(zlibd.decompress(b'', 2)) + + # Add more data that fits into input buffer after + # moving existing data to beginning + out.append(zlibd.decompress(self.DATA[100:105], 15)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[105:])) + self.assertEqual(b''.join(out), self.TEXT) + + def test_decompressor_inputbuf_2(self): + # Test reusing input buffer by appending data at the + # end right away + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create input buffer and empty it + self.assertEqual(zlibd.decompress(self.DATA[:200], + max_length=0), b'') + out.append(zlibd.decompress(b'')) + + # Fill buffer with new data + out.append(zlibd.decompress(self.DATA[200:280], 2)) + + # Append some more data, not enough to require resize + out.append(zlibd.decompress(self.DATA[280:300], 2)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[300:])) + self.assertEqual(b''.join(out), self.TEXT) + + def test_decompressor_inputbuf_3(self): + # Test reusing input buffer after extending it + + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create almost full input buffer + out.append(zlibd.decompress(self.DATA[:200], 5)) + + # Add even more data to it, requiring resize + out.append(zlibd.decompress(self.DATA[200:300], 5)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[300:])) + self.assertEqual(b''.join(out), self.TEXT) + + def test_failure(self): + zlibd = zlib._ZlibDecompressor() + self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30) + # Previously, a second call could crash due to internal inconsistency + self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30) + + @support.refcount_test + def test_refleaks_in___init__(self): + gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') + zlibd = zlib._ZlibDecompressor() + refs_before = gettotalrefcount() + for i in range(100): + zlibd.__init__() + self.assertAlmostEqual(gettotalrefcount() - refs_before, 0, delta=10) + + +class CustomInt: + def __index__(self): + return 100 + + +if __name__ == "__main__": + unittest.main() From f78ae6cb44d885a89541b17e7fb814e012ccbd7e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 23 Jan 2023 11:26:42 +0100 Subject: [PATCH 08/94] Add functional gzip module --- src/zlib_ng/gzip_ng.py | 475 ++++++++++++++++++++++++++++++++++++++++ src/zlib_ng/zlib_ng.pyi | 19 +- 2 files changed, 490 insertions(+), 4 deletions(-) diff --git a/src/zlib_ng/gzip_ng.py b/src/zlib_ng/gzip_ng.py index e69de29..a6bd4b7 100644 --- a/src/zlib_ng/gzip_ng.py +++ b/src/zlib_ng/gzip_ng.py @@ -0,0 +1,475 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 +# Python Software Foundation; All Rights Reserved + +# This file is part of python-isal which is distributed under the +# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. + +# This file uses code from CPython's Lib/gzip.py after backported changes from +# python-isal were merged into CPython. +# Changes compared to CPython: +# - Subclassed GzipFile to GzipNGFile. Methods that included calls to zlib have +# been overwritten with the same methods, but now calling to zlib_ng. +# - _GzipReader._add_read_data uses zlib_ng.crc32 instead of zlib.crc32. +# - compress, decompress use zlib_ng methods rather than zlib. +# - The main() function's gzip utility supports many more options for easier +# use. This was ported from the python-isal module + +"""Similar to the stdlib gzip module. But using zlib-ng to speed up its +methods.""" + +import argparse +import gzip +import io +import os +import struct +import sys +import time +import _compression # noqa: I201 # Not third-party + +from . import zlib_ng + +__all__ = ["GzipFile", "open", "compress", "decompress", "BadGzipFile", + "READ_BUFFER_SIZE"] + +_COMPRESS_LEVEL_FAST = zlib_ng.Z_BEST_SPEED +_COMPRESS_LEVEL_TRADEOFF = zlib_ng.Z_DEFAULT_COMPRESSION +_COMPRESS_LEVEL_BEST = zlib_ng.Z_BEST_COMPRESSION + +#: The amount of data that is read in at once when decompressing a file. +#: Increasing this value may increase performance. +#: 128K is also the size used by pigz and cat to read files from the +# filesystem. +READ_BUFFER_SIZE = 128 * 1024 + +FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 +READ, WRITE = 1, 2 + +try: + BadGzipFile = gzip.BadGzipFile # type: ignore +except AttributeError: # Versions lower than 3.8 do not have BadGzipFile + BadGzipFile = OSError # type: ignore + + +# The open method was copied from the CPython source with minor adjustments. +def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_TRADEOFF, + encoding=None, errors=None, newline=None): + """Open a gzip-compressed file in binary or text mode. This uses the isa-l + library for optimized speed. + + The filename argument can be an actual filename (a str or bytes object), or + an existing file object to read from or write to. + + The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for + binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is + "rb", and the default compresslevel is 6. + + For binary mode, this function is equivalent to the GzipFile constructor: + GzipFile(filename, mode, compresslevel). In this case, the encoding, errors + and newline arguments must not be provided. + + For text mode, a GzipFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError( + "Argument 'encoding' not supported in binary mode") + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + gz_mode = mode.replace("t", "") + # __fspath__ method is os.PathLike + if isinstance(filename, (str, bytes)) or hasattr(filename, "__fspath__"): + binary_file = GzipNGFile(filename, gz_mode, compresslevel) + elif hasattr(filename, "read") or hasattr(filename, "write"): + binary_file = GzipNGFile(None, gz_mode, compresslevel, filename) + else: + raise TypeError("filename must be a str or bytes object, or a file") + + if "t" in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file + + +class GzipNGFile(gzip.GzipFile): + """The IGzipFile class simulates most of the methods of a file object with + the exception of the truncate() method. + + This class only supports opening files in binary mode. If you need to open + a compressed file in text mode, use the gzip.open() function. + """ + def __init__(self, filename=None, mode=None, + compresslevel=zlib_ng.Z_DEFAULT_COMPRESSION, + fileobj=None, mtime=None): + """Constructor for the IGzipFile class. + + At least one of fileobj and filename must be given a + non-trivial value. + + The new class instance is based on fileobj, which can be a regular + file, an io.BytesIO object, or any other object which simulates a file. + It defaults to None, in which case filename is opened to provide + a file object. + + When fileobj is not None, the filename argument is only used to be + included in the gzip file header, which may include the original + filename of the uncompressed file. It defaults to the filename of + fileobj, if discernible; otherwise, it defaults to the empty string, + and in this case the original filename is not included in the header. + + The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', + or 'xb' depending on whether the file will be read or written. + The default is the mode of fileobj if discernible; otherwise, the + default is 'rb'. A mode of 'r' is equivalent to one of 'rb', and + similarly for 'w' and 'wb', 'a' and 'ab', and 'x' and 'xb'. + + The compresslevel argument is an integer from 0 to 3 controlling the + level of compression; 0 is fastest and produces the least compression, + and 3 is slowest and produces the most compression. Unlike + gzip.GzipFile 0 is NOT no compression. The default is 2. + + The mtime argument is an optional numeric timestamp to be written + to the last modification time field in the stream when compressing. + If omitted or None, the current time is used. + """ + super().__init__(filename, mode, compresslevel, fileobj, mtime) + if self.mode == WRITE: + self.compress = zlib_ng.compressobj(compresslevel, + zlib_ng.DEFLATED, + -zlib_ng.MAX_WBITS, + zlib_ng.DEF_MEM_LEVEL, + 0) + if self.mode == READ: + raw = _GzipNGReader(self.fileobj) + self._buffer = io.BufferedReader(raw) + + def __repr__(self): + s = repr(self.fileobj) + return '' + + def write(self, data): + self._check_not_closed() + if self.mode != WRITE: + import errno + raise OSError(errno.EBADF, "write() on read-only IGzipFile object") + + if self.fileobj is None: + raise ValueError("write() on closed IGzipFile object") + + if isinstance(data, bytes): + length = len(data) + else: + # accept any data that supports the buffer protocol + data = memoryview(data) + length = data.nbytes + + if length > 0: + self.fileobj.write(self.compress.compress(data)) + self.size += length + self.crc = zlib_ng.crc32(data, self.crc) + self.offset += length + return length + + +class _GzipNGReader(gzip._GzipReader): + def __init__(self, fp): + # Call the init method of gzip._GzipReader's parent here. + # It is not very invasive and allows us to override _PaddedFile + _compression.DecompressReader.__init__( + self, gzip._PaddedFile(fp), zlib_ng._ZlibDecompressor, + hist_bits=-zlib_ng.MAX_WBITS) + # Set flag indicating start of a new member + self._new_member = True + self._last_mtime = None + + def read(self, size=-1): + if size < 0: + return self.readall() + # size=0 is special because decompress(max_length=0) is not supported + if not size: + return b"" + + # For certain input data, a single + # call to decompress() may not return + # any data. In this case, retry until we get some data or reach EOF. + while True: + if self._decompressor.eof: + # Ending case: we've come to the end of a member in the file, + # so finish up this member, and read a new gzip header. + # Check the CRC and file size, and set the flag so we read + # a new member + self._read_eof() + self._new_member = True + self._decompressor = self._decomp_factory( + **self._decomp_args) + + if self._new_member: + # If the _new_member flag is set, we have to + # jump to the next member, if there is one. + self._init_read() + if not self._read_gzip_header(): + self._size = self._pos + return b"" + self._new_member = False + + # Read a chunk of data from the file + if self._decompressor.needs_input: + buf = self._fp.read(READ_BUFFER_SIZE) + uncompress = self._decompressor.decompress(buf, size) + else: + uncompress = self._decompressor.decompress(b"", size) + if self._decompressor.unused_data != b"": + # Prepend the already read bytes to the fileobj so they can + # be seen by _read_eof() and _read_gzip_header() + self._fp.prepend(self._decompressor.unused_data) + + if uncompress != b"": + break + if buf == b"": + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + + self._crc = zlib_ng.crc32(uncompress, self._crc) + self._stream_size += len(uncompress) + self._pos += len(uncompress) + return uncompress + + +# Aliases for improved compatibility with CPython gzip module. +GzipFile = GzipNGFile +_GzipReader = _GzipNGReader + + +def _read_exact(fp, n): + '''Read exactly *n* bytes from `fp` + This method is required because fp may be unbuffered, + i.e. return short reads. + ''' + data = fp.read(n) + while len(data) < n: + b = fp.read(n - len(data)) + if not b: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + data += b + return data + + +def _read_gzip_header(fp): + '''Read a gzip header from `fp` and progress to the end of the header. + Returns last mtime if header was present or None otherwise. + ''' + magic = fp.read(2) + if magic == b'': + return None + + if magic != b'\037\213': + raise BadGzipFile('Not a gzipped file (%r)' % magic) + + (method, flag, last_mtime) = struct.unpack(" bytes: + """ + Write a simple gzip header with no extra fields. + :param compresslevel: Compresslevel used to determine the xfl bytes. + :param mtime: The mtime (must support conversion to a 32-bit integer). + :return: A bytes object representing the gzip header. + """ + if mtime is None: + mtime = time.time() + if compresslevel == _COMPRESS_LEVEL_BEST: + xfl = 2 + elif compresslevel == _COMPRESS_LEVEL_FAST: + xfl = 4 + else: + xfl = 0 + # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra + # fields added to header), mtime, xfl and os (255 for unknown OS). + return struct.pack("