diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..3e4afab --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,4 @@ + +### Checklist +- [ ] Pull request details were added to CHANGELOG.rst +- [ ] Documentation was updated (if needed) diff --git a/.github/release_checklist.md b/.github/release_checklist.md new file mode 100644 index 0000000..9abffc2 --- /dev/null +++ b/.github/release_checklist.md @@ -0,0 +1,17 @@ +Release checklist +- [ ] Check outstanding issues on JIRA and Github. +- [ ] Check [latest documentation](https://python-zlib-ng.readthedocs.io/en/latest/) looks fine. +- [ ] Create a release branch. + - [ ] Set version to a stable number. + - [ ] Change current development version in `CHANGELOG.rst` to stable version. + - [ ] Change the version in `__init__.py` +- [ ] Merge the release branch into `main`. +- [ ] Created an annotated tag with the stable version number. Include changes +from CHANGELOG.rst. +- [ ] Push tag to remote. This triggers the wheel/sdist build on github CI. +- [ ] merge `main` branch back into `develop`. +- [ ] Add updated version number to develop. (`setup.py` and `src/zlib_ng/__init__.py`) +- [ ] Build the new tag on readthedocs. Only build the last patch version of +each minor version. So `1.1.1` and `1.2.0` but not `1.1.0`, `1.1.1` and `1.2.0`. +- [ ] Create a new release on github. +- [ ] Update the package on conda-forge. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..16420fe --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,238 @@ +name: Continous integration + +on: + pull_request: + paths-ignore: + - 'docs/**' + - '*.rst' + push: + branches: + - develop + - main + tags: + - "*" + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - name: Set up Python 3.7 + uses: actions/setup-python@v2.2.1 + with: + python-version: 3.7 + - name: Install tox + run: pip install tox + - name: Lint + run: tox -e lint + + package-checks: + strategy: + matrix: + tox_env: + - docs + - twine_check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - name: Set up Python 3.7 + uses: actions/setup-python@v2.2.1 + with: + python-version: 3.7 + - name: Install tox and upgrade setuptools and pip + run: pip install --upgrade tox setuptools pip + - name: Run tox -e ${{ matrix.tox_env }} + run: tox -e ${{ matrix.tox_env }} + + test-static: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: + - "3.7" + - "3.8" + - "3.9" + - "3.10" + - "3.11" + - "pypy-3.7" + - "pypy-3.8" + - "pypy-3.9" + os: ["ubuntu-latest"] + include: + - os: "macos-latest" + python-version: 3.7 + - os: "windows-latest" + python-version: 3.7 + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2.2.1 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox and upgrade setuptools + run: pip install --upgrade tox setuptools + - name: Set MSVC developer prompt + uses: ilammy/msvc-dev-cmd@v1.6.0 + if: runner.os == 'Windows' + - name: Install build dependencies (MacOS) + run: brew install make + if: runner.os == 'macOS' + - name: Run tests + run: tox + - name: Upload coverage report + uses: codecov/codecov-action@v1 + + test-arch: + if: startsWith(github.ref, 'refs/tags') || github.ref == 'refs/heads/develop' || github.ref == 'refs/heads/main' + runs-on: "ubuntu-latest" + strategy: + matrix: + distro: [ "ubuntu_latest" ] + arch: ["aarch64"] + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - uses: uraimo/run-on-arch-action@v2.2.0 + name: Build & run test + with: + arch: ${{ matrix.arch }} + distro: ${{ matrix.distro }} + install: | + apt-get update -q -y + apt-get install -q -y python3 python3-pip tox cmake + run: | + tox + + # Test if the python-zlib-ng conda package can be build. Which is linked + # dynamically to the conda zlib-ng package. + test-dynamic: + runs-on: ${{ matrix.os }} + defaults: + run: + # This is needed for miniconda, see: + # https://github.com/marketplace/actions/setup-miniconda#important. + shell: bash -l {0} + strategy: + matrix: + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python_version: [ "python" ] + include: + - os: "ubuntu-latest" + python_version: "pypy" + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - name: Install miniconda. + uses: conda-incubator/setup-miniconda@v2.0.1 # https://github.com/conda-incubator/setup-miniconda. + with: + channels: conda-forge,defaults + - name: Install requirements (universal) + run: conda install zlib-ng ${{ matrix.python_version}} tox + - name: Set MSVC developer prompt + uses: ilammy/msvc-dev-cmd@v1.6.0 + if: runner.os == 'Windows' + - name: Run tests (dynamic link) + run: tox + env: + PYTHON_ZLIB_NG_LINK_DYNAMIC: True + + deploy: + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + runs-on: ${{ matrix.os }} + needs: + - lint + - package-checks + - test-static + - test-dynamic + - test-arch + strategy: + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + cibw_archs_linux: ["x86_64"] + build_sdist: [true] + include: + - os: "ubuntu-latest" + cibw_archs_linux: "aarch64" + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - uses: actions/setup-python@v2 + name: Install Python + - name: Install cibuildwheel twine build + run: python -m pip install cibuildwheel twine build + - name: Install build dependencies (Macos) + run: brew install make + if: runner.os == 'macOS' + - name: Set MSVC developer prompt + uses: ilammy/msvc-dev-cmd@v1.6.0 + if: runner.os == 'Windows' + - name: Set up QEMU + if: ${{runner.os == 'Linux' && matrix.cibw_archs_linux == 'aarch64'}} + uses: docker/setup-qemu-action@v1.0.1 + with: + platforms: arm64 + - name: Build wheels + run: cibuildwheel --output-dir dist + env: + CIBW_SKIP: "*-win32 *-manylinux_i686" # Skip 32 bit + CIBW_ARCHS_LINUX: ${{ matrix.cibw_archs_linux }} + CIBW_TEST_REQUIRES: "pytest" + # Simple tests that requires the project to be build correctly + CIBW_TEST_COMMAND_LINUX: >- + pytest {project}/tests/test_zlib_compliance.py + {project}/tests/test_gzip_compliance.py + {project}/tests/test_gzip_ng.py + CIBW_TEST_COMMAND_MACOS: >- + pytest {project}/tests/test_zlib_compliance.py + {project}/tests/test_gzip_compliance.py + {project}/tests/test_gzip_ng.py + # Windows does not have the test module in the included python. + # Run compatibility tests instead. + CIBW_TEST_COMMAND_WINDOWS: >- + pytest {project}/tests/test_compat.py + {project}/tests/test_gzip_ng.py + CIBW_ENVIRONMENT_LINUX: >- + PYTHON_ZLIB_NG_BUILD_CACHE=True + PYTHON_ZLIB_NG_BUILD_CACHE_FILE=/tmp/build_cache + CFLAGS="-g0 -DNDEBUG" + CIBW_ENVIRONMENT_WINDOWS: >- + PYTHON_ZLIB_NG_BUILD_CACHE=True + PYTHON_ZLIB_NG_BUILD_CACHE_FILE=${{ runner.temp }}\build_cache + CIBW_ENVIRONMENT_MACOS: >- + PYTHON_ZLIB_NG_BUILD_CACHE=True + PYTHON_ZLIB_NG_BUILD_CACHE_FILE=${{ runner.temp }}/build_cache + CFLAGS="-g0 -DNDEBUG" + - name: Build sdist + if: ${{runner.os == 'Linux' && matrix.cibw_archs_linux == 'x86_64'}} + run: + python -m build --sdist + - uses: actions/upload-artifact@v2 + with: + name: "dist" + path: "dist/" + - name: Publish package to TestPyPI + # pypa/gh-action-pypi-publish@master does not work on OSX + # Alpha, Beta and dev releases contain a - in the tag. + if: contains(github.ref, '-') && startsWith(github.ref, 'refs/tags') + run: twine upload --skip-existing -r testpypi dist/* + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }} + - name: Publish package to PyPI + if: "!contains(github.ref, '-') && startsWith(github.ref, 'refs/tags')" + run: twine upload --skip-existing dist/* + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..da39485 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "src/zlib-ng"] + path = src/zlib_ng/zlib-ng + url = https://github.com/zlib-ng/zlib-ng.git diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..2866eda --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,9 @@ +version: 2 +formats: [] # Do not build epub and pdf + +python: + install: + - method: pip + path: . +conda: + environment: docs/conda-environment.yml \ No newline at end of file diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 0000000..fd4ffee --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,16 @@ +========== +Changelog +========== + +.. Newest changes should be on top. + +.. This document is user facing. Please word the changes in such a way +.. that users understand how the changes affect the new version. + +version 0.1.0 +----------------- ++ Build wheels for all three major operating systems. ++ Add a fully featured gzip application in python m zlib_ng.gzip_ng. ++ Port Cpython's gzip module to use zlib-ng. ++ Port CPython's zlib module to use zlib-ng. ++ Use zlib-ng version 2.0.6 as included statically linked version. \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..35acd7f --- /dev/null +++ b/LICENSE @@ -0,0 +1,48 @@ +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Python Software Foundation; +All Rights Reserved" are retained in Python alone or in any derivative version +prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..49eb90b --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +graft src/zlib_ng/zlib-ng diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..e1a551c --- /dev/null +++ b/README.rst @@ -0,0 +1,166 @@ +.. image:: https://img.shields.io/pypi/v/zlib-ng.svg + :target: https://pypi.org/project/zlib-ng/ + :alt: + +.. image:: https://img.shields.io/conda/v/conda-forge/python-zlib-ng.svg + :target: https://github.com/conda-forge/python-zlib-ng-feedstock + :alt: + +.. image:: https://img.shields.io/pypi/pyversions/zlib-ng.svg + :target: https://pypi.org/project/zlib-ng/ + :alt: + +.. image:: https://img.shields.io/pypi/l/zlib-ng.svg + :target: https://github.com/pycompression/python-zlib-ng/blob/main/LICENSE + :alt: + +.. image:: https://img.shields.io/conda/pn/conda-forge/python-zlib-ng.svg + :target: https://github.com/conda-forge/python-zlib-ng-feedstock + :alt: + +.. image:: https://github.com/pycompression/python-zlib-ng//actions/workflows/ci.yml/badge.svg + :target: https://github.com/pycompression/python-zlib-ng/actions + :alt: + +.. image:: https://codecov.io/gh/pycompression/python-zlib-ng/branch/develop/graph/badge.svg + :target: https://codecov.io/gh/pycompression/python-zlib-ng + :alt: + +.. image:: https://readthedocs.org/projects/python-zlib-ng/badge + :target: https://python-zlib-ng.readthedocs.io + :alt: + + +python-zlib-ng +============== + +.. introduction start + +Faster zlib and gzip compatible compression and decompression +by providing Python bindings for the zlib-ng library. + +This package provides Python bindings for the `zlib-ng +`_ library. + +``python-zlib-ng`` provides the bindings by offering two modules: + ++ ``zlib_ng``: A drop-in replacement for the zlib module that uses zlib-ng to + accelerate its performance. + ++ ``gzip_ng``: A drop-in replacement for the gzip module that uses ``zlib_ng`` + instead of ``zlib`` to perform its compression and checksum tasks, which + improves performance. + +``zlib_ng`` and ``gzip_ng`` are almost fully compatible with ``zlib`` and +``gzip`` from the Python standard library. There are some minor differences +see: differences-with-zlib-and-gzip-modules_. + +.. introduction end + +Quickstart +---------- + +.. quickstart start + +The python-zlib-ng modules can be imported as follows + +.. code-block:: python + + from zlib_ng import zlib_ng + from zlib_ng import gzip_ng + +``zlib_ng`` and ``gzip_ng`` are meant to be used as drop in replacements so +their api and functions are the same as the stdlib's modules. + +A full API documentation can be found on `our readthedocs page +`_. + +``python -m zlib_ng.gzip_ng`` implements a fully featured gzip-like command line +application (just like ``python -m gzip``, but better). Full usage documentation can be +found on `our readthedocs page `_. + + +.. quickstart end + +Installation +------------ +- with pip: ``pip install zlib-ng`` +- with conda: ``conda install python-zlib-ng`` + +Installation is supported on Linux, Windows and MacOS. For more advanced +installation options check the `documentation +`_. + +python-zlib-ng as a dependency in your project +---------------------------------------------- + +.. dependency start + +zlib-ng supports numerous platforms but not all of these have pre-built wheels +available. To prevent your users from running into issues when installing +your project please list a python-zlib-ng dependency as follows. + +``setup.cfg``:: + + install_requires = + zlib-ng; platform.machine == "x86_64" or platform.machine == "AMD64" + +``setup.py``:: + + extras_require={ + ":platform.machine == 'x86_64' or platform.machine == 'AMD64'": ['zlib-ng'] + }, + +.. dependency end + +.. _differences-with-zlib-and-gzip-modules: + +Differences with zlib and gzip modules +-------------------------------------- + +.. differences start + ++ Compression level 1 zlib_ng has a much worse compression rate than that in + zlib. For other compression levels zlib_ng compresses better. ++ Compression level 1 does not apply requested ``wbits`` correctly. For example + compressing with ``zlib_ng.compress(data, level=1, wbits=-9)`` results in + data that cannot be decompressed with ``zlib_ng.decompress(data, wbits=-9)`` + as this will throw an error mentioning invalid window sizes. This is a + bug in the included zlib-ng 2.0.6. ++ ``gzip_ng.open`` returns a class ``GzipNGFile`` instead of ``GzipFile``. Since + there are differences between the compressed ratios between levels, a + difference in naming was chosen to reflect this. + ``gzip_ng.GzipFile`` does exist as an alias of + ``gzip_ng.GzipNGFile`` for compatibility reasons. + +.. differences end + +Contributing +------------ +.. contributing start + +Please make a PR or issue if you feel anything can be improved. Bug reports +are also very welcome. Please report them on the `github issue tracker +`_. + +.. contributing end + +Acknowledgements +---------------- + +.. acknowledgements start + +This project builds upon the software and experience of many. Many thanks to: + ++ The `zlib-ng contributors + `_ for making the + zlib-ng library. ++ The `CPython contributors + `_. + Python-zlib-ng mimicks ``zlibmodule.c`` and ``gzip.py`` from the standard + library to make it easier for python users to adopt it. ++ The `github actions team `_ for + creating the actions CI service that enables building and testing on all + three major operating systems. + +.. acknowledgements end diff --git a/benchmark_scripts/benchmark.py b/benchmark_scripts/benchmark.py new file mode 100644 index 0000000..3cedf4a --- /dev/null +++ b/benchmark_scripts/benchmark.py @@ -0,0 +1,132 @@ +import argparse +import gzip +import io # noqa: F401 used in timeit strings +import timeit +import zlib +from pathlib import Path +from typing import Dict + +from zlib_ng import gzip_ng, zlib_ng # noqa: F401 used in timeit strings + +DATA_DIR = Path(__file__).parent.parent / "tests" / "data" +COMPRESSED_FILE = DATA_DIR / "test.fastq.gz" +with gzip.open(str(COMPRESSED_FILE), mode="rb") as file_h: + data = file_h.read() + +sizes: Dict[str, bytes] = { + "0b": b"", + "8b": data[:8], + "128b": data[:128], + "1kb": data[:1024], + "8kb": data[:8 * 1024], + "16kb": data[:16 * 1024], + "32kb": data[:32 * 1024], + "64kb": data[:64 * 1024], + # "128kb": data[:128*1024], + # "512kb": data[:512*1024] +} +compressed_sizes = {name: zlib.compress(data_block) + for name, data_block in sizes.items()} + +compressed_sizes_gzip = {name: gzip.compress(data_block) + for name, data_block in sizes.items()} + + +def show_sizes(): + print("zlib sizes") + print("name\t" + "\t".join(str(level) for level in range(-1, 10))) + for name, data_block in sizes.items(): + orig_size = max(len(data_block), 1) + rel_sizes = ( + str(round(len(zlib.compress(data_block, level)) / orig_size, 3)) + for level in range(-1, 10)) + print(name + "\t" + "\t".join(rel_sizes)) + + print("zlib_ng sizes") + print("name\t" + "\t".join(str(level) for level in range(-1, 10))) + for name, data_block in sizes.items(): + orig_size = max(len(data_block), 1) + rel_sizes = ( + str(round(len(zlib_ng.compress(data_block, level)) / orig_size, + 3)) + for level in range(-1, 10)) + print(name + "\t" + "\t".join(rel_sizes)) + + +def benchmark(name: str, + names_and_data: Dict[str, bytes], + zlib_ng_string: str, + zlib_string: str, + number: int = 10_000, + **kwargs): + print(name) + print("name\tzlib_ng\tzlib\tratio") + for name, data_block in names_and_data.items(): + timeit_kwargs = dict(globals=dict(**globals(), **locals()), + number=number, **kwargs) + zlib_ng_time = timeit.timeit(zlib_ng_string, **timeit_kwargs) + zlib_time = timeit.timeit(zlib_string, **timeit_kwargs) + zlib_ng_microsecs = round(zlib_ng_time * (1_000_000 / number), 2) + zlib_microsecs = round(zlib_time * (1_000_000 / number), 2) + ratio = round(zlib_ng_time / zlib_time, 2) + print("{0}\t{1}\t{2}\t{3}".format(name, + zlib_ng_microsecs, + zlib_microsecs, + ratio)) + + +# show_sizes() + +def argument_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + parser.add_argument("--all", action="store_true") + parser.add_argument("--checksums", action="store_true") + parser.add_argument("--functions", action="store_true") + parser.add_argument("--gzip", action="store_true") + parser.add_argument("--sizes", action="store_true") + parser.add_argument("--objects", action="store_true") + return parser + + +if __name__ == "__main__": + args = argument_parser().parse_args() + if args.checksums or args.all: + benchmark("CRC32", sizes, + "zlib_ng.crc32(data_block)", + "zlib.crc32(data_block)") + + benchmark("Adler32", sizes, + "zlib_ng.adler32(data_block)", + "zlib.adler32(data_block)") + if args.functions or args.all: + benchmark("zlib compression", sizes, + "zlib_ng.compress(data_block, 1)", + "zlib.compress(data_block, 1)") + + benchmark("zlib decompression", compressed_sizes, + "zlib_ng.decompress(data_block)", + "zlib.decompress(data_block)") + + if args.gzip or args.all: + benchmark("gzip compression", sizes, + "gzip_ng.compress(data_block, 1)", + "gzip.compress(data_block, 1)") + + benchmark("gzip decompression", compressed_sizes_gzip, + "gzip_ng.decompress(data_block)", + "gzip.decompress(data_block)") + if args.objects or args.all: + benchmark("zlib Compress instantiation", {"": b""}, + "a = zlib_ng.compressobj()", + "a = zlib.compressobj()") + benchmark("zlib Decompress instantiation", {"": b""}, + "a = zlib_ng.decompressobj()", + "a = zlib.decompressobj()") + benchmark("Gzip Writer instantiation", {"": b""}, + "a = gzip_ng.GzipFile(fileobj=io.BytesIO(), mode='wb' )", + "a = gzip.GzipFile(fileobj=io.BytesIO(), mode='wb')") + benchmark("Gzip Reader instantiation", {"": b""}, + "a = gzip_ng.GzipFile(fileobj=io.BytesIO(), mode='rb' )", + "a = gzip.GzipFile(fileobj=io.BytesIO(), mode='rb')") + if args.sizes or args.all: + show_sizes() diff --git a/benchmark_scripts/gzipread128kblocks.py b/benchmark_scripts/gzipread128kblocks.py new file mode 100644 index 0000000..fcedbb2 --- /dev/null +++ b/benchmark_scripts/gzipread128kblocks.py @@ -0,0 +1,9 @@ +import sys + +from zlib_ng import gzip_ng + +with gzip_ng.open(sys.argv[1], "rb") as gzip_file: + while True: + block = gzip_file.read(128 * 1024) + if not block: + break diff --git a/benchmark_scripts/gzipreadlines.py b/benchmark_scripts/gzipreadlines.py new file mode 100644 index 0000000..1c4d0d8 --- /dev/null +++ b/benchmark_scripts/gzipreadlines.py @@ -0,0 +1,7 @@ +import sys + +from zlib_ng import gzip_ng + +with gzip_ng.open(sys.argv[1], "rb") as gzip_file: + for line in gzip_file: + pass diff --git a/benchmark_scripts/gzipwrite128kblocks.py b/benchmark_scripts/gzipwrite128kblocks.py new file mode 100644 index 0000000..5a79612 --- /dev/null +++ b/benchmark_scripts/gzipwrite128kblocks.py @@ -0,0 +1,12 @@ +import os +import sys + +from zlib_ng import gzip_ng + +with open(sys.argv[1], "rb") as in_file: + with gzip_ng.open(os.devnull, "wb") as out_gzip: + while True: + block = in_file.read(128 * 1024) + if block == b"": + break + out_gzip.write(block) diff --git a/benchmark_scripts/gzipwritelines.py b/benchmark_scripts/gzipwritelines.py new file mode 100644 index 0000000..f80ae69 --- /dev/null +++ b/benchmark_scripts/gzipwritelines.py @@ -0,0 +1,9 @@ +import os +import sys + +from zlib_ng import gzip_ng + +with open(sys.argv[1], "rb") as in_file: + with gzip_ng.open(os.devnull, "wb") as out_gzip: + for line in in_file: + out_gzip.write(line) diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..cbf6103 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,8 @@ +coverage: + status: + project: + default: + target: 90 # let's try to hit high standards + patch: + default: + target: 90 # Tests should be written for new features diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conda-environment.yml b/docs/conda-environment.yml new file mode 100644 index 0000000..e41f810 --- /dev/null +++ b/docs/conda-environment.yml @@ -0,0 +1,12 @@ +name: rtd +channels: + - conda-forge + - defaults +dependencies: + - zlib-ng + - python >=3.7 + - sphinx + - setuptools + - pip: + - sphinx-rtd-theme + - sphinx-argparse \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..e2e0bf0 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,59 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +from distutils.dist import DistributionMetadata +from pathlib import Path + +import pkg_resources + +# -- Project information ----------------------------------------------------- + +# Get package information from the installed package. +package = pkg_resources.get_distribution("zlib-ng") +metadata_file = Path(package.egg_info) / Path(package.PKG_INFO) +metadata = DistributionMetadata(path=str(metadata_file)) + +project = 'python-zlib-ng' +copyright = '2023, Leiden University Medical Center' +author = 'Leiden University Medical Center' + +# The short X.Y version +version = package.parsed_version.base_version +# The full version, including alpha/beta/rc tags +release = package.version + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ["sphinx.ext.autodoc", 'sphinxarg.ext'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +# includes/* prevents double indexing +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'includes/*'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' +html_theme_options = dict( + display_version=True, +) + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] \ No newline at end of file diff --git a/docs/includes/CHANGELOG.rst b/docs/includes/CHANGELOG.rst new file mode 120000 index 0000000..bfa394d --- /dev/null +++ b/docs/includes/CHANGELOG.rst @@ -0,0 +1 @@ +../../CHANGELOG.rst \ No newline at end of file diff --git a/docs/includes/README.rst b/docs/includes/README.rst new file mode 120000 index 0000000..c768ff7 --- /dev/null +++ b/docs/includes/README.rst @@ -0,0 +1 @@ +../../README.rst \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..fbc8261 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,140 @@ +.. python-zlib-ng documentation master file, created by + sphinx-quickstart on Fri Sep 11 15:42:56 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +========================================== +Welcome to python-zlib-ng's documentation! +========================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + +============ +Introduction +============ + +.. include:: includes/README.rst + :start-after: .. introduction start + :end-before: .. introduction end + +========== +Quickstart +========== + +.. include:: includes/README.rst + :start-after: .. quickstart start + :end-before: .. quickstart end + +============ +Installation +============ +Installation with pip +--------------------- + +:: + + pip install zlib-ng + +Installation is supported on Linux, MacOS and Windows. On most platforms +wheels are provided. +The installation will include a staticallly linked version of zlib-ng. +If a wheel is not provided for your system the +installation will build zlib-ng first in a temporary directory. Please check the +`zlib-ng homepage `_ for the build requirements. + +The latest development version of python-zlib-ng can be installed with:: + + pip install git+https://github.com/pycompression/python-zlib-ng.git + +This requires having the build requirements installed. +If you wish to link +dynamically against a version of libz-ng installed on your system use:: + + PYTHON_ZLIB_NG_LINK_DYNAMIC=true pip install zlib-ng --no-binary zlib-ng + +Installation via conda +---------------------- +Python-zlib-ng can be installed via conda, for example using +the `miniconda `_ installer +with a properly setup `conda-forge +`_ +channel. When used with bioinformatics tools setting up `bioconda +`_ +provides a clear set of installation instructions for conda. + +python-zlib-ng is available on conda-forge and can be installed with:: + + conda install python-zlib-ng + +This will automatically install the zlib-ng library dependency as well, since +it is available on conda-forge. + +============================================== +python-zlib-ng as a dependency in your project +============================================== + +.. include:: includes/README.rst + :start-after: .. dependency start + :end-before: .. dependency end + +.. _differences-with-zlib-and-gzip-modules: + +====================================== +Differences with zlib and gzip modules +====================================== + +.. include:: includes/README.rst + :start-after: .. differences start + :end-before: .. differences end + +================================== +API Documentation: zlib_ng.zlib_ng +================================== + +.. automodule:: zlib_ng.zlib_ng + :members: + + .. autoclass:: _Compress + :members: + + .. autoclass:: _Decompress + :members: + +================================== +API-documentation: zlib_ng.gzip_ng +================================== + +.. automodule:: zlib_ng.gzip_ng + :members: compress, decompress, open, BadGzipFile, GzipFile, READ_BUFFER_SIZE + + .. autoclass:: GzipNGFile + :members: + :special-members: __init__ + +=============================== +python -m zlib_ng.gzip_ng usage +=============================== + +.. argparse:: + :module: zlib_ng.gzip_ng + :func: _argument_parser + :prog: python -m zlib_ng.gzip_ng + + +============ +Contributing +============ +.. include:: includes/README.rst + :start-after: .. contributing start + :end-before: .. contributing end + +================ +Acknowledgements +================ +.. include:: includes/README.rst + :start-after: .. acknowledgements start + :end-before: .. acknowledgements end + +.. include:: includes/CHANGELOG.rst diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..2119f51 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..62f5f82 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=51", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/requirements-docs.txt b/requirements-docs.txt new file mode 100644 index 0000000..7fea598 --- /dev/null +++ b/requirements-docs.txt @@ -0,0 +1,4 @@ +sphinx +setuptools +sphinx-rtd-theme>=1.2.0rc3,<1.3 +sphinx-argparse \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..177298e --- /dev/null +++ b/setup.py @@ -0,0 +1,161 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 +# Python Software Foundation; All Rights Reserved + +# This file is part of python-zlib-ng which is distributed under the +# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. + +import functools +import os +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path + +from setuptools import Extension, find_packages, setup +from setuptools.command.build_ext import build_ext + +ZLIB_NG_SOURCE = os.path.join("src", "zlib_ng", "zlib-ng") + +SYSTEM_IS_UNIX = (sys.platform.startswith("linux") or + sys.platform.startswith("darwin")) +SYSTEM_IS_WINDOWS = sys.platform.startswith("win") + +# Since pip builds in a temp directory by default, setting a fixed file in +# /tmp works during the entire session. +DEFAULT_CACHE_FILE = Path(tempfile.gettempdir() + ).absolute() / ".zlib_ng_build_cache" +BUILD_CACHE = os.environ.get("PYTHON_ZLIB_NG_BUILD_CACHE") +BUILD_CACHE_FILE = Path(os.environ.get("PYTHON_ZLIB_NG_BUILD_CACHE_FILE", + DEFAULT_CACHE_FILE)) + +EXTENSIONS = [ + Extension("zlib_ng.zlib_ng", ["src/zlib_ng/zlib_ngmodule.c"]), + ] + + +class BuildZlibNGExt(build_ext): + def build_extension(self, ext): + # Add option to link dynamically for packaging systems such as conda. + # Always link dynamically on readthedocs to simplify install. + if (os.getenv("PYTHON_ZLIB_NG_LINK_DYNAMIC") is not None or + os.environ.get("READTHEDOCS") is not None): + # Check for zlib_ng include directories. This is useful when + # installing in a conda environment. + possible_prefixes = [sys.exec_prefix, sys.base_exec_prefix] + for prefix in possible_prefixes: + if Path(prefix, "include", "zlib-ng.h").exists(): + ext.include_dirs = [os.path.join(prefix, "include")] + ext.library_dirs = [os.path.join(prefix, "lib")] + break # Only one include directory is needed. + # On windows include is in Library apparently + elif Path(prefix, "Library", "include", "zlib-ng.h").exists(): + ext.include_dirs = [os.path.join(prefix, "Library", + "include")] + ext.library_dirs = [os.path.join(prefix, "Library", "lib")] + break + if SYSTEM_IS_UNIX: + ext.libraries = ["z-ng"] # libz-ng.so* + elif SYSTEM_IS_WINDOWS: + ext.libraries = ["zlib-ng"] # zlib-ng*.dll + else: + raise NotImplementedError( + f"Unsupported platform: {sys.platform}") + else: + build_dir = build_zlib_ng() + if SYSTEM_IS_UNIX: + ext.extra_objects = [ + os.path.join(build_dir, "libz-ng.a")] + elif SYSTEM_IS_WINDOWS: + ext.extra_objects = [ + os.path.join(build_dir, "Release", "zlibstatic-ng.lib")] + else: + raise NotImplementedError( + f"Unsupported platform: {sys.platform}") + ext.include_dirs = [build_dir] + # -fPIC needed for proper static linking + # ext.extra_compile_args = ["-fPIC"] + pass + super().build_extension(ext) + + +# Use a cache to prevent zlib-ng from being build twice. +@functools.lru_cache(maxsize=None) +def build_zlib_ng(): + # Check for cache + if BUILD_CACHE: + if BUILD_CACHE_FILE.exists(): + cache_path = Path(BUILD_CACHE_FILE.read_text()) + if (cache_path / "zlib-ng.h").exists(): + return str(cache_path) + + # Creating temporary directories + build_dir = tempfile.mktemp() + shutil.copytree(ZLIB_NG_SOURCE, build_dir) + + if hasattr(os, "sched_getaffinity"): + cpu_count = len(os.sched_getaffinity(0)) + else: # sched_getaffinity not available on all platforms + cpu_count = os.cpu_count() or 1 # os.cpu_count() can return None + # Build environment is a copy of OS environment to allow user to influence + # it. + build_env = os.environ.copy() + build_env["CFLAGS"] = build_env.get("CFLAGS", "") + " -fPIC" + # Add -fPIC flag to allow static compilation + run_args = dict(cwd=build_dir, env=build_env) + if sys.platform == "darwin": # Cmake does not work properly + subprocess.run([os.path.join(build_dir, "configure")], **run_args) + subprocess.run(["gmake", "libz-ng.a"], **run_args) + else: + subprocess.run(["cmake", build_dir], **run_args) + # Do not create test suite and do not perform tests to shorten build times. + # There is no need when stable releases of zlib-ng are used. + subprocess.run(["cmake", "--build", build_dir, "--config", "Release", + "--target", "zlibstatic", + "-j", str(cpu_count)], **run_args) + if BUILD_CACHE: + BUILD_CACHE_FILE.write_text(build_dir) + return build_dir + + +setup( + name="zlib-ng", + version="0.1.0", + description="Drop-in replacement for zlib and gzip modules using zlib-ng", + author="Leiden University Medical Center", + author_email="r.h.p.vorderman@lumc.nl", # A placeholder for now + long_description=Path("README.rst").read_text(), + long_description_content_type="text/x-rst", + cmdclass={"build_ext": BuildZlibNGExt}, + license="PSF-2.0", + keywords="zlib-ng zlib compression deflate gzip", + zip_safe=False, + packages=find_packages('src'), + package_dir={'': 'src'}, + package_data={'zlib_ng': [ + '*.pyi', 'py.typed', + # Include zlib-ng LICENSE and other relevant files with the binary distribution. + 'zlib-ng/LICENSE.md', 'zlib-ng/README.md']}, + url="https://github.com/pycompression/python-zlib-ng", + classifiers=[ + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Programming Language :: C", + "Development Status :: 4 - Beta", + "Topic :: System :: Archiving :: Compression", + "License :: OSI Approved :: Python Software Foundation License", + "Operating System :: POSIX :: Linux", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + ], + python_requires=">=3.7", # uses METH_FASTCALL + ext_modules=EXTENSIONS +) diff --git a/src/zlib_ng/__init__.py b/src/zlib_ng/__init__.py new file mode 100644 index 0000000..80b634d --- /dev/null +++ b/src/zlib_ng/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 +# Python Software Foundation; All Rights Reserved + +# This file is part of python-zlib-ng which is distributed under the +# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. + +__version__ = "0.1.0" diff --git a/src/zlib_ng/gzip_ng.py b/src/zlib_ng/gzip_ng.py new file mode 100644 index 0000000..70eede2 --- /dev/null +++ b/src/zlib_ng/gzip_ng.py @@ -0,0 +1,476 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 +# Python Software Foundation; All Rights Reserved + +# This file is part of python-zlib-ng which is distributed under the +# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. + +# This file uses code from CPython's Lib/gzip.py after backported changes from +# python-isal were merged into CPython. +# Changes compared to CPython: +# - Subclassed GzipFile to GzipNGFile. Methods that included calls to zlib have +# been overwritten with the same methods, but now calling to zlib_ng. +# - _GzipReader._add_read_data uses zlib_ng.crc32 instead of zlib.crc32. +# - compress, decompress use zlib_ng methods rather than zlib. +# - The main() function's gzip utility supports many more options for easier +# use. This was ported from the python-isal module + +"""Similar to the stdlib gzip module. But using zlib-ng to speed up its +methods.""" + +import argparse +import gzip +import io +import os +import struct +import sys +import time +import _compression # noqa: I201 # Not third-party + +from . import zlib_ng + +__all__ = ["GzipFile", "open", "compress", "decompress", "BadGzipFile", + "READ_BUFFER_SIZE"] + +_COMPRESS_LEVEL_FAST = zlib_ng.Z_BEST_SPEED +_COMPRESS_LEVEL_TRADEOFF = zlib_ng.Z_DEFAULT_COMPRESSION +_COMPRESS_LEVEL_BEST = zlib_ng.Z_BEST_COMPRESSION + +#: The amount of data that is read in at once when decompressing a file. +#: Increasing this value may increase performance. +#: 128K is also the size used by pigz and cat to read files from the +# filesystem. +READ_BUFFER_SIZE = 128 * 1024 + +FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 +READ, WRITE = 1, 2 + +try: + BadGzipFile = gzip.BadGzipFile # type: ignore +except AttributeError: # Versions lower than 3.8 do not have BadGzipFile + BadGzipFile = OSError # type: ignore + + +# The open method was copied from the CPython source with minor adjustments. +def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_TRADEOFF, + encoding=None, errors=None, newline=None): + """Open a gzip-compressed file in binary or text mode. This uses the isa-l + library for optimized speed. + + The filename argument can be an actual filename (a str or bytes object), or + an existing file object to read from or write to. + + The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for + binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is + "rb", and the default compresslevel is 6. + + For binary mode, this function is equivalent to the GzipFile constructor: + GzipFile(filename, mode, compresslevel). In this case, the encoding, errors + and newline arguments must not be provided. + + For text mode, a GzipFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError( + "Argument 'encoding' not supported in binary mode") + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + gz_mode = mode.replace("t", "") + # __fspath__ method is os.PathLike + if isinstance(filename, (str, bytes)) or hasattr(filename, "__fspath__"): + binary_file = GzipNGFile(filename, gz_mode, compresslevel) + elif hasattr(filename, "read") or hasattr(filename, "write"): + binary_file = GzipNGFile(None, gz_mode, compresslevel, filename) + else: + raise TypeError("filename must be a str or bytes object, or a file") + + if "t" in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file + + +class GzipNGFile(gzip.GzipFile): + """The GzipNGFile class simulates most of the methods of a file object with + the exception of the truncate() method. + + This class only supports opening files in binary mode. If you need to open + a compressed file in text mode, use the gzip.open() function. + """ + + def __init__(self, filename=None, mode=None, + compresslevel=_COMPRESS_LEVEL_BEST, + fileobj=None, mtime=None): + """Constructor for the GzipNGFile class. + + At least one of fileobj and filename must be given a + non-trivial value. + + The new class instance is based on fileobj, which can be a regular + file, an io.BytesIO object, or any other object which simulates a file. + It defaults to None, in which case filename is opened to provide + a file object. + + When fileobj is not None, the filename argument is only used to be + included in the gzip file header, which may include the original + filename of the uncompressed file. It defaults to the filename of + fileobj, if discernible; otherwise, it defaults to the empty string, + and in this case the original filename is not included in the header. + + The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', + or 'xb' depending on whether the file will be read or written. + The default is the mode of fileobj if discernible; otherwise, the + default is 'rb'. A mode of 'r' is equivalent to one of 'rb', and + similarly for 'w' and 'wb', 'a' and 'ab', and 'x' and 'xb'. + + The compresslevel argument is an integer from 0 to 3 controlling the + level of compression; 0 is fastest and produces the least compression, + and 3 is slowest and produces the most compression. Unlike + gzip.GzipFile 0 is NOT no compression. The default is 2. + + The mtime argument is an optional numeric timestamp to be written + to the last modification time field in the stream when compressing. + If omitted or None, the current time is used. + """ + super().__init__(filename, mode, compresslevel, fileobj, mtime) + if self.mode == WRITE: + self.compress = zlib_ng.compressobj(compresslevel, + zlib_ng.DEFLATED, + -zlib_ng.MAX_WBITS, + zlib_ng.DEF_MEM_LEVEL, + 0) + if self.mode == READ: + raw = _GzipNGReader(self.fileobj) + self._buffer = io.BufferedReader(raw) + + def __repr__(self): + s = repr(self.fileobj) + return '' + + def write(self, data): + self._check_not_closed() + if self.mode != WRITE: + import errno + raise OSError(errno.EBADF, "write() on read-only GzipNGFile object") + + if self.fileobj is None: + raise ValueError("write() on closed GzipNGFile object") + + if isinstance(data, bytes): + length = len(data) + else: + # accept any data that supports the buffer protocol + data = memoryview(data) + length = data.nbytes + + if length > 0: + self.fileobj.write(self.compress.compress(data)) + self.size += length + self.crc = zlib_ng.crc32(data, self.crc) + self.offset += length + return length + + +class _GzipNGReader(gzip._GzipReader): + def __init__(self, fp): + # Call the init method of gzip._GzipReader's parent here. + # It is not very invasive and allows us to override _PaddedFile + _compression.DecompressReader.__init__( + self, gzip._PaddedFile(fp), zlib_ng._ZlibDecompressor, + wbits=-zlib_ng.MAX_WBITS) + # Set flag indicating start of a new member + self._new_member = True + self._last_mtime = None + + def read(self, size=-1): + if size < 0: + return self.readall() + # size=0 is special because decompress(max_length=0) is not supported + if not size: + return b"" + + # For certain input data, a single + # call to decompress() may not return + # any data. In this case, retry until we get some data or reach EOF. + while True: + if self._decompressor.eof: + # Ending case: we've come to the end of a member in the file, + # so finish up this member, and read a new gzip header. + # Check the CRC and file size, and set the flag so we read + # a new member + self._read_eof() + self._new_member = True + self._decompressor = self._decomp_factory( + **self._decomp_args) + + if self._new_member: + # If the _new_member flag is set, we have to + # jump to the next member, if there is one. + self._init_read() + if not self._read_gzip_header(): + self._size = self._pos + return b"" + self._new_member = False + + # Read a chunk of data from the file + if self._decompressor.needs_input: + buf = self._fp.read(READ_BUFFER_SIZE) + uncompress = self._decompressor.decompress(buf, size) + else: + uncompress = self._decompressor.decompress(b"", size) + if self._decompressor.unused_data != b"": + # Prepend the already read bytes to the fileobj so they can + # be seen by _read_eof() and _read_gzip_header() + self._fp.prepend(self._decompressor.unused_data) + + if uncompress != b"": + break + if buf == b"": + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + + self._crc = zlib_ng.crc32(uncompress, self._crc) + self._stream_size += len(uncompress) + self._pos += len(uncompress) + return uncompress + + +# Aliases for improved compatibility with CPython gzip module. +GzipFile = GzipNGFile +_GzipReader = _GzipNGReader + + +def _read_exact(fp, n): + '''Read exactly *n* bytes from `fp` + This method is required because fp may be unbuffered, + i.e. return short reads. + ''' + data = fp.read(n) + while len(data) < n: + b = fp.read(n - len(data)) + if not b: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + data += b + return data + + +def _read_gzip_header(fp): + '''Read a gzip header from `fp` and progress to the end of the header. + Returns last mtime if header was present or None otherwise. + ''' + magic = fp.read(2) + if magic == b'': + return None + + if magic != b'\037\213': + raise BadGzipFile('Not a gzipped file (%r)' % magic) + + (method, flag, last_mtime) = struct.unpack(" bytes: + """ + Write a simple gzip header with no extra fields. + :param compresslevel: Compresslevel used to determine the xfl bytes. + :param mtime: The mtime (must support conversion to a 32-bit integer). + :return: A bytes object representing the gzip header. + """ + if mtime is None: + mtime = time.time() + if compresslevel == _COMPRESS_LEVEL_BEST: + xfl = 2 + elif compresslevel == _COMPRESS_LEVEL_FAST: + xfl = 4 + else: + xfl = 0 + # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra + # fields added to header), mtime, xfl and os (255 for unknown OS). + return struct.pack("