diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..3e4afab --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,4 @@ + +### Checklist +- [ ] Pull request details were added to CHANGELOG.rst +- [ ] Documentation was updated (if needed) diff --git a/.github/release_checklist.md b/.github/release_checklist.md new file mode 100644 index 0000000..9abffc2 --- /dev/null +++ b/.github/release_checklist.md @@ -0,0 +1,17 @@ +Release checklist +- [ ] Check outstanding issues on JIRA and Github. +- [ ] Check [latest documentation](https://python-zlib-ng.readthedocs.io/en/latest/) looks fine. +- [ ] Create a release branch. + - [ ] Set version to a stable number. + - [ ] Change current development version in `CHANGELOG.rst` to stable version. + - [ ] Change the version in `__init__.py` +- [ ] Merge the release branch into `main`. +- [ ] Created an annotated tag with the stable version number. Include changes +from CHANGELOG.rst. +- [ ] Push tag to remote. This triggers the wheel/sdist build on github CI. +- [ ] merge `main` branch back into `develop`. +- [ ] Add updated version number to develop. (`setup.py` and `src/zlib_ng/__init__.py`) +- [ ] Build the new tag on readthedocs. Only build the last patch version of +each minor version. So `1.1.1` and `1.2.0` but not `1.1.0`, `1.1.1` and `1.2.0`. +- [ ] Create a new release on github. +- [ ] Update the package on conda-forge. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..26ee812 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,238 @@ +name: Continous integration + +on: + pull_request: + paths-ignore: + - 'docs/**' + - '*.rst' + push: + branches: + - develop + - main + tags: + - "*" + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - name: Set up Python 3.7 + uses: actions/setup-python@v2.2.1 + with: + python-version: 3.7 + - name: Install tox + run: pip install tox + - name: Lint + run: tox -e lint + + package-checks: + strategy: + matrix: + tox_env: + - docs + - twine_check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - name: Set up Python 3.7 + uses: actions/setup-python@v2.2.1 + with: + python-version: 3.7 + - name: Install tox and upgrade setuptools and pip + run: pip install --upgrade tox setuptools pip + - name: Run tox -e ${{ matrix.tox_env }} + run: tox -e ${{ matrix.tox_env }} + + test-static: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: + - "3.7" + - "3.8" + - "3.9" + - "3.10" + - "3.11" + - "pypy-3.7" + - "pypy-3.8" + - "pypy-3.9" + os: ["ubuntu-latest"] + include: +# - os: "macos-latest" +# python-version: 3.7 + - os: "windows-latest" + python-version: 3.7 + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2.2.1 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox and upgrade setuptools + run: pip install --upgrade tox setuptools + - name: Set MSVC developer prompt + uses: ilammy/msvc-dev-cmd@v1.6.0 + if: runner.os == 'Windows' + - name: Install build dependencies (MacOS) + run: brew install ninja + env: + HOMEBREW_NO_INSTALL_CLEANUP: 1 + if: runner.os == 'macOS' + - name: Run tests + run: tox + - name: Upload coverage report + uses: codecov/codecov-action@v1 + + test-arch: + if: startsWith(github.ref, 'refs/tags') || github.ref == 'refs/heads/develop' || github.ref == 'refs/heads/main' + runs-on: "ubuntu-latest" + strategy: + matrix: + distro: [ "ubuntu_latest" ] + arch: ["aarch64"] + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - uses: uraimo/run-on-arch-action@v2.2.0 + name: Build & run test + with: + arch: ${{ matrix.arch }} + distro: ${{ matrix.distro }} + install: | + apt-get update -q -y + apt-get install -q -y python3 python3-pip tox cmake + run: | + tox + + # Test if the python-zlib-ng conda package can be build. Which is linked + # dynamically to the conda zlib-ng package. + test-dynamic: + runs-on: ${{ matrix.os }} + defaults: + run: + # This is needed for miniconda, see: + # https://github.com/marketplace/actions/setup-miniconda#important. + shell: bash -l {0} + strategy: + matrix: + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python_version: [ "python" ] + include: + - os: "ubuntu-latest" + python_version: "pypy" + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - name: Install miniconda. + uses: conda-incubator/setup-miniconda@v2.0.1 # https://github.com/conda-incubator/setup-miniconda. + with: + channels: conda-forge,defaults + - name: Install requirements (universal) + run: conda install zlib-ng ${{ matrix.python_version}} tox + - name: Set MSVC developer prompt + uses: ilammy/msvc-dev-cmd@v1.6.0 + if: runner.os == 'Windows' + - name: Run tests (dynamic link) + run: tox + env: + PYTHON_ZLIB_NG_LINK_DYNAMIC: True + + deploy: + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + runs-on: ${{ matrix.os }} + needs: + - lint + - package-checks + - test-static + - test-dynamic + - test-arch + strategy: + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + cibw_archs_linux: ["x86_64"] + build_sdist: [true] + include: + - os: "ubuntu-latest" + cibw_archs_linux: "aarch64" + cibw_before_all_linux: "true" # The true command exits with 0 + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - uses: actions/setup-python@v2 + name: Install Python + - name: Install cibuildwheel twine wheel + run: python -m pip install cibuildwheel twine wheel + - name: Install build dependencies (Macos) + run: brew install nasm automake autoconf + if: runner.os == 'macOS' + - name: Set MSVC developer prompt + uses: ilammy/msvc-dev-cmd@v1.6.0 + if: runner.os == 'Windows' + - name: Install nasm (Windows) + uses: ilammy/setup-nasm@v1.2.0 + if: runner.os == 'Windows' + - name: Set up QEMU + if: ${{runner.os == 'Linux' && matrix.cibw_archs_linux == 'aarch64'}} + uses: docker/setup-qemu-action@v1.0.1 + with: + platforms: arm64 + - name: Build wheels + run: cibuildwheel --output-dir dist + env: + CIBW_SKIP: "*-win32 *-manylinux_i686" # Skip 32 bit + CIBW_ARCHS_LINUX: ${{ matrix.cibw_archs_linux }} + CIBW_BEFORE_ALL_LINUX: ${{ matrix.cibw_before_all_linux }} + # Fully test the build wheels again. + CIBW_TEST_REQUIRES: "pytest" + # Simple test that requires the project to be build correctly + CIBW_TEST_COMMAND: >- + pytest {project}/tests/test_igzip.py + {project}/tests/test_compat.py + {project}/tests/test_igzip_lib.py + CIBW_ENVIRONMENT_LINUX: >- + PYTHON_ZLIB_NG_BUILD_CACHE=True + PYTHON_ZLIB_NG_BUILD_CACHE_FILE=/tmp/build_cache + CFLAGS="-g0 -DNDEBUG" + CIBW_ENVIRONMENT_WINDOWS: >- + PYTHON_ZLIB_NG_BUILD_CACHE=True + PYTHON_ZLIB_NG_BUILD_CACHE_FILE=${{ runner.temp }}\build_cache + CIBW_ENVIRONMENT_MACOS: >- + PYTHON_ZLIB_NG_BUILD_CACHE=True + PYTHON_ZLIB_NG_BUILD_CACHE_FILE=${{ runner.temp }}/build_cache + CFLAGS="-g0 -DNDEBUG" + - name: Build sdist + if: ${{runner.os == 'Linux' && matrix.cibw_archs_linux == 'x86_64'}} + run: | + pip install build + python -m build --sdist + - uses: actions/upload-artifact@v2 + with: + name: "dist" + path: "dist/" + - name: Publish package to TestPyPI + # pypa/gh-action-pypi-publish@master does not work on OSX + # Alpha, Beta and dev releases contain a - in the tag. + if: contains(github.ref, '-') && startsWith(github.ref, 'refs/tags') + run: twine upload --repository-url https://test.pypi.org/legacy/ dist/* + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }} + - name: Publish package to PyPI + if: "!contains(github.ref, '-') && startsWith(github.ref, 'refs/tags')" + run: twine upload dist/* + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 0000000..9d4b11d --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,15 @@ +========== +Changelog +========== + +.. Newest changes should be on top. + +.. This document is user facing. Please word the changes in such a way +.. that users understand how the changes affect the new version. + +version 0.1.0-dev +----------------- ++ Add a fully featured gzip application in python m zlib_ng.gzip_ng ++ Port Cpython's gzip module to use zlib-ng ++ Port CPython's zlib module to use zlib-ng ++ Use zlib-ng version 2.0.6 as included statically linked version. \ No newline at end of file diff --git a/README.rst b/README.rst index e69de29..82c260e 100644 --- a/README.rst +++ b/README.rst @@ -0,0 +1,163 @@ +.. image:: https://img.shields.io/pypi/v/zlib-ng.svg + :target: https://pypi.org/project/zlib-ng/ + :alt: + +.. image:: https://img.shields.io/conda/v/conda-forge/python-zlib-ng.svg + :target: https://github.com/conda-forge/python-zlib-ng-feedstock + :alt: + +.. image:: https://img.shields.io/pypi/pyversions/zlib-ng.svg + :target: https://pypi.org/project/zlib-ng/ + :alt: + +.. image:: https://img.shields.io/pypi/l/zlib-ng.svg + :target: https://github.com/pycompression/python-zlib-ng/blob/main/LICENSE + :alt: + +.. image:: https://img.shields.io/conda/pn/conda-forge/python-zlib-ng.svg + :target: https://github.com/conda-forge/python-zlib-ng-feedstock + :alt: + +.. image:: https://github.com/pycompression/python-zlib-ng//actions/workflows/ci.yml/badge.svg + :target: https://github.com/pycompression/python-zlib-ng/actions + :alt: + +.. image:: https://codecov.io/gh/pycompression/python-zlib-ng/branch/develop/graph/badge.svg + :target: https://codecov.io/gh/pycompression/python-zlib-ng + :alt: + +.. image:: https://readthedocs.org/projects/python-zlib-ng/badge + :target: https://python-zlib-ng.readthedocs.io + :alt: + + +python-zlib-ng +============== + +.. introduction start + +Faster zlib and gzip compatible compression and decompression +by providing Python bindings for the zlib-ng library. + +This package provides Python bindings for the `zlib-ng +`_ library. + +``python-zlib-ng`` provides the bindings by offering two modules: + ++ ``zlib_ng``: A drop-in replacement for the zlib module that uses zlib-ng to + accelerate its performance. + ++ ``gzip_ng``: A drop-in replacement for the gzip module that uses ``zlib_ng`` + instead of ``zlib`` to perform its compression and checksum tasks, which + improves performance. + +``zlib_ng`` and ``gzip_ng`` are almost fully compatible with ``zlib`` and +``gzip`` from the Python standard library. There are some minor differences +see: differences-with-zlib-and-gzip-modules_. + +.. introduction end + +Quickstart +---------- + +.. quickstart start + +The python-zlib-ng modules can be imported as follows + +.. code-block:: python + + from zlib_ng import zlib_ng + from zlib_ng import gzip_ng + +``zlib_ng`` and ``gzip_ng`` are meant to be used as drop in replacements so +their api and functions are the same as the stdlib's modules. + +A full API documentation can be found on `our readthedocs page +`_. + +``python -m zlib_ng.gzip_ng`` implements a fully featured gzip-like command line +application (just like ``python -m gzip``, but better). Full usage documentation can be +found on `our readthedocs page `_. + + +.. quickstart end + +Installation +------------ +- with pip: ``pip install zlib-ng`` +- with conda: ``conda install python-zlib-ng`` + +Installation is supported on Linux, Windows and MacOS. For more advanced +installation options check the `documentation +`_. + +python-zlib-ng as a dependency in your project +---------------------------------------------- + +.. dependency start + +zlib-ng supports numerous platforms but not all of these have pre-built wheels +available. To prevent your users from running into issues when installing +your project please list a python-zlib-ng dependency as follows. + +``setup.cfg``:: + + install_requires = + zlib-ng; platform.machine == "x86_64" or platform.machine == "AMD64" + +``setup.py``:: + + extras_require={ + ":platform.machine == 'x86_64' or platform.machine == 'AMD64'": ['zlib-ng'] + }, + +.. dependency end + +.. _differences-with-zlib-and-gzip-modules: + +Differences with zlib and gzip modules +-------------------------------------- + +.. differences start + ++ Compression level 1 zlib_ng has a much worse compression rate than that in + zlib. For other compression levels zlib_ng compresses better. ++ Compression level 1 does not apply requested ``wbits`` correctly. For example + compressing with ``zlib_ng.compress(data, level=1, wbits=-9)`` results in + data that cannot be decompressed with ``zlib_ng.decompress(data, wbits=-9)`` + as this will throw an error mentioning invalid window sizes. This is a + bug in the included zlib-ng 2.0.6. ++ ``gzip_ng.open`` returns a class ``GzipNGFile`` instead of ``GzipFile``. Since + there are differences between the compressed ratios between levels, a + difference in naming was chosen to reflect this. + ``gzip_ng.GzipFile`` does exist as an alias of + ``gzip_ng.GzipNGFile`` for compatibility reasons. + +.. differences end + +Contributing +------------ +.. contributing start + +Please make a PR or issue if you feel anything can be improved. Bug reports +are also very welcome. Please report them on the `github issue tracker +`_. + +.. contributing end + +Acknowledgements +---------------- + +.. acknowledgements start + +This project builds upon the software and experience of many. Many thanks to: + ++ The `zlib-ng contributors + `_ for making the + zlib-ng library. ++ The `CPython contributors + `_. + Python-zlib-ng mimicks ``zlibmodule.c`` and ``gzip.py`` from the standard + library to make it easier for python users to adopt it. + +.. acknowledgements end diff --git a/benchmark_scripts/benchmark.py b/benchmark_scripts/benchmark.py index e3c3662..3cedf4a 100644 --- a/benchmark_scripts/benchmark.py +++ b/benchmark_scripts/benchmark.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Dict -from zlib_ng import zlib_ng, gzip_ng # noqa: F401 used in timeit strings +from zlib_ng import gzip_ng, zlib_ng # noqa: F401 used in timeit strings DATA_DIR = Path(__file__).parent.parent / "tests" / "data" COMPRESSED_FILE = DATA_DIR / "test.fastq.gz" @@ -43,7 +43,7 @@ def show_sizes(): print(name + "\t" + "\t".join(rel_sizes)) print("zlib_ng sizes") - print("name\t" + "\t".join(str(level) for level in range(-1,10))) + print("name\t" + "\t".join(str(level) for level in range(-1, 10))) for name, data_block in sizes.items(): orig_size = max(len(data_block), 1) rel_sizes = ( diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..cbf6103 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,8 @@ +coverage: + status: + project: + default: + target: 90 # let's try to hit high standards + patch: + default: + target: 90 # Tests should be written for new features diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conda-environment.yml b/docs/conda-environment.yml new file mode 100644 index 0000000..e41f810 --- /dev/null +++ b/docs/conda-environment.yml @@ -0,0 +1,12 @@ +name: rtd +channels: + - conda-forge + - defaults +dependencies: + - zlib-ng + - python >=3.7 + - sphinx + - setuptools + - pip: + - sphinx-rtd-theme + - sphinx-argparse \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..e2e0bf0 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,59 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +from distutils.dist import DistributionMetadata +from pathlib import Path + +import pkg_resources + +# -- Project information ----------------------------------------------------- + +# Get package information from the installed package. +package = pkg_resources.get_distribution("zlib-ng") +metadata_file = Path(package.egg_info) / Path(package.PKG_INFO) +metadata = DistributionMetadata(path=str(metadata_file)) + +project = 'python-zlib-ng' +copyright = '2023, Leiden University Medical Center' +author = 'Leiden University Medical Center' + +# The short X.Y version +version = package.parsed_version.base_version +# The full version, including alpha/beta/rc tags +release = package.version + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ["sphinx.ext.autodoc", 'sphinxarg.ext'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +# includes/* prevents double indexing +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'includes/*'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' +html_theme_options = dict( + display_version=True, +) + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] \ No newline at end of file diff --git a/docs/includes/CHANGELOG.rst b/docs/includes/CHANGELOG.rst new file mode 120000 index 0000000..bfa394d --- /dev/null +++ b/docs/includes/CHANGELOG.rst @@ -0,0 +1 @@ +../../CHANGELOG.rst \ No newline at end of file diff --git a/docs/includes/README.rst b/docs/includes/README.rst new file mode 120000 index 0000000..c768ff7 --- /dev/null +++ b/docs/includes/README.rst @@ -0,0 +1 @@ +../../README.rst \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..fbc8261 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,140 @@ +.. python-zlib-ng documentation master file, created by + sphinx-quickstart on Fri Sep 11 15:42:56 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +========================================== +Welcome to python-zlib-ng's documentation! +========================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + +============ +Introduction +============ + +.. include:: includes/README.rst + :start-after: .. introduction start + :end-before: .. introduction end + +========== +Quickstart +========== + +.. include:: includes/README.rst + :start-after: .. quickstart start + :end-before: .. quickstart end + +============ +Installation +============ +Installation with pip +--------------------- + +:: + + pip install zlib-ng + +Installation is supported on Linux, MacOS and Windows. On most platforms +wheels are provided. +The installation will include a staticallly linked version of zlib-ng. +If a wheel is not provided for your system the +installation will build zlib-ng first in a temporary directory. Please check the +`zlib-ng homepage `_ for the build requirements. + +The latest development version of python-zlib-ng can be installed with:: + + pip install git+https://github.com/pycompression/python-zlib-ng.git + +This requires having the build requirements installed. +If you wish to link +dynamically against a version of libz-ng installed on your system use:: + + PYTHON_ZLIB_NG_LINK_DYNAMIC=true pip install zlib-ng --no-binary zlib-ng + +Installation via conda +---------------------- +Python-zlib-ng can be installed via conda, for example using +the `miniconda `_ installer +with a properly setup `conda-forge +`_ +channel. When used with bioinformatics tools setting up `bioconda +`_ +provides a clear set of installation instructions for conda. + +python-zlib-ng is available on conda-forge and can be installed with:: + + conda install python-zlib-ng + +This will automatically install the zlib-ng library dependency as well, since +it is available on conda-forge. + +============================================== +python-zlib-ng as a dependency in your project +============================================== + +.. include:: includes/README.rst + :start-after: .. dependency start + :end-before: .. dependency end + +.. _differences-with-zlib-and-gzip-modules: + +====================================== +Differences with zlib and gzip modules +====================================== + +.. include:: includes/README.rst + :start-after: .. differences start + :end-before: .. differences end + +================================== +API Documentation: zlib_ng.zlib_ng +================================== + +.. automodule:: zlib_ng.zlib_ng + :members: + + .. autoclass:: _Compress + :members: + + .. autoclass:: _Decompress + :members: + +================================== +API-documentation: zlib_ng.gzip_ng +================================== + +.. automodule:: zlib_ng.gzip_ng + :members: compress, decompress, open, BadGzipFile, GzipFile, READ_BUFFER_SIZE + + .. autoclass:: GzipNGFile + :members: + :special-members: __init__ + +=============================== +python -m zlib_ng.gzip_ng usage +=============================== + +.. argparse:: + :module: zlib_ng.gzip_ng + :func: _argument_parser + :prog: python -m zlib_ng.gzip_ng + + +============ +Contributing +============ +.. include:: includes/README.rst + :start-after: .. contributing start + :end-before: .. contributing end + +================ +Acknowledgements +================ +.. include:: includes/README.rst + :start-after: .. acknowledgements start + :end-before: .. acknowledgements end + +.. include:: includes/CHANGELOG.rst diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..2119f51 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..62f5f82 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=51", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/requirements-docs.txt b/requirements-docs.txt new file mode 100644 index 0000000..7fea598 --- /dev/null +++ b/requirements-docs.txt @@ -0,0 +1,4 @@ +sphinx +setuptools +sphinx-rtd-theme>=1.2.0rc3,<1.3 +sphinx-argparse \ No newline at end of file diff --git a/setup.py b/setup.py index 189255c..1e38e85 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ def build_extension(self, ext): os.path.join(build_dir, "libz-ng.a")] elif SYSTEM_IS_WINDOWS: ext.extra_objects = [ - os.path.join(build_dir, "zlib-ng.lib")] + os.path.join(build_dir, "Release", "zlibstatic-ng.lib")] else: raise NotImplementedError( f"Unsupported platform: {sys.platform}") @@ -117,7 +117,7 @@ def build_zlib_ng(): setup( name="zlib-ng", - version="0.1.0", + version="0.1.0-dev", description="Drop-in replacement for zlib and gzip modules using zlib-ng", author="Leiden University Medical Center", author_email="r.h.p.vorderman@lumc.nl", # A placeholder for now @@ -129,10 +129,10 @@ def build_zlib_ng(): zip_safe=False, packages=find_packages('src'), package_dir={'': 'src'}, - package_data={'zlib_ng': ['*.pyi', 'py.typed', - # Include zlib-ng LICENSE and other relevant files - # with the binary distribution. - 'zlib-ng/LICENSE.md', 'zlib-ng/README.md']}, + package_data={'zlib_ng': [ + '*.pyi', 'py.typed', + # Include zlib-ng LICENSE and other relevant files with the binary distribution. + 'zlib-ng/LICENSE.md', 'zlib-ng/README.md']}, url="https://github.com/pycompression/python-zlib-ng", classifiers=[ "Programming Language :: Python :: 3 :: Only", diff --git a/src/zlib_ng/__init__.py b/src/zlib_ng/__init__.py index a6c8f90..563e415 100644 --- a/src/zlib_ng/__init__.py +++ b/src/zlib_ng/__init__.py @@ -4,3 +4,5 @@ # This file is part of python-zlib-ng which is distributed under the # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. + +__version__ = "0.1.0-dev" diff --git a/src/zlib_ng/gzip_ng.py b/src/zlib_ng/gzip_ng.py index 172e66a..70eede2 100644 --- a/src/zlib_ng/gzip_ng.py +++ b/src/zlib_ng/gzip_ng.py @@ -2,7 +2,7 @@ # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 # Python Software Foundation; All Rights Reserved -# This file is part of python-isal which is distributed under the +# This file is part of python-zlib-ng which is distributed under the # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. # This file uses code from CPython's Lib/gzip.py after backported changes from @@ -100,16 +100,17 @@ def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_TRADEOFF, class GzipNGFile(gzip.GzipFile): - """The IGzipFile class simulates most of the methods of a file object with + """The GzipNGFile class simulates most of the methods of a file object with the exception of the truncate() method. This class only supports opening files in binary mode. If you need to open a compressed file in text mode, use the gzip.open() function. """ + def __init__(self, filename=None, mode=None, compresslevel=_COMPRESS_LEVEL_BEST, fileobj=None, mtime=None): - """Constructor for the IGzipFile class. + """Constructor for the GzipNGFile class. At least one of fileobj and filename must be given a non-trivial value. @@ -143,26 +144,26 @@ def __init__(self, filename=None, mode=None, super().__init__(filename, mode, compresslevel, fileobj, mtime) if self.mode == WRITE: self.compress = zlib_ng.compressobj(compresslevel, - zlib_ng.DEFLATED, - -zlib_ng.MAX_WBITS, - zlib_ng.DEF_MEM_LEVEL, - 0) + zlib_ng.DEFLATED, + -zlib_ng.MAX_WBITS, + zlib_ng.DEF_MEM_LEVEL, + 0) if self.mode == READ: raw = _GzipNGReader(self.fileobj) self._buffer = io.BufferedReader(raw) def __repr__(self): s = repr(self.fileobj) - return '' + return '' def write(self, data): self._check_not_closed() if self.mode != WRITE: import errno - raise OSError(errno.EBADF, "write() on read-only IGzipFile object") + raise OSError(errno.EBADF, "write() on read-only GzipNGFile object") if self.fileobj is None: - raise ValueError("write() on closed IGzipFile object") + raise ValueError("write() on closed GzipNGFile object") if isinstance(data, bytes): length = len(data) @@ -286,21 +287,21 @@ def _read_gzip_header(fp): # Read and discard a null-terminated string containing the filename while True: s = fp.read(1) - if not s or s==b'\000': + if not s or s == b'\000': break if flag & FCOMMENT: # Read and discard a null-terminated string containing a comment while True: s = fp.read(1) - if not s or s==b'\000': + if not s or s == b'\000': break if flag & FHCRC: - _read_exact(fp, 2) # Read & discard the 16-bit header CRC + _read_exact(fp, 2) # Read & discard the 16-bit header CRC return last_mtime def _create_simple_gzip_header(compresslevel: int, - mtime = None) -> bytes: + mtime=None) -> bytes: """ Write a simple gzip header with no extra fields. :param compresslevel: Compresslevel used to determine the xfl bytes. @@ -397,7 +398,7 @@ def _argument_parser(): "timestamp") parser.add_argument("-f", "--force", action="store_true", help="Overwrite output without prompting") - # -b flag not taken by either gzip or igzip. Hidden attribute. + # -b flag not taken by gzip. Hidden attribute. parser.add_argument("-b", "--buffer-size", default=READ_BUFFER_SIZE, type=int, help=argparse.SUPPRESS) @@ -445,7 +446,7 @@ def main(): else: gzip_file_kwargs = {"filename": out_filepath} out_file = GzipNGFile(mode="wb", fileobj=out_buffer, - compresslevel=compresslevel, **gzip_file_kwargs) + compresslevel=compresslevel, **gzip_file_kwargs) else: if args.file: in_file = open(args.file, mode="rb") diff --git a/src/zlib_ng/zlib_ng.pyi b/src/zlib_ng/zlib_ng.pyi index 367de17..f5d7dee 100644 --- a/src/zlib_ng/zlib_ng.pyi +++ b/src/zlib_ng/zlib_ng.pyi @@ -2,29 +2,37 @@ # 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 # Python Software Foundation; All Rights Reserved -# This file is part of python-isal which is distributed under the +# This file is part of python-zlib-ng which is distributed under the # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. +MAX_WBITS: int +DEFLATED: int +DEF_MEM_LEVEL: int +DEF_BUF_SIZE: int + +Z_NO_COMPRESSION: int Z_BEST_SPEED: int Z_BEST_COMPRESSION: int Z_DEFAULT_COMPRESSION: int -DEF_BUF_SIZE: int -DEF_MEM_LEVEL: int -MAX_WBITS: int - -DEFLATED: int - -Z_DEFAULT_STRATEGY: int -Z_RLE: int -Z_HUFFMAN_ONLY: int Z_FILTERED: int +Z_HUFFMAN_ONLY: int +Z_RLE: int Z_FIXED: int +Z_DEFAULT_STRATEGY: int Z_NO_FLUSH: int +Z_PARTIAL_FLUSH: int Z_SYNC_FLUSH: int Z_FULL_FLUSH: int Z_FINISH: int +Z_BLOCK: int +Z_TREES: int + +ZLIBNG_VERSION: int +ZLIBNG_RUNTIME_VERSION: int +ZLIB_VERSION: int +ZLIB_RUNTIME_VERSION: int error: Exception diff --git a/src/zlib_ng/zlib_ngmodule.c b/src/zlib_ng/zlib_ngmodule.c index 3cb068a..5bb6f95 100644 --- a/src/zlib_ng/zlib_ngmodule.c +++ b/src/zlib_ng/zlib_ngmodule.c @@ -1531,7 +1531,7 @@ zlib_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } PyDoc_STRVAR(zlib_compress__doc__, -"compress($module, data, /, level=ISAL_DEFAULT_COMPRESSION, wbits=MAX_WBITS)\n" +"compress($module, data, /, level=Z_DEFAULT_COMPRESSION, wbits=MAX_WBITS)\n" "--\n" "\n" "Returns a bytes object containing compressed data.\n" @@ -1606,7 +1606,7 @@ zlib_decompress(PyObject *module, PyObject *args, PyObject *kwargs) PyDoc_STRVAR(zlib_compressobj__doc__, -"compressobj($module, /, level=ISAL_DEFAULT_COMPRESSION, method=DEFLATED,\n" +"compressobj($module, /, level=Z_DEFAULT_COMPRESSION, method=DEFLATED,\n" " wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL,\n" " strategy=Z_DEFAULT_STRATEGY, zdict=None)\n" "--\n" @@ -1912,7 +1912,7 @@ static PyMemberDef ZlibDecompressor_members[] = { }; static PyTypeObject Comptype = { - .tp_name = "zlib_ng.Compress", + .tp_name = "zlib_ng._Compress", .tp_basicsize = sizeof(compobject), .tp_flags = Py_TPFLAGS_DEFAULT, .tp_dealloc = (destructor)Comp_dealloc, @@ -1920,7 +1920,7 @@ static PyTypeObject Comptype = { }; static PyTypeObject Decomptype = { - .tp_name = "zlib_ng.Decompress", + .tp_name = "zlib_ng._Decompress", .tp_basicsize = sizeof(compobject), .tp_flags = Py_TPFLAGS_DEFAULT, .tp_dealloc = (destructor)Decomp_dealloc, @@ -1986,9 +1986,15 @@ PyInit_zlib_ng(void) if (PyType_Ready(&Comptype) < 0) { return NULL; } + PyObject *Comptype_obj = (PyObject *)&Comptype; + Py_INCREF(Comptype_obj); + PyModule_AddObject(m, "_Compress", Comptype_obj); if (PyType_Ready(&Decomptype) < 0) { return NULL; } + PyObject *Decomptype_obj = (PyObject *)&Decomptype; + Py_INCREF(Decomptype_obj); + PyModule_AddObject(m, "_Decompress", Decomptype_obj); if (PyType_Ready(&ZlibDecompressorType) < 0) { return NULL; } diff --git a/tests/data/README.rst b/tests/data/README.rst new file mode 120000 index 0000000..c768ff7 --- /dev/null +++ b/tests/data/README.rst @@ -0,0 +1 @@ +../../README.rst \ No newline at end of file diff --git a/tests/data/concatenated.fastq.gz b/tests/data/concatenated.fastq.gz new file mode 100644 index 0000000..9559a0a Binary files /dev/null and b/tests/data/concatenated.fastq.gz differ diff --git a/tests/data/seeds.txt b/tests/data/seeds.txt new file mode 100644 index 0000000..8864b75 --- /dev/null +++ b/tests/data/seeds.txt @@ -0,0 +1,100 @@ +3543943125 +2012239574 +2871899840 +1576915383 +443567851 +3239180423 +3373382826 +3275169686 +3720569364 +2789290573 +618407468 +3601134391 +257166085 +951339096 +688447921 +2980268332 +4041231849 +2665838137 +2152080166 +3061402473 +114075645 +1066757169 +3059147274 +246562262 +3880192386 +4271112646 +3614603334 +3491130170 +332943583 +1921692081 +159629030 +4169931400 +641090334 +2429776133 +152825723 +2237958331 +2779470898 +872209389 +1543398313 +227922013 +1433480097 +2411689409 +4285411687 +2669236754 +2315690542 +2593947855 +2435538281 +640615264 +2183831503 +2141774460 +498759490 +1981068107 +659087394 +644660319 +4084740067 +306553019 +4172422758 +761521931 +2820900477 +160799031 +3875345091 +1347385004 +2312301814 +404607200 +2258507054 +1861096434 +3113292015 +1063461481 +3623153239 +11376533 +1748307784 +1185247326 +1030491484 +3925346575 +4212727269 +4234599042 +4019013354 +3237401702 +1335471143 +3405720069 +4119823939 +1476030215 +1339973082 +3541240200 +608243180 +4149852723 +3655893277 +2099999624 +3547173299 +860208608 +4254465956 +603882225 +1785239865 +3260029982 +83829029 +3136481952 +2052207849 +1971968310 +2157083216 +3415051802 diff --git a/tests/test_compat.py b/tests/test_compat.py new file mode 100644 index 0000000..7595cbe --- /dev/null +++ b/tests/test_compat.py @@ -0,0 +1,211 @@ +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 +# Python Software Foundation; All Rights Reserved + +# This file is part of python-isal which is distributed under the +# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2. + +# This file does not include original code from CPython. It is used to ensure +# that compression and decompression between CPython's zlib and zlib_ng +# is compatible. + +import gzip +import itertools +import zlib +from pathlib import Path + +import pytest + +from zlib_ng import gzip_ng, zlib_ng + +DATA_DIR = Path(__file__).parent / "data" +COMPRESSED_FILE = DATA_DIR / "test.fastq.gz" +with gzip.open(str(COMPRESSED_FILE), mode="rb") as file_h: + DATA = file_h.read() + +DATA_SIZES = [2**i for i in range(3, 20)] +# 100 seeds generated with random.randint(0, 2**32-1) +SEEDS_FILE = DATA_DIR / "seeds.txt" +INT_OVERFLOW = 211928379812738912738917238971289378912379823871932719823798123 +# Get some negative ints and some really big ints into the mix. +SEEDS = [-INT_OVERFLOW, -3, -1, 0, 1, INT_OVERFLOW] + [ + int(seed) for seed in SEEDS_FILE.read_text().splitlines()] + +# Wbits for ZLIB compression, GZIP compression, and RAW compressed streams +WBITS_RANGE = list(range(9, 16)) + list(range(25, 32)) + list(range(-15, -8)) + +ZLIBNG_STRATEGIES = (zlib_ng.Z_DEFAULT_STRATEGY, zlib_ng.Z_FILTERED, + zlib_ng.Z_HUFFMAN_ONLY, zlib_ng.Z_RLE, zlib_ng.Z_FIXED) + +ZLIB_STRATEGIES = [zlib.Z_DEFAULT_STRATEGY, zlib.Z_FILTERED, + zlib.Z_HUFFMAN_ONLY] +if hasattr(zlib, "Z_RLE"): + ZLIB_STRATEGIES.append(zlib.Z_RLE) +if hasattr(zlib, "Z_FIXED"): + ZLIB_STRATEGIES.append(zlib.Z_FIXED) + + +def limited_zlib_tests(strategies=ZLIB_STRATEGIES): + """ + Test all combinations of memlevel compression level and wbits, but + only for the default strategy. Test other strategies with default settings. + """ + DEFAULT_DATA_SIZE = 128 * 1024 + compression_levels = range(-1, 10) + memory_levels = list(range(1, 10)) + for compresslevel in compression_levels: + for wbits in WBITS_RANGE: + for memlevel in memory_levels: + yield (DEFAULT_DATA_SIZE, compresslevel, wbits, memlevel, + zlib.Z_DEFAULT_STRATEGY) + for strategy in strategies: + yield (DEFAULT_DATA_SIZE, -1, zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, + strategy) + + +@pytest.mark.parametrize(["data_size", "value"], + itertools.product(DATA_SIZES, SEEDS)) +def test_crc32(data_size, value): + data = DATA[:data_size] + assert zlib.crc32(data, value) == zlib_ng.crc32(data, value) + + +@pytest.mark.parametrize(["data_size", "value"], + itertools.product(DATA_SIZES, SEEDS)) +def test_adler32(data_size, value): + data = DATA[:data_size] + assert zlib.adler32(data, value) == zlib_ng.adler32(data, value) + + +@pytest.mark.parametrize(["data_size", "level", "wbits"], + itertools.product(DATA_SIZES, range(10), WBITS_RANGE)) +def test_compress(data_size, level, wbits): + data = DATA[:data_size] + compressed = zlib_ng.compress(data, level=level, wbits=wbits) + try: + decompressed = zlib.decompress(compressed, wbits) + except zlib.error: + # Known bug in zlib-ng 2.0.6. Wbits is not correctly applied for level 1. + if (zlib_ng.ZLIBNG_VERSION == "2.0.6" and + level == 1 and + wbits & 0b1111 < 13): + pytest.xfail() + assert decompressed == data + + +@pytest.mark.parametrize(["data_size", "level"], + itertools.product(DATA_SIZES, range(10))) +def test_decompress_zlib(data_size, level): + data = DATA[:data_size] + compressed = zlib.compress(data, level=level) + decompressed = zlib_ng.decompress(compressed) + assert decompressed == data + + +@pytest.mark.parametrize(["data_size", "level", "wbits", "memLevel", "strategy"], + limited_zlib_tests(ZLIB_STRATEGIES)) +def test_decompress_wbits(data_size, level, wbits, memLevel, strategy): + data = DATA[:data_size] + compressobj = zlib.compressobj(level=level, wbits=wbits, memLevel=memLevel, + strategy=strategy) + compressed = compressobj.compress(data) + compressobj.flush() + decompressed = zlib_ng.decompress(compressed, wbits=wbits) + assert data == decompressed + + +@pytest.mark.parametrize(["data_size", "level", "wbits"], + itertools.product([128 * 1024], range(10), WBITS_RANGE),) +def test_decompress_zlib_ng(data_size, level, wbits): + data = DATA[:data_size] + compressed = zlib_ng.compress(data, level=level, wbits=wbits) + try: + decompressed = zlib_ng.decompress(compressed, wbits=wbits) + except zlib_ng.error: + # Known bug in zlib-ng 2.0.6. Wbits is not correctly applied for level 1. + if (zlib_ng.ZLIBNG_VERSION == "2.0.6" and + level == 1 and + wbits & 0b1111 < 13): + pytest.xfail() + assert decompressed == data + assert decompressed == data + + +@pytest.mark.parametrize(["data_size", "level", "wbits", "memLevel", "strategy"], + limited_zlib_tests(ZLIBNG_STRATEGIES)) +def test_compress_compressobj(data_size, level, wbits, memLevel, strategy): + data = DATA[:data_size] + compressobj = zlib_ng.compressobj(level=level, + wbits=wbits, + memLevel=memLevel, + strategy=strategy) + compressed = compressobj.compress(data) + compressobj.flush() + try: + decompressed = zlib.decompress(compressed, wbits=wbits) + except zlib.error: + # Known bug in zlib-ng 2.0.6. Wbits is not correctly applied for level 1. + if (zlib_ng.ZLIBNG_VERSION == "2.0.6" and + level == 1 and + wbits & 0b1111 < 13): + pytest.xfail() + assert data == decompressed + + +@pytest.mark.parametrize(["data_size", "level", "wbits", "memLevel", "strategy"], + limited_zlib_tests(ZLIB_STRATEGIES)) +def test_decompress_decompressobj(data_size, level, wbits, memLevel, strategy): + data = DATA[:data_size] + compressobj = zlib.compressobj(level=level, wbits=wbits, memLevel=memLevel, + strategy=strategy) + compressed = compressobj.compress(data) + compressobj.flush() + decompressobj = zlib_ng.decompressobj(wbits=wbits) + decompressed = decompressobj.decompress(compressed) + decompressobj.flush() + assert data == decompressed + assert decompressobj.unused_data == b"" + assert decompressobj.unconsumed_tail == b"" + + +def test_decompressobj_unconsumed_tail(): + data = DATA[:128*1024] + compressed = zlib.compress(data) + decompressobj = zlib_ng.decompressobj() + output = decompressobj.decompress(compressed, 2048) + assert len(output) == 2048 + + +@pytest.mark.parametrize(["data_size", "level"], + itertools.product(DATA_SIZES, range(10))) +def test_gzip_ng_compress(data_size, level): + data = DATA[:data_size] + compressed = gzip_ng.compress(data, compresslevel=level) + assert gzip.decompress(compressed) == data + + +@pytest.mark.parametrize(["data_size", "level"], + itertools.product(DATA_SIZES, range(10))) +def test_decompress_gzip(data_size, level): + data = DATA[:data_size] + compressed = gzip.compress(data, compresslevel=level) + decompressed = gzip_ng.decompress(compressed) + assert decompressed == data + + +@pytest.mark.parametrize(["data_size", "level"], + itertools.product(DATA_SIZES, range(10))) +def test_decompress_gzip_ng(data_size, level): + data = DATA[:data_size] + compressed = gzip_ng.compress(data, compresslevel=level) + decompressed = gzip_ng.decompress(compressed) + assert decompressed == data + + +@pytest.mark.parametrize(["unused_size", "wbits"], + itertools.product([26], [-15, 15, 31])) +def test_unused_data(unused_size, wbits): + unused_data = b"abcdefghijklmnopqrstuvwxyz"[:unused_size] + compressor = zlib.compressobj(wbits=wbits) + data = b"A meaningful sentence starts with a capital and ends with a." + compressed = compressor.compress(data) + compressor.flush() + decompressor = zlib_ng.decompressobj(wbits=wbits) + result = decompressor.decompress(compressed + unused_data) + assert result == data + assert decompressor.unused_data == unused_data diff --git a/tests/test_gzip_compliance.py b/tests/test_gzip_compliance.py index f8f8cd5..f51c867 100644 --- a/tests/test_gzip_compliance.py +++ b/tests/test_gzip_compliance.py @@ -27,8 +27,8 @@ import tempfile import unittest from subprocess import PIPE, Popen -from test.support import _4G, bigmemtest -from test.support.script_helper import assert_python_ok, assert_python_failure +from test.support import _4G, bigmemtest # type: ignore +from test.support.script_helper import assert_python_failure, assert_python_ok # type: ignore # noqa: E501 from zlib_ng import gzip_ng as gzip @@ -44,7 +44,6 @@ /* See http://www.winimage.com/zLibDll for Windows */ """ - TEMPDIR = tempfile.mkdtemp() @@ -77,10 +76,10 @@ def tearDown(self): class TestGzip(BaseTest): def write_and_read_back(self, data, mode='b'): b_data = bytes(data) - with gzip.GzipFile(self.filename, 'w'+mode) as f: - l = f.write(data) - self.assertEqual(l, len(b_data)) - with gzip.GzipFile(self.filename, 'r'+mode) as f: + with gzip.GzipFile(self.filename, 'w' + mode) as f: + out_length = f.write(data) + self.assertEqual(out_length, len(b_data)) + with gzip.GzipFile(self.filename, 'r' + mode) as f: self.assertEqual(f.read(), b_data) def test_write(self): @@ -115,7 +114,7 @@ def test_write_read_with_pathlike_file(self): def test_write_memoryview(self): self.write_and_read_back(memoryview(data1 * 50)) m = memoryview(bytes(range(256))) - data = m.cast('B', shape=[8,8,4]) + data = m.cast('B', shape=[8, 8, 4]) self.write_and_read_back(data) def test_write_bytearray(self): @@ -142,7 +141,7 @@ def test_read(self): # Try reading. with gzip.GzipFile(self.filename, 'r') as f: d = f.read() - self.assertEqual(d, data1*50) + self.assertEqual(d, data1 * 50) def test_read1(self): self.test_write() @@ -203,7 +202,7 @@ def test_append(self): with gzip.GzipFile(self.filename, 'rb') as f: d = f.read() - self.assertEqual(d, (data1*50) + (data2*15)) + self.assertEqual(d, (data1 * 50) + (data2 * 15)) def test_many_append(self): # Bug #1074261 was triggered when reading a file that contained @@ -212,7 +211,7 @@ def test_many_append(self): with gzip.GzipFile(self.filename, 'wb', 9) as f: f.write(b'a') for i in range(0, 200): - with gzip.GzipFile(self.filename, "ab", 9) as f: # append + with gzip.GzipFile(self.filename, "ab", 9) as f: # append f.write(b'a') # Try reading the file @@ -221,8 +220,9 @@ def test_many_append(self): while 1: ztxt = zgfile.read(8192) contents += ztxt - if not ztxt: break - self.assertEqual(contents, b'a'*201) + if not ztxt: + break + self.assertEqual(contents, b'a' * 201) def test_exclusive_write(self): with gzip.GzipFile(self.filename, 'xb') as f: @@ -251,7 +251,8 @@ def test_readline(self): line_length = 0 while 1: L = f.readline(line_length) - if not L and line_length != 0: break + if not L and line_length != 0: + break self.assertTrue(len(L) <= line_length) line_length = (line_length + 1) % 50 @@ -265,7 +266,8 @@ def test_readlines(self): with gzip.GzipFile(self.filename, 'rb') as f: while 1: L = f.readlines(150) - if L == []: break + if L == []: + break def test_seek_read(self): self.test_write() @@ -275,10 +277,11 @@ def test_seek_read(self): while 1: oldpos = f.tell() line1 = f.readline() - if not line1: break + if not line1: + break newpos = f.tell() f.seek(oldpos) # negative seek - if len(line1)>10: + if len(line1) > 10: amount = 10 else: amount = len(line1) @@ -325,7 +328,7 @@ def test_paddedfile_getattr(self): def test_mtime(self): mtime = 123456789 - with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite: + with gzip.GzipFile(self.filename, 'w', mtime=mtime) as fWrite: fWrite.write(data1) with gzip.GzipFile(self.filename) as fRead: self.assertTrue(hasattr(fRead, 'mtime')) @@ -337,17 +340,17 @@ def test_mtime(self): def test_metadata(self): mtime = 123456789 - with gzip.GzipFile(self.filename, 'w', mtime = mtime) as fWrite: + with gzip.GzipFile(self.filename, 'w', mtime=mtime) as fWrite: fWrite.write(data1) with open(self.filename, 'rb') as fRead: # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html idBytes = fRead.read(2) - self.assertEqual(idBytes, b'\x1f\x8b') # gzip ID + self.assertEqual(idBytes, b'\x1f\x8b') # gzip ID cmByte = fRead.read(1) - self.assertEqual(cmByte, b'\x08') # deflate + self.assertEqual(cmByte, b'\x08') # deflate try: expectedname = os.path.basename(self.filename).encode( @@ -361,13 +364,14 @@ def test_metadata(self): self.assertEqual(flagsByte, expectedflags) mtimeBytes = fRead.read(4) - self.assertEqual(mtimeBytes, struct.pack(' 2**32, 'requires 64bit platform') + @unittest.skipUnless(sys.maxsize > 2 ** 32, 'requires 64bit platform') @bigmemtest(size=_4G + 100, memuse=4) def test_64bit_compress(self, size): data = b'x' * size @@ -286,7 +290,8 @@ def test_pair(self): co = zlib.compressobj() x1 = co.compress(data) x2 = co.flush() - self.assertRaises(zlib.error, co.flush) # second flush should not work + self.assertRaises(zlib.error, + co.flush) # second flush should not work self.assertEqual(x1 + x2, datazip) for v1, v2 in ((x1, x2), (bytearray(x1), bytearray(x2))): dco = zlib.decompressobj() @@ -338,9 +343,8 @@ def test_compressincremental(self): co = zlib.compressobj() bufs = [] for i in range(0, len(data), 256): - bufs.append(co.compress(data[i:i+256])) + bufs.append(co.compress(data[i:i + 256])) bufs.append(co.flush()) - combuf = b''.join(bufs) dco = zlib.decompressobj() y1 = dco.decompress(b''.join(bufs)) @@ -354,7 +358,7 @@ def test_decompinc(self, flush=False, source=None, cx=256, dcx=64): co = zlib.compressobj() bufs = [] for i in range(0, len(data), cx): - bufs.append(co.compress(data[i:i+cx])) + bufs.append(co.compress(data[i:i + cx])) bufs.append(co.flush()) combuf = b''.join(bufs) @@ -367,10 +371,10 @@ def test_decompinc(self, flush=False, source=None, cx=256, dcx=64): dco = zlib.decompressobj() bufs = [] for i in range(0, len(combuf), dcx): - bufs.append(dco.decompress(combuf[i:i+dcx])) - self.assertEqual(b'', dco.unconsumed_tail, ######## + bufs.append(dco.decompress(combuf[i:i + dcx])) + self.assertEqual(b'', dco.unconsumed_tail, "(A) uct should be b'': not %d long" % - len(dco.unconsumed_tail)) + len(dco.unconsumed_tail)) self.assertEqual(b'', dco.unused_data) if flush: bufs.append(dco.flush()) @@ -381,9 +385,9 @@ def test_decompinc(self, flush=False, source=None, cx=256, dcx=64): bufs.append(chunk) else: break - self.assertEqual(b'', dco.unconsumed_tail, ######## + self.assertEqual(b'', dco.unconsumed_tail, "(B) uct should be b'': not %d long" % - len(dco.unconsumed_tail)) + len(dco.unconsumed_tail)) self.assertEqual(b'', dco.unused_data) self.assertEqual(data, b''.join(bufs)) # Failure means: "decompressobj with init options failed" @@ -399,7 +403,7 @@ def test_decompimax(self, source=None, cx=256, dcx=64): co = zlib.compressobj() bufs = [] for i in range(0, len(data), cx): - bufs.append(co.compress(data[i:i+cx])) + bufs.append(co.compress(data[i:i + cx])) bufs.append(co.flush()) combuf = b''.join(bufs) self.assertEqual(data, zlib.decompress(combuf), @@ -409,10 +413,10 @@ def test_decompimax(self, source=None, cx=256, dcx=64): bufs = [] cb = combuf while cb: - #max_length = 1 + len(cb)//10 + # max_length = 1 + len(cb)//10 chunk = dco.decompress(cb, dcx) self.assertFalse(len(chunk) > dcx, - 'chunk too big (%d>%d)' % (len(chunk), dcx)) + 'chunk too big (%d>%d)' % (len(chunk), dcx)) bufs.append(chunk) cb = dco.unconsumed_tail bufs.append(dco.flush()) @@ -424,7 +428,7 @@ def test_decompressmaxlen(self, flush=False): co = zlib.compressobj() bufs = [] for i in range(0, len(data), 256): - bufs.append(co.compress(data[i:i+256])) + bufs.append(co.compress(data[i:i + 256])) bufs.append(co.flush()) combuf = b''.join(bufs) self.assertEqual(data, zlib.decompress(combuf), @@ -434,10 +438,11 @@ def test_decompressmaxlen(self, flush=False): bufs = [] cb = combuf while cb: - max_length = 1 + len(cb)//10 + max_length = 1 + len(cb) // 10 chunk = dco.decompress(cb, max_length) self.assertFalse(len(chunk) > max_length, - 'chunk too big (%d>%d)' % (len(chunk),max_length)) + 'chunk too big (%d>%d)' % ( + len(chunk), max_length)) bufs.append(chunk) cb = dco.unconsumed_tail if flush: @@ -446,7 +451,8 @@ def test_decompressmaxlen(self, flush=False): while chunk: chunk = dco.decompress(b'', max_length) self.assertFalse(len(chunk) > max_length, - 'chunk too big (%d>%d)' % (len(chunk),max_length)) + 'chunk too big (%d>%d)' % ( + len(chunk), max_length)) bufs.append(chunk) self.assertEqual(data, b''.join(bufs), 'Wrong data retrieved') @@ -477,7 +483,7 @@ def test_maxlen_custom(self): def test_clear_unconsumed_tail(self): # Issue #12050: calling decompress() without providing max_length # should clear the unconsumed_tail attribute. - cdata = b"x\x9cKLJ\x06\x00\x02M\x01" # "abc" + cdata = b"x\x9cKLJ\x06\x00\x02M\x01" # "abc" dco = zlib.decompressobj() ddata = dco.decompress(cdata, 1) ddata += dco.decompress(dco.unconsumed_tail) @@ -501,16 +507,16 @@ def test_flushes(self): for sync in sync_opt: for level in range(10): try: - obj = zlib.compressobj( level ) - a = obj.compress( data[:3000] ) - b = obj.flush( sync ) - c = obj.compress( data[3000:] ) + obj = zlib.compressobj(level) + a = obj.compress(data[:3000]) + b = obj.flush(sync) + c = obj.compress(data[3000:]) d = obj.flush() - except: + except Exception: print("Error for flush mode={}, level={}" .format(sync, level)) raise - self.assertEqual(zlib.decompress(b''.join([a,b,c,d])), + self.assertEqual(zlib.decompress(b''.join([a, b, c, d])), data, ("Decompress failed: flush " "mode=%i, level=%i") % (sync, level)) del obj @@ -539,6 +545,8 @@ def test_odd_flush(self): # others might simply have a single RNG gen = random gen.seed(1) + if not hasattr(gen, "randbytes"): # Python 3.7 workaround. + setattr(gen, "randbytes", lambda x: os.urandom(x)) data = gen.randbytes(17 * 1024) # compress, sync-flush, and decompress @@ -557,7 +565,7 @@ def test_empty_flush(self): co = zlib.compressobj(zlib.Z_BEST_COMPRESSION) self.assertTrue(co.flush()) # Returns a zlib header dco = zlib.decompressobj() - self.assertEqual(dco.flush(), b"") # Returns nothing + self.assertEqual(dco.flush(), b"") # Returns nothing def test_dictionary(self): h = HAMLET_SCENE @@ -636,11 +644,11 @@ def test_decompress_unused_data(self): if i < len(y): self.assertEqual(dco.unused_data, b'') if maxlen == 0: - data += dco.decompress(x[i : i + step]) + data += dco.decompress(x[i: i + step]) self.assertEqual(dco.unconsumed_tail, b'') else: data += dco.decompress( - dco.unconsumed_tail + x[i : i + step], maxlen) + dco.unconsumed_tail + x[i: i + step], maxlen) data += dco.flush() self.assertTrue(dco.eof) self.assertEqual(data, source) @@ -665,7 +673,7 @@ def test_flush_with_freed_input(self): dco = zlib.decompressobj() dco.decompress(data, 1) del data - data = zlib.compress(input2) + zlib.compress(input2) self.assertEqual(dco.flush(), input1[1:]) @bigmemtest(size=_4G, memuse=1) @@ -677,6 +685,11 @@ def test_flush_large_length(self, size): dco.decompress(data, 1) self.assertEqual(dco.flush(size), input[1:]) + # Skip this test for pypy. This is an extreme fringe use case. There are + # constants provided for the mode parameter, so it seems very unlikely + # custom ints will be used. + @unittest.skipIf(sys.implementation.name == "pypy", + "PyPy does not handle __index__ properly") def test_flush_custom_length(self): input = HAMLET_SCENE * 10 data = zlib.compress(input, 1) @@ -705,8 +718,8 @@ def test_compresscopy(self): bufs1.append(c1.flush()) s1 = b''.join(bufs1) - self.assertEqual(zlib.decompress(s0),data0+data0) - self.assertEqual(zlib.decompress(s1),data0+data1) + self.assertEqual(zlib.decompress(s0), data0 + data0) + self.assertEqual(zlib.decompress(s1), data0 + data1) @requires_Compress_copy def test_badcompresscopy(self): @@ -740,8 +753,8 @@ def test_decompresscopy(self): bufs1.append(d1.decompress(comp[32:])) s1 = b''.join(bufs1) - self.assertEqual(s0,s1) - self.assertEqual(s0,data) + self.assertEqual(s0, s1) + self.assertEqual(s0, data) @requires_Decompress_copy def test_baddecompresscopy(self): @@ -769,16 +782,20 @@ def test_decompresspickle(self): @bigmemtest(size=_1G + 1024 * 1024, memuse=3) def test_big_compress_buffer(self, size): c = zlib.compressobj(1) - compress = lambda s: c.compress(s) + c.flush() + + def compress(data): + return c.compress(data) + c.flush() self.check_big_compress_buffer(size, compress) @bigmemtest(size=_1G + 1024 * 1024, memuse=2) def test_big_decompress_buffer(self, size): d = zlib.decompressobj() - decompress = lambda s: d.decompress(s) + d.flush() + + def decompress(data): + return d.decompress(data) + d.flush() self.check_big_decompress_buffer(size, decompress) - @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @unittest.skipUnless(sys.maxsize > 2 ** 32, 'requires 64bit platform') @bigmemtest(size=_4G + 100, memuse=4) def test_64bit_compress(self, size): data = b'x' * size @@ -791,7 +808,7 @@ def test_64bit_compress(self, size): finally: comp = uncomp = data = None - @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @unittest.skipUnless(sys.maxsize > 2 ** 32, 'requires 64bit platform') @bigmemtest(size=_4G + 100, memuse=3) def test_large_unused_data(self, size): data = b'abcdefghijklmnop' @@ -805,7 +822,7 @@ def test_large_unused_data(self, size): finally: unused = comp = do = None - @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @unittest.skipUnless(sys.maxsize > 2 ** 32, 'requires 64bit platform') @bigmemtest(size=_4G + 100, memuse=5) def test_large_unconsumed_tail(self, size): data = b'x' * size @@ -887,6 +904,7 @@ def test_wbits(self): ) self.assertEqual(expected, actual) + def choose_lines(source, number, seed=None, generator=random): """Return a list of number lines randomly chosen from the source""" if seed is not None: @@ -971,7 +989,8 @@ class ZlibDecompressorTest(unittest.TestCase): def test_Constructor(self): self.assertRaises(TypeError, zlib._ZlibDecompressor, "bla") self.assertRaises(TypeError, zlib._ZlibDecompressor, -15, "bla") - self.assertRaises(TypeError, zlib._ZlibDecompressor, -15, b"bla", "bla") + self.assertRaises(TypeError, zlib._ZlibDecompressor, -15, b"bla", + "bla") def testDecompress(self): zlibd = zlib._ZlibDecompressor() @@ -984,7 +1003,7 @@ def testDecompressChunks10(self): text = b'' n = 0 while True: - str = self.DATA[n*10:(n+1)*10] + str = self.DATA[n * 10:(n + 1) * 10] if not str: break text += zlibd.decompress(str) @@ -994,22 +1013,21 @@ def testDecompressChunks10(self): def testDecompressUnusedData(self): zlibd = zlib._ZlibDecompressor() unused_data = b"this is unused data" - text = zlibd.decompress(self.DATA+unused_data) + text = zlibd.decompress(self.DATA + unused_data) self.assertEqual(text, self.TEXT) self.assertEqual(zlibd.unused_data, unused_data) def testEOFError(self): zlibd = zlib._ZlibDecompressor() - text = zlibd.decompress(self.DATA) + zlibd.decompress(self.DATA) self.assertRaises(EOFError, zlibd.decompress, b"anything") self.assertRaises(EOFError, zlibd.decompress, b"") - @support.skip_if_pgo_task @bigmemtest(size=_4G + 100, memuse=3.3) def testDecompress4G(self, size): # "Test zlib._ZlibDecompressor.decompress() with >4GiB input" blocksize = 10 * 1024 * 1024 - block = random.randbytes(blocksize) + block = os.urandom(blocksize) try: data = block * (size // blocksize + 1) compressed = zlib.compress(data) @@ -1021,6 +1039,9 @@ def testDecompress4G(self, size): compressed = None decompressed = None + @unittest.skipIf(sys.implementation.name == "pypy", + reason="Pickling is not a requirement, and certainly " + "not a blocker for PyPy.") def testPickle(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): with self.assertRaises(TypeError): @@ -1034,7 +1055,7 @@ def testDecompressorChunksMaxsize(self): # Feed some input len_ = len(self.BIG_DATA) - 64 out.append(zlibd.decompress(self.BIG_DATA[:len_], - max_length=max_length)) + max_length=max_length)) self.assertFalse(zlibd.needs_input) self.assertEqual(len(out[-1]), max_length) @@ -1045,7 +1066,7 @@ def testDecompressorChunksMaxsize(self): # Retrieve more data while providing more input out.append(zlibd.decompress(self.BIG_DATA[len_:], - max_length=max_length)) + max_length=max_length)) self.assertLessEqual(len(out[-1]), max_length) # Retrieve remaining uncompressed data @@ -1065,7 +1086,7 @@ def test_decompressor_inputbuf_1(self): # Create input buffer and fill it self.assertEqual(zlibd.decompress(self.DATA[:100], - max_length=0), b'') + max_length=0), b'') # Retrieve some results, freeing capacity at beginning # of input buffer @@ -1087,7 +1108,7 @@ def test_decompressor_inputbuf_2(self): # Create input buffer and empty it self.assertEqual(zlibd.decompress(self.DATA[:200], - max_length=0), b'') + max_length=0), b'') out.append(zlibd.decompress(b'')) # Fill buffer with new data diff --git a/tox.ini b/tox.ini index f1b3c5d..5085404 100644 --- a/tox.ini +++ b/tox.ini @@ -73,3 +73,6 @@ commands= deps= commands= python ./benchmark_scripts/benchmark.py --checksums + +[flake8] +max-line-length=88