Merge pull request python#30 from python-lz4/development

jonathanunderwood · web-flow · commit 05df110a2834 · 2017-05-13T13:14:05.000+01:00
Development branch fixes for 0.9.1 release
diff --git a/.travis.yml b/.travis.yml
@@ -6,5 +6,6 @@ python:
   - 3.3
   - 3.4
   - 3.5
+  - 3.6
 install:
 script: python setup.py test
diff --git a/appveyor.yml b/appveyor.yml
@@ -17,6 +17,7 @@ environment:
     - PYTHON: "C:\\Python34-x64"
       DISTUTILS_USE_SDK: "1"
     - PYTHON: "C:\\Python35-x64"
+    - PYTHON: "C:\\Python36-x64"
 
 matrix:
   # Immediately finish build once one of the jobs fails.
diff --git a/lz4/block/_block.c b/lz4/block/_block.c
@@ -121,11 +121,6 @@ compress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs)
       return NULL;
     }
 
-  if (source_size <= 0) {
-    PyErr_Format(PyExc_ValueError, "Input source data size invalid: %d bytes", source_size);
-    return NULL;
-  }
-
   if (!strncmp (mode, "default", sizeof ("default")))
     {
       comp = DEFAULT;
@@ -258,7 +253,7 @@ decompress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs)
       source_size -= hdr_size;
     }
 
-  if (dest_size <= 0 || dest_size > PY_SSIZE_T_MAX)
+  if (dest_size < 0 || dest_size > PY_SSIZE_T_MAX)
     {
       PyErr_Format (PyExc_ValueError, "Invalid size in header: 0x%zu",
                     dest_size);
diff --git a/lz4/frame/__init__.py b/lz4/frame/__init__.py
@@ -35,9 +35,6 @@ class LZ4FrameCompressor(object):
             - lz4.frame.CONTENTCHECKSUM_DISABLED or 0: disables checksumming
             - lz4.frame.CONTENTCHECKSUM_ENABLED or 1: enables checksumming
             The default is CONTENTCHECKSUM_DISABLED.
-        content_size (bool): Specifies whether to include an optional 8-byte header
-            field that is the uncompressed size of data included within the frame.
-            Including the content-size header is optional, and is enabled by default.
         frame_type (int): Specifies whether user data can be injected between
             frames. Options:
             - lz4.frame.FRAMETYPE_FRAME or 0: disables user data injection
@@ -53,14 +50,12 @@ def __init__(self,
                  block_mode=BLOCKMODE_LINKED,
                  compression_level=COMPRESSIONLEVEL_MIN,
                  content_checksum=CONTENTCHECKSUM_DISABLED,
-                 content_size=True,
                  frame_type=FRAMETYPE_FRAME,
                  auto_flush=True):
         self.block_size = block_size
         self.block_mode = block_mode
         self.compression_level = compression_level
         self.content_checksum = content_checksum
-        self.content_size = content_size
         self.frame_type = frame_type
         self.auto_flush = auto_flush
         self._context = create_compression_context()
@@ -82,9 +77,10 @@ def compress_begin(self, source_size=0):
 
         Args:
             data (bytes): data to compress
-            source_size (int): Optionally specified the total size of the
+            source_size (int): Optionally specify the total size of the
                 uncompressed data. If specified, will be stored in the
-                compressed frame header for later use in decompression.
+                compressed frame header as an 8-byte field for later use
+                during decompression.
 
         Returns:
             bytes: frame header data
@@ -97,7 +93,6 @@ def compress_begin(self, source_size=0):
                                     frame_type=self.frame_type,
                                     compression_level=self.compression_level,
                                     content_checksum=self.content_checksum,
-                                    content_size=self.content_size,
                                     auto_flush=self.auto_flush,
                                     source_size=source_size)
 
diff --git a/lz4/frame/_frame.c b/lz4/frame/_frame.c
@@ -121,9 +121,9 @@ create_compression_context (PyObject * Py_UNUSED (self))
   return PyCapsule_New (context, capsule_name, destruct_compression_context);
 }
 
-/******************
- * compress_frame *
- ******************/
+/************
+ * compress *
+ ************/
 #define __COMPRESS_KWARGS_DOCSTRING \
   "    block_size (int): Sepcifies the maximum blocksize to use.\n"     \
   "        Options:\n\n"                                                \
@@ -152,25 +152,25 @@ create_compression_context (PyObject * Py_UNUSED (self))
   "        - lz4.frame.CONTENTCHECKSUM_DISABLED or 0: disables checksumming\n" \
   "        - lz4.frame.CONTENTCHECKSUM_ENABLED or 1: enables checksumming\n\n" \
   "        The default is CONTENTCHECKSUM_DISABLED.\n"                  \
-  "    content_size (bool): Specifies whether to include an optional 8-byte header\n" \
-  "        field that is the uncompressed size of data included within the frame.\n" \
-  "        Including the content-size header is optional, and is enabled by default.\n" \
   "    frame_type (int): Specifies whether user data can be injected between\n" \
   "        frames. Options:\n\n"                                        \
   "        - lz4.frame.FRAMETYPE_FRAME or 0: disables user data injection\n" \
   "        - lz4.frame.FRAMETYPE_SKIPPABLEFRAME or 1: enables user data injection\n\n" \
   "        The default is lz4.frame.FRAMETYPE_FRAME.\n"                 \
 
 PyDoc_STRVAR(compress__doc,
-             "compress(source, compression_level=0, block_size=0, content_checksum=0, content_size=1, block_mode=0, frame_type=0)\n\n" \
+             "compress(source, compression_level=0, block_size=0, content_checksum=0, block_mode=0, frame_type=0,  content_size_header=1)\n\n" \
              "Accepts a string, and compresses the string in one go, returning the\n" \
              "compressed string as a string of bytes. The compressed string includes\n" \
              "a header and endmark and so is suitable for writing to a file.\n\n" \
              "Args:\n"                                                  \
              "    source (str): String to compress\n\n"                 \
              "Keyword Args:\n"                                          \
              __COMPRESS_KWARGS_DOCSTRING                                \
-             "\n"                                                       \
+             "    content_size_header (bool): Specifies whether to include an optional\n" \
+             "        8-byte header field that is the uncompressed size of data included\n" \
+             "        within the frame. Including the content-size header is optional\n" \
+             "        and is enabled by default.\n\n"                   \
              "Returns:\n"                                               \
              "    str: Compressed data as a string\n"
              );
@@ -192,9 +192,9 @@ compress (PyObject * Py_UNUSED (self), PyObject * args,
                             "compression_level",
                             "block_size",
                             "content_checksum",
-                            "content_size",
                             "block_mode",
                             "frame_type",
+                            "content_size_header",
                             NULL
                           };
 
@@ -205,11 +205,10 @@ compress (PyObject * Py_UNUSED (self), PyObject * args,
                                     &source, &source_size,
                                     &preferences.compressionLevel,
                                     &preferences.frameInfo.blockSizeID,
-                                    &preferences.
-                                    frameInfo.contentChecksumFlag,
-                                    &content_size_header,
+                                    &preferences.frameInfo.contentChecksumFlag,
                                     &preferences.frameInfo.blockMode,
-                                    &preferences.frameInfo.frameType))
+                                    &preferences.frameInfo.frameType,
+                                    &content_size_header))
     {
       return NULL;
     }
@@ -295,7 +294,7 @@ PyDoc_STRVAR(compress_begin__doc,
              "         When autoFlush is disabled, the LZ4 library may buffer data\n" \
              "         until a block is full\n\n"                       \
              "    source_size (int): This optionally specifies the uncompressed size\n" \
-             "        of the source content. This arument is optional, but can if specified\n" \
+             "        of the source content. This arument is optional, but if specified\n" \
              "        will be stored in the frame header for use during decompression.\n"
              "Returns:\n"                                               \
              "    str (str): Frame header.\n"
@@ -309,7 +308,6 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
 {
   PyObject *py_context = NULL;
   unsigned long source_size = 0;
-  int content_size_header = 1;
   LZ4F_preferences_t preferences;
   /* Only needs to be large enough for a header, which is 15 bytes.
    * Unfortunately, the lz4 library doesn't provide a #define for this.
@@ -322,7 +320,6 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
                             "compression_level",
                             "block_size",
                             "content_checksum",
-                            "content_size",
                             "block_mode",
                             "frame_type",
                             "auto_flush",
@@ -335,13 +332,12 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
      argument */
   preferences.autoFlush = 1;
 
-  if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|kiiiiiii", kwlist,
+  if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|kiiiiii", kwlist,
                                     &py_context,
                                     &source_size,
                                     &preferences.compressionLevel,
                                     &preferences.frameInfo.blockSizeID,
                                     &preferences.frameInfo.contentChecksumFlag,
-                                    &content_size_header,
                                     &preferences.frameInfo.blockMode,
                                     &preferences.frameInfo.frameType,
                                     &preferences.autoFlush
@@ -350,14 +346,7 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
       return NULL;
     }
 
-  if (content_size_header)
-    {
-      preferences.frameInfo.contentSize = source_size;
-    }
-  else
-    {
-      preferences.frameInfo.contentSize = 0;
-    }
+  preferences.frameInfo.contentSize = source_size;
 
   context =
     (struct compression_context *) PyCapsule_GetPointer (py_context, capsule_name);
@@ -798,16 +787,17 @@ decompress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * keywds)
              to estimate the new size of the destination buffer. */
           char * destination_buffer_new;
           destination_size += 3 * result;
+          Py_BLOCK_THREADS
           destination_buffer_new = PyMem_Realloc(destination_buffer, destination_size);
           if (!destination_buffer_new)
             {
               LZ4F_freeDecompressionContext (context);
-              Py_BLOCK_THREADS
               PyErr_SetString (PyExc_RuntimeError,
                                "Failed to increase destination buffer size");
               PyMem_Free (destination_buffer);
               return NULL;
             }
+          Py_UNBLOCK_THREADS
           destination_buffer = destination_buffer_new;
         }
       /* Data still remaining to be decompressed, so increment the source and
diff --git a/py3c/py3c.h b/py3c/py3c.h
@@ -29,6 +29,6 @@ SOFTWARE.
 
 #include <py3c/comparison.h>
 #include <py3c/compat.h>
-
+#include <py3c/py3shims.h>
 
 #endif
diff --git a/py3c/py3c/compat.h b/py3c/py3c/compat.h
@@ -73,7 +73,7 @@
 #define PyStr_InternFromString PyString_InternFromString
 #define PyStr_Decode PyString_Decode
 
-static inline PyObject *PyStr_Concat(PyObject *left, PyObject *right) {
+static PyObject *PyStr_Concat(PyObject *left, PyObject *right) {
     PyObject *str = left;
     Py_INCREF(left);  // reference to old left will be stolen
     PyString_Concat(&str, right);
diff --git a/py3c/py3c/py3shims.h b/py3c/py3c/py3shims.h
@@ -0,0 +1,29 @@
+/* Copyright (c) 2016, Red Hat, Inc. and/or its affiliates
+ * Licensed under the MIT license; see py3c.h
+ */
+
+/*
+ * Shims for the  PyMem_Raw* functions added inPython 3.3
+ *
+ * See https://docs.python.org/3/c-api/memory.html#raw-memory-interface
+ */
+
+#ifndef _PY3C_RAWMALLOC_H_
+#define _PY3C_RAWMALLOC_H_
+#include <Python.h>
+#include <stdlib.h>
+
+
+#if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 4)
+#define PyMem_RawMalloc(n) malloc((n) || 1)
+#define PyMem_RawRealloc(p, n) realloc(p, (n) || 1)
+#define PyMem_RawFree(p) free(p)
+#endif /* version < 3.4 */
+
+
+#if PY_MAJOR_VERSION < 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 5)
+#define PyMem_RawCalloc(n, s) calloc((n) || 1, (s) || 1)
+#endif /* version < 3.5 */
+
+
+#endif /* _PY3C_RAWMALLOC_H_ */
diff --git a/tests/bench.py b/tests/bench.py
@@ -1,23 +1,24 @@
 import uuid
 import timeit
 import lz4
-import snappy
 import os
 from timeit import Timer
+import sys
+import blosc
 
-DATA = open("../src/lz4.c", "rb").read()
-LZ4_DATA = lz4.compress(DATA)
-SNAPPY_DATA = snappy.compress(DATA)
-LOOPS = 200000
+DATA = open(sys.argv[1], "rb").read()
+LZ4_DATA = lz4.block.compress(DATA)
+BLOSC_DATA = blosc.compress(DATA, cname='lz4', clevel=5, shuffle=True)
+LOOPS = 100
 
 print("Data Size:")
 print("  Input: %d" % len(DATA))
 print("  LZ4: %d (%.2f)" % (len(LZ4_DATA), len(LZ4_DATA) / float(len(DATA))))
-print("  Snappy: %d (%.2f)" % (len(SNAPPY_DATA), len(SNAPPY_DATA) / float(len(DATA))))
-print("  LZ4 / Snappy: %f" % (float(len(LZ4_DATA)) / float(len(SNAPPY_DATA))))
+print("  Blosc: %d (%.2f)" % (len(BLOSC_DATA), len(BLOSC_DATA) / float(len(DATA))))
+print("  LZ4 / Blosc: %f" % (float(len(LZ4_DATA)) / float(len(BLOSC_DATA))))
 
 print("Benchmark: %d calls" % LOOPS)
-print("  LZ4 Compression: %fs" % Timer("lz4.compress(DATA)", "from __main__ import DATA; import lz4").timeit(number=LOOPS))
-print("  Snappy Compression: %fs" % Timer("snappy.compress(DATA)", "from __main__ import DATA; import snappy").timeit(number=LOOPS))
-print("  LZ4 Decompression: %fs" % Timer("lz4.uncompress(LZ4_DATA)", "from __main__ import LZ4_DATA; import lz4").timeit(number=LOOPS))
-print("  Snappy Decompression : %fs" % Timer("snappy.uncompress(SNAPPY_DATA)", "from __main__ import SNAPPY_DATA; import snappy").timeit(number=LOOPS))
+print("  LZ4 Compression: %fs" % (Timer("lz4.block.compress(DATA)", "from __main__ import DATA; import lz4").timeit(number=LOOPS)/LOOPS))
+print("  Blosc Compression: %fs" % (Timer("blosc.compress(DATA, cname='lz4', clevel=5, shuffle=True)", "from __main__ import DATA; import blosc").timeit(number=LOOPS)/LOOPS))
+print("  LZ4 Decompression: %fs" % (Timer("lz4.block.decompress(LZ4_DATA)", "from __main__ import LZ4_DATA; import lz4").timeit(number=LOOPS)/LOOPS))
+print("  Blosc Decompression : %fs" % (Timer("blosc.decompress(BLOSC_DATA)", "from __main__ import BLOSC_DATA; import blosc").timeit(number=LOOPS)/LOOPS))
diff --git a/tests/test_block.py b/tests/test_block.py
@@ -8,6 +8,10 @@
 
 class TestLZ4Block(unittest.TestCase):
 
+    def test_empty_string(self):
+      DATA = b''
+      self.assertEqual(DATA, lz4.block.decompress(lz4.block.compress(DATA)))
+
     def test_random(self):
       DATA = os.urandom(128 * 1024)  # Read 128kb
       self.assertEqual(DATA, lz4.block.decompress(lz4.block.compress(DATA)))
diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -138,7 +138,6 @@ def test_compress_begin_update_end_not_defaults(self):
             block_size=lz4frame.BLOCKSIZE_MAX256KB,
             block_mode=lz4frame.BLOCKMODE_LINKED,
             compression_level=lz4frame.COMPRESSIONLEVEL_MINHC,
-            content_size=False,
             auto_flush=1
         )
         chunk_size = 128 * 1024 # 128 kb, half of block size
@@ -265,6 +264,14 @@ def test_LZ4FrameCompressor_reset(self):
         decompressed = lz4frame.decompress(compressed)
         self.assertEqual(input_data, decompressed)
 
+    def test_compress_without_content_size(self):
+        input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123"
+        compressed = lz4frame.compress(input_data, content_size_header=False)
+        frame = lz4frame.get_frame_info(compressed)
+        self.assertEqual(frame['contentSize'], 0)
+        decompressed = lz4frame.decompress(compressed)
+        self.assertEqual(input_data, decompressed)
+
 class TestLZ4FrameModern(unittest.TestCase):
     def test_decompress_truncated(self):
         input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123"
@@ -306,14 +313,6 @@ def test_LZ4FrameCompressor_fails(self):
                 compressed += compressor.flush()
                 compressed = compressor.compress(input_data)
 
-    def test_compress_without_content_size(self):
-        input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123"
-        compressed = lz4frame.compress(input_data, content_size=False)
-        frame = lz4frame.get_frame_info(compressed)
-        self.assertEqual(frame['contentSize'], 0)
-        decompressed = lz4frame.decompress(compressed)
-        self.assertEqual(input_data, decompressed)
-
 
 if sys.version_info < (2, 7):
     # Poor-man unittest.TestCase.skip for Python 2.6

Original file line number	Diff line number	Diff line change
`@@ -121,11 +121,6 @@ compress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs)`
`121`	`121`	`return NULL;`
`122`	`122`	`}`
`123`	`123`
`124`		`- if (source_size <= 0) {`
`125`		`- PyErr_Format(PyExc_ValueError, "Input source data size invalid: %d bytes", source_size);`
`126`		`- return NULL;`
`127`		`- }`
`128`		`-`
`129`	`124`	`if (!strncmp (mode, "default", sizeof ("default")))`
`130`	`125`	`{`
`131`	`126`	`comp = DEFAULT;`
`@@ -258,7 +253,7 @@ decompress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs)`
`258`	`253`	`source_size -= hdr_size;`
`259`	`254`	`}`
`260`	`255`
`261`		`- if (dest_size <= 0 \|\| dest_size > PY_SSIZE_T_MAX)`
	`256`	`+ if (dest_size < 0 \|\| dest_size > PY_SSIZE_T_MAX)`
`262`	`257`	`{`
`263`	`258`	`PyErr_Format (PyExc_ValueError, "Invalid size in header: 0x%zu",`
`264`	`259`	`dest_size);`