Skip to content

Commit ad43096

Browse files
committed
gh-91349: Expose the crc32 function from the lzma library
1 parent e42bda9 commit ad43096

File tree

5 files changed

+143
-2
lines changed

5 files changed

+143
-2
lines changed

Doc/library/lzma.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,23 @@ Compressing and decompressing data in memory
311311
*preset* and *filters* arguments.
312312

313313

314+
.. function:: crc32(data, value=0)
315+
316+
.. index::
317+
single: Cyclic Redundancy Check
318+
single: checksum; Cyclic Redundancy Check
319+
320+
Computes a CRC (Cyclic Redundancy Check) checksum of *data*. The
321+
result is a positive integer. If *value* is present, it is used
322+
as the starting value of the checksum; otherwise, a default value of 0
323+
is used. Passing in *value* allows computing a running checksum over the
324+
concatenation of several inputs. The algorithm is not cryptographically
325+
strong, and should not be used for authentication or digital signatures. Since
326+
the algorithm is designed for use as a checksum algorithm, it is not suitable
327+
for use as a general hash algorithm.
328+
329+
.. versionadded:: 3.14
330+
314331
.. function:: decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None)
315332

316333
Decompress *data* (a :class:`bytes` object), returning the uncompressed data

Lib/test/test_lzma.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import _compression
22
import array
3+
import binascii
34
from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE
45
import os
56
import pickle
@@ -8,7 +9,7 @@
89
from test import support
910
import unittest
1011

11-
from test.support import _4G, bigmemtest
12+
from test.support import _1G, _4G, bigmemtest
1213
from test.support.import_helper import import_module
1314
from test.support.os_helper import (
1415
TESTFN, unlink, FakePath
@@ -17,6 +18,44 @@
1718
lzma = import_module("lzma")
1819
from lzma import LZMACompressor, LZMADecompressor, LZMAError, LZMAFile
1920

21+
class ChecksumTestCase(unittest.TestCase):
22+
# checksum test cases
23+
def test_crc32start(self):
24+
self.assertEqual(lzma.crc32(b""), lzma.crc32(b"", 0))
25+
self.assertTrue(lzma.crc32(b"abc", 0xffffffff))
26+
27+
def test_crc32empty(self):
28+
self.assertEqual(lzma.crc32(b"", 0), 0)
29+
self.assertEqual(lzma.crc32(b"", 1), 1)
30+
self.assertEqual(lzma.crc32(b"", 432), 432)
31+
32+
def test_penguins(self):
33+
self.assertEqual(lzma.crc32(b"penguin", 0), 0x0e5c1a120)
34+
self.assertEqual(lzma.crc32(b"penguin", 1), 0x43b6aa94)
35+
self.assertEqual(lzma.crc32(b"penguin"), lzma.crc32(b"penguin", 0))
36+
37+
def test_crc32_unsigned(self):
38+
foo = b'abcdefghijklmnop'
39+
# explicitly test signed behavior
40+
self.assertEqual(lzma.crc32(foo), 2486878355)
41+
self.assertEqual(lzma.crc32(b'spam'), 1138425661)
42+
43+
def test_same_as_binascii_crc32(self):
44+
foo = b'abcdefghijklmnop'
45+
crc = 2486878355
46+
self.assertEqual(binascii.crc32(foo), crc)
47+
self.assertEqual(lzma.crc32(foo), crc)
48+
self.assertEqual(binascii.crc32(b'spam'), lzma.crc32(b'spam'))
49+
50+
51+
# GH-54485 - check that inputs >=4 GiB are handled correctly.
52+
class ChecksumBigBufferTestCase(unittest.TestCase):
53+
54+
@bigmemtest(size=_4G + 4, memuse=1, dry_run=False)
55+
def test_big_buffer(self, size):
56+
data = b"nyan" * (_1G + 1)
57+
self.assertEqual(lzma.crc32(data), 1044521549)
58+
2059

2160
class CompressorDecompressorTestCase(unittest.TestCase):
2261

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Expose the crc32 function from the lzma library.

Modules/_lzmamodule.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,10 +1599,40 @@ lzma_exec(PyObject *module)
15991599
return 0;
16001600
}
16011601

1602+
/*[clinic input]
1603+
_lzma.crc32 -> unsigned_int
1604+
1605+
data: Py_buffer
1606+
value: unsigned_int(bitwise=True) = 0
1607+
Starting value of the checksum.
1608+
/
1609+
1610+
Compute a CRC-32 checksum of data.
1611+
1612+
The returned checksum is an integer.
1613+
[clinic start generated code]*/
1614+
1615+
static unsigned int
1616+
_lzma_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
1617+
/*[clinic end generated code: output=fca7916d796faf8b input=bb623a169c14534f]*/
1618+
{
1619+
/* Releasing the GIL for very small buffers is inefficient
1620+
and may lower performance */
1621+
if (data->len > 1024*5) {
1622+
Py_BEGIN_ALLOW_THREADS
1623+
value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value);
1624+
Py_END_ALLOW_THREADS
1625+
} else {
1626+
value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value);
1627+
}
1628+
return value;
1629+
}
1630+
16021631
static PyMethodDef lzma_methods[] = {
16031632
_LZMA_IS_CHECK_SUPPORTED_METHODDEF
16041633
_LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
16051634
_LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1635+
_LZMA_CRC32_METHODDEF
16061636
{NULL}
16071637
};
16081638

Modules/clinic/_lzmamodule.c.h

Lines changed: 55 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)