-
Notifications
You must be signed in to change notification settings - Fork 102
implement fletcher32 #412
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
implement fletcher32 #412
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
1dc39fb
implement fletcher32
martindurant 4a7fd63
Update numcodecs/fletcher32.pyx
martindurant db2275e
Add docstring and erorr test
martindurant 4366b5b
Use HDF C impl
martindurant 8e01f63
Remove unused, add docstrings
martindurant cb0aa2f
to runtime and int test
martindurant 93cef03
to cython
martindurant dbbf2bc
Update numcodecs/fletcher32.pyx
martindurant 4825a1d
Add docs
martindurant 12eb7a3
Merge branch 'main' into fletch
martindurant File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# cython: language_level=3 | ||
# cython: overflowcheck=False | ||
# cython: cdivision=True | ||
import struct | ||
|
||
from numcodecs.abc import Codec | ||
from numcodecs.compat import ensure_contiguous_ndarray | ||
|
||
from libc.stdint cimport uint8_t, uint16_t, uint32_t | ||
|
||
|
||
cdef uint32_t _fletcher32(const uint8_t[::1] _data): | ||
# converted from | ||
# https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L109 | ||
cdef: | ||
const uint8_t *data = &_data[0] | ||
size_t _len = _data.shape[0] | ||
size_t len = _len / 2 | ||
size_t tlen | ||
uint32_t sum1 = 0, sum2 = 0; | ||
|
||
|
||
while len: | ||
tlen = 360 if len > 360 else len | ||
len -= tlen | ||
while True: | ||
sum1 += <uint32_t>((<uint16_t>data[0]) << 8) | (<uint16_t>data[1]) | ||
data += 2 | ||
sum2 += sum1 | ||
tlen -= 1 | ||
if tlen < 1: | ||
break | ||
sum1 = (sum1 & 0xffff) + (sum1 >> 16) | ||
sum2 = (sum2 & 0xffff) + (sum2 >> 16) | ||
|
||
if _len % 2: | ||
sum1 += <uint32_t>((<uint16_t>(data[0])) << 8) | ||
sum2 += sum1 | ||
sum1 = (sum1 & 0xffff) + (sum1 >> 16) | ||
sum2 = (sum2 & 0xffff) + (sum2 >> 16) | ||
|
||
sum1 = (sum1 & 0xffff) + (sum1 >> 16) | ||
sum2 = (sum2 & 0xffff) + (sum2 >> 16) | ||
|
||
return (sum2 << 16) | sum1 | ||
|
||
|
||
class Fletcher32(Codec): | ||
"""The fletcher checksum with 16-bit words and 32-bit output | ||
|
||
This is the netCDF4/HED5 implementation, which is not equivalent | ||
to the one in wikipedia | ||
https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L95 | ||
|
||
With this codec, the checksum is concatenated on the end of the data | ||
bytes when encoded. At decode time, the checksum is performed on | ||
the data portion and compared with the four-byte checksum, raising | ||
RuntimeError if inconsistent. | ||
""" | ||
|
||
codec_id = "fletcher32" | ||
|
||
def encode(self, buf): | ||
"""Return buffer plus 4-byte fletcher checksum""" | ||
buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') | ||
cdef const uint8_t[::1] b_ptr = buf | ||
val = _fletcher32(b_ptr) | ||
return buf.tobytes() + struct.pack("<I", val) | ||
|
||
def decode(self, buf, out=None): | ||
"""Check fletcher checksum, and return buffer without it""" | ||
b = ensure_contiguous_ndarray(buf).view('uint8') | ||
cdef const uint8_t[::1] b_ptr = b[:-4] | ||
val = _fletcher32(b_ptr) | ||
found = b[-4:].view("<u4")[0] | ||
if val != found: | ||
raise RuntimeError( | ||
f"The fletcher32 checksum of the data ({val}) did not" | ||
f" match the expected checksum ({found}).\n" | ||
"This could be a sign that the data has been corrupted." | ||
) | ||
if out: | ||
out.view("uint8")[:] = b[:-4] | ||
return out | ||
return memoryview(b[:-4]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import numpy as np | ||
import pytest | ||
|
||
from numcodecs.fletcher32 import Fletcher32 | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"dtype", | ||
["uint8", "int32", "float32"] | ||
) | ||
def test_with_data(dtype): | ||
data = np.arange(100, dtype=dtype) | ||
f = Fletcher32() | ||
arr = np.frombuffer(f.decode(f.encode(data)), dtype=dtype) | ||
assert (arr == data).all() | ||
martindurant marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
def test_error(): | ||
data = np.arange(100) | ||
f = Fletcher32() | ||
enc = f.encode(data) | ||
enc2 = bytearray(enc) | ||
enc2[0] += 1 | ||
with pytest.raises(RuntimeError) as e: | ||
f.decode(enc2) | ||
assert "fletcher32 checksum" in str(e.value) | ||
|
||
|
||
def test_known(): | ||
data = ( | ||
b'w\x07\x00\x00\x00\x00\x00\x00\x85\xf6\xff\xff\xff\xff\xff\xff' | ||
b'i\x07\x00\x00\x00\x00\x00\x00\x94\xf6\xff\xff\xff\xff\xff\xff' | ||
b'\x88\t\x00\x00\x00\x00\x00\x00i\x03\x00\x00\x00\x00\x00\x00' | ||
b'\x93\xfd\xff\xff\xff\xff\xff\xff\xc3\xfc\xff\xff\xff\xff\xff\xff' | ||
b"'\x02\x00\x00\x00\x00\x00\x00\xba\xf7\xff\xff\xff\xff\xff\xff" | ||
b'\xfd%\x86d') | ||
data3 = Fletcher32().decode(data) | ||
outarr = np.frombuffer(data3, dtype="<i8") | ||
expected = [ | ||
1911, -2427, 1897, -2412, 2440, 873, -621, -829, 551, -2118, | ||
] | ||
assert outarr.tolist() == expected |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.