Skip to content

Commit 4366b5b

Browse files
committed
Use HDF C impl
1 parent db2275e commit 4366b5b

File tree

3 files changed

+70
-40
lines changed

3 files changed

+70
-40
lines changed

numcodecs/_fletcher.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#include <stdint.h>
2+
#include <stddef.h>
3+
4+
// https://github.com/Unidata/netcdf-c/blob/8eb71290eb9360dcfd4955ba94759ba8d02c40a9/plugins/H5checksum.c
5+
6+
7+
uint32_t H5_checksum_fletcher32(const void *_data, size_t _len)
8+
{
9+
const uint8_t *data = (const uint8_t *)_data; /* Pointer to the data to be summed */
10+
size_t len = _len / 2; /* Length in 16-bit words */
11+
uint32_t sum1 = 0, sum2 = 0;
12+
13+
14+
/* Compute checksum for pairs of bytes */
15+
/* (the magic "360" value is is the largest number of sums that can be
16+
* performed without numeric overflow)
17+
*/
18+
while (len) {
19+
size_t tlen = len > 360 ? 360 : len;
20+
len -= tlen;
21+
do {
22+
sum1 += (uint32_t)(((uint16_t)data[0]) << 8) | ((uint16_t)data[1]);
23+
data += 2;
24+
sum2 += sum1;
25+
} while (--tlen);
26+
sum1 = (sum1 & 0xffff) + (sum1 >> 16);
27+
sum2 = (sum2 & 0xffff) + (sum2 >> 16);
28+
}
29+
30+
/* Check for odd # of bytes */
31+
if(_len % 2) {
32+
sum1 += (uint32_t)(((uint16_t)*data) << 8);
33+
sum2 += sum1;
34+
sum1 = (sum1 & 0xffff) + (sum1 >> 16);
35+
sum2 = (sum2 & 0xffff) + (sum2 >> 16);
36+
} /* end if */
37+
38+
/* Second reduction step to reduce sums to 16 bits */
39+
sum1 = (sum1 & 0xffff) + (sum1 >> 16);
40+
sum2 = (sum2 & 0xffff) + (sum2 >> 16);
41+
42+
return (sum2 << 16) | sum1;
43+
} /* end H5_checksum_fletcher32() */

numcodecs/fletcher32.pyx

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,8 @@ from numcodecs.compat import ensure_contiguous_ndarray
1111

1212
from libc.stdint cimport uint8_t, uint16_t, uint32_t
1313

14-
cpdef uint32_t fletcher32(const uint16_t[::1] data):
15-
cdef:
16-
uint32_t sum1 = 0
17-
uint32_t sum2 = 0
18-
int index
19-
int size = data.shape[0]
20-
21-
for index in range(0, size):
22-
sum1 = (sum1 + data[index]) % 0xffff
23-
sum2 = (sum2 + sum1) % 0xffff
24-
25-
return (sum2 << 16) | sum1
26-
14+
cdef extern from "_fletcher.c":
15+
uint32_t H5_checksum_fletcher32(const void *_data, size_t _len)
2716

2817
class Fletcher32(Codec):
2918
"""The fletcher checksum with 16-bit words and 32-bit output
@@ -37,28 +26,20 @@ class Fletcher32(Codec):
3726
codec_id = "fletcher32"
3827

3928
def encode(self, buf):
40-
buf = ensure_contiguous_ndarray(buf).ravel()
41-
if len(buf) % 2:
42-
# rare, odd size of bytes data only
43-
arr = np.frombuffer(buf.tobytes() + b"\x00", dtype="uint16")
44-
val = fletcher32(arr)
45-
else:
46-
val = fletcher32(buf.view('uint16'))
29+
buf = ensure_contiguous_ndarray(buf).ravel().view('uint8')
30+
cdef const uint8_t[::1] b_ptr = buf
31+
val = H5_checksum_fletcher32(&b_ptr[0], buf.nbytes)
4732
return buf.tobytes() + struct.pack("<I", val)
4833

4934
def decode(self, buf, out=None):
5035
b = ensure_contiguous_ndarray(buf).view('uint8')
51-
if len(buf) % 2:
52-
# rare, odd size of bytes data only
53-
arr = np.frombuffer(b.tobytes() + b"\x00", dtype="uint16")
54-
val = fletcher32(arr)
55-
else:
56-
val = fletcher32(b[:-4].view('uint16'))
57-
found = b[-4:].view('uint32')[0]
36+
cdef const uint8_t[::1] b_ptr = b
37+
val = H5_checksum_fletcher32(&b_ptr[0], b.nbytes - 4)
38+
found = b[-4:].view("<u4")[0]
5839
if val != found:
5940
raise ValueError(
60-
f"The fletcher32 checksum of the data ({found}) did not"
61-
f" match the expected checksum ({val}).\n"
41+
f"The fletcher32 checksum of the data ({val}) did not"
42+
f" match the expected checksum ({found}).\n"
6243
"This could be a sign that the data has been corrupted."
6344
)
6445
if out:

numcodecs/tests/test_fletcher32.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,15 @@
11
import numpy as np
22
import pytest
33

4-
from numcodecs.fletcher32 import Fletcher32, fletcher32
5-
6-
7-
@pytest.mark.parametrize("inval,outval", [
8-
[b"abcdef", 1448095018],
9-
[b"abcdefgh", 3957429649]
10-
])
11-
def test_vectors(inval, outval):
12-
arr = np.array(list(inval), dtype="uint8").view('uint16')
13-
assert fletcher32(arr) == outval
4+
from numcodecs.fletcher32 import Fletcher32
145

156

167
@pytest.mark.parametrize(
178
"dtype",
189
["uint8", "int32", "float32"]
1910
)
2011
def test_with_data(dtype):
21-
data = np.empty(100, dtype=dtype)
12+
data = np.arange(100, dtype=dtype)
2213
f = Fletcher32()
2314
arr = np.frombuffer(f.decode(f.encode(data)), dtype=dtype)
2415
assert (arr == data).all()
@@ -34,3 +25,18 @@ def test_error():
3425
f.decode(enc2)
3526
assert "fletcher32 checksum" in str(e.value)
3627

28+
29+
def test_known():
30+
data = (
31+
b'\xf04\xfe\x1a\x03\xb2\xb1?^\x99j\xf3\xd6f\xef?\xbbm\x04n'
32+
b'\x9a\xdf\xeb?x\x9eIL\xdeW\xc8?A\xef\x88\xa8&\xad\xef?'
33+
b'\xf2\xc6a\x01a\xb8\xe8?#&\x96\xabY\xf2\xe7?\xe2Pw\xba\xd0w\xea?'
34+
b'\x80\xc5\xf8M@0\x9a?\x98H+\xb4\x03\xfa\xc6?\xb9P\x1e1'
35+
)
36+
data3 = Fletcher32().decode(data)
37+
outarr = np.frombuffer(data3, dtype="<f8")
38+
expected = [
39+
0.0691225, 0.98130367, 0.87104532, 0.19018153, 0.9898866,
40+
0.77250719, 0.74833377, 0.8271259, 0.02557469, 0.17950484
41+
]
42+
assert np.allclose(outarr, expected)

0 commit comments

Comments
 (0)