2
2
3
3
import hashlib
4
4
import itertools
5
+ import math
6
+ import random
5
7
import time
6
8
from collections .abc import Callable
7
9
from typing import Final
15
17
K2 : Final [int ] = 0b1100001010110010101011100011110100100111110101001110101101001111
16
18
MASK : Final [int ] = 0xFFFFFFFFFFFFFFFF
17
19
18
- HASHES = {
19
- "mmh3_32" : mmh3 .mmh3_32_digest ,
20
- "mmh3_128" : mmh3 .mmh3_x64_128_digest ,
21
- "xxh_32" : xxhash .xxh32_digest ,
22
- "xxh_64" : xxhash .xxh64_digest ,
23
- "xxh3_64" : xxhash .xxh3_64_digest ,
24
- "xxh3_128" : xxhash .xxh3_128_digest ,
25
- "md5" : lambda ba : hashlib .md5 (ba ).digest (),
26
- "sha1" : lambda ba : hashlib .sha1 (ba ).digest (),
27
- "pymmh3_32" : pymmh3 .hash ,
28
- "pymmh3_128" : pymmh3 .hash128 ,
29
- }
30
-
31
20
32
21
def init_buffer (ba : bytearray ) -> bytearray :
33
22
"""Initializes a byte array with a pattern.
@@ -50,8 +39,24 @@ def init_buffer(ba: bytearray) -> bytearray:
50
39
return ba
51
40
52
41
42
+ def generate_size (size : int , p : float ) -> int :
43
+ """Generate a random size for a buffer.
44
+
45
+ Args:
46
+ size: The size of the buffer to hash.
47
+ p: The percentage of the buffer size to vary.
48
+
49
+ Returns:
50
+ The random size of the buffer.
51
+ """
52
+ lower = math .ceil (size * (1 - p ))
53
+ upper = math .floor (size * (1 + p ))
54
+
55
+ return random .randint (lower , upper )
56
+
57
+
53
58
def perf_hash (loops : int , f : Callable , size : int ) -> float :
54
- """Benchmark the mmh3 hash function.
59
+ """Benchmark a hash function.
55
60
56
61
Args:
57
62
loops: The number of outer loops to run.
@@ -63,6 +68,9 @@ def perf_hash(loops: int, f: Callable, size: int) -> float:
63
68
"""
64
69
# pylint: disable=too-many-locals
65
70
71
+ if size <= 0 :
72
+ raise ValueError ("size must be greater than 0" )
73
+
66
74
range_it = itertools .repeat (None , loops )
67
75
68
76
data = bytearray (size + 9 )
@@ -77,7 +85,61 @@ def perf_hash(loops: int, f: Callable, size: int) -> float:
77
85
data6 = bytes (data [6 : size + 6 ])
78
86
data7 = bytes (data [7 : size + 7 ])
79
87
data8 = bytes (data [8 : size + 8 ])
80
- data9 = bytes (data [8 : size + 9 ])
88
+ data9 = bytes (data [9 : size + 9 ])
89
+
90
+ t0 = time .perf_counter ()
91
+ for _ in range_it :
92
+ f (data0 )
93
+ f (data1 )
94
+ f (data2 )
95
+ f (data3 )
96
+ f (data4 )
97
+ f (data5 )
98
+ f (data6 )
99
+ f (data7 )
100
+ f (data8 )
101
+ f (data9 )
102
+
103
+ return time .perf_counter () - t0
104
+
105
+
106
+ def perf_hash_random (loops : int , f : Callable , size : int ) -> float :
107
+ """Benchmark a hash function with varying data sizes.
108
+
109
+ Args:
110
+ loops: The number of outer loops to run.
111
+ f: The hash function to benchmark
112
+ size: The size of the buffer to hash.
113
+
114
+ Returns:
115
+ The time taken to hash the buffer in fractional seconds.
116
+ """
117
+ # pylint: disable=too-many-locals
118
+
119
+ if size <= 0 :
120
+ raise ValueError ("size must be greater than 0" )
121
+
122
+ range_it = itertools .repeat (None , loops )
123
+ random .seed (42 )
124
+ inner_loops = 10
125
+ extra_size = 255
126
+
127
+ data = bytearray (size + extra_size )
128
+ data = init_buffer (data )
129
+
130
+ pos_list = [random .randint (0 , extra_size ) for _ in range (inner_loops )]
131
+ size_list = [generate_size (size , 0.1 ) for _ in range (inner_loops )]
132
+
133
+ data0 = bytes (data [pos_list [0 ] : pos_list [0 ] + size_list [0 ]])
134
+ data1 = bytes (data [pos_list [1 ] : pos_list [1 ] + size_list [1 ]])
135
+ data2 = bytes (data [pos_list [2 ] : pos_list [2 ] + size_list [2 ]])
136
+ data3 = bytes (data [pos_list [3 ] : pos_list [3 ] + size_list [3 ]])
137
+ data4 = bytes (data [pos_list [4 ] : pos_list [4 ] + size_list [4 ]])
138
+ data5 = bytes (data [pos_list [5 ] : pos_list [5 ] + size_list [5 ]])
139
+ data6 = bytes (data [pos_list [6 ] : pos_list [6 ] + size_list [6 ]])
140
+ data7 = bytes (data [pos_list [7 ] : pos_list [7 ] + size_list [7 ]])
141
+ data8 = bytes (data [pos_list [8 ] : pos_list [8 ] + size_list [8 ]])
142
+ data9 = bytes (data [pos_list [9 ] : pos_list [9 ] + size_list [9 ]])
81
143
82
144
t0 = time .perf_counter ()
83
145
for _ in range_it :
@@ -95,6 +157,60 @@ def perf_hash(loops: int, f: Callable, size: int) -> float:
95
157
return time .perf_counter () - t0
96
158
97
159
160
+ def perf_hash_latency (loops : int , f : Callable , size : int ) -> float :
161
+ """Benchmark a hash function with overhead costs with varying data sizes.
162
+
163
+ Based on xxHash's ``benchLatency`` function.
164
+ https://github.com/Cyan4973/xxHash/blob/dev/tests/bench/benchHash.c
165
+
166
+ Args:
167
+ loops: The number of outer loops to run.
168
+ f: The hash function to benchmark
169
+ size: The size of the buffer to hash.
170
+
171
+ Returns:
172
+ The time taken to hash the buffer in fractional seconds.
173
+ """
174
+ # pylint: disable=too-many-locals
175
+
176
+ if size <= 0 :
177
+ raise ValueError ("size must be greater than 0" )
178
+
179
+ range_it = itertools .repeat (None , loops )
180
+ random .seed (42 )
181
+
182
+ n = 0
183
+
184
+ size0 = generate_size (size , 0.1 )
185
+ size1 = generate_size (size , 0.1 )
186
+ size2 = generate_size (size , 0.1 )
187
+ size3 = generate_size (size , 0.1 )
188
+ size4 = generate_size (size , 0.1 )
189
+ size5 = generate_size (size , 0.1 )
190
+ size6 = generate_size (size , 0.1 )
191
+ size7 = generate_size (size , 0.1 )
192
+ size8 = generate_size (size , 0.1 )
193
+ size9 = generate_size (size , 0.1 )
194
+
195
+ data = bytearray (math .floor (size * 1.1 ) + 255 )
196
+ view_to_hash = memoryview (bytes (init_buffer (data )))
197
+
198
+ t0 = time .perf_counter ()
199
+ for _ in range_it :
200
+ n = f (view_to_hash [n : n + size0 ])[0 ]
201
+ n = f (view_to_hash [n : n + size1 ])[0 ]
202
+ n = f (view_to_hash [n : n + size2 ])[0 ]
203
+ n = f (view_to_hash [n : n + size3 ])[0 ]
204
+ n = f (view_to_hash [n : n + size4 ])[0 ]
205
+ n = f (view_to_hash [n : n + size5 ])[0 ]
206
+ n = f (view_to_hash [n : n + size6 ])[0 ]
207
+ n = f (view_to_hash [n : n + size7 ])[0 ]
208
+ n = f (view_to_hash [n : n + size8 ])[0 ]
209
+ n = f (view_to_hash [n : n + size9 ])[0 ]
210
+
211
+ return time .perf_counter () - t0
212
+
213
+
98
214
def add_cmdline_args (cmd : list , args ) -> None :
99
215
"""Add command line arguments to the runner.
100
216
@@ -103,9 +219,30 @@ def add_cmdline_args(cmd: list, args) -> None:
103
219
args: The parsed command line arguments.
104
220
"""
105
221
cmd .extend (("--test-hash" , args .test_hash ))
222
+ cmd .extend (("--test-type" , args .test_type ))
106
223
cmd .extend (("--test-buffer-size-max" , str (args .test_buffer_size_max )))
107
224
108
225
226
+ HASHES = {
227
+ "mmh3_32" : mmh3 .mmh3_32_digest ,
228
+ "mmh3_128" : mmh3 .mmh3_x64_128_digest ,
229
+ "xxh_32" : xxhash .xxh32_digest ,
230
+ "xxh_64" : xxhash .xxh64_digest ,
231
+ "xxh3_64" : xxhash .xxh3_64_digest ,
232
+ "xxh3_128" : xxhash .xxh3_128_digest ,
233
+ "md5" : lambda ba : hashlib .md5 (ba ).digest (),
234
+ "sha1" : lambda ba : hashlib .sha1 (ba ).digest (),
235
+ "pymmh3_32" : pymmh3 .hash ,
236
+ "pymmh3_128" : pymmh3 .hash128 ,
237
+ }
238
+
239
+ BENCHMARKING_TYPES = {
240
+ "naive" : perf_hash ,
241
+ "random" : perf_hash_random ,
242
+ "latency" : perf_hash_latency ,
243
+ }
244
+
245
+
109
246
if __name__ == "__main__" :
110
247
runner = pyperf .Runner (add_cmdline_args = add_cmdline_args )
111
248
@@ -117,6 +254,14 @@ def add_cmdline_args(cmd: list, args) -> None:
117
254
choices = HASHES .keys (),
118
255
)
119
256
257
+ runner .argparser .add_argument (
258
+ "--test-type" ,
259
+ type = str ,
260
+ help = "Type of benchmarking to perform" ,
261
+ choices = BENCHMARKING_TYPES .keys (),
262
+ default = "random" ,
263
+ )
264
+
120
265
runner .argparser .add_argument (
121
266
"--test-buffer-size-max" ,
122
267
type = int ,
@@ -130,7 +275,7 @@ def add_cmdline_args(cmd: list, args) -> None:
130
275
while fib1 <= process_args .test_buffer_size_max :
131
276
runner .bench_time_func (
132
277
f"{ fib1 } bytes" ,
133
- perf_hash ,
278
+ BENCHMARKING_TYPES [ process_args . test_type ] ,
134
279
HASHES [process_args .test_hash ],
135
280
fib1 ,
136
281
inner_loops = 10 ,
0 commit comments