From 6daf2845cb06a7e99736b0f6fd06cb76a63c72da Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Fri, 18 Jul 2025 22:42:23 +0100 Subject: [PATCH 1/8] blake2 refactor --- crates/precompile/src/blake2.rs | 94 +++++++++++++-------------------- 1 file changed, 36 insertions(+), 58 deletions(-) diff --git a/crates/precompile/src/blake2.rs b/crates/precompile/src/blake2.rs index 3996b894a7..bc6caed4ec 100644 --- a/crates/precompile/src/blake2.rs +++ b/crates/precompile/src/blake2.rs @@ -16,44 +16,43 @@ pub fn run(input: &[u8], gas_limit: u64) -> PrecompileResult { return Err(PrecompileError::Blake2WrongLength); } - // Rounds 4 bytes + // Parse number of rounds (4 bytes) let rounds = u32::from_be_bytes(input[..4].try_into().unwrap()) as usize; let gas_used = rounds as u64 * F_ROUND; if gas_used > gas_limit { return Err(PrecompileError::OutOfGas); } + // Parse final block flag let f = match input[212] { - 1 => true, 0 => false, + 1 => true, _ => return Err(PrecompileError::Blake2WrongFinalIndicatorFlag), }; + // Parse state vector h (8 × u64) let mut h = [0u64; 8]; - //let mut m = [0u64; 16]; - - let t; - // Optimized parsing using ptr::read_unaligned for potentially better performance - - let m; - unsafe { - let ptr = input.as_ptr(); - - // Read h values - for (i, item) in h.iter_mut().enumerate() { - *item = u64::from_le_bytes(core::ptr::read_unaligned( - ptr.add(4 + i * 8) as *const [u8; 8] - )); - } - - m = input[68..68 + 16 * size_of::()].try_into().unwrap(); - - t = [ - u64::from_le_bytes(core::ptr::read_unaligned(ptr.add(196) as *const [u8; 8])), - u64::from_le_bytes(core::ptr::read_unaligned(ptr.add(204) as *const [u8; 8])), - ]; - } - algo::compress(rounds, &mut h, m, t, f); + input[4..68] + .chunks_exact(8) + .enumerate() + .for_each(|(i, chunk)| { + h[i] = u64::from_le_bytes(chunk.try_into().unwrap()); + }); + + // Parse message block m (16 × u64) + let mut m = [0u64; 16]; + input[68..196] + .chunks_exact(8) + .enumerate() + .for_each(|(i, chunk)| { + m[i] = u64::from_le_bytes(chunk.try_into().unwrap()); + }); + + // Parse offset counters + let t_0 = u64::from_le_bytes(input[196..204].try_into().unwrap()); + let t_1 = u64::from_le_bytes(input[204..212].try_into().unwrap()); + + algo::compress(rounds, &mut h, m, [t_0, t_1], f); let mut out = [0u8; 64]; for (i, h) in (0..64).step_by(8).zip(h.iter()) { @@ -94,22 +93,16 @@ pub mod algo { #[inline(always)] #[allow(clippy::many_single_char_names)] /// G function: - pub fn g(v: &mut [u64], a: usize, b: usize, c: usize, d: usize, x: u64, y: u64) { - v[a] = v[a].wrapping_add(v[b]); - v[a] = v[a].wrapping_add(x); - v[d] ^= v[a]; - v[d] = v[d].rotate_right(32); + fn g(v: &mut [u64], a: usize, b: usize, c: usize, d: usize, x: u64, y: u64) { + v[a] = v[a].wrapping_add(v[b]).wrapping_add(x); + v[d] = (v[d] ^ v[a]).rotate_right(32); v[c] = v[c].wrapping_add(v[d]); - v[b] ^= v[c]; - v[b] = v[b].rotate_right(24); + v[b] = (v[b] ^ v[c]).rotate_right(24); - v[a] = v[a].wrapping_add(v[b]); - v[a] = v[a].wrapping_add(y); - v[d] ^= v[a]; - v[d] = v[d].rotate_right(16); + v[a] = v[a].wrapping_add(v[b]).wrapping_add(y); + v[d] = (v[d] ^ v[a]).rotate_right(16); v[c] = v[c].wrapping_add(v[d]); - v[b] ^= v[c]; - v[b] = v[b].rotate_right(63); + v[b] = (v[b] ^ v[c]).rotate_right(63); } /// Compression function F takes as an argument the state vector "h", @@ -119,15 +112,7 @@ pub mod algo { /// returns a new state vector. The number of rounds, "r", is 12 for /// BLAKE2b and 10 for BLAKE2s. Rounds are numbered from 0 to r - 1. #[allow(clippy::many_single_char_names)] - pub fn compress( - rounds: usize, - h: &mut [u64; 8], - m_slice: &[u8; 16 * size_of::()], - t: [u64; 2], - f: bool, - ) { - assert!(m_slice.len() == 16 * size_of::()); - + pub fn compress(rounds: usize, h: &mut [u64; 8], m: [u64; 16], t: [u64; 2], f: bool) { #[cfg(all(target_feature = "avx2", feature = "std"))] { // only if it is compiled with avx2 flag and it is std, we can use avx2. @@ -136,7 +121,7 @@ pub mod algo { unsafe { super::avx2::compress_block( rounds, - m_slice, + &m, h, ((t[1] as u128) << 64) | (t[0] as u128), if f { !0 } else { 0 }, @@ -149,14 +134,6 @@ pub mod algo { // if avx2 is not available, use the fallback portable implementation - // Read m values - let mut m = [0u64; 16]; - for (i, item) in m.iter_mut().enumerate() { - *item = u64::from_le_bytes(unsafe { - core::ptr::read_unaligned(m_slice.as_ptr().add(i * 8) as *const [u8; 8]) - }); - } - let mut v = [0u64; 16]; v[..h.len()].copy_from_slice(h); // First half from state. v[h.len()..].copy_from_slice(&IV); // Second half from IV. @@ -224,7 +201,7 @@ mod avx2 { #[inline(always)] pub(crate) unsafe fn compress_block( mut rounds: usize, - block: &[u8; BLOCKBYTES], + block: &[Word; 16], words: &mut [Word; 8], count: Count, last_block: Word, @@ -238,6 +215,7 @@ mod avx2 { let flags = set4(count_low(count), count_high(count), last_block, last_node); let mut d = xor(loadu(iv_high), flags); + let block: &[u8; BLOCKBYTES] = std::mem::transmute(block); let msg_chunks = array_refs!(block, 16, 16, 16, 16, 16, 16, 16, 16); let m0 = _mm256_broadcastsi128_si256(loadu_128(msg_chunks.0)); let m1 = _mm256_broadcastsi128_si256(loadu_128(msg_chunks.1)); From bf3f86a53b66b60c3b89c6a736d8865247a53a8c Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Fri, 18 Jul 2025 22:57:29 +0100 Subject: [PATCH 2/8] blake2 bench --- crates/precompile/bench/blake2.rs | 62 +++++++++++++++++++++++++++++++ crates/precompile/bench/main.rs | 4 ++ 2 files changed, 66 insertions(+) create mode 100644 crates/precompile/bench/blake2.rs diff --git a/crates/precompile/bench/blake2.rs b/crates/precompile/bench/blake2.rs new file mode 100644 index 0000000000..b5710b72da --- /dev/null +++ b/crates/precompile/bench/blake2.rs @@ -0,0 +1,62 @@ +use criterion::{black_box, BenchmarkGroup}; +use primitives::hex; +use revm_precompile::blake2; + +pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTime>) { + // Test vectors from the blake2 test + let inputs = [ + hex!("0000040048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b616162636465666768696a6b6c6d6e6f700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), + hex!("0000020048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), + hex!("0000004048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), + ]; + + // Benchmark with 4 rounds (most common case) + group.bench_function("blake2/4_rounds", |b| { + let input = &inputs[0]; // 4 rounds + b.iter(|| { + let _ = blake2::run(black_box(input), u64::MAX); + }); + }); + + // Benchmark with 2 rounds + group.bench_function("blake2/2_rounds", |b| { + let input = &inputs[1]; // 2 rounds + b.iter(|| { + let _ = blake2::run(black_box(input), u64::MAX); + }); + }); + + // Benchmark with 64 rounds (stress test) + group.bench_function("blake2/64_rounds", |b| { + let input = &inputs[2]; // 64 rounds + b.iter(|| { + let _ = blake2::run(black_box(input), u64::MAX); + }); + }); + + // Benchmark just the compression function with random round counts + group.bench_function("blake2/compress_12_rounds", |b| { + let h = [ + 0x6a09e667f3bcc908u64, + 0xbb67ae8584caa73bu64, + 0x3c6ef372fe94f82bu64, + 0xa54ff53a5f1d36f1u64, + 0x510e527fade682d1u64, + 0x9b05688c2b3e6c1fu64, + 0x1f83d9abfb41bd6bu64, + 0x5be0cd19137e2179u64, + ]; + let m = [0u64; 16]; + let t = [0u64, 0u64]; + b.iter(|| { + let mut h_copy = h; + blake2::algo::compress( + black_box(12), + &mut h_copy, + black_box(m), + black_box(t), + black_box(false), + ); + }); + }); +} diff --git a/crates/precompile/bench/main.rs b/crates/precompile/bench/main.rs index cd3b479136..153dc40ad1 100644 --- a/crates/precompile/bench/main.rs +++ b/crates/precompile/bench/main.rs @@ -1,6 +1,7 @@ #![allow(missing_docs)] //! Benchmarks for the crypto precompiles +pub mod blake2; pub mod ecrecover; pub mod eip1962; pub mod eip2537; @@ -31,6 +32,9 @@ pub fn benchmark_crypto_precompiles(c: &mut Criterion) { // Run KZG point evaluation benchmarks eip4844::add_benches(&mut group); + + // Run Blake2 benchmarks + blake2::add_benches(&mut group); } criterion_group! { From bec55f370b23c4db9a08d81a3af5f452708e6996 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Fri, 18 Jul 2025 23:23:13 +0100 Subject: [PATCH 3/8] refactor benchmarks --- crates/precompile/bench/blake2.rs | 58 +++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/crates/precompile/bench/blake2.rs b/crates/precompile/bench/blake2.rs index b5710b72da..33c01eef5c 100644 --- a/crates/precompile/bench/blake2.rs +++ b/crates/precompile/bench/blake2.rs @@ -5,22 +5,26 @@ use revm_precompile::blake2; pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTime>) { // Test vectors from the blake2 test let inputs = [ - hex!("0000040048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b616162636465666768696a6b6c6d6e6f700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), - hex!("0000020048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), - hex!("0000004048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), + hex!("0000000248c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 2 rounds + hex!("0000000448c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b616162636465666768696a6b6c6d6e6f700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 4 rounds + hex!("0000004048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 64 rounds + hex!("0000000a48c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 10 rounds (Blake2s standard) + hex!("0000000c48c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 12 rounds (Blake2b standard) + hex!("0000020048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 512 rounds + hex!("0000040048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 1024 rounds ]; - // Benchmark with 4 rounds (most common case) - group.bench_function("blake2/4_rounds", |b| { - let input = &inputs[0]; // 4 rounds + // Benchmark with 2 rounds + group.bench_function("blake2/2_rounds", |b| { + let input = &inputs[0]; // 2 rounds b.iter(|| { let _ = blake2::run(black_box(input), u64::MAX); }); }); - // Benchmark with 2 rounds - group.bench_function("blake2/2_rounds", |b| { - let input = &inputs[1]; // 2 rounds + // Benchmark with 4 rounds + group.bench_function("blake2/4_rounds", |b| { + let input = &inputs[1]; // 4 rounds b.iter(|| { let _ = blake2::run(black_box(input), u64::MAX); }); @@ -34,7 +38,39 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi }); }); - // Benchmark just the compression function with random round counts + // Benchmark with 10 rounds (Blake2s standard) + group.bench_function("blake2/10_rounds", |b| { + let input = &inputs[3]; // 10 rounds + b.iter(|| { + let _ = blake2::run(black_box(input), u64::MAX); + }); + }); + + // Benchmark with 12 rounds (Blake2b standard) + group.bench_function("blake2/12_rounds", |b| { + let input = &inputs[4]; // 12 rounds + b.iter(|| { + let _ = blake2::run(black_box(input), u64::MAX); + }); + }); + + // Benchmark with 512 rounds (stress test) + group.bench_function("blake2/512_rounds", |b| { + let input = &inputs[5]; // 512 rounds + b.iter(|| { + let _ = blake2::run(black_box(input), u64::MAX); + }); + }); + + // Benchmark with 1024 rounds (extreme stress test) + group.bench_function("blake2/1024_rounds", |b| { + let input = &inputs[6]; // 1024 rounds + b.iter(|| { + let _ = blake2::run(black_box(input), u64::MAX); + }); + }); + + // Benchmark just the compression function with different round counts group.bench_function("blake2/compress_12_rounds", |b| { let h = [ 0x6a09e667f3bcc908u64, @@ -59,4 +95,4 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi ); }); }); -} +} \ No newline at end of file From a2d4c13cb0dc2d9f81e383331c40f453c2ae4e3a Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Fri, 18 Jul 2025 23:25:11 +0100 Subject: [PATCH 4/8] fmt --- crates/precompile/bench/blake2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/precompile/bench/blake2.rs b/crates/precompile/bench/blake2.rs index 33c01eef5c..583ad3affd 100644 --- a/crates/precompile/bench/blake2.rs +++ b/crates/precompile/bench/blake2.rs @@ -95,4 +95,4 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi ); }); }); -} \ No newline at end of file +} From 3a47170a5109437a87e41a933c1eae4825dc4879 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Fri, 18 Jul 2025 23:38:31 +0100 Subject: [PATCH 5/8] add 100K and 200K --- crates/precompile/bench/blake2.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/crates/precompile/bench/blake2.rs b/crates/precompile/bench/blake2.rs index 583ad3affd..7e566dbf6e 100644 --- a/crates/precompile/bench/blake2.rs +++ b/crates/precompile/bench/blake2.rs @@ -12,6 +12,8 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi hex!("0000000c48c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 12 rounds (Blake2b standard) hex!("0000020048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 512 rounds hex!("0000040048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 1024 rounds + hex!("000186a048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 100000 rounds (100K) + hex!("00030d4048c9bdf267e6096a3ba7ca8485ae67bb2bf894fe72f36e3cf1361d5f3af54fa5d182e6ad7f520e511f6c3e2b8c68059b6bbd41fbabd9831f79217e1319cde05b61626300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000300000000000000000000000000000001"), // 200000 rounds (200K) ]; // Benchmark with 2 rounds @@ -30,7 +32,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi }); }); - // Benchmark with 64 rounds (stress test) + // Benchmark with 64 rounds group.bench_function("blake2/64_rounds", |b| { let input = &inputs[2]; // 64 rounds b.iter(|| { @@ -54,7 +56,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi }); }); - // Benchmark with 512 rounds (stress test) + // Benchmark with 512 rounds group.bench_function("blake2/512_rounds", |b| { let input = &inputs[5]; // 512 rounds b.iter(|| { @@ -62,7 +64,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi }); }); - // Benchmark with 1024 rounds (extreme stress test) + // Benchmark with 1024 rounds group.bench_function("blake2/1024_rounds", |b| { let input = &inputs[6]; // 1024 rounds b.iter(|| { @@ -70,6 +72,22 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi }); }); + // Benchmark with 100K rounds + group.bench_function("blake2/100K_rounds", |b| { + let input = &inputs[7]; // 100000 rounds + b.iter(|| { + let _ = blake2::run(black_box(input), u64::MAX); + }); + }); + + // Benchmark with 200K rounds + group.bench_function("blake2/200K_rounds", |b| { + let input = &inputs[8]; // 200000 rounds + b.iter(|| { + let _ = blake2::run(black_box(input), u64::MAX); + }); + }); + // Benchmark just the compression function with different round counts group.bench_function("blake2/compress_12_rounds", |b| { let h = [ From fae3ba95bb46894a64808e9a41e4d729bc5deeb9 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Fri, 18 Jul 2025 23:48:12 +0100 Subject: [PATCH 6/8] blackbox --- crates/precompile/bench/blake2.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/precompile/bench/blake2.rs b/crates/precompile/bench/blake2.rs index 7e566dbf6e..a4ec094d8f 100644 --- a/crates/precompile/bench/blake2.rs +++ b/crates/precompile/bench/blake2.rs @@ -20,7 +20,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/2_rounds", |b| { let input = &inputs[0]; // 2 rounds b.iter(|| { - let _ = blake2::run(black_box(input), u64::MAX); + black_box(blake2::run(black_box(input), u64::MAX)); }); }); @@ -28,7 +28,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/4_rounds", |b| { let input = &inputs[1]; // 4 rounds b.iter(|| { - let _ = blake2::run(black_box(input), u64::MAX); + black_box(blake2::run(black_box(input), u64::MAX)); }); }); @@ -36,7 +36,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/64_rounds", |b| { let input = &inputs[2]; // 64 rounds b.iter(|| { - let _ = blake2::run(black_box(input), u64::MAX); + black_box(blake2::run(black_box(input), u64::MAX)); }); }); @@ -44,7 +44,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/10_rounds", |b| { let input = &inputs[3]; // 10 rounds b.iter(|| { - let _ = blake2::run(black_box(input), u64::MAX); + black_box(blake2::run(black_box(input), u64::MAX)); }); }); @@ -52,7 +52,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/12_rounds", |b| { let input = &inputs[4]; // 12 rounds b.iter(|| { - let _ = blake2::run(black_box(input), u64::MAX); + black_box(blake2::run(black_box(input), u64::MAX)); }); }); @@ -60,7 +60,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/512_rounds", |b| { let input = &inputs[5]; // 512 rounds b.iter(|| { - let _ = blake2::run(black_box(input), u64::MAX); + black_box(blake2::run(black_box(input), u64::MAX)); }); }); @@ -68,7 +68,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/1024_rounds", |b| { let input = &inputs[6]; // 1024 rounds b.iter(|| { - let _ = blake2::run(black_box(input), u64::MAX); + black_box(blake2::run(black_box(input), u64::MAX)); }); }); @@ -76,7 +76,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/100K_rounds", |b| { let input = &inputs[7]; // 100000 rounds b.iter(|| { - let _ = blake2::run(black_box(input), u64::MAX); + black_box(blake2::run(black_box(input), u64::MAX)); }); }); @@ -84,7 +84,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/200K_rounds", |b| { let input = &inputs[8]; // 200000 rounds b.iter(|| { - let _ = blake2::run(black_box(input), u64::MAX); + black_box(blake2::run(black_box(input), u64::MAX)); }); }); From cddba02adee803bc580a32479ec0a084ea31a5ae Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Fri, 18 Jul 2025 23:50:23 +0100 Subject: [PATCH 7/8] unwrap --- crates/precompile/bench/blake2.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/precompile/bench/blake2.rs b/crates/precompile/bench/blake2.rs index a4ec094d8f..053552fad4 100644 --- a/crates/precompile/bench/blake2.rs +++ b/crates/precompile/bench/blake2.rs @@ -20,7 +20,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/2_rounds", |b| { let input = &inputs[0]; // 2 rounds b.iter(|| { - black_box(blake2::run(black_box(input), u64::MAX)); + black_box(blake2::run(black_box(input), u64::MAX).unwrap()); }); }); @@ -28,7 +28,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/4_rounds", |b| { let input = &inputs[1]; // 4 rounds b.iter(|| { - black_box(blake2::run(black_box(input), u64::MAX)); + black_box(blake2::run(black_box(input), u64::MAX).unwrap()); }); }); @@ -36,7 +36,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/64_rounds", |b| { let input = &inputs[2]; // 64 rounds b.iter(|| { - black_box(blake2::run(black_box(input), u64::MAX)); + black_box(blake2::run(black_box(input), u64::MAX).unwrap()); }); }); @@ -44,7 +44,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/10_rounds", |b| { let input = &inputs[3]; // 10 rounds b.iter(|| { - black_box(blake2::run(black_box(input), u64::MAX)); + black_box(blake2::run(black_box(input), u64::MAX).unwrap()); }); }); @@ -52,7 +52,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/12_rounds", |b| { let input = &inputs[4]; // 12 rounds b.iter(|| { - black_box(blake2::run(black_box(input), u64::MAX)); + black_box(blake2::run(black_box(input), u64::MAX).unwrap()); }); }); @@ -60,7 +60,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/512_rounds", |b| { let input = &inputs[5]; // 512 rounds b.iter(|| { - black_box(blake2::run(black_box(input), u64::MAX)); + black_box(blake2::run(black_box(input), u64::MAX).unwrap()); }); }); @@ -68,7 +68,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/1024_rounds", |b| { let input = &inputs[6]; // 1024 rounds b.iter(|| { - black_box(blake2::run(black_box(input), u64::MAX)); + black_box(blake2::run(black_box(input), u64::MAX).unwrap()); }); }); @@ -76,7 +76,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/100K_rounds", |b| { let input = &inputs[7]; // 100000 rounds b.iter(|| { - black_box(blake2::run(black_box(input), u64::MAX)); + black_box(blake2::run(black_box(input), u64::MAX).unwrap()); }); }); @@ -84,7 +84,7 @@ pub fn add_benches(group: &mut BenchmarkGroup<'_, criterion::measurement::WallTi group.bench_function("blake2/200K_rounds", |b| { let input = &inputs[8]; // 200000 rounds b.iter(|| { - black_box(blake2::run(black_box(input), u64::MAX)); + black_box(blake2::run(black_box(input), u64::MAX).unwrap()); }); }); From bb4f22e591fb12b46a6905f07a879b58204106c0 Mon Sep 17 00:00:00 2001 From: Kevaundray Wedderburn Date: Fri, 18 Jul 2025 23:59:07 +0100 Subject: [PATCH 8/8] refactor g --- crates/precompile/src/blake2.rs | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/crates/precompile/src/blake2.rs b/crates/precompile/src/blake2.rs index bc6caed4ec..6c446152a9 100644 --- a/crates/precompile/src/blake2.rs +++ b/crates/precompile/src/blake2.rs @@ -93,16 +93,26 @@ pub mod algo { #[inline(always)] #[allow(clippy::many_single_char_names)] /// G function: - fn g(v: &mut [u64], a: usize, b: usize, c: usize, d: usize, x: u64, y: u64) { - v[a] = v[a].wrapping_add(v[b]).wrapping_add(x); - v[d] = (v[d] ^ v[a]).rotate_right(32); - v[c] = v[c].wrapping_add(v[d]); - v[b] = (v[b] ^ v[c]).rotate_right(24); - - v[a] = v[a].wrapping_add(v[b]).wrapping_add(y); - v[d] = (v[d] ^ v[a]).rotate_right(16); - v[c] = v[c].wrapping_add(v[d]); - v[b] = (v[b] ^ v[c]).rotate_right(63); + fn g(v: &mut [u64; 16], a: usize, b: usize, c: usize, d: usize, x: u64, y: u64) { + let mut va = v[a]; + let mut vb = v[b]; + let mut vc = v[c]; + let mut vd = v[d]; + + va = va.wrapping_add(vb).wrapping_add(x); + vd = (vd ^ va).rotate_right(32); + vc = vc.wrapping_add(vd); + vb = (vb ^ vc).rotate_right(24); + + va = va.wrapping_add(vb).wrapping_add(y); + vd = (vd ^ va).rotate_right(16); + vc = vc.wrapping_add(vd); + vb = (vb ^ vc).rotate_right(63); + + v[a] = va; + v[b] = vb; + v[c] = vc; + v[d] = vd; } /// Compression function F takes as an argument the state vector "h",