diff --git a/src/Nethermind/Nethermind.Benchmark/Core/FastHashBenchmarks.cs b/src/Nethermind/Nethermind.Benchmark/Core/FastHashBenchmarks.cs
new file mode 100644
index 000000000000..a6481332d087
--- /dev/null
+++ b/src/Nethermind/Nethermind.Benchmark/Core/FastHashBenchmarks.cs
@@ -0,0 +1,47 @@
+// SPDX-FileCopyrightText: 2024 Demerzel Solutions Limited
+// SPDX-License-Identifier: LGPL-3.0-only
+
+using System;
+using System.Runtime.InteropServices;
+using BenchmarkDotNet.Attributes;
+using Nethermind.Core.Extensions;
+
+namespace Nethermind.Benchmarks.Core;
+
+[ShortRunJob]
+[DisassemblyDiagnoser]
+[MemoryDiagnoser]
+public class FastHashBenchmarks
+{
+    private byte[] _data = null!;
+
+    [Params(16, 20, 32, 64, 128, 256, 512, 1024)]
+    public int Size;
+
+    [GlobalSetup]
+    public void Setup()
+    {
+        _data = new byte[Size];
+        Random.Shared.NextBytes(_data);
+    }
+
+    [Benchmark(Baseline = true)]
+    public int FastHash()
+    {
+        return ((ReadOnlySpan<byte>)_data).FastHash();
+    }
+
+    [Benchmark]
+    public int FastHashAes()
+    {
+        ref byte start = ref MemoryMarshal.GetReference<byte>(_data);
+        return SpanExtensions.FastHashAesX64(ref start, _data.Length, SpanExtensions.ComputeSeed(_data.Length));
+    }
+
+    [Benchmark]
+    public int FastHashCrc()
+    {
+        ref byte start = ref MemoryMarshal.GetReference<byte>(_data);
+        return SpanExtensions.FastHashCrc(ref start, _data.Length, SpanExtensions.ComputeSeed(_data.Length));
+    }
+}
diff --git a/src/Nethermind/Nethermind.Core.Test/BytesTests.cs b/src/Nethermind/Nethermind.Core.Test/BytesTests.cs
index a8e64a7de955..ed7ccc0c2ac4 100644
--- a/src/Nethermind/Nethermind.Core.Test/BytesTests.cs
+++ b/src/Nethermind/Nethermind.Core.Test/BytesTests.cs
@@ -467,5 +467,139 @@ public void NullableComparison()
         {
             Bytes.NullableEqualityComparer.Equals(null, null).Should().BeTrue();
         }
+
+        [Test]
+        public void FastHash_EmptyInput_ReturnsZero()
+        {
+            ReadOnlySpan<byte> empty = ReadOnlySpan<byte>.Empty;
+            empty.FastHash().Should().Be(0);
+        }
+
+        [Test]
+        public void FastHash_SameInput_ReturnsSameHash()
+        {
+            byte[] input = new byte[100];
+            TestContext.CurrentContext.Random.NextBytes(input);
+
+            int hash1 = ((ReadOnlySpan<byte>)input).FastHash();
+            int hash2 = ((ReadOnlySpan<byte>)input).FastHash();
+
+            hash1.Should().Be(hash2);
+        }
+
+        [Test]
+        public void FastHash_DifferentInput_ReturnsDifferentHash()
+        {
+            byte[] input1 = new byte[100];
+            byte[] input2 = new byte[100];
+            TestContext.CurrentContext.Random.NextBytes(input1);
+            Array.Copy(input1, input2, input1.Length);
+            input2[50] ^= 0xFF; // Flip bits at position 50
+
+            int hash1 = ((ReadOnlySpan<byte>)input1).FastHash();
+            int hash2 = ((ReadOnlySpan<byte>)input2).FastHash();
+
+            hash1.Should().NotBe(hash2);
+        }
+
+        // Test cases for the fold-back bug fix: remaining in [49-63] after 64-byte initial load
+        // For len=113 to 127, remaining = len-64 = 49 to 63, which requires the last64 fold-back
+        [TestCase(113)] // remaining=49, boundary case for last64
+        [TestCase(120)] // remaining=56, middle of the gap range
+        [TestCase(127)] // remaining=63, upper boundary
+        [TestCase(65)]  // remaining=1, lower boundary for >64 path
+        [TestCase(80)]  // remaining=16
+        [TestCase(96)]  // remaining=32
+        [TestCase(112)] // remaining=48, boundary where last64 is NOT needed
+        public void FastHash_AllBytesAreHashed_FoldBackCoverage(int length)
+        {
+            byte[] input = new byte[length];
+            TestContext.CurrentContext.Random.NextBytes(input);
+
+            int originalHash = ((ReadOnlySpan<byte>)input).FastHash();
+
+            // Verify that changing any byte changes the hash
+            // This catches the gap bug where bytes[64-71] weren't being hashed
+            for (int i = 0; i < length; i++)
+            {
+                byte[] modified = (byte[])input.Clone();
+                modified[i] ^= 0xFF;
+
+                int modifiedHash = ((ReadOnlySpan<byte>)modified).FastHash();
+                modifiedHash.Should().NotBe(originalHash, $"Changing byte at index {i} should change the hash for length {length}");
+            }
+        }
+
+        // Specifically test the gap range that was buggy: bytes[64-71] for len=120
+        [Test]
+        public void FastHash_GapBytesAreHashed_Len120()
+        {
+            byte[] input = new byte[120];
+            TestContext.CurrentContext.Random.NextBytes(input);
+
+            int originalHash = ((ReadOnlySpan<byte>)input).FastHash();
+
+            // The bug was that bytes[64-71] weren't hashed for len=120
+            // Test each byte in the gap
+            for (int i = 64; i < 72; i++)
+            {
+                byte[] modified = (byte[])input.Clone();
+                modified[i] ^= 0xFF;
+
+                int modifiedHash = ((ReadOnlySpan<byte>)modified).FastHash();
+                modifiedHash.Should().NotBe(originalHash, $"Changing byte at index {i} (in gap range) should change the hash");
+            }
+        }
+
+        // Test medium-large case (33-64 bytes) with overlap to verify it works
+        [TestCase(50)] // Tests overlap in medium-large path
+        public void FastHash_MediumLarge_AllBytesContribute(int length)
+        {
+            byte[] input = new byte[length];
+            TestContext.CurrentContext.Random.NextBytes(input);
+
+            int originalHash = ((ReadOnlySpan<byte>)input).FastHash();
+
+            // Test ALL bytes to verify overlap handling works
+            for (int i = 0; i < length; i++)
+            {
+                byte[] modified = (byte[])input.Clone();
+                modified[i] ^= 0xFF;
+
+                int modifiedHash = ((ReadOnlySpan<byte>)modified).FastHash();
+                modifiedHash.Should().NotBe(originalHash, $"Changing byte at index {i} should change the hash for length {length}");
+            }
+        }
+
+        [TestCase(1)]
+        [TestCase(7)]
+        [TestCase(8)]
+        [TestCase(15)]
+        [TestCase(16)]
+        [TestCase(31)]
+        [TestCase(32)]
+        [TestCase(33)]
+        [TestCase(64)]
+        [TestCase(128)]
+        [TestCase(256)]
+        [TestCase(500)]
+        public void FastHash_VariousLengths_AllBytesContribute(int length)
+        {
+            byte[] input = new byte[length];
+            TestContext.CurrentContext.Random.NextBytes(input);
+
+            int originalHash = ((ReadOnlySpan<byte>)input).FastHash();
+
+            // Test first, middle, and last bytes to ensure all contribute
+            int[] indicesToTest = [0, length / 2, length - 1];
+            foreach (int i in indicesToTest)
+            {
+                byte[] modified = (byte[])input.Clone();
+                modified[i] ^= 0xFF;
+
+                int modifiedHash = ((ReadOnlySpan<byte>)modified).FastHash();
+                modifiedHash.Should().NotBe(originalHash, $"Changing byte at index {i} should change the hash for length {length}");
+            }
+        }
     }
 }
diff --git a/src/Nethermind/Nethermind.Core/Extensions/SpanExtensions.cs b/src/Nethermind/Nethermind.Core/Extensions/SpanExtensions.cs
index e4ef7139ddb8..7ae71df3f508 100644
--- a/src/Nethermind/Nethermind.Core/Extensions/SpanExtensions.cs
+++ b/src/Nethermind/Nethermind.Core/Extensions/SpanExtensions.cs
@@ -22,6 +22,8 @@ public static class SpanExtensions
         // the performance of the network as a whole.
         private static readonly uint s_instanceRandom = (uint)System.Security.Cryptography.RandomNumberGenerator.GetInt32(int.MinValue, int.MaxValue);
 
+        internal static uint ComputeSeed(int len) => s_instanceRandom + (uint)len;
+
         public static string ToHexString(this in Memory<byte> memory, bool withZeroX = false)
         {
             return ToHexString(memory.Span, withZeroX, false, false);
@@ -227,149 +229,286 @@ public static ArrayPoolListRef<T> ToPooledListRef<T>(this in ReadOnlySpan<T> spa
         [SkipLocalsInit]
         public static int FastHash(this ReadOnlySpan<byte> input)
         {
-            // Fast hardware-accelerated, non-cryptographic hash.
-            // Core idea: CRC32C is extremely cheap on CPUs with SSE4.2/ARM CRC,
-            // and gives good diffusion for hashing. We then optionally add extra
-            // mixing to reduce "CRC linearity" artifacts.
-
             int len = input.Length;
-
-            // Contract choice: empty input hashes to 0.
-            // (Also avoids doing any ref work on an empty span.)
             if (len == 0) return 0;
-            // Using ref + Unsafe.ReadUnaligned lets the JIT hoist bounds checks
-            // and keep the hot loop tight.
-            ref byte start = ref MemoryMarshal.GetReference(input);
 
-            // Seed with an instance-random value so attackers cannot trivially
-            // engineer lots of same-bucket keys. Mixing in length makes "same prefix,
-            // different length" less correlated (CRC alone can be length-sensitive).
+            ref byte start = ref MemoryMarshal.GetReference(input);
             uint seed = s_instanceRandom + (uint)len;
 
-            // Small: 1-7 bytes.
-            // Using the tail routine here avoids building a synthetic
-            // 64-bit value with shifts/byte-permute.
-            if (len < 8)
+            if (len >= 16)
             {
-                uint small = CrcTailOrdered(seed, ref start, len);
-                // FinalMix breaks some remaining linearity and improves avalanche for tiny inputs.
-                return (int)FinalMix(small);
+                if (x64.Aes.IsSupported) return FastHashAesX64(ref start, len, seed);
+                if (Arm.Aes.IsSupported) return FastHashAesArm(ref start, len, seed);
             }
 
-            // Medium: 8-31 bytes.
-            // A single CRC lane is usually fine here - overhead dominates,
-            // and latency hiding is less important.
-            if (len < 32)
+            return FastHashCrc(ref start, len, seed);
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [SkipLocalsInit]
+        internal static int FastHashAesX64(ref byte start, int len, uint seed)
+        {
+            Vector128<byte> seedVec = Vector128.CreateScalar(seed).AsByte();
+            Vector128<byte> acc0 = Unsafe.As<byte, Vector128<byte>>(ref start) ^ seedVec;
+
+            if (len > 64)
             {
-                uint h = seed;
-                ref byte p = ref start;
+                Vector128<byte> acc1 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref start, 16)) ^ seedVec;
+                Vector128<byte> acc2 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref start, 32)) ^ seedVec;
+                Vector128<byte> acc3 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref start, 48)) ^ seedVec;
 
-                // Process as many full 64-bit words as possible.
-                // "& ~7" is a cheap round-down-to-multiple-of-8 (no division/mod).
-                int full = len & ~7;
-                int tail = len - full;
+                ref byte p = ref Unsafe.Add(ref start, 64);
+                int remaining = len - 64;
 
-                // Streaming CRC over 8-byte chunks.
-                // ReadUnaligned keeps us safe for arbitrary input alignment.
-                for (int i = 0; i < full; i += 8)
+                while (remaining >= 64)
                 {
-                    h = BitOperations.Crc32C(h, Unsafe.ReadUnaligned<ulong>(ref p));
-                    p = ref Unsafe.Add(ref p, 8);
+                    acc0 = x64.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref p), acc0);
+                    acc1 = x64.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref p, 16)), acc1);
+                    acc2 = x64.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref p, 32)), acc2);
+                    acc3 = x64.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref p, 48)), acc3);
+
+                    p = ref Unsafe.Add(ref p, 64);
+                    remaining -= 64;
                 }
 
-                // Hash remaining 1-7 bytes in strict order (no over-read).
-                if (tail != 0)
-                    h = CrcTailOrdered(h, ref p, tail);
+                // Fold 4 lanes: 3 XOR + 1 AES (minimal serial latency)
+                acc0 ^= acc1;
+                acc2 ^= acc3;
+                acc0 ^= acc2;
+                acc0 = x64.Aes.Encrypt(seedVec, acc0);
 
-                // Final mixing for better bit diffusion than raw CRC,
-                // especially for shorter payloads.
-                return (int)FinalMix(h);
+                // Drain remaining 0-63 bytes
+                while (remaining >= 16)
+                {
+                    acc0 = x64.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref p), acc0);
+                    p = ref Unsafe.Add(ref p, 16);
+                    remaining -= 16;
+                }
+
+                // Remaining 1-15 bytes: use CRC to avoid overlap with drain blocks
+                if (remaining > 0)
+                {
+                    uint crc = seed;
+                    if (remaining >= 8)
+                    {
+                        crc = BitOperations.Crc32C(crc, Unsafe.ReadUnaligned<ulong>(ref p));
+                        p = ref Unsafe.Add(ref p, 8);
+                        remaining -= 8;
+                    }
+                    if ((remaining & 4) != 0)
+                    {
+                        crc = BitOperations.Crc32C(crc, Unsafe.ReadUnaligned<uint>(ref p));
+                        p = ref Unsafe.Add(ref p, 4);
+                    }
+                    if ((remaining & 2) != 0)
+                    {
+                        crc = BitOperations.Crc32C(crc, Unsafe.ReadUnaligned<ushort>(ref p));
+                        p = ref Unsafe.Add(ref p, 2);
+                    }
+                    if ((remaining & 1) != 0)
+                    {
+                        crc = BitOperations.Crc32C(crc, p);
+                    }
+                    acc0 = x64.Aes.Encrypt(Vector128.CreateScalar(crc).AsByte(), acc0);
+                }
             }
+            else if (len > 32)
+            {
+                ref byte p = ref Unsafe.Add(ref start, 16);
+                int remaining = len - 16;
 
-            // Large: 32+ bytes.
-            // Use multiple independent CRC accumulators ("lanes") to hide crc32
-            // latency and increase ILP. CRC32C instructions have decent throughput
-            // but non-trivial latency; 4 lanes keeps the CPU busy.
-            uint h0 = seed;
-            uint h1 = seed ^ 0x9E3779B9u; // golden-ratio-ish constants to separate lanes
-            uint h2 = seed ^ 0x85EBCA6Bu; // constants borrowed from common finalizers (good bit dispersion)
-            uint h3 = seed ^ 0xC2B2AE35u;
-
-            ref byte q = ref start;
-
-            // Consume all full 64-bit words first. Tail (1-7 bytes) is handled later.
-            int aligned = len & ~7;
-            int remaining = aligned;
-
-            // 64-byte unroll:
-            // - amortizes loop branch/compare overhead
-            // - feeds enough independent work to keep OoO cores busy
-            // - maps nicely onto cache line sized chunks
-            while (remaining >= 64)
+                while (remaining > 16)
+                {
+                    acc0 = x64.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref p), acc0);
+                    p = ref Unsafe.Add(ref p, 16);
+                    remaining -= 16;
+                }
+
+                Vector128<byte> last = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref start, len - 16));
+                acc0 = x64.Aes.Encrypt(last, acc0);
+            }
+            else
             {
-                h0 = BitOperations.Crc32C(h0, Unsafe.ReadUnaligned<ulong>(ref q));
-                h1 = BitOperations.Crc32C(h1, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 8)));
-                h2 = BitOperations.Crc32C(h2, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 16)));
-                h3 = BitOperations.Crc32C(h3, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 24)));
-
-                h0 = BitOperations.Crc32C(h0, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 32)));
-                h1 = BitOperations.Crc32C(h1, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 40)));
-                h2 = BitOperations.Crc32C(h2, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 48)));
-                h3 = BitOperations.Crc32C(h3, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 56)));
-
-                q = ref Unsafe.Add(ref q, 64);
-                remaining -= 64;
+                Vector128<byte> data = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref start, len - 16));
+                acc0 = x64.Aes.Encrypt(data, acc0);
             }
 
-            // One more half-unroll for 32 bytes if present.
-            // Keeps the "drain" path short and avoids a smaller loop with more branches.
-            if (remaining >= 32)
+            ulong compressed = acc0.AsUInt64().GetElement(0) ^ acc0.AsUInt64().GetElement(1);
+            return (int)(uint)(compressed ^ (compressed >> 32));
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [SkipLocalsInit]
+        internal static int FastHashAesArm(ref byte start, int len, uint seed)
+        {
+            Vector128<byte> seedVec = Vector128.CreateScalar(seed).AsByte();
+            Vector128<byte> acc0 = Unsafe.As<byte, Vector128<byte>>(ref start) ^ seedVec;
+
+            if (len > 64)
             {
-                h0 = BitOperations.Crc32C(h0, Unsafe.ReadUnaligned<ulong>(ref q));
-                h1 = BitOperations.Crc32C(h1, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 8)));
-                h2 = BitOperations.Crc32C(h2, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 16)));
-                h3 = BitOperations.Crc32C(h3, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 24)));
+                Vector128<byte> acc1 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref start, 16)) ^ seedVec;
+                Vector128<byte> acc2 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref start, 32)) ^ seedVec;
+                Vector128<byte> acc3 = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref start, 48)) ^ seedVec;
+
+                ref byte p = ref Unsafe.Add(ref start, 64);
+                int remaining = len - 64;
+
+                while (remaining >= 64)
+                {
+                    acc0 = Arm.Aes.MixColumns(Arm.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref p), acc0));
+                    acc1 = Arm.Aes.MixColumns(Arm.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref p, 16)), acc1));
+                    acc2 = Arm.Aes.MixColumns(Arm.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref p, 32)), acc2));
+                    acc3 = Arm.Aes.MixColumns(Arm.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref p, 48)), acc3));
+
+                    p = ref Unsafe.Add(ref p, 64);
+                    remaining -= 64;
+                }
 
-                q = ref Unsafe.Add(ref q, 32);
-                remaining -= 32;
+                acc0 ^= acc1;
+                acc2 ^= acc3;
+                acc0 ^= acc2;
+                acc0 = Arm.Aes.MixColumns(Arm.Aes.Encrypt(seedVec, acc0));
+
+                while (remaining >= 16)
+                {
+                    acc0 = Arm.Aes.MixColumns(Arm.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref p), acc0));
+                    p = ref Unsafe.Add(ref p, 16);
+                    remaining -= 16;
+                }
+
+                if (remaining > 0)
+                {
+                    uint crc = seed;
+                    if (remaining >= 8)
+                    {
+                        crc = BitOperations.Crc32C(crc, Unsafe.ReadUnaligned<ulong>(ref p));
+                        p = ref Unsafe.Add(ref p, 8);
+                        remaining -= 8;
+                    }
+                    if ((remaining & 4) != 0)
+                    {
+                        crc = BitOperations.Crc32C(crc, Unsafe.ReadUnaligned<uint>(ref p));
+                        p = ref Unsafe.Add(ref p, 4);
+                    }
+                    if ((remaining & 2) != 0)
+                    {
+                        crc = BitOperations.Crc32C(crc, Unsafe.ReadUnaligned<ushort>(ref p));
+                        p = ref Unsafe.Add(ref p, 2);
+                    }
+                    if ((remaining & 1) != 0)
+                    {
+                        crc = BitOperations.Crc32C(crc, p);
+                    }
+                    acc0 = Arm.Aes.MixColumns(Arm.Aes.Encrypt(Vector128.CreateScalar(crc).AsByte(), acc0));
+                }
             }
+            else if (len > 32)
+            {
+                ref byte p = ref Unsafe.Add(ref start, 16);
+                int remaining = len - 16;
 
-            // Drain any remaining full 64-bit words (0, 8, 16, or 24 bytes).
-            // This is branchy but only runs once, so it is cheaper than another loop.
-            if (remaining != 0)
+                while (remaining > 16)
+                {
+                    acc0 = Arm.Aes.MixColumns(Arm.Aes.Encrypt(Unsafe.As<byte, Vector128<byte>>(ref p), acc0));
+                    p = ref Unsafe.Add(ref p, 16);
+                    remaining -= 16;
+                }
+
+                Vector128<byte> last = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref start, len - 16));
+                acc0 = Arm.Aes.MixColumns(Arm.Aes.Encrypt(last, acc0));
+            }
+            else
             {
-                // remaining is a multiple of 8 here.
-                if (remaining >= 8) h0 = BitOperations.Crc32C(h0, Unsafe.ReadUnaligned<ulong>(ref q));
-                if (remaining >= 16) h1 = BitOperations.Crc32C(h1, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 8)));
-                if (remaining == 24) h2 = BitOperations.Crc32C(h2, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 16)));
+                Vector128<byte> data = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref start, len - 16));
+                acc0 = Arm.Aes.MixColumns(Arm.Aes.Encrypt(data, acc0));
             }
 
-            // Fold lanes down to one 32-bit value.
-            // Rotates permute bit positions so each lane contributes differently.
-            // Adds (rather than XOR) deliberately introduce carries
-            // - CRC is linear over GF(2), and carry breaks that, making simple algebraic
-            // structure harder to exploit for collision clustering in hash tables.
-            h2 = BitOperations.RotateLeft(h2, 17) + BitOperations.RotateLeft(h3, 23);
-            h0 += BitOperations.RotateLeft(h1, 11);
-            uint hash = h2 + h0;
-
-            // Handle tail bytes (1-7 bytes) that were not part of the 64-bit-aligned stream.
-            // This is exact, in-order processing - no overlap and no over-read.
-            int tailBytes = len - aligned;
-            if (tailBytes != 0)
+            ulong compressed = acc0.AsUInt64().GetElement(0) ^ acc0.AsUInt64().GetElement(1);
+            return (int)(uint)(compressed ^ (compressed >> 32));
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        [SkipLocalsInit]
+        internal static int FastHashCrc(ref byte start, int len, uint seed)
+        {
+            uint hash;
+            if (len < 16)
             {
-                ref byte tailRef = ref Unsafe.Add(ref start, aligned);
-                hash = CrcTailOrdered(hash, ref tailRef, tailBytes);
+                if (len >= 8)
+                {
+                    ulong lo = Unsafe.ReadUnaligned<ulong>(ref start);
+                    ulong hi = Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref start, len - 8));
+                    uint h0 = BitOperations.Crc32C(seed, lo);
+                    uint h1 = BitOperations.Crc32C(seed ^ 0x9E3779B9u, hi);
+                    hash = h0 + BitOperations.RotateLeft(h1, 11);
+                }
+                else
+                {
+                    hash = CrcTailOrdered(seed, ref start, len);
+                }
+            }
+            else
+            {
+                uint h0 = seed;
+                uint h1 = seed ^ 0x9E3779B9u;
+                uint h2 = seed ^ 0x85EBCA6Bu;
+                uint h3 = seed ^ 0xC2B2AE35u;
+
+                ref byte q = ref start;
+                int aligned = len & ~7;
+                int remaining = aligned;
+
+                while (remaining >= 64)
+                {
+                    h0 = BitOperations.Crc32C(h0, Unsafe.ReadUnaligned<ulong>(ref q));
+                    h1 = BitOperations.Crc32C(h1, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 8)));
+                    h2 = BitOperations.Crc32C(h2, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 16)));
+                    h3 = BitOperations.Crc32C(h3, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 24)));
+
+                    h0 = BitOperations.Crc32C(h0, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 32)));
+                    h1 = BitOperations.Crc32C(h1, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 40)));
+                    h2 = BitOperations.Crc32C(h2, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 48)));
+                    h3 = BitOperations.Crc32C(h3, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 56)));
+
+                    q = ref Unsafe.Add(ref q, 64);
+                    remaining -= 64;
+                }
+
+                if (remaining >= 32)
+                {
+                    h0 = BitOperations.Crc32C(h0, Unsafe.ReadUnaligned<ulong>(ref q));
+                    h1 = BitOperations.Crc32C(h1, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 8)));
+                    h2 = BitOperations.Crc32C(h2, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 16)));
+                    h3 = BitOperations.Crc32C(h3, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 24)));
+
+                    q = ref Unsafe.Add(ref q, 32);
+                    remaining -= 32;
+                }
+
+                if (remaining >= 8) h0 = BitOperations.Crc32C(h0, Unsafe.ReadUnaligned<ulong>(ref q));
+                if (remaining >= 16) h1 = BitOperations.Crc32C(h1, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 8)));
+                if (remaining >= 24) h2 = BitOperations.Crc32C(h2, Unsafe.ReadUnaligned<ulong>(ref Unsafe.Add(ref q, 16)));
+
+                h2 = BitOperations.RotateLeft(h2, 17) + BitOperations.RotateLeft(h3, 23);
+                h0 += BitOperations.RotateLeft(h1, 11);
+                hash = h2 + h0;
+
+                int tailBytes = len - aligned;
+                if (tailBytes != 0)
+                {
+                    ref byte tailRef = ref Unsafe.Add(ref start, aligned);
+                    hash = CrcTailOrdered(hash, ref tailRef, tailBytes);
+                }
             }
 
-            // FinalMix breaks some remaining linearity and improves avalanche
-            return (int)FinalMix(hash);
+            hash ^= hash >> 16;
+            hash *= 0x9E3779B1u;
+            hash ^= hash >> 16;
+            return (int)hash;
 
             [MethodImpl(MethodImplOptions.AggressiveInlining)]
             static uint CrcTailOrdered(uint hash, ref byte p, int length)
             {
-                // length is 1..7
-                // Process 4-2-1 bytes in natural order
                 if ((length & 4) != 0)
                 {
                     hash = BitOperations.Crc32C(hash, Unsafe.ReadUnaligned<uint>(ref p));
@@ -386,19 +525,6 @@ static uint CrcTailOrdered(uint hash, ref byte p, int length)
                 }
                 return hash;
             }
-
-            [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            static uint FinalMix(uint x)
-            {
-                // A tiny finalizer to improve avalanche:
-                // - xor-fold high bits down
-                // - multiply by an odd constant to spread changes across bits
-                // - xor-fold again to propagate the multiply result
-                x ^= x >> 16;
-                x *= 0x9E3779B1u;
-                x ^= x >> 16;
-                return x;
-            }
         }
 
         /// <summary>