-
Notifications
You must be signed in to change notification settings - Fork 1
fix(tests): SharderInfoTheoreticTests — deterministic XxHash3 (Otto-281) #478
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,11 +2,29 @@ module Zeta.Tests.Formal.SharderInfoTheoreticTests | |
| #nowarn "0893" | ||
|
|
||
| open System | ||
| open System.IO.Hashing | ||
| open FsUnit.Xunit | ||
| open global.Xunit | ||
| open Zeta.Core | ||
|
|
||
|
|
||
| /// Deterministic 64-bit hash for an integer key. Replaces the | ||
| /// process-randomized `HashCode.Combine` (which by .NET design | ||
| /// re-seeds per-process to deter hash flooding) with `XxHash3` on | ||
| /// the key's 4-byte little-endian representation. Same convention | ||
| /// `src/Core/Sketch.fs` `AddBytes` already uses. | ||
| /// | ||
| /// Otto-281 fix (DST discipline, Aaron 2026-04-25): the prior | ||
| /// `HashCode.Combine`-based `Pick` made | ||
| /// `SharderInfoTheoreticTests` flake across CI runs because the | ||
| /// Jump-consistent-hash output depends on the input hash, and | ||
| /// `HashCode.Combine` produces different hashes in different | ||
| /// processes for the same int. Process-randomization is good for | ||
| /// dictionary security; bad for determinism tests. | ||
| let private detHash (k: int) : uint64 = | ||
| XxHash3.HashToUInt64 (ReadOnlySpan (BitConverter.GetBytes k)) | ||
|
Comment on lines
+24
to
+25
|
||
|
|
||
|
|
||
| // ═══════════════════════════════════════════════════════════════════ | ||
| // Proves (or disproves) the claim: `InfoTheoreticSharder` produces | ||
| // a measurably better load distribution than uniform consistent | ||
|
|
@@ -51,7 +69,7 @@ let ``Consistent-hash bucket skew is material on Zipfian keys`` () = | |
| let shards = 16 | ||
| let loads = Array.zeroCreate<int> shards | ||
| for k in keys do | ||
| let h = uint64 (HashCode.Combine k) | ||
| let h = detHash k | ||
| let s = JumpConsistentHash.Pick(h, shards) | ||
| loads.[s] <- loads.[s] + 1 | ||
| let ratio = maxAvgRatio loads | ||
|
|
@@ -62,11 +80,11 @@ let ``Consistent-hash bucket skew is material on Zipfian keys`` () = | |
| printfn "Jump ratio on Zipf(s=1.2): %.3f" ratio | ||
|
|
||
|
|
||
| // DST-exempt: uses `HashCode.Combine` which is process-randomized | ||
| // per-run. Flake analysis + fix pipeline tracked in docs/BACKLOG.md | ||
| // "SharderInfoTheoreticTests 'Uniform traffic' flake" row. DST | ||
| // marker convention + lint rule: docs/BACKLOG.md "DST-marker | ||
| // convention + lint rule" row (Aaron 2026-04-24 directive). | ||
| // DST-clean per Otto-281 fix (Aaron 2026-04-25): all three | ||
| // `HashCode.Combine`-based hashes in this file are now `detHash` | ||
| // (XxHash3 on the key's bytes), which is deterministic across | ||
| // processes. Earlier "DST-exempt" marker dropped — the test is | ||
| // no longer exempt; it's required to be deterministic and is. | ||
|
Comment on lines
+83
to
+87
|
||
| [<Fact>] | ||
| let ``InfoTheoreticSharder does not make skew worse`` () = | ||
| // 100k Zipf keys, 16 shards. Observe then query with the MI-sharder. | ||
|
|
@@ -88,7 +106,7 @@ let ``InfoTheoreticSharder does not make skew worse`` () = | |
|
|
||
| let jumpLoads = Array.zeroCreate<int> shards | ||
| for k in keys do | ||
| let h = uint64 (HashCode.Combine k) | ||
| let h = detHash k | ||
| let s = JumpConsistentHash.Pick(h, shards) | ||
| jumpLoads.[s] <- jumpLoads.[s] + 1 | ||
| let jumpRatio = maxAvgRatio jumpLoads | ||
|
|
@@ -116,7 +134,7 @@ let ``Uniform traffic: consistent-hash is already near-optimal`` () = | |
| let shards = 16 | ||
| let loads = Array.zeroCreate<int> shards | ||
| for k in keys do | ||
| let h = uint64 (HashCode.Combine k) | ||
| let h = detHash k | ||
| let s = JumpConsistentHash.Pick(h, shards) | ||
| loads.[s] <- loads.[s] + 1 | ||
| let ratio = maxAvgRatio loads | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doc comment includes direct contributor name attribution ("Aaron 2026-04-25"). Repo standing rule is to avoid contributor names in code/docs and use role references instead (e.g., "human maintainer"), keeping names only in the documented carve-outs (see docs/AGENT-BEST-PRACTICES.md:284-290). Please rewrite these attributions accordingly.