diff --git a/src/Core/Graph.fs b/src/Core/Graph.fs index 12227054..0c7b66e9 100644 --- a/src/Core/Graph.fs +++ b/src/Core/Graph.fs @@ -526,3 +526,64 @@ module Graph = let modularityShift = qAttacked - qBaseline Some (alpha * spectralGrowth + beta * modularityShift) | _ -> None + + /// **Robust-z-score variant of coordinationRiskScore.** + /// + /// Upgrades the MVP composite from raw linear differences + /// (per PR #328) to robust standardized scores per Amara + /// 17th-ferry correction #4 (robust statistics for + /// adversarial data). + /// + /// Formula: + /// ``` + /// risk = alpha * Z(λ₁_attacked; baselineLambdas) + /// + beta * Z(Q_attacked; baselineQs) + /// ``` + /// where `Z(x; baseline) = (x - median(baseline)) / + /// (1.4826 * MAD(baseline))`. + /// + /// Caller provides `baselineLambdas` + `baselineQs` — + /// sequences of metric values computed across many + /// known-null baseline samples. The `double seq` type + /// is materialized once inside `robustZScore` (see + /// RobustStats), so callers may pass arrays, lists, + /// or any `seq` form without re-enumeration cost. The + /// distributions calibrate thresholds from data rather + /// than hard-coding them. + /// + /// Returns `None` when any underlying computation is + /// undefined (empty baselines, iteration failure, etc.). + /// + /// Future expansion: the full 6-term CoordinationRiskScore + /// from Amara's 17th ferry adds Sync_S + Exclusivity_S + + /// Influence_S terms. This MVP covers λ₁ + Q — the two + /// signals with shipped primitives. Additional terms land + /// as their primitives mature. + /// + /// Provenance: external AI collaborator's 17th + /// courier ferry Part 2 correction #4 (robust + /// z-scores for adversarial data) plus the corrected + /// composite-score formula. Eighteenth graduation + /// under the Otto-105 cadence. + let coordinationRiskScoreRobust + (alpha: double) + (beta: double) + (eigenTol: double) + (eigenIter: int) + (lpIter: int) + (baselineLambdas: double seq) + (baselineQs: double seq) + (attacked: Graph<'N>) + : double option = + match largestEigenvalue eigenTol eigenIter attacked with + | None -> None + | Some lambdaAttacked -> + let partition = labelPropagation lpIter attacked + match modularityScore partition attacked with + | None -> None + | Some qAttacked -> + match RobustStats.robustZScore baselineLambdas lambdaAttacked, + RobustStats.robustZScore baselineQs qAttacked with + | Some zLambda, Some zQ -> + Some (alpha * zLambda + beta * zQ) + | _ -> None diff --git a/src/Core/RobustStats.fs b/src/Core/RobustStats.fs index 48480820..910289c9 100644 --- a/src/Core/RobustStats.fs +++ b/src/Core/RobustStats.fs @@ -100,3 +100,48 @@ module RobustStats = let threshold = 3.0 * max d MadFloor let kept = arr |> Array.filter (fun x -> abs (x - m) <= threshold) median kept + + /// **Robust z-score.** Given a `baseline` distribution + /// and a `measurement`, return + /// `(measurement - median(baseline)) / (1.4826 * MAD(baseline))`. + /// The 1.4826 constant scales MAD to be consistent with + /// the standard deviation of a normal distribution (so + /// robust z-scores are directly comparable to ordinary + /// z-scores when the baseline actually IS normal). + /// + /// Returns `None` when the baseline is empty. When + /// MAD collapses to zero (every baseline value + /// identical), `MadFloor` is substituted so the + /// function returns `Some` finite value rather than + /// `None` or infinity — the floor reflects "scale is + /// below epsilon" rather than "scale is undefined." + /// Per Copilot review thread 59VhYb: the earlier doc + /// contradicted the implementation by claiming None + /// on MAD=0; the implementation is the contract. + /// + /// Why robust z-scores for adversarial data: ordinary + /// z-scores assume Gaussian baseline; an attacker can + /// poison a ~normal distribution by adding a few outliers + /// that inflate the standard deviation, making subsequent + /// real attacks look "within one sigma" and evade + /// detection. Median+MAD survives ~50% adversarial + /// outliers. + /// + /// Provenance: Amara 17th-ferry Part 2 correction #4 + /// (robust statistics for adversarial data in + /// CoordinationRiskScore composition). + let robustZScore (baseline: double seq) (measurement: double) : double option = + // Materialize the baseline once. `median` + `mad` + // both need to walk the sequence; re-enumerating + // `double seq` costs O(n) twice AND can yield + // inconsistent results if the seq is lazy/non- + // repeatable (Copilot review thread 59VhYq). + let baselineArr = Seq.toArray baseline + match median baselineArr with + | None -> None + | Some med -> + match mad baselineArr with + | None -> None + | Some m -> + let scale = 1.4826 * max m MadFloor + Some ((measurement - med) / scale) diff --git a/tests/Tests.FSharp/Algebra/Graph.Tests.fs b/tests/Tests.FSharp/Algebra/Graph.Tests.fs index bbc1e214..918011f0 100644 --- a/tests/Tests.FSharp/Algebra/Graph.Tests.fs +++ b/tests/Tests.FSharp/Algebra/Graph.Tests.fs @@ -381,3 +381,71 @@ let ``coordinationRiskScore is near zero when attacked == baseline`` () = Graph.coordinationRiskScore 0.5 0.5 1e-9 500 50 g g |> Option.defaultValue nan abs score |> should (be lessThan) 0.2 + + +// ─── coordinationRiskScoreRobust + RobustStats.robustZScore ───────── + +[] +let ``robustZScore returns None on empty baseline`` () = + RobustStats.robustZScore [] 1.0 |> should equal (None: double option) + +[] +let ``robustZScore of measurement equal to baseline median is 0`` () = + // Baseline [1,2,3,4,5]; median = 3; measurement 3 → z = 0 + let z = RobustStats.robustZScore [1.0; 2.0; 3.0; 4.0; 5.0] 3.0 |> Option.defaultValue 999.0 + abs z |> should (be lessThan) 1e-9 + +[] +let ``robustZScore scales MAD by 1.4826 for Gaussian consistency`` () = + // Baseline [1,2,3,4,5]; median=3; MAD=1; scale = 1.4826. + // Measurement 4: z = (4-3)/1.4826 ≈ 0.674. + let z = RobustStats.robustZScore [1.0; 2.0; 3.0; 4.0; 5.0] 4.0 |> Option.defaultValue 0.0 + abs (z - 0.6744763) |> should (be lessThan) 0.001 + +[] +let ``coordinationRiskScoreRobust fires strongly on cartel-injected graph`` () = + // Gather baseline samples: 5 sparse graphs with varying + // small lambdas and modularities. Build each as a slightly + // perturbed 5-node random graph. + let rng = System.Random(42) + let baselineGraphs = + [| for _ in 1 .. 5 -> + [ for _ in 1 .. 5 do + let s = rng.Next(5) + let t = rng.Next(5) + if s <> t then yield (s, t, 1L) ] + |> Graph.fromEdgeSeq |] + let baselineLambdas = + baselineGraphs + |> Array.choose (fun g -> Graph.largestEigenvalue 1e-9 200 g) + let baselineQs = + baselineGraphs + |> Array.choose (fun g -> + let p = Graph.labelPropagation 30 g + Graph.modularityScore p g) + // Now build the attacked graph with K4 cartel. + let cartelEdges = [ + for s in [6; 7; 8; 9] do + for t in [6; 7; 8; 9] do + if s <> t then yield (s, t, 10L) + ] + let attacked = + [ yield! [(0, 1, 1L); (1, 2, 1L); (3, 4, 1L)] + yield! cartelEdges ] + |> Graph.fromEdgeSeq + let risk = + Graph.coordinationRiskScoreRobust + 0.5 0.5 1e-9 500 50 + baselineLambdas baselineQs attacked + |> Option.defaultValue 0.0 + // Robust score: we expect a clear positive signal when + // lambda and/or Q jumps substantially beyond the baseline + // MAD. With K4 injected, lambda_attacked is much larger + // than any baseline value. + risk |> should (be greaterThan) 1.0 + +[] +let ``coordinationRiskScoreRobust returns None when baselines empty`` () = + let g = Graph.fromEdgeSeq [ (1, 2, 1L); (2, 1, 1L) ] + Graph.coordinationRiskScoreRobust 0.5 0.5 1e-9 200 30 [||] [||] g + |> should equal (None: double option)