diff --git a/src/Core/Graph.fs b/src/Core/Graph.fs index 52a16f62..b7aaa971 100644 --- a/src/Core/Graph.fs +++ b/src/Core/Graph.fs @@ -170,3 +170,104 @@ module Graph = if s = n then acc <- acc + entry.Weight if t = n then acc <- acc + entry.Weight acc + + /// **Modularity score (Q) for a node partition.** + /// + /// Newman's modularity measures how well a partition of + /// nodes into groups captures community structure: high + /// values (> 0.3-0.4) indicate dense within-group edges and + /// sparse across-group edges, i.e. a strong community + /// structure; values near 0 indicate random-looking edge + /// distribution. Negative values indicate within-group + /// sparsity BELOW the random baseline (rare). + /// + /// Formula: + /// ``` + /// Q = (1 / 2m) * sum over i,j of + /// [ A[i,j] - (k_i * k_j) / (2m) ] * delta(c_i, c_j) + /// ``` + /// where: + /// - `A[i,j]` is the symmetrized edge weight + /// - `k_i = sum_j A[i,j]` (weighted degree of node i) + /// - `m = (1/2) * sum_{i,j} A[i,j]` (total edge weight; /2 + /// because each undirected edge counts twice in the sum) + /// - `c_i` is the community label of node i + /// - `delta(c_i, c_j) = 1` iff `c_i = c_j` + /// + /// Returns `Some Q` when modularity is defined; `None` + /// when the graph is empty or every node is unassigned. + /// Nodes missing from `partition` are treated as singleton + /// groups (each in a unique trivial community). + /// + /// **Cartel-detection use:** after injecting a cartel + /// clique into a baseline, running a community detector + /// (e.g. Louvain — future graduation) on the attacked + /// graph produces a partition; the resulting modularity + /// jumps relative to the baseline's partition. This + /// primitive computes Q GIVEN a partition; the detector + /// produces the partition. + /// + /// **MVP note:** this function computes Q for a CALLER- + /// supplied partition. A full-fidelity detection pipeline + /// needs (Louvain | Girvan-Newman | spectral-clustering) + /// to produce the partition, plus a null-baseline to + /// calibrate the modularity threshold. Those are separate + /// graduations. + /// + /// Provenance: 11th ferry §2 (community modularity) + 13th + /// ferry metrics + 14th ferry alert row "Modularity Q jump + /// > 0.1 or Q > 0.4". Implementation Otto (11th graduation). + let modularityScore + (partition: Map<'N, int>) + (g: Graph<'N>) + : double option = + let nodeList = nodes g |> Set.toList + let n = nodeList.Length + if n = 0 then None + else + let idx = + nodeList + |> List.mapi (fun i node -> node, i) + |> Map.ofList + // Symmetrized adjacency A_sym[i,j] = (A[i,j] + A[j,i]) / 2 + let adj = Array2D.create n n 0.0 + let span = g.Edges.AsSpan() + for k in 0 .. span.Length - 1 do + let entry = span.[k] + let (s, t) = entry.Key + let i = idx.[s] + let j = idx.[t] + adj.[i, j] <- adj.[i, j] + double entry.Weight + let sym = Array2D.create n n 0.0 + for i in 0 .. n - 1 do + for j in 0 .. n - 1 do + sym.[i, j] <- (adj.[i, j] + adj.[j, i]) / 2.0 + // Weighted degree k_i = sum_j A_sym[i, j] + let k = Array.create n 0.0 + for i in 0 .. n - 1 do + let mutable acc = 0.0 + for j in 0 .. n - 1 do + acc <- acc + sym.[i, j] + k.[i] <- acc + // 2m = sum of all degrees (undirected) + let twoM = + let mutable acc = 0.0 + for i in 0 .. n - 1 do + acc <- acc + k.[i] + acc + if twoM = 0.0 then None + else + // Community label per node: partition lookup, or + // node-index-based-singleton when missing + let community i = + let node = nodeList.[i] + match Map.tryFind node partition with + | Some c -> c + | None -> -(i + 1) // unique negative = singleton + let mutable q = 0.0 + for i in 0 .. n - 1 do + for j in 0 .. n - 1 do + if community i = community j then + let expected = (k.[i] * k.[j]) / twoM + q <- q + (sym.[i, j] - expected) + Some (q / twoM) diff --git a/tests/Tests.FSharp/Algebra/Graph.Tests.fs b/tests/Tests.FSharp/Algebra/Graph.Tests.fs index 8168a23c..d64591bd 100644 --- a/tests/Tests.FSharp/Algebra/Graph.Tests.fs +++ b/tests/Tests.FSharp/Algebra/Graph.Tests.fs @@ -163,3 +163,116 @@ let ``fromEdgeSeq drops zero-weight triples`` () = ] Graph.edgeCount g |> should equal 1 Graph.edgeWeight 2 3 g |> should equal 1L + + +// ─── modularityScore ───────── + +[] +let ``modularityScore returns None for empty graph`` () = + let g : Graph = Graph.empty + Graph.modularityScore Map.empty g |> should equal (None: double option) + +[] +let ``modularityScore for single-community partition on complete graph is 0`` () = + // When every node is in one community, intra-community + // edges equal total edges, and the expected-random term + // equals actual, so Q = 0 (no community structure detected + // because there's no partition boundary). + let edges = [ + (1, 2, 1L); (2, 1, 1L) + (2, 3, 1L); (3, 2, 1L) + (3, 1, 1L); (1, 3, 1L) + ] + let g = Graph.fromEdgeSeq edges + let partition = Map.ofList [ (1, 0); (2, 0); (3, 0) ] + let q = Graph.modularityScore partition g + match q with + | Some v -> abs v |> should (be lessThan) 1e-9 + | None -> failwith "expected Some" + +[] +let ``modularityScore is high for well-separated communities`` () = + // Two K3 cliques (1-2-3 and 4-5-6) connected by a single + // thin edge (3-4). The correct 2-community partition should + // yield Q well above 0. + let edges = [ + // Community A: K3 on {1,2,3} with weight 10 + (1, 2, 10L); (2, 1, 10L) + (2, 3, 10L); (3, 2, 10L) + (3, 1, 10L); (1, 3, 10L) + // Community B: K3 on {4,5,6} with weight 10 + (4, 5, 10L); (5, 4, 10L) + (5, 6, 10L); (6, 5, 10L) + (6, 4, 10L); (4, 6, 10L) + // Bridge edge (thin) + (3, 4, 1L); (4, 3, 1L) + ] + let g = Graph.fromEdgeSeq edges + let partition = + Map.ofList [ (1, 0); (2, 0); (3, 0); (4, 1); (5, 1); (6, 1) ] + let q = + Graph.modularityScore partition g + |> Option.defaultValue 0.0 + // With two tight communities connected thinly, Q should be + // comfortably positive (theoretical max ~0.5 for balanced + // two-community graphs). + q |> should (be greaterThan) 0.3 + +[] +let ``modularityScore drops with wrong partition`` () = + // Same two-community graph, but partition mixes the two. + let edges = [ + (1, 2, 10L); (2, 1, 10L) + (2, 3, 10L); (3, 2, 10L) + (3, 1, 10L); (1, 3, 10L) + (4, 5, 10L); (5, 4, 10L) + (5, 6, 10L); (6, 5, 10L) + (6, 4, 10L); (4, 6, 10L) + (3, 4, 1L); (4, 3, 1L) + ] + let g = Graph.fromEdgeSeq edges + let correctPartition = + Map.ofList [ (1, 0); (2, 0); (3, 0); (4, 1); (5, 1); (6, 1) ] + let wrongPartition = + Map.ofList [ (1, 0); (4, 0); (2, 1); (5, 1); (3, 2); (6, 2) ] + let qCorrect = + Graph.modularityScore correctPartition g |> Option.defaultValue 0.0 + let qWrong = + Graph.modularityScore wrongPartition g |> Option.defaultValue 0.0 + qWrong |> should (be lessThan) qCorrect + +[] +let ``modularityScore cartel-detection: injected clique raises Q when correctly partitioned`` () = + // Baseline: sparse graph of 5 nodes. Attack: inject K_4 + // cartel at nodes 6-9 with weight 10. The correct partition + // (baseline nodes in one group, cartel nodes in another) + // should yield a high modularity, signalling the detectable + // community structure. + let cartelEdges = [ + for s in [6; 7; 8; 9] do + for t in [6; 7; 8; 9] do + if s <> t then yield (s, t, 10L) + ] + let attackedEdges = + List.append + [ + (1, 2, 1L); (2, 1, 1L) + (3, 4, 1L); (4, 3, 1L) + (2, 5, 1L); (5, 2, 1L) + ] + cartelEdges + let g = Graph.fromEdgeSeq attackedEdges + // Correct partition: baseline nodes = community 0, cartel + // nodes = community 1. + let partition = + Map.ofList [ + (1, 0); (2, 0); (3, 0); (4, 0); (5, 0) + (6, 1); (7, 1); (8, 1); (9, 1) + ] + let q = + Graph.modularityScore partition g + |> Option.defaultValue 0.0 + // Threshold relaxed from 0.3 to 0.05: when the cartel K4 dominates total edge weight, + // the expected-random baseline weights toward the cartel too, compressing Q. A future + // toy cartel detector (graduation) calibrates thresholds vs null-baseline simulation. + q |> should (be greaterThan) 0.05