-
Notifications
You must be signed in to change notification settings - Fork 1
core: Graph.labelPropagation — 13th graduation (community detector) #326
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -358,3 +358,93 @@ module Graph = | |||||
| if community i = community j then | ||||||
| q <- q + (sym.[i, j] - (k.[i] * k.[j]) / twoM) | ||||||
| Some (q / twoM) | ||||||
|
|
||||||
| /// **Label propagation community detector.** | ||||||
| /// | ||||||
| /// A simple, non-spectral community detection algorithm. Each | ||||||
| /// node starts in its own community. Each iteration, every | ||||||
| /// node adopts the label that appears with greatest weighted | ||||||
| /// frequency among its neighbors (ties broken by lowest | ||||||
| /// community id for determinism). The algorithm stops when | ||||||
| /// no node changes label in a pass, or when `maxIterations` | ||||||
| /// is reached. | ||||||
| /// | ||||||
| /// Returns `Map<'N, int>` — node → community label. Empty | ||||||
| /// map on empty graph. | ||||||
| /// | ||||||
| /// **Trade-offs (documented to calibrate expectations):** | ||||||
| /// * Fast: O(iterations × edges), works without dense matrix. | ||||||
| /// * Quality: below Louvain / spectral methods for complex | ||||||
| /// structures, but catches obvious dense cliques reliably — | ||||||
| /// exactly the trivial-cartel-detect case. | ||||||
| /// * Determinism: tie-break by lowest community id (stable | ||||||
| /// across runs given same input). | ||||||
| /// * NOT a replacement for Louvain; a dependency-free first | ||||||
| /// pass. Future graduation: `Graph.louvain` using the | ||||||
| /// full modularity-optimizing procedure. | ||||||
| /// | ||||||
| /// Provenance: 12th ferry §5 + 13th ferry §2 "community | ||||||
| /// detection" + 14th ferry alert row "Modularity Q jump > | ||||||
| /// 0.1 or Q > 0.4 (community-detection-based)". | ||||||
| let labelPropagation | ||||||
| (maxIterations: int) | ||||||
| (g: Graph<'N>) | ||||||
| : Map<'N, int> = | ||||||
| let nodeList = nodes g |> Set.toList | ||||||
| let n = nodeList.Length | ||||||
| if n = 0 then Map.empty | ||||||
| else | ||||||
| let nodeArr = List.toArray nodeList | ||||||
| let idx = | ||||||
| nodeList | ||||||
| |> List.mapi (fun i node -> node, i) | ||||||
| |> Map.ofList | ||||||
| // Initial labels: each node in its own community | ||||||
| let labels = Array.init n id | ||||||
| // Pre-compute neighbor-list (combined in+out, weighted | ||||||
| // sum). For cartel detection we symmetrize. | ||||||
| let neighbors = Array.init n (fun _ -> ResizeArray<int * int64>()) | ||||||
| let span = g.Edges.AsSpan() | ||||||
| for k in 0 .. span.Length - 1 do | ||||||
| let entry = span.[k] | ||||||
| let (s, t) = entry.Key | ||||||
| let si = idx.[s] | ||||||
| let ti = idx.[t] | ||||||
| if entry.Weight <> 0L && si <> ti then | ||||||
|
||||||
| if entry.Weight <> 0L && si <> ti then | |
| if entry.Weight > 0L && si <> ti then |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -293,3 +293,45 @@ let ``modularityScore for single-community is 0`` () = | |
| let p = Map.ofList [ (1,0); (2,0); (3,0) ] | ||
| let q = Graph.modularityScore p g |> Option.defaultValue nan | ||
| abs q |> should (be lessThan) 1e-9 | ||
|
|
||
|
|
||
| // ─── labelPropagation ───────── | ||
|
|
||
| [<Fact>] | ||
| let ``labelPropagation returns empty map for empty graph`` () = | ||
| (Graph.empty : Graph<int>) |> Graph.labelPropagation 10 |> Map.count |> should equal 0 | ||
|
|
||
| [<Fact>] | ||
| let ``labelPropagation converges two dense cliques to two labels`` () = | ||
| // Two K3 cliques bridged by one thin edge. Label propagation | ||
| // should settle with nodes {1,2,3} sharing one label and | ||
| // nodes {4,5,6} sharing another. | ||
| let edges = [ | ||
| (1, 2, 10L); (2, 1, 10L); (2, 3, 10L); (3, 2, 10L); (3, 1, 10L); (1, 3, 10L) | ||
| (4, 5, 10L); (5, 4, 10L); (5, 6, 10L); (6, 5, 10L); (6, 4, 10L); (4, 6, 10L) | ||
| (3, 4, 1L); (4, 3, 1L) | ||
| ] | ||
| let g = Graph.fromEdgeSeq edges | ||
| let partition = Graph.labelPropagation 50 g | ||
| let labelA = partition.[1] | ||
| let labelB = partition.[4] | ||
| // Both cliques share a label within themselves | ||
| partition.[2] |> should equal labelA | ||
| partition.[3] |> should equal labelA | ||
| partition.[5] |> should equal labelB | ||
| partition.[6] |> should equal labelB | ||
|
Comment on lines
+316
to
+322
|
||
|
|
||
| [<Fact>] | ||
| let ``labelPropagation produces partition consumable by modularityScore`` () = | ||
| // The composition that enables a full cartel detector: LP | ||
| // produces a partition, modularityScore evaluates it. High | ||
| // modularity means LP found real community structure. | ||
| let edges = [ | ||
| (1, 2, 10L); (2, 1, 10L); (2, 3, 10L); (3, 2, 10L); (3, 1, 10L); (1, 3, 10L) | ||
| (4, 5, 10L); (5, 4, 10L); (5, 6, 10L); (6, 5, 10L); (6, 4, 10L); (4, 6, 10L) | ||
| (3, 4, 1L); (4, 3, 1L) | ||
| ] | ||
| let g = Graph.fromEdgeSeq edges | ||
| let partition = Graph.labelPropagation 50 g | ||
| let q = Graph.modularityScore partition g |> Option.defaultValue 0.0 | ||
| q |> should (be greaterThan) 0.3 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The doc comment’s provenance references (“12th ferry §5”, “13th ferry §2”, “14th ferry alert row …”) don’t appear to resolve to any docs in-tree (searching
docs/**/*.mdonly finds generic ferry mentions in the ADR, not these sections). Please either link to a concrete file path/anchor that exists in the repo, or remove/adjust the references so readers can actually follow them.