abhigyanpatwari · magyargergo · May 12, 2026 · May 10, 2026
diff --git a/gitnexus/src/core/incremental/subgraph-extract.ts b/gitnexus/src/core/incremental/subgraph-extract.ts
@@ -17,6 +17,35 @@
  * The resulting subgraph is what gets passed to `loadGraphToLbug` after
  * the orchestrator has deleted the corresponding DB rows. Hydrated
  * unchanged-file rows are never touched in the DB.
+ *
+ * # Cross-file edge consistency (Finding 1)
+ *
+ * `extractChangedSubgraph` intentionally does NOT expand the set it is
+ * given — expansion is the orchestrator's job, so the SAME expanded set
+ * can be fed to both `deleteNodesForFile` and this function (asymmetry
+ * between the delete set and the write set silently corrupts the DB).
+ * `computeEffectiveWriteSet` below performs the boundary-crossing 1-hop
+ * walk; the orchestrator composes it with its importer-BFS expansion and
+ * passes the result here.
+ *
+ * Why the 1-hop walk is needed: consider a barrel re-export change —
+ * file C (a barrel) shifts `export { foo } from './b'` to
+ * `export { foo } from './d'`. After scope resolution, file A's CALLS
+ * edge to `foo` resolves to D instead of B, even though A's content is
+ * byte-for-byte identical:
+ *
+ *   - Old A→B edge survives in DB (neither A nor B is changed → not deleted)
+ *   - New A→D edge is missing (neither A nor D in writable set → skipped)
+ *
+ * Pulling the unchanged-side file of every writable-boundary-crossing
+ * edge into the write set fixes both halves: the orchestrator's
+ * `DETACH DELETE` cleans up the stale unchanged-side rows, and the new
+ * cross-file edges land because at least one endpoint is now writable.
+ *
+ * Limitation (documented): if a file X *stopped* importing from a
+ * changed file C, X has no edge to C in the new graph, so this 1-hop
+ * walk doesn't catch it. The orchestrator's importer-BFS (which reads
+ * IMPORTS from the pre-pipeline DB) covers that case instead.
  */
 
 import type { GraphNode, GraphRelationship } from 'gitnexus-shared';
@@ -25,6 +54,19 @@ import type { KnowledgeGraph } from '../graph/types.js';
 
 const isGraphWide = (label: string): boolean => label === 'Community' || label === 'Process';
 
+/**
+ * Build a Map<nodeId, filePath> for every File-bound node in the graph.
+ * Graph-wide nodes (Community/Process) have no filePath and are filtered.
+ */
+const indexNodeFilePaths = (fullGraph: KnowledgeGraph): Map<string, string> => {
+  const idx = new Map<string, string>();
+  fullGraph.forEachNode((n: GraphNode) => {
+    const fp = n.properties?.filePath as string | undefined;
+    if (fp) idx.set(n.id, fp);
+  });
+  return idx;
+};
+
 export const extractChangedSubgraph = (
   fullGraph: KnowledgeGraph,
   toWriteSet: ReadonlySet<string>,
@@ -49,3 +91,33 @@ export const extractChangedSubgraph = (
 
   return sub;
 };
+
+/**
+ * Public — derive the EFFECTIVE write-set: `toWriteSet` expanded by one
+ * hop along every edge in the new graph that crosses the writable
+ * boundary (one endpoint in a writable file, the other in an unchanged
+ * file). The unchanged-side file is pulled in so its stale rows are
+ * deleted + rewritten in lockstep with the changed side.
+ *
+ * Single pass over the edge list. Does NOT mutate `toWriteSet`. The
+ * orchestrator MUST feed the returned set to both `deleteNodesForFile`
+ * and `extractChangedSubgraph` — feeding the unexpanded set to either
+ * one leaves stale rows or PK-conflicts at COPY time.
+ */
+export const computeEffectiveWriteSet = (
+  fullGraph: KnowledgeGraph,
+  toWriteSet: ReadonlySet<string>,
+): Set<string> => {
+  const nodeFilePaths = indexNodeFilePaths(fullGraph);
+  const expanded = new Set<string>(toWriteSet);
+  fullGraph.forEachRelationship((r: GraphRelationship) => {
+    const sourcePath = nodeFilePaths.get(r.sourceId);
+    const targetPath = nodeFilePaths.get(r.targetId);
+    if (!sourcePath || !targetPath) return; // skip edges to graph-wide nodes
+    const sourceWritable = toWriteSet.has(sourcePath);
+    const targetWritable = toWriteSet.has(targetPath);
+    if (sourceWritable && !targetWritable) expanded.add(targetPath);
+    else if (targetWritable && !sourceWritable) expanded.add(sourcePath);
+  });
+  return expanded;
+};
diff --git a/gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts b/gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts
@@ -163,14 +163,20 @@ export async function runChunkedParseAndResolve(
     );
   }
 
-  // We previously sorted parseableScanned alphabetically here for stable
-  // chunk membership across runs (so the parse cache wouldn't miss when
-  // filesystem-scan order varied). Removed because it surfaced a
-  // pre-existing order-dependency in Ruby cross-file resolution
-  // (`user.address.save → Address#save` resolution depends on file
-  // processing order — a separate bug to fix). Filesystem order on most
-  // platforms is stable enough in practice that the cache still hits the
-  // common case; runs where it doesn't simply pay a cold-parse cost.
+  // Sort parseableScanned alphabetically for stable chunk membership
+  // across runs (Finding 4). Without this, filesystem-scan order can
+  // shift between runs (notably on macOS APFS where directory entry
+  // order can change after modifications) — different files in the
+  // same chunk → different chunk hash → cache miss even when no file
+  // content changed. The cache also becomes platform-specific: a
+  // Linux-built cache misses on macOS for the same repo.
+  //
+  // Note: this re-introduces a pre-existing order-dependency in Ruby
+  // cross-file resolution (`user.address.save → Address#save` resolves
+  // differently depending on file processing order). That bug is
+  // independent — the sort surfaces it but doesn't cause it. Tracking
+  // separately rather than letting the parse cache pay the cost.
+  parseableScanned.sort((a, b) => (a.path < b.path ? -1 : a.path > b.path ? 1 : 0));
 
   const totalParseable = parseableScanned.length;
 

diff --git a/gitnexus/src/core/run-analyze.ts b/gitnexus/src/core/run-analyze.ts
@@ -36,7 +36,7 @@ import {
   INCREMENTAL_SCHEMA_VERSION,
 } from '../storage/repo-manager.js';
 import { computeFileHashes, diffFileHashes } from '../storage/file-hash.js';
-import { extractChangedSubgraph } from './incremental/subgraph-extract.js';
+import { extractChangedSubgraph, computeEffectiveWriteSet } from './incremental/subgraph-extract.js';
 import { shadowCandidatesFor } from './incremental/shadow-candidates.js';
 import { loadParseCache, saveParseCache, pruneCache } from '../storage/parse-cache.js';
 import {
@@ -523,12 +523,26 @@ export async function runFullAnalysis(
         );
       }
 
-      // 1. Delete rows for files we're about to rewrite + deleted files.
-      //    Deduped: deleted entries may already appear in writableFiles via
-      //    BFS expansion (queryImporters can return a now-deleted path),
-      //    which would otherwise call deleteNodesForFile twice for the
-      //    same file (Bugbot LOW finding on PR #1479).
-      const filesToDelete = [...new Set([...writableFiles, ...hashDiff.deleted])];
+      // 1. Compute the EFFECTIVE write-set (Finding 1). Two layers,
+      //    composed:
+      //      (a) `writableFiles` — toWrite ∪ transitive importers of
+      //          changed/deleted files (the bounded BFS above, reading
+      //          IMPORTS from the pre-pipeline DB).
+      //      (b) `computeEffectiveWriteSet` — walks the NEW graph's
+      //          edges and pulls in any unchanged-side file that sits
+      //          on a writable-boundary-crossing edge (catches refined
+      //          cross-file CALLS edges that the pre-run DB couldn't
+      //          predict, e.g. a barrel re-export shifting `foo` from
+      //          B to D).
+      //    The composed set is the input to BOTH deleteNodesForFile
+      //    and extractChangedSubgraph — asymmetry between the two would
+      //    leave stale rows or PK-conflict at COPY time.
+      const effectiveWriteSet = computeEffectiveWriteSet(pipelineResult.graph, writableFiles);
+      // Deduped: deleted entries may already appear via importer-BFS
+      // expansion (queryImporters can return a now-deleted path), which
+      // would otherwise call deleteNodesForFile twice for the same file
+      // (Bugbot LOW finding on PR #1479).
+      const filesToDelete = [...new Set([...effectiveWriteSet, ...hashDiff.deleted])];
       for (let i = 0; i < filesToDelete.length; i++) {
         const f = filesToDelete[i];
         try {
@@ -546,8 +560,10 @@ export async function runFullAnalysis(
       await deleteAllCommunitiesAndProcesses();
 
       // 3. Extract the changed subgraph from the FULL ctx.graph and write
-      //    only that. Unchanged-file rows in the DB stay untouched.
-      const subgraph = extractChangedSubgraph(pipelineResult.graph, writableFiles);
+      //    only that. Unchanged-file rows in the DB stay untouched. Pass
+      //    the SAME effectiveWriteSet so the subgraph and the deletes
+      //    cover identical files (asymmetry would silently corrupt).
+      const subgraph = extractChangedSubgraph(pipelineResult.graph, effectiveWriteSet);
       await loadGraphToLbug(subgraph, pipelineResult.repoPath, storagePath, (msg) => {
         lbugMsgCount++;
         const pct = Math.min(84, 65 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 19));