diff --git a/gitnexus/src/core/group/extractors/manifest-extractor.ts b/gitnexus/src/core/group/extractors/manifest-extractor.ts index 4c0d737b79..83f5cab5ec 100644 --- a/gitnexus/src/core/group/extractors/manifest-extractor.ts +++ b/gitnexus/src/core/group/extractors/manifest-extractor.ts @@ -79,17 +79,50 @@ export class ManifestExtractor { links: GroupManifestLink[], dbExecutors?: Map, ): Promise { - const contracts: StoredContract[] = []; - const crossLinks: CrossLink[] = []; + // Resolve all (repo, link) pairs in parallel. The previous sequential + // await-per-link produced 2N round-trips; parallel resolution uses the + // per-repo executor pool directly and scales linearly with manifest size. + // + // Memoization: a manifest can list the same contract multiple times + // (e.g. a consumer and provider declaration, or cross-referenced groups). + // Key on (repo, type, contract) — the canonical input to the Cypher + // query — so duplicate links resolve to one DB hit. + type ResolvedSymbol = { filePath: string; name: string; uid: string } | null; + const resolveCache = new Map>(); + const resolveOnce = (repo: string, link: GroupManifestLink): Promise => { + const key = `${repo}\u0000${link.type}\u0000${link.contract}`; + let pending = resolveCache.get(key); + if (!pending) { + pending = this.resolveSymbol(repo, link, dbExecutors); + resolveCache.set(key, pending); + } + return pending; + }; - for (const link of links) { - const contractId = this.buildContractId(link.type, link.contract); + const perLink = await Promise.all( + links.map(async (link) => { + const contractId = this.buildContractId(link.type, link.contract); + const providerRepo = link.role === 'provider' ? link.from : link.to; + const consumerRepo = link.role === 'provider' ? link.to : link.from; + const [providerSymbol, consumerSymbol] = await Promise.all([ + resolveOnce(providerRepo, link), + resolveOnce(consumerRepo, link), + ]); + return { link, contractId, providerRepo, consumerRepo, providerSymbol, consumerSymbol }; + }), + ); - const providerRepo = link.role === 'provider' ? link.from : link.to; - const consumerRepo = link.role === 'provider' ? link.to : link.from; + const contracts: StoredContract[] = []; + const crossLinks: CrossLink[] = []; - const providerSymbol = await this.resolveSymbol(providerRepo, link, dbExecutors); - const consumerSymbol = await this.resolveSymbol(consumerRepo, link, dbExecutors); + for (const { + link, + contractId, + providerRepo, + consumerRepo, + providerSymbol, + consumerSymbol, + } of perLink) { const providerRef = providerSymbol || { filePath: '', name: link.contract }; const consumerRef = consumerSymbol || { filePath: '', name: link.contract }; // When the resolver finds a real graph symbol we keep its uid, otherwise diff --git a/gitnexus/src/core/group/sync.ts b/gitnexus/src/core/group/sync.ts index 92cd9fe5f1..af7c3e686d 100644 --- a/gitnexus/src/core/group/sync.ts +++ b/gitnexus/src/core/group/sync.ts @@ -7,6 +7,7 @@ import type { GroupConfig, RepoHandle, RepoSnapshot, StoredContract, CrossLink } import { HttpRouteExtractor } from './extractors/http-route-extractor.js'; import { GrpcExtractor } from './extractors/grpc-extractor.js'; import { TopicExtractor } from './extractors/topic-extractor.js'; +import { ManifestExtractor } from './extractors/manifest-extractor.js'; import { runExactMatch } from './matching.js'; import { detectServiceBoundaries, assignService } from './service-boundary-detector.js'; import type { CypherExecutor } from './contract-extractor.js'; @@ -60,10 +61,28 @@ function defaultResolveHandle(allEntries: RegistryEntry[]) { }; } +/** + * Dedupe cross-links that point from the same consumer endpoint to the same + * provider endpoint for the same contract. Preserves first-seen order so the + * caller controls precedence (e.g., pass manifest links first). + */ +function dedupeCrossLinks(links: CrossLink[]): CrossLink[] { + const seen = new Set(); + const out: CrossLink[] = []; + for (const link of links) { + const key = `${link.from.repo}::${link.from.symbolUid}|${link.to.repo}::${link.to.symbolUid}|${link.type}|${link.contractId}`; + if (seen.has(key)) continue; + seen.add(key); + out.push(link); + } + return out; +} + export async function syncGroup(config: GroupConfig, opts?: SyncOptions): Promise { const missingRepos: string[] = []; const repoSnapshots: Record = {}; let autoContracts: StoredContract[] = []; + let manifestCrossLinks: CrossLink[] = []; let dbExecutors: Map | undefined; const eo = opts?.extractorOverride; @@ -158,8 +177,44 @@ export async function syncGroup(config: GroupConfig, opts?: SyncOptions): Promis } } + // Process manifest links declared in group.yaml. + // ManifestExtractor is fully implemented but was never wired into this + // pipeline — config.links were parsed and validated but silently dropped. + // Placed after the DB try/finally: resolveSymbol falls back to synthetic + // UIDs when dbExecutors is undefined or a pool is closed, so cross-links + // are always generated regardless of whether real DB executors are available. + if (config.links.length > 0) { + // Warn about dangling links that reference repos not declared in config.repos. + // They still generate cross-links via synthetic UIDs (determinism is preserved), + // but the operator probably meant something that now silently does nothing useful. + const knownRepos = new Set(Object.keys(config.repos)); + for (const link of config.links) { + const dangling = [link.from, link.to].filter((r) => !knownRepos.has(r)); + if (dangling.length > 0) { + console.warn( + `[group/sync] manifest link ${link.type}:${link.contract} references repos not in config.repos: ${dangling.join(', ')} — cross-links will use synthetic UIDs`, + ); + } + } + + const manifestEx = new ManifestExtractor(); + const manifestResult = await manifestEx.extractFromManifest(config.links, dbExecutors); + autoContracts.push(...manifestResult.contracts); + manifestCrossLinks = manifestResult.crossLinks; + if (opts?.verbose) { + console.log( + ` manifest: ${manifestCrossLinks.length} cross-links from ${config.links.length} declared links`, + ); + } + } + const { matched, unmatched } = runExactMatch(autoContracts); - const crossLinks: CrossLink[] = matched; + + // Dedupe cross-links. Manifest contracts participate in runExactMatch, so a + // manifest-declared link can also emit a matchType:'exact' CrossLink with the + // same endpoints. Prefer the manifest version — it reflects operator intent + // and carries matchType:'manifest' which downstream consumers may rely on. + const crossLinks = dedupeCrossLinks([...manifestCrossLinks, ...matched]); const allContracts: StoredContract[] = autoContracts; const registry: ContractRegistry = { diff --git a/gitnexus/test/unit/group/manifest-extractor.test.ts b/gitnexus/test/unit/group/manifest-extractor.test.ts index 42ed86b848..59725dc861 100644 --- a/gitnexus/test/unit/group/manifest-extractor.test.ts +++ b/gitnexus/test/unit/group/manifest-extractor.test.ts @@ -583,4 +583,34 @@ describe('ManifestExtractor', () => { expect(result.contracts).toHaveLength(0); expect(result.crossLinks).toHaveLength(0); }); + + it('memoizes repeated (repo, type, contract) resolutions so each tuple hits the DB once', async () => { + const calls: Array<{ repo: string; cypher: string }> = []; + const execFor = (repo: string) => async (cypher: string) => { + calls.push({ repo, cypher }); + return [{ uid: `uid::${repo}`, name: 'handler', filePath: 'src/h.ts' }]; + }; + + const dbExecutors = new Map Promise[]>>([ + ['svc/a', execFor('svc/a')], + ['svc/b', execFor('svc/b')], + ]); + + // Two links declare the same (repo, type, contract) triple on each side, + // so naive sequential resolution would run 4 queries; memoization collapses + // to 2 (one per distinct repo tuple). + const link: GroupManifestLink = { + from: 'svc/b', + to: 'svc/a', + type: 'http', + contract: 'GET::/api/orders', + role: 'consumer', + }; + + await extractor.extractFromManifest([link, { ...link }], dbExecutors); + + // One resolution per distinct (repo, type, contract) — not per (link × side). + expect(calls).toHaveLength(2); + expect(new Set(calls.map((c) => c.repo))).toEqual(new Set(['svc/a', 'svc/b'])); + }); }); diff --git a/gitnexus/test/unit/group/sync.test.ts b/gitnexus/test/unit/group/sync.test.ts index 50c9093b93..5aa586c255 100644 --- a/gitnexus/test/unit/group/sync.test.ts +++ b/gitnexus/test/unit/group/sync.test.ts @@ -3,7 +3,12 @@ import * as fs from 'node:fs'; import * as path from 'node:path'; import * as os from 'node:os'; import { syncGroup, stableRepoPoolId } from '../../../src/core/group/sync.js'; -import type { GroupConfig, StoredContract, RepoHandle } from '../../../src/core/group/types.js'; +import type { + GroupConfig, + StoredContract, + RepoHandle, + GroupManifestLink, +} from '../../../src/core/group/types.js'; import type { RegistryEntry } from '../../../src/storage/repo-manager.js'; describe('syncGroup', () => { @@ -202,6 +207,110 @@ describe('syncGroup', () => { } }); + it('manifest links in config.links produce cross-links with matchType manifest', async () => { + const links: GroupManifestLink[] = [ + { + from: 'app/consumer', + to: 'app/provider', + type: 'http', + contract: 'GET::/api/orders', + role: 'consumer', + }, + ]; + + const config: GroupConfig = { + version: 1, + name: 'test', + description: '', + repos: { 'app/consumer': 'consumer-repo', 'app/provider': 'provider-repo' }, + links, + packages: {}, + detect: { + http: true, + grpc: false, + topics: false, + shared_libs: false, + embedding_fallback: false, + }, + matching: { bm25_threshold: 0.7, embedding_threshold: 0.65, max_candidates_per_step: 3 }, + }; + + const result = await syncGroup(config, { + extractorOverride: async () => [], + skipWrite: true, + }); + + // ManifestExtractor should inject 2 contracts (provider + consumer) and 1 cross-link + expect(result.contracts).toHaveLength(2); + const manifestLinks = result.crossLinks.filter((cl) => cl.matchType === 'manifest'); + expect(manifestLinks).toHaveLength(1); + expect(manifestLinks[0].contractId).toBe('http::GET::/api/orders'); + expect(manifestLinks[0].from.repo).toBe('app/consumer'); + expect(manifestLinks[0].to.repo).toBe('app/provider'); + expect(manifestLinks[0].confidence).toBe(1.0); + + // With no DB executors available, UIDs fall back to the deterministic + // synthetic form `manifest::::`. + expect(manifestLinks[0].from.symbolUid).toBe('manifest::app/consumer::http::GET::/api/orders'); + expect(manifestLinks[0].to.symbolUid).toBe('manifest::app/provider::http::GET::/api/orders'); + + // Manifest contracts also participate in runExactMatch; we must not emit a + // duplicate matchType:'exact' cross-link for the same endpoint pair. + const exactForSameContract = result.crossLinks.filter( + (cl) => cl.matchType === 'exact' && cl.contractId === 'http::GET::/api/orders', + ); + expect(exactForSameContract).toHaveLength(0); + expect(result.crossLinks).toHaveLength(1); + }); + + it('manifest links referencing unknown repos still produce cross-links via synthetic UIDs', async () => { + const links: GroupManifestLink[] = [ + { + from: 'app/known', + to: 'app/dangling', // not present in config.repos + type: 'http', + contract: 'POST::/api/missing', + role: 'consumer', + }, + ]; + + const config: GroupConfig = { + version: 1, + name: 'test', + description: '', + repos: { 'app/known': 'known-repo' }, + links, + packages: {}, + detect: { + http: true, + grpc: false, + topics: false, + shared_libs: false, + embedding_fallback: false, + }, + matching: { bm25_threshold: 0.7, embedding_threshold: 0.65, max_candidates_per_step: 3 }, + }; + + const warnings: string[] = []; + const origWarn = console.warn; + console.warn = (msg: string) => warnings.push(String(msg)); + try { + const result = await syncGroup(config, { + extractorOverride: async () => [], + skipWrite: true, + }); + + expect(result.crossLinks).toHaveLength(1); + expect(result.crossLinks[0].matchType).toBe('manifest'); + expect(result.crossLinks[0].to.symbolUid).toBe( + 'manifest::app/dangling::http::POST::/api/missing', + ); + expect(warnings.some((w) => w.includes('app/dangling'))).toBe(true); + } finally { + console.warn = origWarn; + } + }); + it('writes registry to groupDir when skipWrite is false', async () => { const tmpDir = path.join(os.tmpdir(), `gitnexus-sync-write-${Date.now()}`); fs.mkdirSync(tmpDir, { recursive: true });