Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 41 additions & 8 deletions gitnexus/src/core/group/extractors/manifest-extractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,50 @@ export class ManifestExtractor {
links: GroupManifestLink[],
dbExecutors?: Map<string, CypherExecutor>,
): Promise<ManifestExtractResult> {
const contracts: StoredContract[] = [];
const crossLinks: CrossLink[] = [];
// Resolve all (repo, link) pairs in parallel. The previous sequential
// await-per-link produced 2N round-trips; parallel resolution uses the
// per-repo executor pool directly and scales linearly with manifest size.
//
// Memoization: a manifest can list the same contract multiple times
// (e.g. a consumer and provider declaration, or cross-referenced groups).
// Key on (repo, type, contract) — the canonical input to the Cypher
// query — so duplicate links resolve to one DB hit.
type ResolvedSymbol = { filePath: string; name: string; uid: string } | null;
const resolveCache = new Map<string, Promise<ResolvedSymbol>>();
const resolveOnce = (repo: string, link: GroupManifestLink): Promise<ResolvedSymbol> => {
const key = `${repo}\u0000${link.type}\u0000${link.contract}`;
let pending = resolveCache.get(key);
if (!pending) {
pending = this.resolveSymbol(repo, link, dbExecutors);
resolveCache.set(key, pending);
}
return pending;
};

for (const link of links) {
const contractId = this.buildContractId(link.type, link.contract);
const perLink = await Promise.all(
links.map(async (link) => {
const contractId = this.buildContractId(link.type, link.contract);
const providerRepo = link.role === 'provider' ? link.from : link.to;
const consumerRepo = link.role === 'provider' ? link.to : link.from;
const [providerSymbol, consumerSymbol] = await Promise.all([
resolveOnce(providerRepo, link),
resolveOnce(consumerRepo, link),
]);
return { link, contractId, providerRepo, consumerRepo, providerSymbol, consumerSymbol };
}),
);

const providerRepo = link.role === 'provider' ? link.from : link.to;
const consumerRepo = link.role === 'provider' ? link.to : link.from;
const contracts: StoredContract[] = [];
const crossLinks: CrossLink[] = [];

const providerSymbol = await this.resolveSymbol(providerRepo, link, dbExecutors);
const consumerSymbol = await this.resolveSymbol(consumerRepo, link, dbExecutors);
for (const {
link,
contractId,
providerRepo,
consumerRepo,
providerSymbol,
consumerSymbol,
} of perLink) {
const providerRef = providerSymbol || { filePath: '', name: link.contract };
const consumerRef = consumerSymbol || { filePath: '', name: link.contract };
// When the resolver finds a real graph symbol we keep its uid, otherwise
Expand Down
57 changes: 56 additions & 1 deletion gitnexus/src/core/group/sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import type { GroupConfig, RepoHandle, RepoSnapshot, StoredContract, CrossLink }
import { HttpRouteExtractor } from './extractors/http-route-extractor.js';
import { GrpcExtractor } from './extractors/grpc-extractor.js';
import { TopicExtractor } from './extractors/topic-extractor.js';
import { ManifestExtractor } from './extractors/manifest-extractor.js';
import { runExactMatch } from './matching.js';
import { detectServiceBoundaries, assignService } from './service-boundary-detector.js';
import type { CypherExecutor } from './contract-extractor.js';
Expand Down Expand Up @@ -60,10 +61,28 @@ function defaultResolveHandle(allEntries: RegistryEntry[]) {
};
}

/**
* Dedupe cross-links that point from the same consumer endpoint to the same
* provider endpoint for the same contract. Preserves first-seen order so the
* caller controls precedence (e.g., pass manifest links first).
*/
function dedupeCrossLinks(links: CrossLink[]): CrossLink[] {
const seen = new Set<string>();
const out: CrossLink[] = [];
for (const link of links) {
const key = `${link.from.repo}::${link.from.symbolUid}|${link.to.repo}::${link.to.symbolUid}|${link.type}|${link.contractId}`;
if (seen.has(key)) continue;
seen.add(key);
out.push(link);
}
return out;
}

export async function syncGroup(config: GroupConfig, opts?: SyncOptions): Promise<SyncResult> {
const missingRepos: string[] = [];
const repoSnapshots: Record<string, RepoSnapshot> = {};
let autoContracts: StoredContract[] = [];
let manifestCrossLinks: CrossLink[] = [];
let dbExecutors: Map<string, CypherExecutor> | undefined;

const eo = opts?.extractorOverride;
Expand Down Expand Up @@ -158,8 +177,44 @@ export async function syncGroup(config: GroupConfig, opts?: SyncOptions): Promis
}
}

// Process manifest links declared in group.yaml.
// ManifestExtractor is fully implemented but was never wired into this
// pipeline — config.links were parsed and validated but silently dropped.
// Placed after the DB try/finally: resolveSymbol falls back to synthetic
// UIDs when dbExecutors is undefined or a pool is closed, so cross-links
// are always generated regardless of whether real DB executors are available.
if (config.links.length > 0) {
// Warn about dangling links that reference repos not declared in config.repos.
// They still generate cross-links via synthetic UIDs (determinism is preserved),
// but the operator probably meant something that now silently does nothing useful.
const knownRepos = new Set(Object.keys(config.repos));
for (const link of config.links) {
const dangling = [link.from, link.to].filter((r) => !knownRepos.has(r));
if (dangling.length > 0) {
console.warn(
`[group/sync] manifest link ${link.type}:${link.contract} references repos not in config.repos: ${dangling.join(', ')} — cross-links will use synthetic UIDs`,
);
}
}

const manifestEx = new ManifestExtractor();
const manifestResult = await manifestEx.extractFromManifest(config.links, dbExecutors);
autoContracts.push(...manifestResult.contracts);
manifestCrossLinks = manifestResult.crossLinks;
if (opts?.verbose) {
console.log(
` manifest: ${manifestCrossLinks.length} cross-links from ${config.links.length} declared links`,
);
}
}

const { matched, unmatched } = runExactMatch(autoContracts);
const crossLinks: CrossLink[] = matched;

// Dedupe cross-links. Manifest contracts participate in runExactMatch, so a
// manifest-declared link can also emit a matchType:'exact' CrossLink with the
// same endpoints. Prefer the manifest version — it reflects operator intent
// and carries matchType:'manifest' which downstream consumers may rely on.
const crossLinks = dedupeCrossLinks([...manifestCrossLinks, ...matched]);
const allContracts: StoredContract[] = autoContracts;

const registry: ContractRegistry = {
Expand Down
30 changes: 30 additions & 0 deletions gitnexus/test/unit/group/manifest-extractor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -583,4 +583,34 @@ describe('ManifestExtractor', () => {
expect(result.contracts).toHaveLength(0);
expect(result.crossLinks).toHaveLength(0);
});

it('memoizes repeated (repo, type, contract) resolutions so each tuple hits the DB once', async () => {
const calls: Array<{ repo: string; cypher: string }> = [];
const execFor = (repo: string) => async (cypher: string) => {
calls.push({ repo, cypher });
return [{ uid: `uid::${repo}`, name: 'handler', filePath: 'src/h.ts' }];
};

const dbExecutors = new Map<string, (c: string) => Promise<Record<string, unknown>[]>>([
['svc/a', execFor('svc/a')],
['svc/b', execFor('svc/b')],
]);

// Two links declare the same (repo, type, contract) triple on each side,
// so naive sequential resolution would run 4 queries; memoization collapses
// to 2 (one per distinct repo tuple).
const link: GroupManifestLink = {
from: 'svc/b',
to: 'svc/a',
type: 'http',
contract: 'GET::/api/orders',
role: 'consumer',
};

await extractor.extractFromManifest([link, { ...link }], dbExecutors);

// One resolution per distinct (repo, type, contract) — not per (link × side).
expect(calls).toHaveLength(2);
expect(new Set(calls.map((c) => c.repo))).toEqual(new Set(['svc/a', 'svc/b']));
});
});
111 changes: 110 additions & 1 deletion gitnexus/test/unit/group/sync.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@ import * as fs from 'node:fs';
import * as path from 'node:path';
import * as os from 'node:os';
import { syncGroup, stableRepoPoolId } from '../../../src/core/group/sync.js';
import type { GroupConfig, StoredContract, RepoHandle } from '../../../src/core/group/types.js';
import type {
GroupConfig,
StoredContract,
RepoHandle,
GroupManifestLink,
} from '../../../src/core/group/types.js';
import type { RegistryEntry } from '../../../src/storage/repo-manager.js';

describe('syncGroup', () => {
Expand Down Expand Up @@ -202,6 +207,110 @@ describe('syncGroup', () => {
}
});

it('manifest links in config.links produce cross-links with matchType manifest', async () => {
const links: GroupManifestLink[] = [
{
from: 'app/consumer',
to: 'app/provider',
type: 'http',
contract: 'GET::/api/orders',
role: 'consumer',
},
];

const config: GroupConfig = {
version: 1,
name: 'test',
description: '',
repos: { 'app/consumer': 'consumer-repo', 'app/provider': 'provider-repo' },
links,
packages: {},
detect: {
http: true,
grpc: false,
topics: false,
shared_libs: false,
embedding_fallback: false,
},
matching: { bm25_threshold: 0.7, embedding_threshold: 0.65, max_candidates_per_step: 3 },
};

const result = await syncGroup(config, {
extractorOverride: async () => [],
skipWrite: true,
});

// ManifestExtractor should inject 2 contracts (provider + consumer) and 1 cross-link
expect(result.contracts).toHaveLength(2);
const manifestLinks = result.crossLinks.filter((cl) => cl.matchType === 'manifest');
expect(manifestLinks).toHaveLength(1);
expect(manifestLinks[0].contractId).toBe('http::GET::/api/orders');
expect(manifestLinks[0].from.repo).toBe('app/consumer');
expect(manifestLinks[0].to.repo).toBe('app/provider');
expect(manifestLinks[0].confidence).toBe(1.0);

// With no DB executors available, UIDs fall back to the deterministic
// synthetic form `manifest::<repo>::<contractId>`.
expect(manifestLinks[0].from.symbolUid).toBe('manifest::app/consumer::http::GET::/api/orders');
expect(manifestLinks[0].to.symbolUid).toBe('manifest::app/provider::http::GET::/api/orders');

// Manifest contracts also participate in runExactMatch; we must not emit a
// duplicate matchType:'exact' cross-link for the same endpoint pair.
const exactForSameContract = result.crossLinks.filter(
(cl) => cl.matchType === 'exact' && cl.contractId === 'http::GET::/api/orders',
);
expect(exactForSameContract).toHaveLength(0);
expect(result.crossLinks).toHaveLength(1);
});

it('manifest links referencing unknown repos still produce cross-links via synthetic UIDs', async () => {
const links: GroupManifestLink[] = [
{
from: 'app/known',
to: 'app/dangling', // not present in config.repos
type: 'http',
contract: 'POST::/api/missing',
role: 'consumer',
},
];

const config: GroupConfig = {
version: 1,
name: 'test',
description: '',
repos: { 'app/known': 'known-repo' },
links,
packages: {},
detect: {
http: true,
grpc: false,
topics: false,
shared_libs: false,
embedding_fallback: false,
},
matching: { bm25_threshold: 0.7, embedding_threshold: 0.65, max_candidates_per_step: 3 },
};

const warnings: string[] = [];
const origWarn = console.warn;
console.warn = (msg: string) => warnings.push(String(msg));
try {
const result = await syncGroup(config, {
extractorOverride: async () => [],
skipWrite: true,
});

expect(result.crossLinks).toHaveLength(1);
expect(result.crossLinks[0].matchType).toBe('manifest');
expect(result.crossLinks[0].to.symbolUid).toBe(
'manifest::app/dangling::http::POST::/api/missing',
);
expect(warnings.some((w) => w.includes('app/dangling'))).toBe(true);
} finally {
console.warn = origWarn;
}
});

it('writes registry to groupDir when skipWrite is false', async () => {
const tmpDir = path.join(os.tmpdir(), `gitnexus-sync-write-${Date.now()}`);
fs.mkdirSync(tmpDir, { recursive: true });
Expand Down
Loading