Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
588 changes: 588 additions & 0 deletions gitnexus/src/core/group/bridge-db.ts

Large diffs are not rendered by default.

60 changes: 60 additions & 0 deletions gitnexus/src/core/group/bridge-schema.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/**
* Bridge LadybugDB schema for cross-repo Contract Registry.
* Separate from per-repo schema in lbug/schema.ts.
*/

/**
* Version of the bridge.lbug schema below. `openBridgeDbReadOnly` compares
* this against `meta.json`'s version field and returns `null` on mismatch,
* which trips the caller into either the JSON fallback path or a fresh
* `group sync` that rebuilds `bridge.lbug` from scratch.
*
* Migration contract for contributors bumping this constant:
* 1. Bump the number (e.g. `1` → `2`).
* 2. Update the DDL below to match the new schema.
* 3. DO NOT attempt an online migration in this file — the version gate
* is intentionally a "discard and re-sync" strategy for V1. An old
* bridge.lbug whose version doesn't match is treated as opaque and
* rebuilt by the next `group sync`.
* 4. If online migration becomes necessary (e.g. when groups accumulate
* large amounts of embedding data), add a migration path as a
* separate `bridge-migrations.ts` module rather than bloating this
* file — keep schema and migration concerns separate.
*/
export const BRIDGE_SCHEMA_VERSION = 1;

export const CONTRACT_SCHEMA = `
CREATE NODE TABLE Contract (
id STRING,
contractId STRING,
type STRING,
role STRING,
repo STRING,
service STRING DEFAULT '',
symbolUid STRING DEFAULT '',
filePath STRING DEFAULT '',
symbolName STRING DEFAULT '',
confidence DOUBLE DEFAULT 0.0,
meta STRING DEFAULT '{}',
PRIMARY KEY (id)
)`;

export const REPO_SNAPSHOT_SCHEMA = `
CREATE NODE TABLE RepoSnapshot (
id STRING,
indexedAt STRING DEFAULT '',
lastCommit STRING DEFAULT '',
PRIMARY KEY (id)
)`;

export const CONTRACT_LINK_SCHEMA = `
CREATE REL TABLE ContractLink (
FROM Contract TO Contract,
matchType STRING,
confidence DOUBLE,
contractId STRING,
fromRepo STRING,
toRepo STRING
)`;

export const BRIDGE_SCHEMA_QUERIES = [CONTRACT_SCHEMA, REPO_SNAPSHOT_SCHEMA, CONTRACT_LINK_SCHEMA];
136 changes: 123 additions & 13 deletions gitnexus/src/core/group/matching.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ export interface MatchResult {
unmatched: StoredContract[];
}

export interface WildcardMatchResult {
matched: CrossLink[];
remaining: StoredContract[];
}

function isGrpcWildcard(cid: string): boolean {
return cid.startsWith('grpc::') && cid.endsWith('/*');
}

export function normalizeContractId(id: string): string {
const colonIdx = id.indexOf('::');
if (colonIdx === -1) return id;
Expand All @@ -24,19 +33,35 @@ export function normalizeContractId(id: string): string {
return id;
}
case 'grpc': {
// Canonical form: `grpc::<lowercased-package-or-service>[/<method>]`.
//
// The package/service segment is lowercased because gRPC package
// names are effectively case-insensitive across language bindings
// (`auth.AuthService`, `auth.authservice`, `AUTH.AUTHSERVICE` all
// describe the same wire protocol service). The RPC method segment
// is preserved as-is because the HTTP/2 path used on the wire is
// case-sensitive per the gRPC spec (`/Service/MethodName`), and
// method names in generated clients match the proto source exactly.
//
// A package-only id (no slash) and a package/method id are treated
// as DISTINCT canonical forms: `grpc::userservice` does not match
// `grpc::userservice/Login`. That's by design — callers that want
// service-level manifest matching against method-level providers
// should use the gRPC wildcard form `grpc::UserService/*` which is
// handled by runWildcardMatch below.
const slashIdx = rest.indexOf('/');
if (slashIdx > 0) {
const pkg = rest.substring(0, slashIdx).toLowerCase();
const method = rest.substring(slashIdx);
return `grpc::${pkg}${method}`;
}
if (slashIdx === 0) {
// Malformed "package/method" with leading slash — do not lowercase the whole string
// (method segment is case-sensitive per spec).
// Malformed "/method" with leading slash — keep as-is so two
// equally malformed ids can still match each other.
return `grpc::${rest}`;
}
// No slash: spec is ambiguous (package-only vs full service.method). MVP: lowercase
// the whole token; differs from pkg/method split above where RPC method keeps case.
// No slash: package/service only. Lowercase to match the package
// segment produced by the pkg/method branch above.
return `grpc::${rest.toLowerCase()}`;
}
case 'topic':
Expand Down Expand Up @@ -66,27 +91,36 @@ function findMatchingKeys(contractId: string, index: Map<string, StoredContract[
return [];
}

export function runExactMatch(contracts: StoredContract[]): MatchResult {
export function buildProviderIndex(contracts: StoredContract[]): Map<string, StoredContract[]> {
const providers = contracts.filter((c) => c.role === 'provider');
const consumers = contracts.filter((c) => c.role === 'consumer');

const providerIndex = new Map<string, StoredContract[]>();
const index = new Map<string, StoredContract[]>();
for (const p of providers) {
const key = normalizeContractId(p.contractId);
const list = providerIndex.get(key) || [];
const list = index.get(key) || [];
list.push(p);
providerIndex.set(key, list);
index.set(key, list);
}
return index;
}

export function runExactMatch(
contracts: StoredContract[],
providerIndex?: Map<string, StoredContract[]>,
): MatchResult {
const index = providerIndex ?? buildProviderIndex(contracts);

// Skip gRPC wildcard consumers — they go to wildcard pass only
const consumers = contracts.filter((c) => c.role === 'consumer' && !isGrpcWildcard(c.contractId));

const matched: CrossLink[] = [];
const matchedConsumerIds = new Set<string>();
const matchedProviderIds = new Set<string>();

for (const consumer of consumers) {
const matchingKeys = findMatchingKeys(consumer.contractId, providerIndex);
const matchingKeys = findMatchingKeys(consumer.contractId, index);
if (matchingKeys.length === 0) continue;

const allMatchingProviders = matchingKeys.flatMap((k) => providerIndex.get(k) || []);
const allMatchingProviders = matchingKeys.flatMap((k) => index.get(k) || []);
for (const provider of allMatchingProviders) {
if (provider.repo === consumer.repo) {
if (!provider.service || !consumer.service || provider.service === consumer.service) {
Expand Down Expand Up @@ -118,10 +152,86 @@ export function runExactMatch(contracts: StoredContract[]): MatchResult {
}
}

const unmatched = contracts.filter((c) => {
// normalUnmatched: contracts that weren't matched in exact pass
const normalUnmatched = contracts.filter((c) => {
if (isGrpcWildcard(c.contractId)) return false; // excluded from exact, handled separately
const id = `${c.repo}::${c.contractId}`;
return c.role === 'provider' ? !matchedProviderIds.has(id) : !matchedConsumerIds.has(id);
});

// Re-add gRPC wildcard contracts — they were never in exact matching
const grpcWildcards = contracts.filter((c) => isGrpcWildcard(c.contractId));
const unmatched = [...normalUnmatched, ...grpcWildcards];

return { matched, unmatched };
}

export function runWildcardMatch(
unmatched: StoredContract[],
providerIndex: Map<string, StoredContract[]>,
): WildcardMatchResult {
const wildcardConsumers = unmatched.filter(
(c) => c.role === 'consumer' && isGrpcWildcard(c.contractId),
);
const matched: CrossLink[] = [];
const matchedConsumerIds = new Set<string>();

for (const consumer of wildcardConsumers) {
const normalized = normalizeContractId(consumer.contractId);
// "grpc::com.example.userservice/*" → "com.example.userservice"
// "grpc::userservice/*" → "userservice"
const fqService = normalized.slice(normalized.indexOf('::') + 2, -2); // strip "grpc::" and "/*"

for (const [key, providers] of providerIndex) {
// Only match against non-wildcard gRPC providers (method-level IDs)
if (!key.startsWith('grpc::') || key.endsWith('/*')) continue;
const afterPrefix = key.slice(6); // strip "grpc::"
const slashIdx = afterPrefix.indexOf('/');
if (slashIdx < 0) continue;
const providerFqService = afterPrefix.slice(0, slashIdx);

// Match: exact FQ service, or bare-name match when consumer has no package
const isMatch =
providerFqService === fqService ||
(!fqService.includes('.') && providerFqService.endsWith('.' + fqService));

if (!isMatch) continue;

for (const provider of providers) {
// Skip same-repo same-service (same logic as runExactMatch)
if (provider.repo === consumer.repo) {
if (!provider.service || !consumer.service || provider.service === consumer.service) {
continue;
}
}

matched.push({
from: {
repo: consumer.repo,
service: consumer.service,
symbolUid: consumer.symbolUid,
symbolRef: consumer.symbolRef,
},
to: {
repo: provider.repo,
service: provider.service,
symbolUid: provider.symbolUid,
symbolRef: provider.symbolRef,
},
type: consumer.type,
contractId: consumer.contractId, // consumer's wildcard ID
matchType: 'wildcard',
confidence: Math.min(provider.confidence, consumer.confidence),
});
matchedConsumerIds.add(`${consumer.repo}::${consumer.contractId}`);
}
}
}

const remaining = unmatched.filter((c) => {
if (c.role !== 'consumer' || !isGrpcWildcard(c.contractId)) return true;
return !matchedConsumerIds.has(`${c.repo}::${c.contractId}`);
});

return { matched, remaining };
}
124 changes: 124 additions & 0 deletions gitnexus/src/core/group/normalization.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import type { CrossLink, CrossLinkEndpoint, StoredContract } from './types.js';

function contractKey(contract: StoredContract): string {
return [contract.repo, contract.contractId, contract.role, contract.symbolRef.filePath].join(
'\0',
);
}

function endpointKey(endpoint: CrossLinkEndpoint): string {
return [
endpoint.repo,
endpoint.service ?? '',
endpoint.symbolRef.filePath,
endpoint.symbolRef.name,
].join('\0');
}

/**
* Score a contract by how much information it carries, so `dedupeContracts`
* can prefer the "richer" record when two contracts collide on the same
* `(repo, contractId, role, filePath)` key.
*
* Weights express a priority ordering, not calibrated probabilities:
* +3 — `symbolUid` resolved (tier 1 of the downstream lookup — highest
* signal because it's the strongest anchor for cross-impact traversal
* and the only one that's robust to renames)
* +2 — any of `filePath`, `symbolRef.name`, or `symbolName` that's more
* specific than the contractId itself (tier 2 signal — resolves
* uniquely in most cases and survives across syncs)
* +1 — `service` tag (monorepo attribution — useful but not sufficient
* on its own) or non-manifest origin (auto-extracted contracts are
* preferred over manifest-declared synthetic ones because the former
* are grounded in real source code)
*
* The absolute numbers don't matter, only their relative ordering.
*/
function contractRichness(contract: StoredContract): number {
let score = 0;
if (contract.symbolUid) score += 3;
if (contract.symbolRef.filePath) score += 2;
if (contract.symbolRef.name && contract.symbolRef.name !== contract.contractId) score += 2;
if (contract.symbolName && contract.symbolName !== contract.contractId) score += 2;
if (contract.service) score += 1;
if (contract.meta.source !== 'manifest') score += 1;
return score;
}

function mergeContracts(existing: StoredContract, incoming: StoredContract): StoredContract {
const [primary, secondary] =
contractRichness(incoming) > contractRichness(existing)
? [incoming, existing]
: [existing, incoming];
const symbolRefName = primary.symbolRef.name || secondary.symbolRef.name;
return {
...secondary,
...primary,
symbolUid: primary.symbolUid || secondary.symbolUid,
symbolRef: {
filePath: primary.symbolRef.filePath || secondary.symbolRef.filePath,
name: symbolRefName,
},
symbolName: primary.symbolName || secondary.symbolName || symbolRefName,
confidence: Math.max(existing.confidence, incoming.confidence),
service: primary.service ?? secondary.service,
meta: { ...secondary.meta, ...primary.meta },
};
}

function mergeEndpoints(
existing: CrossLinkEndpoint,
incoming: CrossLinkEndpoint,
): CrossLinkEndpoint {
return {
repo: existing.repo,
service: existing.service ?? incoming.service,
symbolUid: existing.symbolUid || incoming.symbolUid,
symbolRef: {
filePath: existing.symbolRef.filePath || incoming.symbolRef.filePath,
name: existing.symbolRef.name || incoming.symbolRef.name,
},
};
}

function crossLinkKey(link: CrossLink): string {
return [
link.type,
link.contractId,
link.matchType,
endpointKey(link.from),
endpointKey(link.to),
].join('\0');
}

export function dedupeContracts(items: StoredContract[]): StoredContract[] {
const deduped = new Map<string, StoredContract>();
for (const contract of items) {
const key = contractKey(contract);
const existing = deduped.get(key);
deduped.set(key, existing ? mergeContracts(existing, contract) : contract);
}
return [...deduped.values()];
}

export function dedupeCrossLinks(items: CrossLink[]): CrossLink[] {
const deduped = new Map<string, CrossLink>();
for (const link of items) {
const key = crossLinkKey(link);
const existing = deduped.get(key);
if (!existing) {
deduped.set(key, link);
continue;
}
const keepIncoming = link.confidence > existing.confidence;
const primary = keepIncoming ? link : existing;
const secondary = keepIncoming ? existing : link;
deduped.set(key, {
...primary,
confidence: Math.max(existing.confidence, link.confidence),
from: mergeEndpoints(primary.from, secondary.from),
to: mergeEndpoints(primary.to, secondary.to),
});
}
return [...deduped.values()];
}
16 changes: 15 additions & 1 deletion gitnexus/src/core/group/types.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export type ContractType = 'http' | 'grpc' | 'topic' | 'lib' | 'custom';
export type MatchType = 'exact' | 'manifest' | 'bm25' | 'embedding';
export type MatchType = 'exact' | 'manifest' | 'wildcard' | 'bm25' | 'embedding';
export type ContractRole = 'provider' | 'consumer';

export interface GroupConfig {
Expand Down Expand Up @@ -131,3 +131,17 @@ export interface OutOfScopeLink {
contractId: string;
confidence: number;
}

/** Opaque handle to an open bridge LadybugDB. */
export interface BridgeHandle {
/** Internal — do not access directly. */
readonly _db: unknown;
readonly _conn: unknown;
readonly groupDir: string;
}

export interface BridgeMeta {
version: number;
generatedAt: string;
missingRepos: string[];
}
Loading
Loading