diff --git a/gitnexus/src/core/group/extractors/http-route-extractor.ts b/gitnexus/src/core/group/extractors/http-route-extractor.ts index 898a22c38f..54aeb91505 100644 --- a/gitnexus/src/core/group/extractors/http-route-extractor.ts +++ b/gitnexus/src/core/group/extractors/http-route-extractor.ts @@ -18,12 +18,14 @@ import { getPluginForFile, HTTP_SCAN_GLOB, type HttpDetection } from './http-pat * the preferred path because the graph has richer symbol metadata * (real uids, class/method structure, etc.). * - * 2. **Source-scan fallback (Strategy B)** — parse files directly with - * the per-language plugin registry in `./http-patterns/`. Used when - * the graph has no routes/fetches for this repo (e.g. a repo that - * hasn't been indexed yet, or whose indexer doesn't know the - * framework). Each plugin owns its tree-sitter grammar and query - * sources — this orchestrator imports NO grammars or query strings. + * 2. **Source-scan supplement (Strategy B)** — parse files directly with + * the per-language plugin registry in `./http-patterns/`. Used to + * fill gaps when graph extraction only covers part of a polyglot repo + * (e.g. Java graph routes plus Go source-scan routes). Graph entries + * remain authoritative for duplicate contract IDs because they carry + * richer symbol metadata. Each plugin owns its tree-sitter grammar + * and query sources — this orchestrator imports NO grammars or query + * strings. * * Adding a new language for Strategy B is a one-file edit in * `http-patterns/index.ts`: register a new `HttpLanguagePlugin` and @@ -194,17 +196,19 @@ export class HttpRouteExtractor implements ContractExtractor { const graphProviders = dbExecutor != null ? await this.extractProvidersGraph(dbExecutor, getDetections) : []; - const providers = - graphProviders.length > 0 - ? graphProviders - : this.extractProvidersSourceScan(await getScannedFiles(), getDetections); + // Source scan always runs to capture routes in languages/files not covered + // by graph edges; the glob and per-file parse results are cached above. + const providers = this.mergeGraphAndSourceContracts( + graphProviders, + this.extractProvidersSourceScan(await getScannedFiles(), getDetections), + ); const graphConsumers = dbExecutor != null ? await this.extractConsumersGraph(dbExecutor, getDetections) : []; - const consumers = - graphConsumers.length > 0 - ? graphConsumers - : this.extractConsumersSourceScan(await getScannedFiles(), getDetections); + const consumers = this.mergeGraphAndSourceContracts( + graphConsumers, + this.extractConsumersSourceScan(await getScannedFiles(), getDetections), + ); return [...providers, ...consumers]; } @@ -473,4 +477,18 @@ export class HttpRouteExtractor implements ContractExtractor { } return out; } + + private mergeGraphAndSourceContracts( + graphContracts: ExtractedContract[], + sourceContracts: ExtractedContract[], + ): ExtractedContract[] { + const seenContractIds = new Set(graphContracts.map((c) => c.contractId)); + const out = [...graphContracts]; + for (const contract of sourceContracts) { + if (seenContractIds.has(contract.contractId)) continue; + seenContractIds.add(contract.contractId); + out.push(contract); + } + return out; + } } diff --git a/gitnexus/test/unit/group/http-route-extractor.test.ts b/gitnexus/test/unit/group/http-route-extractor.test.ts index aa648a71d5..d2c1b3fa48 100644 --- a/gitnexus/test/unit/group/http-route-extractor.test.ts +++ b/gitnexus/test/unit/group/http-route-extractor.test.ts @@ -92,6 +92,77 @@ public class UserController { expect(getRoute!.confidence).toBe(0.9); expect(getRoute!.symbolUid).not.toBe('file-uid-ctrl'); }); + + it('supplements graph providers with source-scan providers from other files', async () => { + const dir = path.join(tmpDir, 'graph-source-provider-union'); + fs.mkdirSync(path.join(dir, 'src/controller'), { recursive: true }); + fs.mkdirSync(path.join(dir, 'cmd'), { recursive: true }); + fs.writeFileSync( + path.join(dir, 'src/controller/UserController.java'), + ` +@RestController +@RequestMapping("/api/v2") +public class UserController { + @GetMapping("/users") + public List list() { return service.findAll(); } +} +`, + ); + fs.writeFileSync( + path.join(dir, 'cmd/server.go'), + ` +package main + +func healthHandler(w http.ResponseWriter, r *http.Request) {} + +func main() { + http.HandleFunc("/api/health", healthHandler) +} +`, + ); + + const mockDbExecutor = async (query: string) => { + if (query.includes('HANDLES_ROUTE')) { + return [ + { + fileId: 'file-uid-ctrl', + filePath: 'src/controller/UserController.java', + routePath: '/api/v2/users', + routeId: 'route-uid-users', + responseKeys: null, + routeSource: 'decorator-GetMapping', + }, + ]; + } + if (query.includes('FETCHES')) return []; + if (query.includes('CONTAINS')) { + return [ + { + uid: 'uid-ctrl-list', + name: 'list', + filePath: 'src/controller/UserController.java', + labels: ['Method'], + }, + ]; + } + return []; + }; + + const contracts = await extractor.extract(mockDbExecutor, dir, makeRepo(dir)); + const providers = contracts.filter((c) => c.role === 'provider'); + + const graphRouteMatches = providers.filter( + (c) => c.contractId === 'http::GET::/api/v2/users', + ); + expect(graphRouteMatches).toHaveLength(1); + expect(graphRouteMatches[0].symbolUid).toBe('uid-ctrl-list'); + expect(graphRouteMatches[0].meta.extractionStrategy).toBe('graph_assisted'); + + const sourceRoute = providers.find((c) => c.contractId === 'http::GET::/api/health'); + expect(sourceRoute).toBeDefined(); + expect(sourceRoute?.symbolName).toBe('healthHandler'); + expect(sourceRoute?.meta.extractionStrategy).toBe('source_scan'); + }); }); describe('provider extraction — source-scan fallback (Strategy B)', () => { @@ -166,6 +237,30 @@ export default router; ).toBeDefined(); }); + it('dedupes source-only providers by contract id', async () => { + const dir = path.join(tmpDir, 'source-only-same-contract-id'); + fs.mkdirSync(path.join(dir, 'src/routes'), { recursive: true }); + fs.writeFileSync( + path.join(dir, 'src/routes/health-a.ts'), + ` +router.get('/api/health', healthA); +`, + ); + fs.writeFileSync( + path.join(dir, 'src/routes/health-b.ts'), + ` +router.get('/api/health', healthB); +`, + ); + + const contracts = await extractor.extract(null, dir, makeRepo(dir)); + const providers = contracts.filter((c) => c.contractId === 'http::GET::/api/health'); + + expect(providers).toHaveLength(1); + expect(providers[0].role).toBe('provider'); + expect(providers[0].meta.extractionStrategy).toBe('source_scan'); + }); + it('extracts Go Gin and Echo route registrations', async () => { const dir = path.join(tmpDir, 'go-frameworks'); fs.mkdirSync(path.join(dir, 'cmd'), { recursive: true }); @@ -740,6 +835,59 @@ async def create_user(user: UserCreate): expect(consumers[0].confidence).toBe(0.9); expect(consumers[0].symbolName).toBe('fetchUsers'); }); + + it('supplements graph consumers with source-scan consumers from other files', async () => { + const dir = path.join(tmpDir, 'graph-source-consumer-union'); + fs.mkdirSync(path.join(dir, 'src/api'), { recursive: true }); + fs.writeFileSync(path.join(dir, 'src/api/graph.ts'), 'export const api = {};'); + fs.writeFileSync( + path.join(dir, 'src/api/health.ts'), + ` +export async function fetchHealth() { + const res = await fetch('/api/health'); + return res.json(); +} +`, + ); + + const mockDbExecutor = async (query: string) => { + if (query.includes('HANDLES_ROUTE')) return []; + if (query.includes('FETCHES')) { + return [ + { + fileId: 'file-uid-api', + filePath: 'src/api/graph.ts', + routePath: '/api/users', + routeId: 'route-uid-users', + fetchReason: 'fetch-url-match', + }, + ]; + } + if (query.includes('CONTAINS')) { + return [ + { + uid: 'uid-fn-fetch', + name: 'fetchUsers', + filePath: 'src/api/graph.ts', + labels: ['Function'], + }, + ]; + } + return []; + }; + + const contracts = await extractor.extract(mockDbExecutor, dir, makeRepo(dir)); + const consumers = contracts.filter((c) => c.role === 'consumer'); + + const graphConsumer = consumers.find((c) => c.contractId === 'http::GET::/api/users'); + expect(graphConsumer).toBeDefined(); + expect(graphConsumer?.symbolUid).toBe('uid-fn-fetch'); + expect(graphConsumer?.meta.extractionStrategy).toBe('graph_assisted'); + + const sourceConsumer = consumers.find((c) => c.contractId === 'http::GET::/api/health'); + expect(sourceConsumer).toBeDefined(); + expect(sourceConsumer?.meta.extractionStrategy).toBe('source_scan'); + }); }); describe('edge cases', () => {