diff --git a/gitnexus/src/mcp/local/local-backend.ts b/gitnexus/src/mcp/local/local-backend.ts index ea05295408..36db265672 100644 --- a/gitnexus/src/mcp/local/local-backend.ts +++ b/gitnexus/src/mcp/local/local-backend.ts @@ -8,6 +8,7 @@ import fs from 'fs/promises'; import path from 'path'; +import { getLanguageFromFilename } from 'gitnexus-shared'; import { initLbug, executeQuery, @@ -497,6 +498,8 @@ export class LocalBackend { return this.toolMap(repo, params); case 'api_impact': return this.apiImpact(repo, params); + case 'source_sink': + return this.sourceSinkScan(repo, params); default: throw new Error(`Unknown tool: ${method}`); } @@ -3219,6 +3222,154 @@ export class LocalBackend { }; } + private async sourceSinkScan( + repo: RepoHandle, + params: { + max_depth?: number; + owasp?: string; + source_category?: string; + }, + ): Promise { + await this.ensureInitialized(repo.id); + const maxDepth = params.max_depth ?? 5; + + // Step 1: Find all Function and Method nodes with their content. + const nodesResult = await executeQuery( + repo.id, + ` + MATCH (n:Function) + WHERE n.id IS NOT NULL + RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content + UNION ALL + MATCH (n:Method) + WHERE n.id IS NOT NULL + RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content + `, + ); + + // Step 2: Load user-defined catalog extensions (if any) and merge with built-in catalogs + const { + getMatchingSources, + getMatchingSinks, + loadUserSecurityConfig, + mergeCatalogs, + compilePatterns, + } = await import('../../security/catalogs.js'); + + const userConfig = await loadUserSecurityConfig(repo.repoPath); + const merged = mergeCatalogs(userConfig); + const compiledSources = compilePatterns(merged.sources); + const compiledSinks = compilePatterns(merged.sinks); + + // Step 3: Tag source-adjacent and sink-adjacent functions + const sources: Array<{ + id: string; + name: string; + filePath: string; + sourcePatterns: string[]; + }> = []; + const sinks: Array<{ + id: string; + name: string; + filePath: string; + sinkPatterns: string[]; + owasp: string; + }> = []; + const nodeNameMap = new Map(); + + for (const row of nodesResult) { + const content = row.content ?? row[3] ?? ''; + const name = row.name ?? row[1] ?? ''; + const filePath = row.filePath ?? row[2] ?? ''; + const id = row.id ?? row[0] ?? ''; + const language = getLanguageFromFilename(filePath) ?? undefined; + + nodeNameMap.set(id, { name, filePath }); + + const matchedSources = getMatchingSources(content, language, compiledSources); + if (matchedSources.length > 0) { + if ( + !params.source_category || + matchedSources.some((s: any) => s.category === params.source_category) + ) { + sources.push({ + id, + name, + filePath, + sourcePatterns: matchedSources.map((s: any) => s.pattern), + }); + } + } + + const matchedSinks = getMatchingSinks(content, language, compiledSinks); + if (matchedSinks.length > 0) { + if (!params.owasp || matchedSinks.some((s: any) => s.owasp === params.owasp)) { + sinks.push({ + id, + name, + filePath, + sinkPatterns: matchedSinks.map((s: any) => s.pattern), + owasp: matchedSinks[0]?.owasp || 'unknown', + }); + } + } + } + + // Step 4: Build CALLS adjacency map via Cypher + const callsResult = await executeQuery( + repo.id, + ` + MATCH (a)-[r:CodeRelation {type: 'CALLS'}]->(b) + RETURN a.id AS sourceId, b.id AS targetId + `, + ); + + const callsGraph = new Map(); + for (const row of callsResult) { + const sourceId = row.sourceId ?? row[0]; + const targetId = row.targetId ?? row[1]; + let callees = callsGraph.get(sourceId); + if (!callees) { + callees = []; + callsGraph.set(sourceId, callees); + } + callees.push(targetId); + } + + // Step 5: BFS from sources to sinks + const { buildSourceSinkPaths } = await import('../../security/source-sink-scanner.js'); + const paths = buildSourceSinkPaths(sources, sinks, callsGraph, maxDepth); + + // Step 6: Format results + const findings = paths.map((p: any) => ({ + risk: p.risk, + owasp: p.owasp, + source: { name: p.source.name, file: p.source.filePath, patterns: p.source.sourcePatterns }, + sink: { name: p.sink.name, file: p.sink.filePath, patterns: p.sink.sinkPatterns }, + depth: p.depth, + path: p.path.map((id: string) => { + const info = nodeNameMap.get(id); + return info ? `${info.name} (${info.filePath})` : id; + }), + })); + + const riskCounts = { critical: 0, high: 0, medium: 0 }; + for (const f of findings) { + if (f.risk in riskCounts) riskCounts[f.risk as keyof typeof riskCounts]++; + } + + return { + summary: { + sources_found: sources.length, + sinks_found: sinks.length, + paths_found: findings.length, + ...riskCounts, + }, + findings, + note: 'Structural reachability scan — paths may contain sanitizers. Use context() on flagged functions to verify.', + }; + } + async disconnect(): Promise { await closeLbug(); // close all connections // Note: we intentionally do NOT call disposeEmbedder() here. diff --git a/gitnexus/src/mcp/server.ts b/gitnexus/src/mcp/server.ts index 4d540b8a24..80d209718f 100644 --- a/gitnexus/src/mcp/server.ts +++ b/gitnexus/src/mcp/server.ts @@ -72,6 +72,9 @@ function getNextStepHint(toolName: string, args: Record | undefined case 'overview': return `\n\n---\n**Next:** To drill into an area, READ gitnexus://repo/${repoPath}/cluster/{name}. To see execution flows, READ gitnexus://repo/${repoPath}/processes.`; + case 'source_sink': + return '\n\n---\n**Next:** Use context() on flagged source or sink functions to understand the full call chain and check for sanitizers.'; + default: return ''; } diff --git a/gitnexus/src/mcp/tools.ts b/gitnexus/src/mcp/tools.ts index 2c884d10bc..51a4843022 100644 --- a/gitnexus/src/mcp/tools.ts +++ b/gitnexus/src/mcp/tools.ts @@ -377,6 +377,41 @@ Returns: single route object when one match, or { routes: [...], total: N } for required: [], }, }, + { + name: 'source_sink', + description: `Scan for security-relevant data paths: find functions that read user input (sources) and trace whether they can reach dangerous operations (sinks) through the CALLS graph. + +WHEN TO USE: Security review, pre-deployment audit, checking if user input reaches dangerous operations (SQL injection, command injection, XSS, SSRF). +AFTER THIS: Use context() on flagged functions to understand the full call chain, then verify if sanitizers exist in between. + +Returns paths from source functions to sink functions, ranked by risk level. +Uses BFS over existing CALLS edges — structural reachability, not taint tracking.`, + inputSchema: { + type: 'object', + properties: { + repo: { + type: 'string', + description: 'Repository name or path. Omit if only one repo is indexed.', + }, + max_depth: { + type: 'number', + description: + 'Maximum BFS depth from source to sink (default: 5). Lower = fewer false positives, higher = more coverage.', + }, + owasp: { + type: 'string', + description: + 'Filter by OWASP category: "A03-injection", "A07-xss", "A10-ssrf". Omit for all.', + }, + source_category: { + type: 'string', + description: + 'Filter sources by category: "user_input", "environment", "file_read", "network". Omit for all.', + }, + }, + required: [], + }, + }, { name: 'group_list', description: `List all configured repository groups, or return details for one group (repos, manifest links). diff --git a/gitnexus/src/security/catalogs.ts b/gitnexus/src/security/catalogs.ts new file mode 100644 index 0000000000..5496f7eff0 --- /dev/null +++ b/gitnexus/src/security/catalogs.ts @@ -0,0 +1,642 @@ +/** + * Source and Sink Catalogs for Structural Security Scanning + * + * Sources: functions/patterns that introduce untrusted data into the application + * Sinks: functions/patterns that perform dangerous operations with data + * + * These catalogs are used for BFS reachability analysis over the existing + * CALLS graph — no CFG or data flow analysis needed. + * + * Based on OWASP Top 10 categories: + * - A03: Injection (SQL, command, code) + * - A07: XSS (cross-site scripting) + * - A10: SSRF (server-side request forgery) + */ + +export interface SourceEntry { + /** Pattern to match in function content (regex-compatible string) */ + pattern: string; + /** Category of the source */ + category: 'user_input' | 'environment' | 'file_read' | 'network'; + /** Languages this source applies to (empty = all) */ + languages?: string[]; + /** Description for reports */ + description: string; +} + +export interface SinkEntry { + /** Pattern to match in function name or content */ + pattern: string; + /** OWASP category */ + owasp: 'A03-injection' | 'A07-xss' | 'A10-ssrf' | 'A01-access-control'; + /** Risk if reached from untrusted source */ + severity: 'critical' | 'high' | 'medium'; + /** Languages this sink applies to (empty = all) */ + languages?: string[]; + /** Description for reports */ + description: string; +} + +// ── Source Catalog ── + +export const SOURCE_CATALOG: SourceEntry[] = [ + // HTTP request data (Next.js, Express, Koa, Fastify) + { + pattern: 'request.json', + category: 'user_input', + description: 'Next.js request body (Request object)', + }, + { + pattern: 'req.json', + category: 'user_input', + description: 'Next.js request body (req shorthand)', + }, + { pattern: 'req.body', category: 'user_input', description: 'Express request body' }, + { pattern: 'req.query', category: 'user_input', description: 'Express query parameters' }, + { pattern: 'req.params', category: 'user_input', description: 'Express route parameters' }, + { pattern: 'req.headers', category: 'user_input', description: 'HTTP request headers' }, + { + pattern: 'request.GET', + category: 'user_input', + languages: ['python'], + description: 'Django GET params', + }, + { + pattern: 'request.POST', + category: 'user_input', + languages: ['python'], + description: 'Django POST data', + }, + { + pattern: 'request.data', + category: 'user_input', + languages: ['python'], + description: 'DRF request data', + }, + { + pattern: '$_GET', + category: 'user_input', + languages: ['php'], + description: 'PHP GET superglobal', + }, + { + pattern: '$_POST', + category: 'user_input', + languages: ['php'], + description: 'PHP POST superglobal', + }, + { + pattern: '$_REQUEST', + category: 'user_input', + languages: ['php'], + description: 'PHP REQUEST superglobal', + }, + { + pattern: 'request.form', + category: 'user_input', + languages: ['python'], + description: 'Flask form data', + }, + { + pattern: 'request.args', + category: 'user_input', + languages: ['python'], + description: 'Flask query args', + }, + { + pattern: 'nextUrl.searchParams', + category: 'user_input', + description: 'Next.js URL search params', + }, + + // Go (net/http) + { + pattern: 'r.Body', + category: 'user_input', + languages: ['go'], + description: 'Go HTTP request body', + }, + { + pattern: 'r.URL.Query()', + category: 'user_input', + languages: ['go'], + description: 'Go URL query parameters', + }, + { + pattern: 'r.FormValue', + category: 'user_input', + languages: ['go'], + description: 'Go form value', + }, + { + pattern: 'r.Header.Get', + category: 'user_input', + languages: ['go'], + description: 'Go request header', + }, + + // Rust / Actix-web + { + pattern: 'web::Json', + category: 'user_input', + languages: ['rust'], + description: 'Actix-web JSON extractor', + }, + { + pattern: 'web::Query', + category: 'user_input', + languages: ['rust'], + description: 'Actix-web query extractor', + }, + { + pattern: 'web::Path', + category: 'user_input', + languages: ['rust'], + description: 'Actix-web path extractor', + }, + + // Spring (Java/Kotlin) + { + pattern: '@RequestBody', + category: 'user_input', + languages: ['java', 'kotlin'], + description: 'Spring request body annotation', + }, + { + pattern: '@RequestParam', + category: 'user_input', + languages: ['java', 'kotlin'], + description: 'Spring request parameter annotation', + }, + { + pattern: '@PathVariable', + category: 'user_input', + languages: ['java', 'kotlin'], + description: 'Spring path variable annotation', + }, + + // Rails (Ruby) + { + pattern: 'params[', + category: 'user_input', + languages: ['ruby'], + description: 'Rails params hash access', + }, + { + pattern: 'request.body', + category: 'user_input', + languages: ['ruby'], + description: 'Rails raw request body', + }, + + // Kotlin / Ktor + { + pattern: 'call.receive', + category: 'user_input', + languages: ['kotlin'], + description: 'Ktor request body receive', + }, + { + pattern: 'call.parameters', + category: 'user_input', + languages: ['kotlin'], + description: 'Ktor request parameters', + }, + + // FastAPI (Python) + { + pattern: 'async def endpoint', + category: 'user_input', + languages: ['python'], + description: 'FastAPI auto-injected endpoint parameter', + }, + + // Environment + { pattern: 'process.env', category: 'environment', description: 'Node.js env variable' }, + { + pattern: 'os.environ', + category: 'environment', + languages: ['python'], + description: 'Python env variable', + }, + { + pattern: 'getenv', + category: 'environment', + languages: ['php'], + description: 'PHP env variable', + }, + { + pattern: 'os.Getenv', + category: 'environment', + languages: ['go'], + description: 'Go env variable', + }, + { + pattern: 'std::env::var', + category: 'environment', + languages: ['rust'], + description: 'Rust env variable', + }, + { + pattern: 'System.getenv', + category: 'environment', + languages: ['java', 'kotlin'], + description: 'Java/Kotlin env variable', + }, + { + pattern: 'ENV[', + category: 'environment', + languages: ['ruby'], + description: 'Ruby env variable', + }, + + // File reads + { pattern: 'readFile', category: 'file_read', description: 'File read operation' }, + { pattern: 'readFileSync', category: 'file_read', description: 'Sync file read' }, + { pattern: 'os.ReadFile', category: 'file_read', languages: ['go'], description: 'Go file read' }, + { + pattern: 'std::fs::read', + category: 'file_read', + languages: ['rust'], + description: 'Rust file read', + }, + + // Network input + { pattern: 'fetch(', category: 'network', description: 'Fetch API response' }, + { pattern: 'axios.get', category: 'network', description: 'Axios HTTP response' }, + { pattern: 'axios.post', category: 'network', description: 'Axios HTTP response' }, + { + pattern: 'http.Get', + category: 'network', + languages: ['go'], + description: 'Go HTTP client GET', + }, + { + pattern: 'reqwest::get', + category: 'network', + languages: ['rust'], + description: 'Rust reqwest HTTP GET', + }, +]; + +// ── Sink Catalog ── + +export const SINK_CATALOG: SinkEntry[] = [ + // A03: Injection — SQL + { pattern: 'query', owasp: 'A03-injection', severity: 'critical', description: 'Raw SQL query' }, + { + pattern: '$queryRaw', + owasp: 'A03-injection', + severity: 'critical', + description: 'Prisma raw query', + }, + { + pattern: '$executeRaw', + owasp: 'A03-injection', + severity: 'critical', + description: 'Prisma raw execute', + }, + { + pattern: 'rawQuery', + owasp: 'A03-injection', + severity: 'critical', + description: 'Sequelize raw query', + }, + + // A03: Injection — Command + { + pattern: 'exec', + owasp: 'A03-injection', + severity: 'critical', + description: 'Command execution', + }, + { + pattern: 'execSync', + owasp: 'A03-injection', + severity: 'critical', + description: 'Sync command execution', + }, + { pattern: 'spawn', owasp: 'A03-injection', severity: 'high', description: 'Process spawn' }, + { pattern: 'eval', owasp: 'A03-injection', severity: 'critical', description: 'Code evaluation' }, + { + pattern: 'Function(', + owasp: 'A03-injection', + severity: 'critical', + description: 'Dynamic function creation', + }, + { + pattern: 'subprocess.run', + owasp: 'A03-injection', + severity: 'critical', + languages: ['python'], + description: 'Python subprocess', + }, + { + pattern: 'os.system', + owasp: 'A03-injection', + severity: 'critical', + languages: ['python'], + description: 'Python system call', + }, + { + pattern: 'shell_exec', + owasp: 'A03-injection', + severity: 'critical', + languages: ['php'], + description: 'PHP shell exec', + }, + + // A03: Injection — Go + { + pattern: 'os.exec', + owasp: 'A03-injection', + severity: 'critical', + languages: ['go'], + description: 'Go command execution', + }, + { + pattern: 'sql.Query', + owasp: 'A03-injection', + severity: 'critical', + languages: ['go'], + description: 'Go raw SQL query', + }, + + // A03: Injection — Rust + { + pattern: 'Command::new', + owasp: 'A03-injection', + severity: 'critical', + languages: ['rust'], + description: 'Rust command execution', + }, + { + pattern: 'sqlx::query', + owasp: 'A03-injection', + severity: 'critical', + languages: ['rust'], + description: 'Rust sqlx raw query', + }, + + // A03: Injection — Spring (Java/Kotlin) + { + pattern: 'jdbcTemplate.query', + owasp: 'A03-injection', + severity: 'critical', + languages: ['java', 'kotlin'], + description: 'Spring JDBC raw query', + }, + { + pattern: 'Runtime.exec', + owasp: 'A03-injection', + severity: 'critical', + languages: ['java', 'kotlin'], + description: 'Java runtime command execution', + }, + + // A03: Injection — Rails (Ruby) + { + pattern: 'system(', + owasp: 'A03-injection', + severity: 'critical', + languages: ['ruby'], + description: 'Ruby system command execution', + }, + { + pattern: 'ActiveRecord::Base.connection.execute', + owasp: 'A03-injection', + severity: 'critical', + languages: ['ruby'], + description: 'Rails raw SQL execution', + }, + + // A07: XSS + { + pattern: 'innerHTML', + owasp: 'A07-xss', + severity: 'high', + description: 'Direct HTML injection', + }, + { + pattern: 'dangerouslySetInnerHTML', + owasp: 'A07-xss', + severity: 'high', + description: 'React unsafe HTML', + }, + { pattern: 'document.write', owasp: 'A07-xss', severity: 'high', description: 'Document write' }, + { + pattern: 'template.HTML', + owasp: 'A07-xss', + severity: 'high', + languages: ['go'], + description: 'Go template unescaped HTML', + }, + + // A10: SSRF + { + pattern: 'fetch(', + owasp: 'A10-ssrf', + severity: 'high', + description: 'Server-side fetch with user URL', + }, + { pattern: 'axios(', owasp: 'A10-ssrf', severity: 'high', description: 'Axios with user URL' }, + { + pattern: 'http.get', + owasp: 'A10-ssrf', + severity: 'high', + description: 'HTTP client with user URL', + }, + { + pattern: 'urllib.request', + owasp: 'A10-ssrf', + severity: 'high', + languages: ['python'], + description: 'Python URL request', + }, + + // Database writes (ORM — not injection per se, but data integrity sinks) + { + pattern: 'prisma.', + owasp: 'A03-injection', + severity: 'medium', + description: 'Prisma ORM operation (check for raw queries)', + }, + { + pattern: '.create(', + owasp: 'A03-injection', + severity: 'medium', + description: 'ORM create operation', + }, + { + pattern: '.update(', + owasp: 'A03-injection', + severity: 'medium', + description: 'ORM update operation', + }, +]; + +// ── User-extensible catalog loading ── + +export interface UserSecurityConfig { + sources?: Array<{ + pattern: string; + category: string; + description: string; + languages?: string[]; + }>; + sinks?: Array<{ + pattern: string; + owasp: string; + severity: string; + description: string; + languages?: string[]; + }>; +} + +/** + * Load user-defined security catalog from `.gitnexus/security.json` in the repo root. + * Returns null if the file doesn't exist or is invalid. + */ +export async function loadUserSecurityConfig(repoPath: string): Promise { + try { + const { readFile } = await import('node:fs/promises'); + const { join } = await import('node:path'); + const configPath = join(repoPath, '.gitnexus', 'security.json'); + const content = await readFile(configPath, 'utf-8'); + const config = JSON.parse(content) as UserSecurityConfig; + return config; + } catch { + // File doesn't exist or is invalid — that's fine, just use built-in catalogs + return null; + } +} + +/** + * Merge user-defined entries with the built-in catalogs. + * User entries are appended after built-in entries. + */ +export function mergeCatalogs(userConfig: UserSecurityConfig | null): { + sources: SourceEntry[]; + sinks: SinkEntry[]; +} { + const sources = [...SOURCE_CATALOG]; + const sinks = [...SINK_CATALOG]; + + if (userConfig?.sources) { + for (const s of userConfig.sources) { + sources.push({ + pattern: s.pattern, + category: s.category as SourceEntry['category'], + description: s.description, + ...(s.languages ? { languages: s.languages } : {}), + }); + } + } + + if (userConfig?.sinks) { + for (const s of userConfig.sinks) { + sinks.push({ + pattern: s.pattern, + owasp: s.owasp as SinkEntry['owasp'], + severity: s.severity as SinkEntry['severity'], + description: s.description, + ...(s.languages ? { languages: s.languages } : {}), + }); + } + } + + return { sources, sinks }; +} + +/** + * Compile an array of catalog entries into regex patterns for matching. + */ +export function compilePatterns(entries: T[]): CompiledPattern[] { + return entries.map((entry) => ({ + regex: new RegExp(escapeRegex(entry.pattern), 'i'), + entry, + })); +} + +// Compiled regex patterns for matching +interface CompiledPattern { + regex: RegExp; + entry: T; +} + +const SOURCE_REGEXES: CompiledPattern[] = compilePatterns(SOURCE_CATALOG); + +const SINK_REGEXES: CompiledPattern[] = compilePatterns(SINK_CATALOG); + +/** Check if a compiled pattern applies given the language and content. */ +function patternMatches( + { regex, entry }: CompiledPattern, + content: string, + language?: string, +): boolean { + if (entry.languages && language && !entry.languages.includes(language)) return false; + return regex.test(content); +} + +/** Filter compiled patterns by language applicability and content match. */ +function matchPatterns( + patterns: CompiledPattern[], + content: string, + language?: string, +): T[] { + return patterns.filter((p) => patternMatches(p, content, language)).map(({ entry }) => entry); +} + +/** + * Check if a function's content contains source patterns (user input reads). + * Optionally accepts custom compiled patterns (e.g. merged with user config). + */ +export function isSourceAdjacent( + _functionName: string, + content: string, + language?: string, + customPatterns?: CompiledPattern[], +): boolean { + return (customPatterns ?? SOURCE_REGEXES).some((p) => patternMatches(p, content, language)); +} + +/** + * Check if a function's content contains sink patterns (dangerous operations). + * Optionally accepts custom compiled patterns (e.g. merged with user config). + */ +export function isSinkAdjacent( + _functionName: string, + content: string, + language?: string, + customPatterns?: CompiledPattern[], +): boolean { + return (customPatterns ?? SINK_REGEXES).some((p) => patternMatches(p, content, language)); +} + +/** + * Get matching sink entries for a function's content (for reporting). + * Optionally accepts custom compiled patterns (e.g. merged with user config). + */ +export function getMatchingSinks( + content: string, + language?: string, + customPatterns?: CompiledPattern[], +): SinkEntry[] { + return matchPatterns(customPatterns ?? SINK_REGEXES, content, language); +} + +/** + * Get matching source entries for a function's content (for reporting). + * Optionally accepts custom compiled patterns (e.g. merged with user config). + */ +export function getMatchingSources( + content: string, + language?: string, + customPatterns?: CompiledPattern[], +): SourceEntry[] { + return matchPatterns(customPatterns ?? SOURCE_REGEXES, content, language); +} + +function escapeRegex(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} diff --git a/gitnexus/src/security/source-sink-scanner.ts b/gitnexus/src/security/source-sink-scanner.ts new file mode 100644 index 0000000000..6816ad7384 --- /dev/null +++ b/gitnexus/src/security/source-sink-scanner.ts @@ -0,0 +1,116 @@ +/** + * Source-Sink Structural Scanner + * + * BFS over the existing CALLS graph to find paths from source-adjacent + * functions to sink-adjacent functions. No CFG, no taint tracking — + * pure structural reachability. + * + * This is Phase A of the security analysis path described in the + * architecture assessment. Detects OWASP A03, A07, A10 via structural + * reachability. + */ + +export interface SourceNode { + id: string; + name: string; + filePath: string; + sourcePatterns: string[]; +} + +export interface SinkNode { + id: string; + name: string; + filePath: string; + sinkPatterns: string[]; + owasp: string; +} + +export interface SourceSinkPath { + source: SourceNode; + sink: SinkNode; + /** Ordered node IDs from source to sink */ + path: string[]; + /** Number of hops from source to sink */ + depth: number; + /** OWASP category from the sink */ + owasp: string; + /** Risk level based on depth and sink severity */ + risk: 'critical' | 'high' | 'medium' | 'low'; +} + +/** + * Find all reachable paths from source-adjacent nodes to sink-adjacent nodes + * using BFS over the CALLS graph. + * + * @param sources - Functions identified as source-adjacent (read user input) + * @param sinks - Functions identified as sink-adjacent (perform dangerous ops) + * @param callsGraph - Forward adjacency map: nodeId -> [calleeIds] + * @param maxDepth - Maximum BFS depth (default: 5) + * @returns All source-to-sink paths found + */ +export function buildSourceSinkPaths( + sources: SourceNode[], + sinks: SinkNode[], + callsGraph: Map, + maxDepth: number = 5, +): SourceSinkPath[] { + const sinkMap = new Map(sinks.map((s) => [s.id, s])); + const results: SourceSinkPath[] = []; + + for (const source of sources) { + // BFS from this source + const visited = new Set(); + // Queue: [currentNodeId, path so far] + const queue: Array<[string, string[]]> = [[source.id, [source.id]]]; + visited.add(source.id); + + while (queue.length > 0) { + const [currentId, currentPath] = queue.shift()!; + const depth = currentPath.length - 1; + + if (depth >= maxDepth) continue; + + const callees = callsGraph.get(currentId) || []; + for (const calleeId of callees) { + if (visited.has(calleeId)) continue; + visited.add(calleeId); + + const newPath = [...currentPath, calleeId]; + + // Check if this callee is a sink + const sink = sinkMap.get(calleeId); + if (sink) { + results.push({ + source, + sink, + path: newPath, + depth: newPath.length - 1, + owasp: sink.owasp, + risk: computeRisk(newPath.length - 1, sink.owasp), + }); + // Don't stop — there may be other sinks reachable + } + + // Continue BFS + queue.push([calleeId, newPath]); + } + } + } + + // Sort by risk (critical first), then by depth (shortest first) + const riskOrder = { critical: 0, high: 1, medium: 2, low: 3 }; + results.sort((a, b) => riskOrder[a.risk] - riskOrder[b.risk] || a.depth - b.depth); + + return results; +} + +function computeRisk(depth: number, owasp: string): 'critical' | 'high' | 'medium' | 'low' { + // Direct call to dangerous sink = critical + if (depth <= 1 && (owasp === 'A03-injection' || owasp === 'A07-xss')) return 'critical'; + // Short path to dangerous sink = high + if (depth <= 2) return 'high'; + // Longer paths = medium (may have sanitizers in between) + if (depth <= 4) return 'medium'; + // Very long paths are low confidence + return 'low'; +} diff --git a/gitnexus/test/unit/source-sink-scanner.test.ts b/gitnexus/test/unit/source-sink-scanner.test.ts new file mode 100644 index 0000000000..8ebcfb10a4 --- /dev/null +++ b/gitnexus/test/unit/source-sink-scanner.test.ts @@ -0,0 +1,336 @@ +import { describe, it, expect } from 'vitest'; +import { + SOURCE_CATALOG, + SINK_CATALOG, + isSourceAdjacent, + isSinkAdjacent, + mergeCatalogs, + compilePatterns, + getMatchingSources, + getMatchingSinks, + type UserSecurityConfig, +} from '../../src/security/catalogs.js'; +import { buildSourceSinkPaths } from '../../src/security/source-sink-scanner.js'; + +describe('SOURCE_CATALOG', () => { + it('contains user input sources', () => { + const names = SOURCE_CATALOG.map((s) => s.pattern); + expect(names).toContain('request.json'); + expect(names).toContain('req.body'); + expect(names).toContain('request.GET'); + }); + + it('each source has a category', () => { + for (const source of SOURCE_CATALOG) { + expect(['user_input', 'environment', 'file_read', 'network']).toContain(source.category); + } + }); + + it('contains Go sources', () => { + const patterns = SOURCE_CATALOG.map((s) => s.pattern); + expect(patterns).toContain('r.Body'); + expect(patterns).toContain('r.URL.Query()'); + expect(patterns).toContain('r.FormValue'); + expect(patterns).toContain('r.Header.Get'); + }); + + it('contains Rust/Actix sources', () => { + const patterns = SOURCE_CATALOG.map((s) => s.pattern); + expect(patterns).toContain('web::Json'); + expect(patterns).toContain('web::Query'); + expect(patterns).toContain('web::Path'); + }); + + it('contains Spring annotation sources', () => { + const patterns = SOURCE_CATALOG.map((s) => s.pattern); + expect(patterns).toContain('@RequestBody'); + expect(patterns).toContain('@RequestParam'); + expect(patterns).toContain('@PathVariable'); + }); + + it('contains Rails sources', () => { + const patterns = SOURCE_CATALOG.map((s) => s.pattern); + expect(patterns).toContain('params['); + }); + + it('contains Ktor sources', () => { + const patterns = SOURCE_CATALOG.map((s) => s.pattern); + expect(patterns).toContain('call.receive'); + expect(patterns).toContain('call.parameters'); + }); +}); + +describe('SINK_CATALOG', () => { + it('contains dangerous sinks', () => { + const names = SINK_CATALOG.map((s) => s.pattern); + expect(names).toContain('eval'); + expect(names).toContain('exec'); + expect(names).toContain('innerHTML'); + }); + + it('each sink has an OWASP category', () => { + for (const sink of SINK_CATALOG) { + expect(sink.owasp).toBeDefined(); + } + }); + + it('contains Go sinks', () => { + const patterns = SINK_CATALOG.map((s) => s.pattern); + expect(patterns).toContain('os.exec'); + expect(patterns).toContain('sql.Query'); + expect(patterns).toContain('template.HTML'); + }); + + it('contains Rust sinks', () => { + const patterns = SINK_CATALOG.map((s) => s.pattern); + expect(patterns).toContain('Command::new'); + expect(patterns).toContain('sqlx::query'); + }); + + it('contains Spring sinks', () => { + const patterns = SINK_CATALOG.map((s) => s.pattern); + expect(patterns).toContain('jdbcTemplate.query'); + expect(patterns).toContain('Runtime.exec'); + }); + + it('contains Rails sinks', () => { + const patterns = SINK_CATALOG.map((s) => s.pattern); + expect(patterns).toContain('system('); + expect(patterns).toContain('ActiveRecord::Base.connection.execute'); + }); +}); + +describe('isSourceAdjacent', () => { + it('matches function that reads request body', () => { + const content = `async function handlePOST(req) { const data = await req.json(); }`; + expect(isSourceAdjacent('handlePOST', content)).toBe(true); + }); + + it('does not match function without user input', () => { + const content = `function add(a, b) { return a + b; }`; + expect(isSourceAdjacent('add', content)).toBe(false); + }); + + it('matches Go HTTP handler reading body', () => { + const content = `func handler(w http.ResponseWriter, r *http.Request) { body := r.Body }`; + expect(isSourceAdjacent('handler', content, 'go')).toBe(true); + }); + + it('matches Spring annotation in Java content', () => { + const content = `public ResponseEntity create(@RequestBody UserDto dto) { return ok(); }`; + expect(isSourceAdjacent('create', content, 'java')).toBe(true); + }); +}); + +describe('isSinkAdjacent', () => { + it('matches function with database write', () => { + const content = `async function save(data) { await prisma.grant.create({ data }); }`; + expect(isSinkAdjacent('save', content)).toBe(true); + }); + + it('matches function with exec call', () => { + const content = `function run(cmd) { exec(cmd); }`; + expect(isSinkAdjacent('run', content)).toBe(true); + }); + + it('does not match safe function', () => { + const content = `function format(s) { return s.trim(); }`; + expect(isSinkAdjacent('format', content)).toBe(false); + }); + + it('matches Go sql.Query sink', () => { + const content = `func getUser(db *sql.DB, id string) { rows, _ := db.sql.Query("SELECT * FROM users WHERE id=" + id) }`; + expect(isSinkAdjacent('getUser', content, 'go')).toBe(true); + }); + + it('matches Rust Command::new sink', () => { + const content = `fn run_cmd(input: &str) { Command::new(input).output().unwrap(); }`; + expect(isSinkAdjacent('run_cmd', content, 'rust')).toBe(true); + }); +}); + +describe('mergeCatalogs', () => { + it('returns built-in catalogs when user config is null', () => { + const result = mergeCatalogs(null); + expect(result.sources).toEqual(SOURCE_CATALOG); + expect(result.sinks).toEqual(SINK_CATALOG); + }); + + it('merges user-defined sources with built-in catalog', () => { + const userConfig: UserSecurityConfig = { + sources: [ + { pattern: 'myCustomInput', category: 'user_input', description: 'Custom input source' }, + ], + }; + const result = mergeCatalogs(userConfig); + expect(result.sources.length).toBe(SOURCE_CATALOG.length + 1); + expect(result.sources[result.sources.length - 1].pattern).toBe('myCustomInput'); + }); + + it('merges user-defined sinks with built-in catalog', () => { + const userConfig: UserSecurityConfig = { + sinks: [ + { + pattern: 'dangerousOp', + owasp: 'A03-injection', + severity: 'high', + description: 'Custom sink', + }, + ], + }; + const result = mergeCatalogs(userConfig); + expect(result.sinks.length).toBe(SINK_CATALOG.length + 1); + expect(result.sinks[result.sinks.length - 1].pattern).toBe('dangerousOp'); + }); + + it('merged catalogs work with compilePatterns and getMatchingSources', () => { + const userConfig: UserSecurityConfig = { + sources: [ + { pattern: 'myCustomInput', category: 'user_input', description: 'Custom input source' }, + ], + sinks: [ + { + pattern: 'dangerousOp', + owasp: 'A03-injection', + severity: 'high', + description: 'Custom sink', + }, + ], + }; + const merged = mergeCatalogs(userConfig); + const compiledSources = compilePatterns(merged.sources); + const compiledSinks = compilePatterns(merged.sinks); + + // User-defined source should be detected + const content = `function handle() { const data = myCustomInput(); dangerousOp(data); }`; + const matchedSources = getMatchingSources(content, undefined, compiledSources); + expect(matchedSources.some((s) => s.pattern === 'myCustomInput')).toBe(true); + + // User-defined sink should be detected + const matchedSinks = getMatchingSinks(content, undefined, compiledSinks); + expect(matchedSinks.some((s) => s.pattern === 'dangerousOp')).toBe(true); + }); + + it('does not modify built-in catalog arrays', () => { + const originalSourceCount = SOURCE_CATALOG.length; + const originalSinkCount = SINK_CATALOG.length; + mergeCatalogs({ + sources: [{ pattern: 'x', category: 'user_input', description: 'test' }], + sinks: [{ pattern: 'y', owasp: 'A03-injection', severity: 'high', description: 'test' }], + }); + expect(SOURCE_CATALOG.length).toBe(originalSourceCount); + expect(SINK_CATALOG.length).toBe(originalSinkCount); + }); +}); + +describe('buildSourceSinkPaths', () => { + it('finds path from source to sink through CALLS chain', () => { + const sources = [ + { + id: 'func:handlePOST', + name: 'handlePOST', + filePath: 'route.ts', + sourcePatterns: ['req.body'], + }, + ]; + const sinks = [ + { + id: 'func:createGrant', + name: 'createGrant', + filePath: 'service.ts', + sinkPatterns: ['prisma.'], + owasp: 'A03-injection' as const, + }, + ]; + const callsGraph = new Map([ + ['func:handlePOST', ['func:validateInput']], + ['func:validateInput', ['func:createGrant']], + ]); + + const paths = buildSourceSinkPaths(sources, sinks, callsGraph, 5); + expect(paths).toHaveLength(1); + expect(paths[0].source.name).toBe('handlePOST'); + expect(paths[0].sink.name).toBe('createGrant'); + expect(paths[0].path).toEqual(['func:handlePOST', 'func:validateInput', 'func:createGrant']); + expect(paths[0].depth).toBe(2); + }); + + it('returns empty when no path exists', () => { + const sources = [{ id: 'func:a', name: 'a', filePath: 'a.ts', sourcePatterns: ['req.body'] }]; + const sinks = [ + { + id: 'func:z', + name: 'z', + filePath: 'z.ts', + sinkPatterns: ['eval'], + owasp: 'A03-injection' as const, + }, + ]; + const callsGraph = new Map([ + ['func:a', ['func:b']], + // func:b doesn't call func:z + ]); + + const paths = buildSourceSinkPaths(sources, sinks, callsGraph, 5); + expect(paths).toHaveLength(0); + }); + + it('respects maxDepth', () => { + const sources = [{ id: 'func:a', name: 'a', filePath: 'a.ts', sourcePatterns: ['req.body'] }]; + const sinks = [ + { + id: 'func:d', + name: 'd', + filePath: 'd.ts', + sinkPatterns: ['eval'], + owasp: 'A03-injection' as const, + }, + ]; + const callsGraph = new Map([ + ['func:a', ['func:b']], + ['func:b', ['func:c']], + ['func:c', ['func:d']], + ]); + + // maxDepth 2 should not reach func:d (3 hops away) + const paths = buildSourceSinkPaths(sources, sinks, callsGraph, 2); + expect(paths).toHaveLength(0); + + // maxDepth 3 should find it + const paths3 = buildSourceSinkPaths(sources, sinks, callsGraph, 3); + expect(paths3).toHaveLength(1); + }); +}); + +describe('language-scoped pattern filtering', () => { + it('does not match Python-only source pattern against TypeScript content', () => { + const compiled = compilePatterns(SOURCE_CATALOG); + // request.data is a Python/DRF-only source pattern + const tsContent = 'const payload = request.data;'; + const matches = getMatchingSources(tsContent, 'typescript', compiled); + // Should NOT match because request.data is scoped to python + const pythonOnlyMatches = matches.filter( + (m: any) => + m.languages && m.languages.includes('python') && !m.languages.includes('typescript'), + ); + expect(pythonOnlyMatches).toHaveLength(0); + }); + + it('matches Python-only source pattern when language is python', () => { + const compiled = compilePatterns(SOURCE_CATALOG); + const pyContent = 'data = request.data'; + const matches = getMatchingSources(pyContent, 'python', compiled); + expect(matches.some((m: any) => m.pattern === 'request.data')).toBe(true); + }); + + it('does not match PHP-only sink pattern against JavaScript content', () => { + const compiled = compilePatterns(SINK_CATALOG); + // Check that a PHP-scoped pattern doesn't match JS content + const jsContent = 'const result = mysqli_query(db, sql);'; + const matches = getMatchingSinks(jsContent, 'javascript', compiled); + const phpOnlyMatches = matches.filter( + (m: any) => m.languages && m.languages.includes('php') && !m.languages.includes('javascript'), + ); + expect(phpOnlyMatches).toHaveLength(0); + }); +}); diff --git a/gitnexus/test/unit/tools.test.ts b/gitnexus/test/unit/tools.test.ts index 4274716a78..e96f933a22 100644 --- a/gitnexus/test/unit/tools.test.ts +++ b/gitnexus/test/unit/tools.test.ts @@ -19,8 +19,8 @@ const GROUP_TOOLS = new Set([ ]); describe('GITNEXUS_TOOLS', () => { - it('exports all tools (7 base + 3 route/tool/shape + 1 api_impact + 5 group)', () => { - expect(GITNEXUS_TOOLS).toHaveLength(16); + it('exports all tools (7 base + 3 route/tool/shape + 1 api_impact + 1 source_sink + 5 group)', () => { + expect(GITNEXUS_TOOLS).toHaveLength(17); }); it('contains all expected tool names', () => { @@ -35,6 +35,7 @@ describe('GITNEXUS_TOOLS', () => { 'rename', 'impact', 'api_impact', + 'source_sink', ]), ); });