From 5ccbfe6a3d9e82468a1267e1fa6e0e1fbe75a938 Mon Sep 17 00:00:00 2001 From: marxo126 Date: Sat, 28 Mar 2026 11:38:53 +0100 Subject: [PATCH] feat: source-sink security scanning + parameter data flow tracking Two complementary features for security and data flow analysis: 1. Source-Sink Scanner (MCP tool: source_sink) - BFS reachability from user-input sources to dangerous sinks - OWASP A03/A07/A10 coverage across 10+ languages - User-extensible catalogs via .gitnexus/security.json - Risk-ranked findings with path visualization 2. Parameter Data Flow (pipeline Phase 3.6b) - Extract function/method parameters from AST (tree-sitter) - Parameter nodes with type annotations and position - PASSES_TO edges mapping call-site arguments to callee parameters - Foundation for future taint tracking New node type: Parameter. New edge types: PASSES_TO, DATA_FLOWS_TO. New security module: src/security/ (catalogs.ts, source-sink-scanner.ts). Co-Authored-By: Claude Opus 4.6 (1M context) --- gitnexus/src/core/graph/types.ts | 8 +- .../src/core/ingestion/parameter-processor.ts | 99 ++++++ .../src/core/ingestion/parsing-processor.ts | 9 +- gitnexus/src/core/ingestion/pipeline.ts | 80 ++++- .../core/ingestion/workers/parse-worker.ts | 105 +++++- gitnexus/src/core/lbug/csv-generator.ts | 16 +- gitnexus/src/core/lbug/lbug-adapter.ts | 7 + gitnexus/src/core/lbug/schema.ts | 22 +- gitnexus/src/mcp/local/local-backend.ts | 113 +++++++ gitnexus/src/mcp/server.ts | 3 + gitnexus/src/mcp/tools.ts | 32 ++ gitnexus/src/security/catalogs.ts | 311 ++++++++++++++++++ gitnexus/src/security/source-sink-scanner.ts | 116 +++++++ .../test/unit/parameter-extraction.test.ts | 45 +++ .../test/unit/parameter-processor.test.ts | 126 +++++++ gitnexus/test/unit/parameter-schema.test.ts | 40 +++ gitnexus/test/unit/schema.test.ts | 10 +- .../test/unit/source-sink-scanner.test.ts | 262 +++++++++++++++ 18 files changed, 1386 insertions(+), 18 deletions(-) create mode 100644 gitnexus/src/core/ingestion/parameter-processor.ts create mode 100644 gitnexus/src/security/catalogs.ts create mode 100644 gitnexus/src/security/source-sink-scanner.ts create mode 100644 gitnexus/test/unit/parameter-extraction.test.ts create mode 100644 gitnexus/test/unit/parameter-processor.test.ts create mode 100644 gitnexus/test/unit/parameter-schema.test.ts create mode 100644 gitnexus/test/unit/source-sink-scanner.test.ts diff --git a/gitnexus/src/core/graph/types.ts b/gitnexus/src/core/graph/types.ts index ed89a2ad4c..4514728c86 100644 --- a/gitnexus/src/core/graph/types.ts +++ b/gitnexus/src/core/graph/types.ts @@ -35,7 +35,8 @@ export type NodeLabel = | 'Template' | 'Section' | 'Route' // API route endpoint (e.g., /api/grants) - | 'Tool'; // MCP tool definition + | 'Tool' // MCP tool definition + | 'Parameter'; // Function/method parameter (first-class for data flow tracking) import { SupportedLanguages } from '../../config/supported-languages.js'; @@ -82,6 +83,9 @@ export type NodeProperties = { errorKeys?: string[], // Middleware wrapper chain (outermost first): ['withRateLimit', 'withCSRF', 'withAuth'] middleware?: string[], + // Parameter-specific properties + paramIndex?: number, // 0-indexed position in parameter list + isRest?: boolean, // ...args rest parameter } export type RelationshipType = @@ -106,6 +110,8 @@ export type RelationshipType = | 'ENTRY_POINT_OF' // Route/Tool → Process (this endpoint starts this execution flow) | 'WRAPS' // Function → Function (middleware wrapper chain) — Reserved: future middleware graph traversal (not yet emitted) | 'QUERIES' // File/Function → CodeElement (ORM query to model/table) + | 'PASSES_TO' // Call-site argument maps to callee parameter + | 'DATA_FLOWS_TO' // Variable assignment / data propagation within function export interface GraphNode { id: string, diff --git a/gitnexus/src/core/ingestion/parameter-processor.ts b/gitnexus/src/core/ingestion/parameter-processor.ts new file mode 100644 index 0000000000..1b357741ae --- /dev/null +++ b/gitnexus/src/core/ingestion/parameter-processor.ts @@ -0,0 +1,99 @@ +/** + * Parameter Processor + * + * Creates Parameter nodes from extracted parameter data and builds + * PASSES_TO edges by mapping call-site argument positions to callee parameters. + * + * This is Phase B of the security analysis path (architecture assessment). + */ + +import { generateId } from '../../lib/utils.js'; +import type { ExtractedParameter } from './workers/parse-worker.js'; + +export interface ParameterNode { + id: string; + name: string; + filePath: string; + paramIndex: number; + declaredType?: string; + isRest: boolean; + /** ID of the owning function/method */ + ownerId: string; +} + +export interface PassesToEdge { + id: string; + /** The CALLS edge source (caller function) */ + callerId: string; + /** The Parameter node being passed to */ + targetParamId: string; + /** Argument position at the call site */ + sourceParamIndex: number; + /** Confidence (matches CALLS edge confidence) */ + confidence: number; +} + +/** + * Create Parameter graph nodes from extracted parameter data. + */ +export function createParameterNodes(params: ExtractedParameter[]): ParameterNode[] { + const nodes: ParameterNode[] = []; + const seenIds = new Set(); + + for (const p of params) { + const id = generateId('Parameter', `${p.functionId}:${p.paramName}:${p.paramIndex}`); + if (seenIds.has(id)) continue; + seenIds.add(id); + + nodes.push({ + id, + name: p.paramName, + filePath: p.filePath, + paramIndex: p.paramIndex, + declaredType: p.declaredType, + isRest: p.isRest, + ownerId: p.functionId, + }); + } + + return nodes; +} + +/** + * Build PASSES_TO edges by matching call-site argument positions + * to callee parameter positions. + * + * For each CALLS edge (caller -> callee), if the callee has Parameter nodes, + * create PASSES_TO edges from the caller to each callee parameter that + * receives an argument. + */ +export function buildPassesToEdges( + callEdges: Array<{ sourceId: string; targetId: string; argCount: number }>, + calleeParamMap: Map, +): PassesToEdge[] { + const edges: PassesToEdge[] = []; + + for (const call of callEdges) { + const params = calleeParamMap.get(call.targetId); + if (!params || params.length === 0) continue; + + const argCount = call.argCount || 0; + for (const param of params) { + // Only create edge if the call site provides this argument + // Rest params receive all remaining args, so always match if argCount > 0 + if (param.paramIndex >= argCount && !param.isRest) continue; + + const paramNodeId = generateId('Parameter', `${param.functionId}:${param.paramName}:${param.paramIndex}`); + + edges.push({ + id: generateId('PASSES_TO', `${call.sourceId}->${paramNodeId}`), + callerId: call.sourceId, + targetParamId: paramNodeId, + sourceParamIndex: param.paramIndex, + confidence: 0.9, + }); + } + } + + return edges; +} diff --git a/gitnexus/src/core/ingestion/parsing-processor.ts b/gitnexus/src/core/ingestion/parsing-processor.ts index c9f5eb12d9..cd410baa84 100644 --- a/gitnexus/src/core/ingestion/parsing-processor.ts +++ b/gitnexus/src/core/ingestion/parsing-processor.ts @@ -13,7 +13,7 @@ import { buildTypeEnv } from './type-env.js'; import type { FieldInfo, FieldExtractorContext } from './field-types.js'; import type { LanguageProvider } from './language-provider.js'; import { WorkerPool } from './workers/worker-pool.js'; -import type { ParseWorkerResult, ParseWorkerInput, ExtractedImport, ExtractedCall, ExtractedAssignment, ExtractedHeritage, ExtractedRoute, ExtractedFetchCall, ExtractedDecoratorRoute, ExtractedToolDef, FileConstructorBindings, FileTypeEnvBindings, ExtractedORMQuery } from './workers/parse-worker.js'; +import type { ParseWorkerResult, ParseWorkerInput, ExtractedImport, ExtractedCall, ExtractedAssignment, ExtractedHeritage, ExtractedRoute, ExtractedFetchCall, ExtractedDecoratorRoute, ExtractedToolDef, FileConstructorBindings, FileTypeEnvBindings, ExtractedORMQuery, ExtractedParameter } from './workers/parse-worker.js'; import { getTreeSitterBufferSize, TREE_SITTER_MAX_BUFFER } from './constants.js'; export type FileProgressCallback = (current: number, total: number, filePath: string) => void; @@ -30,6 +30,7 @@ export interface WorkerExtractedData { ormQueries: ExtractedORMQuery[]; constructorBindings: FileConstructorBindings[]; typeEnvBindings: FileTypeEnvBindings[]; + parameters: ExtractedParameter[]; } // ============================================================================ @@ -51,7 +52,7 @@ const processParsingWithWorkers = async ( if (lang) parseableFiles.push({ path: file.path, content: file.content }); } - if (parseableFiles.length === 0) return { imports: [], calls: [], assignments: [], heritage: [], routes: [], fetchCalls: [], decoratorRoutes: [], toolDefs: [], ormQueries: [], constructorBindings: [], typeEnvBindings: [] }; + if (parseableFiles.length === 0) return { imports: [], calls: [], assignments: [], heritage: [], routes: [], fetchCalls: [], decoratorRoutes: [], toolDefs: [], ormQueries: [], constructorBindings: [], typeEnvBindings: [], parameters: [] }; const total = files.length; @@ -75,6 +76,7 @@ const processParsingWithWorkers = async ( const allORMQueries: ExtractedORMQuery[] = []; const allConstructorBindings: FileConstructorBindings[] = []; const allTypeEnvBindings: FileTypeEnvBindings[] = []; + const allParameters: ExtractedParameter[] = []; for (const result of chunkResults) { for (const node of result.nodes) { graph.addNode({ @@ -108,6 +110,7 @@ const processParsingWithWorkers = async ( allDecoratorRoutes.push(...result.decoratorRoutes); allToolDefs.push(...result.toolDefs); if (result.ormQueries) allORMQueries.push(...result.ormQueries); + if (result.parameters) allParameters.push(...result.parameters); allConstructorBindings.push(...result.constructorBindings); allTypeEnvBindings.push(...result.typeEnvBindings); } @@ -128,7 +131,7 @@ const processParsingWithWorkers = async ( // Final progress onFileProgress?.(total, total, 'done'); - return { imports: allImports, calls: allCalls, assignments: allAssignments, heritage: allHeritage, routes: allRoutes, fetchCalls: allFetchCalls, decoratorRoutes: allDecoratorRoutes, toolDefs: allToolDefs, ormQueries: allORMQueries, constructorBindings: allConstructorBindings, typeEnvBindings: allTypeEnvBindings }; + return { imports: allImports, calls: allCalls, assignments: allAssignments, heritage: allHeritage, routes: allRoutes, fetchCalls: allFetchCalls, decoratorRoutes: allDecoratorRoutes, toolDefs: allToolDefs, ormQueries: allORMQueries, constructorBindings: allConstructorBindings, typeEnvBindings: allTypeEnvBindings, parameters: allParameters }; }; // ============================================================================ diff --git a/gitnexus/src/core/ingestion/pipeline.ts b/gitnexus/src/core/ingestion/pipeline.ts index 39294214ed..1defc60561 100644 --- a/gitnexus/src/core/ingestion/pipeline.ts +++ b/gitnexus/src/core/ingestion/pipeline.ts @@ -16,7 +16,7 @@ import { phpFileToRouteURL } from './route-extractors/php.js'; import { extractResponseShapes, extractPHPResponseShapes } from './route-extractors/response-shapes.js'; import { extractMiddlewareChain, extractNextjsMiddlewareConfig, compileMatcher, compiledMatcherMatchesRoute } from './route-extractors/middleware.js'; import { generateId } from '../../lib/utils.js'; -import type { ExtractedFetchCall, ExtractedRoute, ExtractedDecoratorRoute, ExtractedToolDef, ExtractedORMQuery } from './workers/parse-worker.js'; +import type { ExtractedFetchCall, ExtractedRoute, ExtractedDecoratorRoute, ExtractedToolDef, ExtractedORMQuery, ExtractedParameter } from './workers/parse-worker.js'; import { processHeritage, processHeritageFromExtracted } from './heritage-processor.js'; import { computeMRO } from './mro-processor.js'; import { processCommunities } from './community-processor.js'; @@ -544,6 +544,7 @@ async function runChunkedParseAndResolve( allDecoratorRoutes: ExtractedDecoratorRoute[]; allToolDefs: ExtractedToolDef[]; allORMQueries: ExtractedORMQuery[]; + allParameters: ExtractedParameter[]; }> { const symbolTable = ctx.symbols; @@ -664,7 +665,8 @@ async function runChunkedParseAndResolve( const allDecoratorRoutes: ExtractedDecoratorRoute[] = []; // Accumulate MCP/RPC tool definitions (@mcp.tool(), @app.tool(), etc.) const allToolDefs: ExtractedToolDef[] = []; - const allORMQueries: ExtractedORMQuery[] = []; + const allORMQueries: ExtractedORMQuery[] = []; + const allParameters: ExtractedParameter[] = []; try { for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) { @@ -796,6 +798,9 @@ async function runChunkedParseAndResolve( if (chunkWorkerData.ormQueries?.length) { allORMQueries.push(...chunkWorkerData.ormQueries); } + if (chunkWorkerData.parameters?.length) { + allParameters.push(...chunkWorkerData.parameters); + } } else { await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths); sequentialChunkPaths.push(chunkPaths); @@ -891,7 +896,7 @@ async function runChunkedParseAndResolve( importCtx.index = EMPTY_INDEX; // Release suffix index memory (~30MB for large repos) importCtx.normalizedFileList = []; - return { exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes, allToolDefs, allORMQueries }; + return { exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes, allToolDefs, allORMQueries, allParameters }; } /** @@ -1113,7 +1118,7 @@ export const runPipelineFromRepo = async ( const { scannedFiles, allPaths, totalFiles } = await runScanAndStructure(repoPath, graph, onProgress); // Phase 3+4: Chunked parse + resolve (imports, calls, heritage, routes) - const { exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes, allToolDefs, allORMQueries } = await runChunkedParseAndResolve( + const { exportedTypeMap, allFetchCalls, allExtractedRoutes, allDecoratorRoutes, allToolDefs, allORMQueries, allParameters } = await runChunkedParseAndResolve( graph, ctx, scannedFiles, allPaths, totalFiles, repoPath, pipelineStart, onProgress, ); @@ -1378,6 +1383,73 @@ export const runPipelineFromRepo = async ( } } + // ── Phase 3.6b: Parameter Nodes + PASSES_TO ────────────────────── + if (allParameters.length > 0) { + const { createParameterNodes, buildPassesToEdges } = await import('./parameter-processor.js'); + + // Create Parameter nodes + const paramNodes = createParameterNodes(allParameters); + for (const pn of paramNodes) { + graph.addNode({ + id: pn.id, + label: 'Parameter', + properties: { + name: pn.name, + filePath: pn.filePath, + paramIndex: pn.paramIndex, + declaredType: pn.declaredType, + isRest: pn.isRest, + }, + }); + + // DEFINES edge from function to parameter + graph.addRelationship({ + id: generateId('DEFINES', `${pn.ownerId}->${pn.id}`), + sourceId: pn.ownerId, + targetId: pn.id, + type: 'DEFINES', + confidence: 1.0, + reason: 'parameter-definition', + }); + } + + // Build PASSES_TO edges from existing CALLS edges + const callEdgesWithArgs: Array<{ sourceId: string; targetId: string; argCount: number }> = []; + for (const rel of graph.iterRelationships()) { + if (rel.type === 'CALLS') { + callEdgesWithArgs.push({ + sourceId: rel.sourceId, + targetId: rel.targetId, + argCount: (rel as any).argCount || 0, + }); + } + } + + // Group parameters by function ID + const paramsByFunction = new Map(); + for (const p of allParameters) { + const existing = paramsByFunction.get(p.functionId) || []; + existing.push(p); + paramsByFunction.set(p.functionId, existing); + } + + const passesToEdges = buildPassesToEdges(callEdgesWithArgs, paramsByFunction); + for (const edge of passesToEdges) { + graph.addRelationship({ + id: edge.id, + sourceId: edge.callerId, + targetId: edge.targetParamId, + type: 'PASSES_TO', + confidence: edge.confidence, + reason: `arg-${edge.sourceParamIndex}`, + }); + } + + if (isDev) { + console.log(`📎 Parameters: ${paramNodes.length} parameter nodes, ${passesToEdges.length} PASSES_TO edges`); + } + } + // ── Phase 3.7: ORM Dataflow Detection (Prisma + Supabase) ────────── if (allORMQueries.length > 0) { processORMQueries(graph, allORMQueries, isDev); diff --git a/gitnexus/src/core/ingestion/workers/parse-worker.ts b/gitnexus/src/core/ingestion/workers/parse-worker.ts index 2527136523..01a5150cc1 100644 --- a/gitnexus/src/core/ingestion/workers/parse-worker.ts +++ b/gitnexus/src/core/ingestion/workers/parse-worker.ts @@ -194,6 +194,22 @@ export interface ExtractedToolDef { lineNumber: number; } +export interface ExtractedParameter { + filePath: string; + /** Name of the enclosing function/method */ + functionName: string; + /** generateId of the enclosing function/method */ + functionId: string; + /** Parameter name */ + paramName: string; + /** 0-indexed position */ + paramIndex: number; + /** Declared type annotation (e.g., 'NextRequest', 'string') */ + declaredType?: string; + /** Whether this is a rest parameter (...args) */ + isRest: boolean; +} + export interface ExtractedORMQuery { filePath: string; orm: 'prisma' | 'supabase'; @@ -231,6 +247,8 @@ export interface ParseWorkerResult { constructorBindings: FileConstructorBindings[]; /** File-scope type bindings from TypeEnv fixpoint for exported symbol collection. */ typeEnvBindings: FileTypeEnvBindings[]; + /** Extracted function/method parameters for data flow tracking */ + parameters: ExtractedParameter[]; skippedLanguages: Record; fileCount: number; } @@ -436,6 +454,46 @@ const cachedExportCheck = (checker: (node: any, name: string) => boolean, node: // DEFINITION_CAPTURE_KEYS and getDefinitionNodeFromCaptures imported from ../utils.js +// ============================================================================ +// Parameter name extraction helper +// ============================================================================ + +/** + * Extract the parameter name from various AST node types. + * Handles: simple identifiers, typed parameters, rest parameters, destructured patterns. + */ +function extractParamName(paramNode: Parser.SyntaxNode): string | undefined { + // Simple identifier: function foo(x) + if (paramNode.type === 'identifier' || paramNode.type === 'simple_identifier') return paramNode.text; + // Typed parameter: function foo(x: string) — required_parameter or optional_parameter + const pattern = paramNode.childForFieldName('pattern') || paramNode.childForFieldName('name'); + if (pattern) { + // pattern might be a rest_pattern — delegate to rest handling below + if (pattern.type === 'rest_pattern' || pattern.type === 'rest_element') { + return pattern.namedChildren[0]?.text; + } + return pattern.text; + } + // Rest parameter: function foo(...args) + if (paramNode.type === 'rest_pattern' || paramNode.type === 'rest_element') { + return paramNode.namedChildren[0]?.text; + } + // Destructured: function foo({ a, b }) — use the full text as name + if (paramNode.type === 'object_pattern' || paramNode.type === 'array_pattern') { + return paramNode.text; + } + // Generic parameter with name child (Java, C#, Go, etc.) + const nameChild = paramNode.childForFieldName('name'); + if (nameChild) return nameChild.text; + // Fallback: first named child that looks like an identifier + for (const child of paramNode.namedChildren) { + if (child.type === 'identifier' || child.type === 'simple_identifier') { + return child.text; + } + } + return undefined; +} + // ============================================================================ // Process a batch of files // ============================================================================ @@ -456,6 +514,7 @@ const processBatch = (files: ParseWorkerInput[], onProgress?: (filesProcessed: n ormQueries: [], constructorBindings: [], typeEnvBindings: [], + parameters: [], skippedLanguages: {}, fileCount: 0, }; @@ -1439,6 +1498,47 @@ const processFileGroup = ( if (docReturn) returnType = docReturn; } } + // ── Extract individual parameters for data flow tracking ── + if (definitionNode) { + const paramListTypes = new Set([ + 'formal_parameters', 'parameters', 'parameter_list', + 'function_parameters', 'method_parameters', 'function_value_parameters', + ]); + const paramsNode = definitionNode.childForFieldName('parameters') + || definitionNode.children?.find((c: any) => paramListTypes.has(c.type)); + if (paramsNode && paramListTypes.has(paramsNode.type)) { + let paramIdx = 0; + for (const paramChild of paramsNode.namedChildren) { + if (paramChild.type === 'comment') continue; + // Skip self/this parameters + if (paramChild.text === 'self' || paramChild.text === '&self' || paramChild.text === '&mut self' + || paramChild.type === 'self_parameter') continue; + // Skip Kotlin default-value literals that appear as siblings + if (paramChild.type.endsWith('_literal') || paramChild.type === 'call_expression' + || paramChild.type === 'navigation_expression' || paramChild.type === 'prefix_expression' + || paramChild.type === 'parenthesized_expression') continue; + + const pName = extractParamName(paramChild); + if (!pName) { paramIdx++; continue; } + + const isRest = paramChild.type === 'rest_pattern' || paramChild.type === 'rest_element' + || (paramChild.type === 'required_parameter' && paramChild.children?.some((c: any) => c.type === 'rest_pattern')); + const typeNode = paramChild.childForFieldName('type'); + const pDeclaredType = typeNode?.text?.replace(/^:\s*/, '') || undefined; + + result.parameters.push({ + filePath: file.path, + functionName: nodeName, + functionId: nodeId, + paramName: pName, + paramIndex: paramIdx, + declaredType: pDeclaredType, + isRest, + }); + paramIdx++; + } + } + } } else if (nodeLabel === 'Property' && definitionNode) { // FieldExtractor is the single source of truth when available if (provider.fieldExtractor && typeEnv) { @@ -1548,7 +1648,7 @@ const processFileGroup = ( /** Accumulated result across sub-batches */ let accumulated: ParseWorkerResult = { nodes: [], relationships: [], symbols: [], - imports: [], calls: [], assignments: [], heritage: [], routes: [], fetchCalls: [], decoratorRoutes: [], toolDefs: [], ormQueries: [], constructorBindings: [], typeEnvBindings: [], skippedLanguages: {}, fileCount: 0, + imports: [], calls: [], assignments: [], heritage: [], routes: [], fetchCalls: [], decoratorRoutes: [], toolDefs: [], ormQueries: [], constructorBindings: [], typeEnvBindings: [], parameters: [], skippedLanguages: {}, fileCount: 0, }; let cumulativeProcessed = 0; @@ -1567,6 +1667,7 @@ const mergeResult = (target: ParseWorkerResult, src: ParseWorkerResult) => { target.ormQueries.push(...src.ormQueries); target.constructorBindings.push(...src.constructorBindings); target.typeEnvBindings.push(...src.typeEnvBindings); + target.parameters.push(...src.parameters); for (const [lang, count] of Object.entries(src.skippedLanguages)) { target.skippedLanguages[lang] = (target.skippedLanguages[lang] || 0) + count; } @@ -1591,7 +1692,7 @@ parentPort!.on('message', (msg: any) => { if (msg && msg.type === 'flush') { parentPort!.postMessage({ type: 'result', data: accumulated }); // Reset for potential reuse - accumulated = { nodes: [], relationships: [], symbols: [], imports: [], calls: [], assignments: [], heritage: [], routes: [], fetchCalls: [], decoratorRoutes: [], toolDefs: [], ormQueries: [], constructorBindings: [], typeEnvBindings: [], skippedLanguages: {}, fileCount: 0 }; + accumulated = { nodes: [], relationships: [], symbols: [], imports: [], calls: [], assignments: [], heritage: [], routes: [], fetchCalls: [], decoratorRoutes: [], toolDefs: [], ormQueries: [], constructorBindings: [], typeEnvBindings: [], parameters: [], skippedLanguages: {}, fileCount: 0 }; cumulativeProcessed = 0; return; } diff --git a/gitnexus/src/core/lbug/csv-generator.ts b/gitnexus/src/core/lbug/csv-generator.ts index f07b2e4725..614f574cdb 100644 --- a/gitnexus/src/core/lbug/csv-generator.ts +++ b/gitnexus/src/core/lbug/csv-generator.ts @@ -247,6 +247,9 @@ export const streamAllCSVsToDisk = async ( // Tool nodes for MCP tool definitions const toolWriter = new BufferedCSVWriter(path.join(csvDir, 'tool.csv'), 'id,name,filePath,description'); + // Parameter nodes for data flow tracking + const parameterWriter = new BufferedCSVWriter(path.join(csvDir, 'parameter.csv'), 'id,name,filePath,paramIndex,declaredType,isRest'); + // Multi-language node types share the same CSV shape (no isExported column) const multiLangHeader = 'id,name,filePath,startLine,endLine,content,description'; const MULTI_LANG_TYPES = ['Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl', @@ -374,6 +377,16 @@ export const streamAllCSVsToDisk = async ( escapeCSVField((node.properties as any).description || ''), ].join(',')); break; + case 'Parameter': + await parameterWriter.addRow([ + escapeCSVField(node.id), + escapeCSVField(node.properties.name || ''), + escapeCSVField(node.properties.filePath || ''), + escapeCSVNumber((node.properties as any).paramIndex, 0), + escapeCSVField((node.properties as any).declaredType || ''), + (node.properties as any).isRest ? 'true' : 'false', + ].join(',')); + break; default: { // Code element nodes (Function, Class, Interface, CodeElement) const writer = codeWriterMap[node.label]; @@ -411,7 +424,7 @@ export const streamAllCSVsToDisk = async ( } // Finish all node writers - const allWriters = [fileWriter, folderWriter, functionWriter, classWriter, interfaceWriter, methodWriter, codeElemWriter, communityWriter, processWriter, sectionWriter, routeWriter, toolWriter, ...multiLangWriters.values()]; + const allWriters = [fileWriter, folderWriter, functionWriter, classWriter, interfaceWriter, methodWriter, codeElemWriter, communityWriter, processWriter, sectionWriter, routeWriter, toolWriter, parameterWriter, ...multiLangWriters.values()]; await Promise.all(allWriters.map(w => w.finish())); // --- Stream relationship CSV --- @@ -440,6 +453,7 @@ export const streamAllCSVsToDisk = async ( ['Section' as NodeTableName, sectionWriter], ['Route' as NodeTableName, routeWriter], ['Tool' as NodeTableName, toolWriter], + ['Parameter' as NodeTableName, parameterWriter], ...Array.from(multiLangWriters.entries()).map(([name, w]) => [name as NodeTableName, w] as [NodeTableName, BufferedCSVWriter]), ]; for (const [name, writer] of tableMap) { diff --git a/gitnexus/src/core/lbug/lbug-adapter.ts b/gitnexus/src/core/lbug/lbug-adapter.ts index 679baf6643..3f12efabec 100644 --- a/gitnexus/src/core/lbug/lbug-adapter.ts +++ b/gitnexus/src/core/lbug/lbug-adapter.ts @@ -394,6 +394,9 @@ const getCopyQuery = (table: NodeTableName, filePath: string): string => { if (table === 'Tool') { return `COPY ${t}(id, name, filePath, description) FROM "${filePath}" ${COPY_CSV_OPTS}`; } + if (table === 'Parameter') { + return `COPY ${t}(id, name, filePath, paramIndex, declaredType, isRest) FROM "${filePath}" ${COPY_CSV_OPTS}`; + } if (table === 'Method') { return `COPY ${t}(id, name, filePath, startLine, endLine, isExported, content, description, parameterCount, returnType) FROM "${filePath}" ${COPY_CSV_OPTS}`; } @@ -441,6 +444,8 @@ export const insertNodeToLbug = async ( } else if (label === 'Section') { const descPart = properties.description ? `, description: ${escapeValue(properties.description)}` : ''; query = `CREATE (n:Section {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}, startLine: ${properties.startLine || 0}, endLine: ${properties.endLine || 0}, level: ${properties.level || 1}, content: ${escapeValue(properties.content || '')}${descPart}})`; + } else if (label === 'Parameter') { + query = `CREATE (n:Parameter {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}, paramIndex: ${properties.paramIndex || 0}, declaredType: ${escapeValue(properties.declaredType || '')}, isRest: ${!!properties.isRest}})`; } else if (TABLES_WITH_EXPORTED.has(label)) { const descPart = properties.description ? `, description: ${escapeValue(properties.description)}` : ''; query = `CREATE (n:${t} {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}, startLine: ${properties.startLine || 0}, endLine: ${properties.endLine || 0}, isExported: ${!!properties.isExported}, content: ${escapeValue(properties.content || '')}${descPart}})`; @@ -518,6 +523,8 @@ export const batchInsertNodesToLbug = async ( } else if (TABLES_WITH_EXPORTED.has(label)) { const descPart = properties.description ? `, n.description = ${escapeValue(properties.description)}` : ''; query = `MERGE (n:${t} {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}, n.startLine = ${properties.startLine || 0}, n.endLine = ${properties.endLine || 0}, n.isExported = ${!!properties.isExported}, n.content = ${escapeValue(properties.content || '')}${descPart}`; + } else if (label === 'Parameter') { + query = `MERGE (n:Parameter {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}, n.paramIndex = ${properties.paramIndex || 0}, n.declaredType = ${escapeValue(properties.declaredType || '')}, n.isRest = ${!!properties.isRest}`; } else { const descPart = properties.description ? `, n.description = ${escapeValue(properties.description)}` : ''; query = `MERGE (n:${t} {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}, n.startLine = ${properties.startLine || 0}, n.endLine = ${properties.endLine || 0}, n.content = ${escapeValue(properties.content || '')}${descPart}`; diff --git a/gitnexus/src/core/lbug/schema.ts b/gitnexus/src/core/lbug/schema.ts index 5e61c93370..46cd95ab56 100644 --- a/gitnexus/src/core/lbug/schema.ts +++ b/gitnexus/src/core/lbug/schema.ts @@ -18,7 +18,8 @@ export const NODE_TABLES = [ 'Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl', 'TypeAlias', 'Const', 'Static', 'Property', 'Record', 'Delegate', 'Annotation', 'Constructor', 'Template', 'Module', 'Route', - 'Tool' + 'Tool', + 'Parameter' ] as const; export type NodeTableName = typeof NODE_TABLES[number]; @@ -29,7 +30,7 @@ export const REL_TABLE_NAME = 'CodeRelation'; // Valid relation types // Note: WRAPS is reserved for future middleware graph traversal (not yet emitted) -export const REL_TYPES = ['CONTAINS', 'DEFINES', 'IMPORTS', 'CALLS', 'EXTENDS', 'IMPLEMENTS', 'HAS_METHOD', 'HAS_PROPERTY', 'ACCESSES', 'OVERRIDES', 'MEMBER_OF', 'STEP_IN_PROCESS', 'HANDLES_ROUTE', 'FETCHES', 'HANDLES_TOOL', 'ENTRY_POINT_OF', 'WRAPS', 'QUERIES'] as const; +export const REL_TYPES = ['CONTAINS', 'DEFINES', 'IMPORTS', 'CALLS', 'EXTENDS', 'IMPLEMENTS', 'HAS_METHOD', 'HAS_PROPERTY', 'ACCESSES', 'OVERRIDES', 'MEMBER_OF', 'STEP_IN_PROCESS', 'HANDLES_ROUTE', 'FETCHES', 'HANDLES_TOOL', 'ENTRY_POINT_OF', 'WRAPS', 'QUERIES', 'PASSES_TO', 'DATA_FLOWS_TO'] as const; export type RelType = typeof REL_TYPES[number]; // ============================================================================ @@ -217,6 +218,18 @@ CREATE NODE TABLE Tool ( PRIMARY KEY (id) )`; +// Function/method parameters (first-class for data flow tracking) +export const PARAMETER_SCHEMA = ` +CREATE NODE TABLE Parameter ( + id STRING, + name STRING, + filePath STRING, + paramIndex INT32, + declaredType STRING, + isRest BOOLEAN, + PRIMARY KEY (id) +)`; + // Markdown heading sections export const SECTION_SCHEMA = ` CREATE NODE TABLE Section ( @@ -338,6 +351,9 @@ CREATE REL TABLE ${REL_TABLE_NAME} ( FROM File TO Tool, FROM Function TO Tool, FROM Method TO Tool, + FROM Function TO Parameter, + FROM Method TO Parameter, + FROM Parameter TO Parameter, FROM CodeElement TO Community, FROM Interface TO Community, FROM Interface TO Function, @@ -513,6 +529,8 @@ export const NODE_SCHEMA_QUERIES = [ ROUTE_SCHEMA, // MCP tools TOOL_SCHEMA, + // Parameters (data flow tracking) + PARAMETER_SCHEMA, ]; export const REL_SCHEMA_QUERIES = [ diff --git a/gitnexus/src/mcp/local/local-backend.ts b/gitnexus/src/mcp/local/local-backend.ts index 3c73442238..25ce61c41c 100644 --- a/gitnexus/src/mcp/local/local-backend.ts +++ b/gitnexus/src/mcp/local/local-backend.ts @@ -408,6 +408,8 @@ export class LocalBackend { return this.toolMap(repo, params); case 'api_impact': return this.apiImpact(repo, params); + case 'source_sink': + return this.sourceSinkScan(repo, params); default: throw new Error(`Unknown tool: ${method}`); } @@ -2450,6 +2452,117 @@ export class LocalBackend { }; } + private async sourceSinkScan(repo: RepoHandle, params: { + max_depth?: number; + owasp?: string; + source_category?: string; + }): Promise { + await this.ensureInitialized(repo.id); + const maxDepth = params.max_depth ?? 5; + + // Step 1: Find all Function and Method nodes with their content. + const nodesResult = await executeQuery(repo.id, ` + MATCH (n:Function) + WHERE n.id IS NOT NULL + RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content + UNION ALL + MATCH (n:Method) + WHERE n.id IS NOT NULL + RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content + `); + + // Step 2: Load user-defined catalog extensions (if any) and merge with built-in catalogs + const { getMatchingSources, getMatchingSinks, loadUserSecurityConfig, mergeCatalogs, compilePatterns } = await import('../../security/catalogs.js'); + + const userConfig = await loadUserSecurityConfig(repo.repoPath); + const merged = mergeCatalogs(userConfig); + const compiledSources = compilePatterns(merged.sources); + const compiledSinks = compilePatterns(merged.sinks); + + // Step 3: Tag source-adjacent and sink-adjacent functions + const sources: Array<{ id: string; name: string; filePath: string; sourcePatterns: string[] }> = []; + const sinks: Array<{ id: string; name: string; filePath: string; sinkPatterns: string[]; owasp: string }> = []; + const nodeNameMap = new Map(); + + for (const row of nodesResult) { + const content = row.content ?? row[3] ?? ''; + const name = row.name ?? row[1] ?? ''; + const filePath = row.filePath ?? row[2] ?? ''; + const id = row.id ?? row[0] ?? ''; + + nodeNameMap.set(id, { name, filePath }); + + const matchedSources = getMatchingSources(content, undefined, compiledSources); + if (matchedSources.length > 0) { + if (!params.source_category || matchedSources.some((s: any) => s.category === params.source_category)) { + sources.push({ + id, name, filePath, + sourcePatterns: matchedSources.map((s: any) => s.pattern), + }); + } + } + + const matchedSinks = getMatchingSinks(content, undefined, compiledSinks); + if (matchedSinks.length > 0) { + if (!params.owasp || matchedSinks.some((s: any) => s.owasp === params.owasp)) { + sinks.push({ + id, name, filePath, + sinkPatterns: matchedSinks.map((s: any) => s.pattern), + owasp: matchedSinks[0]?.owasp || 'unknown', + }); + } + } + } + + // Step 4: Build CALLS adjacency map via Cypher + const callsResult = await executeQuery(repo.id, ` + MATCH (a)-[r:CodeRelation {type: 'CALLS'}]->(b) + RETURN a.id AS sourceId, b.id AS targetId + `); + + const callsGraph = new Map(); + for (const row of callsResult) { + const sourceId = row.sourceId ?? row[0]; + const targetId = row.targetId ?? row[1]; + let callees = callsGraph.get(sourceId); + if (!callees) { callees = []; callsGraph.set(sourceId, callees); } + callees.push(targetId); + } + + // Step 5: BFS from sources to sinks + const { buildSourceSinkPaths } = await import('../../security/source-sink-scanner.js'); + const paths = buildSourceSinkPaths(sources, sinks, callsGraph, maxDepth); + + // Step 6: Format results + const findings = paths.map((p: any) => ({ + risk: p.risk, + owasp: p.owasp, + source: { name: p.source.name, file: p.source.filePath, patterns: p.source.sourcePatterns }, + sink: { name: p.sink.name, file: p.sink.filePath, patterns: p.sink.sinkPatterns }, + depth: p.depth, + path: p.path.map((id: string) => { + const info = nodeNameMap.get(id); + return info ? `${info.name} (${info.filePath})` : id; + }), + })); + + const riskCounts = { critical: 0, high: 0, medium: 0 }; + for (const f of findings) { + if (f.risk in riskCounts) riskCounts[f.risk as keyof typeof riskCounts]++; + } + + return { + summary: { + sources_found: sources.length, + sinks_found: sinks.length, + paths_found: findings.length, + ...riskCounts, + }, + findings, + note: 'Structural reachability scan — paths may contain sanitizers. Use context() on flagged functions to verify.', + }; + } + async disconnect(): Promise { await closeLbug(); // close all connections // Note: we intentionally do NOT call disposeEmbedder() here. diff --git a/gitnexus/src/mcp/server.ts b/gitnexus/src/mcp/server.ts index 2b7314905d..e8a55cbd57 100644 --- a/gitnexus/src/mcp/server.ts +++ b/gitnexus/src/mcp/server.ts @@ -64,6 +64,9 @@ function getNextStepHint(toolName: string, args: Record | undefined case 'cypher': return `\n\n---\n**Next:** To explore a result symbol, use context({name: ""${repoParam}}). For schema reference, READ gitnexus://repo/${repoPath}/schema.`; + case 'source_sink': + return '\n\n---\n**Next:** Use context() on flagged source or sink functions to understand the full call chain and check for sanitizers.'; + // Legacy tool names — still return useful hints case 'search': return `\n\n---\n**Next:** To understand a result in context, use context({name: ""${repoParam}}).`; diff --git a/gitnexus/src/mcp/tools.ts b/gitnexus/src/mcp/tools.ts index 65088d339e..0c72d01a15 100644 --- a/gitnexus/src/mcp/tools.ts +++ b/gitnexus/src/mcp/tools.ts @@ -295,4 +295,36 @@ Returns: single route object when one match, or { routes: [...], total: N } for required: [], }, }, + { + name: 'source_sink', + description: `Scan for security-relevant data paths: find functions that read user input (sources) and trace whether they can reach dangerous operations (sinks) through the CALLS graph. + +WHEN TO USE: Security review, pre-deployment audit, checking if user input reaches dangerous operations (SQL injection, command injection, XSS, SSRF). +AFTER THIS: Use context() on flagged functions to understand the full call chain, then verify if sanitizers exist in between. + +Returns paths from source functions to sink functions, ranked by risk level. +Uses BFS over existing CALLS edges — structural reachability, not taint tracking.`, + inputSchema: { + type: 'object', + properties: { + repo: { + type: 'string', + description: 'Repository name or path. Omit if only one repo is indexed.', + }, + max_depth: { + type: 'number', + description: 'Maximum BFS depth from source to sink (default: 5). Lower = fewer false positives, higher = more coverage.', + }, + owasp: { + type: 'string', + description: 'Filter by OWASP category: "A03-injection", "A07-xss", "A10-ssrf". Omit for all.', + }, + source_category: { + type: 'string', + description: 'Filter sources by category: "user_input", "environment", "file_read", "network". Omit for all.', + }, + }, + required: [], + }, + }, ]; diff --git a/gitnexus/src/security/catalogs.ts b/gitnexus/src/security/catalogs.ts new file mode 100644 index 0000000000..4663d07be8 --- /dev/null +++ b/gitnexus/src/security/catalogs.ts @@ -0,0 +1,311 @@ +/** + * Source and Sink Catalogs for Structural Security Scanning + * + * Sources: functions/patterns that introduce untrusted data into the application + * Sinks: functions/patterns that perform dangerous operations with data + * + * These catalogs are used for BFS reachability analysis over the existing + * CALLS graph — no CFG or data flow analysis needed. + * + * Based on OWASP Top 10 categories: + * - A03: Injection (SQL, command, code) + * - A07: XSS (cross-site scripting) + * - A10: SSRF (server-side request forgery) + */ + +export interface SourceEntry { + /** Pattern to match in function content (regex-compatible string) */ + pattern: string; + /** Category of the source */ + category: 'user_input' | 'environment' | 'file_read' | 'network'; + /** Languages this source applies to (empty = all) */ + languages?: string[]; + /** Description for reports */ + description: string; +} + +export interface SinkEntry { + /** Pattern to match in function name or content */ + pattern: string; + /** OWASP category */ + owasp: 'A03-injection' | 'A07-xss' | 'A10-ssrf' | 'A01-access-control'; + /** Risk if reached from untrusted source */ + severity: 'critical' | 'high' | 'medium'; + /** Languages this sink applies to (empty = all) */ + languages?: string[]; + /** Description for reports */ + description: string; +} + +// ── Source Catalog ── + +export const SOURCE_CATALOG: SourceEntry[] = [ + // HTTP request data (Next.js, Express, Koa, Fastify) + { pattern: 'request.json', category: 'user_input', description: 'Next.js request body (Request object)' }, + { pattern: 'req.json', category: 'user_input', description: 'Next.js request body (req shorthand)' }, + { pattern: 'req.body', category: 'user_input', description: 'Express request body' }, + { pattern: 'req.query', category: 'user_input', description: 'Express query parameters' }, + { pattern: 'req.params', category: 'user_input', description: 'Express route parameters' }, + { pattern: 'req.headers', category: 'user_input', description: 'HTTP request headers' }, + { pattern: 'request.GET', category: 'user_input', languages: ['python'], description: 'Django GET params' }, + { pattern: 'request.POST', category: 'user_input', languages: ['python'], description: 'Django POST data' }, + { pattern: 'request.data', category: 'user_input', languages: ['python'], description: 'DRF request data' }, + { pattern: '$_GET', category: 'user_input', languages: ['php'], description: 'PHP GET superglobal' }, + { pattern: '$_POST', category: 'user_input', languages: ['php'], description: 'PHP POST superglobal' }, + { pattern: '$_REQUEST', category: 'user_input', languages: ['php'], description: 'PHP REQUEST superglobal' }, + { pattern: 'request.form', category: 'user_input', languages: ['python'], description: 'Flask form data' }, + { pattern: 'request.args', category: 'user_input', languages: ['python'], description: 'Flask query args' }, + { pattern: 'nextUrl.searchParams', category: 'user_input', description: 'Next.js URL search params' }, + + // Go (net/http) + { pattern: 'r.Body', category: 'user_input', languages: ['go'], description: 'Go HTTP request body' }, + { pattern: 'r.URL.Query()', category: 'user_input', languages: ['go'], description: 'Go URL query parameters' }, + { pattern: 'r.FormValue', category: 'user_input', languages: ['go'], description: 'Go form value' }, + { pattern: 'r.Header.Get', category: 'user_input', languages: ['go'], description: 'Go request header' }, + + // Rust / Actix-web + { pattern: 'web::Json', category: 'user_input', languages: ['rust'], description: 'Actix-web JSON extractor' }, + { pattern: 'web::Query', category: 'user_input', languages: ['rust'], description: 'Actix-web query extractor' }, + { pattern: 'web::Path', category: 'user_input', languages: ['rust'], description: 'Actix-web path extractor' }, + + // Spring (Java/Kotlin) + { pattern: '@RequestBody', category: 'user_input', languages: ['java', 'kotlin'], description: 'Spring request body annotation' }, + { pattern: '@RequestParam', category: 'user_input', languages: ['java', 'kotlin'], description: 'Spring request parameter annotation' }, + { pattern: '@PathVariable', category: 'user_input', languages: ['java', 'kotlin'], description: 'Spring path variable annotation' }, + + // Rails (Ruby) + { pattern: 'params[', category: 'user_input', languages: ['ruby'], description: 'Rails params hash access' }, + { pattern: 'request.body', category: 'user_input', languages: ['ruby'], description: 'Rails raw request body' }, + + // Kotlin / Ktor + { pattern: 'call.receive', category: 'user_input', languages: ['kotlin'], description: 'Ktor request body receive' }, + { pattern: 'call.parameters', category: 'user_input', languages: ['kotlin'], description: 'Ktor request parameters' }, + + // FastAPI (Python) + { pattern: 'async def endpoint', category: 'user_input', languages: ['python'], description: 'FastAPI auto-injected endpoint parameter' }, + + // Environment + { pattern: 'process.env', category: 'environment', description: 'Node.js env variable' }, + { pattern: 'os.environ', category: 'environment', languages: ['python'], description: 'Python env variable' }, + { pattern: 'getenv', category: 'environment', languages: ['php'], description: 'PHP env variable' }, + { pattern: 'os.Getenv', category: 'environment', languages: ['go'], description: 'Go env variable' }, + { pattern: 'std::env::var', category: 'environment', languages: ['rust'], description: 'Rust env variable' }, + { pattern: 'System.getenv', category: 'environment', languages: ['java', 'kotlin'], description: 'Java/Kotlin env variable' }, + { pattern: 'ENV[', category: 'environment', languages: ['ruby'], description: 'Ruby env variable' }, + + // File reads + { pattern: 'readFile', category: 'file_read', description: 'File read operation' }, + { pattern: 'readFileSync', category: 'file_read', description: 'Sync file read' }, + { pattern: 'os.ReadFile', category: 'file_read', languages: ['go'], description: 'Go file read' }, + { pattern: 'std::fs::read', category: 'file_read', languages: ['rust'], description: 'Rust file read' }, + + // Network input + { pattern: 'fetch(', category: 'network', description: 'Fetch API response' }, + { pattern: 'axios.get', category: 'network', description: 'Axios HTTP response' }, + { pattern: 'axios.post', category: 'network', description: 'Axios HTTP response' }, + { pattern: 'http.Get', category: 'network', languages: ['go'], description: 'Go HTTP client GET' }, + { pattern: 'reqwest::get', category: 'network', languages: ['rust'], description: 'Rust reqwest HTTP GET' }, +]; + +// ── Sink Catalog ── + +export const SINK_CATALOG: SinkEntry[] = [ + // A03: Injection — SQL + { pattern: 'query', owasp: 'A03-injection', severity: 'critical', description: 'Raw SQL query' }, + { pattern: '$queryRaw', owasp: 'A03-injection', severity: 'critical', description: 'Prisma raw query' }, + { pattern: '$executeRaw', owasp: 'A03-injection', severity: 'critical', description: 'Prisma raw execute' }, + { pattern: 'rawQuery', owasp: 'A03-injection', severity: 'critical', description: 'Sequelize raw query' }, + + // A03: Injection — Command + { pattern: 'exec', owasp: 'A03-injection', severity: 'critical', description: 'Command execution' }, + { pattern: 'execSync', owasp: 'A03-injection', severity: 'critical', description: 'Sync command execution' }, + { pattern: 'spawn', owasp: 'A03-injection', severity: 'high', description: 'Process spawn' }, + { pattern: 'eval', owasp: 'A03-injection', severity: 'critical', description: 'Code evaluation' }, + { pattern: 'Function(', owasp: 'A03-injection', severity: 'critical', description: 'Dynamic function creation' }, + { pattern: 'subprocess.run', owasp: 'A03-injection', severity: 'critical', languages: ['python'], description: 'Python subprocess' }, + { pattern: 'os.system', owasp: 'A03-injection', severity: 'critical', languages: ['python'], description: 'Python system call' }, + { pattern: 'shell_exec', owasp: 'A03-injection', severity: 'critical', languages: ['php'], description: 'PHP shell exec' }, + + // A03: Injection — Go + { pattern: 'os.exec', owasp: 'A03-injection', severity: 'critical', languages: ['go'], description: 'Go command execution' }, + { pattern: 'sql.Query', owasp: 'A03-injection', severity: 'critical', languages: ['go'], description: 'Go raw SQL query' }, + + // A03: Injection — Rust + { pattern: 'Command::new', owasp: 'A03-injection', severity: 'critical', languages: ['rust'], description: 'Rust command execution' }, + { pattern: 'sqlx::query', owasp: 'A03-injection', severity: 'critical', languages: ['rust'], description: 'Rust sqlx raw query' }, + + // A03: Injection — Spring (Java/Kotlin) + { pattern: 'jdbcTemplate.query', owasp: 'A03-injection', severity: 'critical', languages: ['java', 'kotlin'], description: 'Spring JDBC raw query' }, + { pattern: 'Runtime.exec', owasp: 'A03-injection', severity: 'critical', languages: ['java', 'kotlin'], description: 'Java runtime command execution' }, + + // A03: Injection — Rails (Ruby) + { pattern: 'system(', owasp: 'A03-injection', severity: 'critical', languages: ['ruby'], description: 'Ruby system command execution' }, + { pattern: 'ActiveRecord::Base.connection.execute', owasp: 'A03-injection', severity: 'critical', languages: ['ruby'], description: 'Rails raw SQL execution' }, + + // A07: XSS + { pattern: 'innerHTML', owasp: 'A07-xss', severity: 'high', description: 'Direct HTML injection' }, + { pattern: 'dangerouslySetInnerHTML', owasp: 'A07-xss', severity: 'high', description: 'React unsafe HTML' }, + { pattern: 'document.write', owasp: 'A07-xss', severity: 'high', description: 'Document write' }, + { pattern: 'template.HTML', owasp: 'A07-xss', severity: 'high', languages: ['go'], description: 'Go template unescaped HTML' }, + + // A10: SSRF + { pattern: 'fetch(', owasp: 'A10-ssrf', severity: 'high', description: 'Server-side fetch with user URL' }, + { pattern: 'axios(', owasp: 'A10-ssrf', severity: 'high', description: 'Axios with user URL' }, + { pattern: 'http.get', owasp: 'A10-ssrf', severity: 'high', description: 'HTTP client with user URL' }, + { pattern: 'urllib.request', owasp: 'A10-ssrf', severity: 'high', languages: ['python'], description: 'Python URL request' }, + + // Database writes (ORM — not injection per se, but data integrity sinks) + { pattern: 'prisma.', owasp: 'A03-injection', severity: 'medium', description: 'Prisma ORM operation (check for raw queries)' }, + { pattern: '.create(', owasp: 'A03-injection', severity: 'medium', description: 'ORM create operation' }, + { pattern: '.update(', owasp: 'A03-injection', severity: 'medium', description: 'ORM update operation' }, +]; + +// ── User-extensible catalog loading ── + +export interface UserSecurityConfig { + sources?: Array<{ + pattern: string; + category: string; + description: string; + languages?: string[]; + }>; + sinks?: Array<{ + pattern: string; + owasp: string; + severity: string; + description: string; + languages?: string[]; + }>; +} + +/** + * Load user-defined security catalog from `.gitnexus/security.json` in the repo root. + * Returns null if the file doesn't exist or is invalid. + */ +export async function loadUserSecurityConfig(repoPath: string): Promise { + try { + const { readFile } = await import('node:fs/promises'); + const { join } = await import('node:path'); + const configPath = join(repoPath, '.gitnexus', 'security.json'); + const content = await readFile(configPath, 'utf-8'); + const config = JSON.parse(content) as UserSecurityConfig; + return config; + } catch { + // File doesn't exist or is invalid — that's fine, just use built-in catalogs + return null; + } +} + +/** + * Merge user-defined entries with the built-in catalogs. + * User entries are appended after built-in entries. + */ +export function mergeCatalogs(userConfig: UserSecurityConfig | null): { + sources: SourceEntry[]; + sinks: SinkEntry[]; +} { + const sources = [...SOURCE_CATALOG]; + const sinks = [...SINK_CATALOG]; + + if (userConfig?.sources) { + for (const s of userConfig.sources) { + sources.push({ + pattern: s.pattern, + category: s.category as SourceEntry['category'], + description: s.description, + ...(s.languages ? { languages: s.languages } : {}), + }); + } + } + + if (userConfig?.sinks) { + for (const s of userConfig.sinks) { + sinks.push({ + pattern: s.pattern, + owasp: s.owasp as SinkEntry['owasp'], + severity: s.severity as SinkEntry['severity'], + description: s.description, + ...(s.languages ? { languages: s.languages } : {}), + }); + } + } + + return { sources, sinks }; +} + +/** + * Compile an array of catalog entries into regex patterns for matching. + */ +export function compilePatterns(entries: T[]): CompiledPattern[] { + return entries.map(entry => ({ + regex: new RegExp(escapeRegex(entry.pattern), 'i'), + entry, + })); +} + +// Compiled regex patterns for matching +interface CompiledPattern { + regex: RegExp; + entry: T; +} + +const SOURCE_REGEXES: CompiledPattern[] = compilePatterns(SOURCE_CATALOG); + +const SINK_REGEXES: CompiledPattern[] = compilePatterns(SINK_CATALOG); + +/** Check if a compiled pattern applies given the language and content. */ +function patternMatches( + { regex, entry }: CompiledPattern, + content: string, + language?: string, +): boolean { + if (entry.languages && language && !entry.languages.includes(language)) return false; + return regex.test(content); +} + +/** Filter compiled patterns by language applicability and content match. */ +function matchPatterns( + patterns: CompiledPattern[], + content: string, + language?: string, +): T[] { + return patterns + .filter(p => patternMatches(p, content, language)) + .map(({ entry }) => entry); +} + +/** + * Check if a function's content contains source patterns (user input reads). + * Optionally accepts custom compiled patterns (e.g. merged with user config). + */ +export function isSourceAdjacent(_functionName: string, content: string, language?: string, customPatterns?: CompiledPattern[]): boolean { + return (customPatterns ?? SOURCE_REGEXES).some(p => patternMatches(p, content, language)); +} + +/** + * Check if a function's content contains sink patterns (dangerous operations). + * Optionally accepts custom compiled patterns (e.g. merged with user config). + */ +export function isSinkAdjacent(_functionName: string, content: string, language?: string, customPatterns?: CompiledPattern[]): boolean { + return (customPatterns ?? SINK_REGEXES).some(p => patternMatches(p, content, language)); +} + +/** + * Get matching sink entries for a function's content (for reporting). + * Optionally accepts custom compiled patterns (e.g. merged with user config). + */ +export function getMatchingSinks(content: string, language?: string, customPatterns?: CompiledPattern[]): SinkEntry[] { + return matchPatterns(customPatterns ?? SINK_REGEXES, content, language); +} + +/** + * Get matching source entries for a function's content (for reporting). + * Optionally accepts custom compiled patterns (e.g. merged with user config). + */ +export function getMatchingSources(content: string, language?: string, customPatterns?: CompiledPattern[]): SourceEntry[] { + return matchPatterns(customPatterns ?? SOURCE_REGEXES, content, language); +} + +function escapeRegex(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} diff --git a/gitnexus/src/security/source-sink-scanner.ts b/gitnexus/src/security/source-sink-scanner.ts new file mode 100644 index 0000000000..490b417500 --- /dev/null +++ b/gitnexus/src/security/source-sink-scanner.ts @@ -0,0 +1,116 @@ +/** + * Source-Sink Structural Scanner + * + * BFS over the existing CALLS graph to find paths from source-adjacent + * functions to sink-adjacent functions. No CFG, no taint tracking — + * pure structural reachability. + * + * This is Phase A of the security analysis path described in the + * architecture assessment. Detects OWASP A03, A07, A10 via structural + * reachability. + */ + +export interface SourceNode { + id: string; + name: string; + filePath: string; + sourcePatterns: string[]; +} + +export interface SinkNode { + id: string; + name: string; + filePath: string; + sinkPatterns: string[]; + owasp: string; +} + +export interface SourceSinkPath { + source: SourceNode; + sink: SinkNode; + /** Ordered node IDs from source to sink */ + path: string[]; + /** Number of hops from source to sink */ + depth: number; + /** OWASP category from the sink */ + owasp: string; + /** Risk level based on depth and sink severity */ + risk: 'critical' | 'high' | 'medium' | 'low'; +} + +/** + * Find all reachable paths from source-adjacent nodes to sink-adjacent nodes + * using BFS over the CALLS graph. + * + * @param sources - Functions identified as source-adjacent (read user input) + * @param sinks - Functions identified as sink-adjacent (perform dangerous ops) + * @param callsGraph - Forward adjacency map: nodeId -> [calleeIds] + * @param maxDepth - Maximum BFS depth (default: 5) + * @returns All source-to-sink paths found + */ +export function buildSourceSinkPaths( + sources: SourceNode[], + sinks: SinkNode[], + callsGraph: Map, + maxDepth: number = 5, +): SourceSinkPath[] { + const sinkMap = new Map(sinks.map(s => [s.id, s])); + const results: SourceSinkPath[] = []; + + for (const source of sources) { + // BFS from this source + const visited = new Set(); + // Queue: [currentNodeId, path so far] + const queue: Array<[string, string[]]> = [[source.id, [source.id]]]; + visited.add(source.id); + + while (queue.length > 0) { + const [currentId, currentPath] = queue.shift()!; + const depth = currentPath.length - 1; + + if (depth >= maxDepth) continue; + + const callees = callsGraph.get(currentId) || []; + for (const calleeId of callees) { + if (visited.has(calleeId)) continue; + visited.add(calleeId); + + const newPath = [...currentPath, calleeId]; + + // Check if this callee is a sink + const sink = sinkMap.get(calleeId); + if (sink) { + results.push({ + source, + sink, + path: newPath, + depth: newPath.length - 1, + owasp: sink.owasp, + risk: computeRisk(newPath.length - 1, sink.owasp), + }); + // Don't stop — there may be other sinks reachable + } + + // Continue BFS + queue.push([calleeId, newPath]); + } + } + } + + // Sort by risk (critical first), then by depth (shortest first) + const riskOrder = { critical: 0, high: 1, medium: 2, low: 3 }; + results.sort((a, b) => riskOrder[a.risk] - riskOrder[b.risk] || a.depth - b.depth); + + return results; +} + +function computeRisk(depth: number, owasp: string): 'critical' | 'high' | 'medium' | 'low' { + // Direct call to dangerous sink = critical + if (depth <= 1 && (owasp === 'A03-injection' || owasp === 'A07-xss')) return 'critical'; + // Short path to dangerous sink = high + if (depth <= 2) return 'high'; + // Longer paths = medium (may have sanitizers in between) + if (depth <= 4) return 'medium'; + // Very long paths are low confidence + return 'low'; +} diff --git a/gitnexus/test/unit/parameter-extraction.test.ts b/gitnexus/test/unit/parameter-extraction.test.ts new file mode 100644 index 0000000000..f99b5d25e5 --- /dev/null +++ b/gitnexus/test/unit/parameter-extraction.test.ts @@ -0,0 +1,45 @@ +import { describe, it, expect } from 'vitest'; +import type { ExtractedParameter } from '../../src/core/ingestion/workers/parse-worker.js'; + +describe('ExtractedParameter interface', () => { + it('accepts valid parameter data', () => { + const param: ExtractedParameter = { + filePath: 'route.ts', + functionName: 'handlePOST', + functionId: 'Function:route.ts:handlePOST', + paramName: 'request', + paramIndex: 0, + declaredType: 'NextRequest', + isRest: false, + }; + expect(param.paramName).toBe('request'); + expect(param.paramIndex).toBe(0); + expect(param.declaredType).toBe('NextRequest'); + expect(param.isRest).toBe(false); + }); + + it('accepts rest parameter', () => { + const param: ExtractedParameter = { + filePath: 'utils.ts', + functionName: 'merge', + functionId: 'Function:utils.ts:merge', + paramName: 'args', + paramIndex: 0, + isRest: true, + }; + expect(param.isRest).toBe(true); + expect(param.declaredType).toBeUndefined(); + }); + + it('accepts parameter without type annotation', () => { + const param: ExtractedParameter = { + filePath: 'handler.js', + functionName: 'process', + functionId: 'Function:handler.js:process', + paramName: 'data', + paramIndex: 0, + isRest: false, + }; + expect(param.declaredType).toBeUndefined(); + }); +}); diff --git a/gitnexus/test/unit/parameter-processor.test.ts b/gitnexus/test/unit/parameter-processor.test.ts new file mode 100644 index 0000000000..ceaadbef65 --- /dev/null +++ b/gitnexus/test/unit/parameter-processor.test.ts @@ -0,0 +1,126 @@ +import { describe, it, expect } from 'vitest'; +import { + createParameterNodes, + buildPassesToEdges, + type ParameterNode, + type PassesToEdge, +} from '../../src/core/ingestion/parameter-processor.js'; +import type { ExtractedParameter } from '../../src/core/ingestion/workers/parse-worker.js'; + +describe('createParameterNodes', () => { + it('creates Parameter nodes from extracted parameters', () => { + const params: ExtractedParameter[] = [ + { filePath: 'route.ts', functionName: 'handlePOST', functionId: 'Function:route.ts:handlePOST', paramName: 'request', paramIndex: 0, declaredType: 'NextRequest', isRest: false }, + { filePath: 'route.ts', functionName: 'handlePOST', functionId: 'Function:route.ts:handlePOST', paramName: 'context', paramIndex: 1, isRest: false }, + ]; + + const nodes = createParameterNodes(params); + expect(nodes).toHaveLength(2); + expect(nodes[0].name).toBe('request'); + expect(nodes[0].paramIndex).toBe(0); + expect(nodes[0].declaredType).toBe('NextRequest'); + expect(nodes[0].ownerId).toBe('Function:route.ts:handlePOST'); + expect(nodes[1].name).toBe('context'); + expect(nodes[1].paramIndex).toBe(1); + }); + + it('deduplicates by id', () => { + const params: ExtractedParameter[] = [ + { filePath: 'a.ts', functionName: 'foo', functionId: 'Function:a.ts:foo', paramName: 'x', paramIndex: 0, isRest: false }, + { filePath: 'a.ts', functionName: 'foo', functionId: 'Function:a.ts:foo', paramName: 'x', paramIndex: 0, isRest: false }, + ]; + const nodes = createParameterNodes(params); + expect(nodes).toHaveLength(1); + }); + + it('handles rest parameters', () => { + const params: ExtractedParameter[] = [ + { filePath: 'a.ts', functionName: 'merge', functionId: 'Function:a.ts:merge', paramName: 'args', paramIndex: 0, isRest: true }, + ]; + const nodes = createParameterNodes(params); + expect(nodes[0].isRest).toBe(true); + }); +}); + +describe('buildPassesToEdges', () => { + it('maps call arguments to callee parameters by position', () => { + const calls = [ + { sourceId: 'Function:handler.ts:handlePOST', targetId: 'Function:validate.ts:validate', argCount: 1 }, + ]; + + const calleeParams = new Map([ + ['Function:validate.ts:validate', [ + { filePath: 'validate.ts', functionName: 'validate', functionId: 'Function:validate.ts:validate', paramName: 'input', paramIndex: 0, isRest: false }, + ]], + ]); + + const edges = buildPassesToEdges(calls, calleeParams); + expect(edges).toHaveLength(1); + expect(edges[0].sourceParamIndex).toBe(0); + expect(edges[0].targetParamId).toContain('input'); + expect(edges[0].callerId).toBe('Function:handler.ts:handlePOST'); + }); + + it('skips parameters beyond argCount', () => { + const calls = [ + { sourceId: 'f:caller', targetId: 'f:callee', argCount: 1 }, + ]; + + const calleeParams = new Map([ + ['f:callee', [ + { filePath: 'a.ts', functionName: 'callee', functionId: 'f:callee', paramName: 'a', paramIndex: 0, isRest: false }, + { filePath: 'a.ts', functionName: 'callee', functionId: 'f:callee', paramName: 'b', paramIndex: 1, isRest: false }, + ]], + ]); + + const edges = buildPassesToEdges(calls, calleeParams); + expect(edges).toHaveLength(1); + expect(edges[0].sourceParamIndex).toBe(0); + }); + + it('includes rest parameter even when beyond argCount', () => { + const calls = [ + { sourceId: 'f:caller', targetId: 'f:callee', argCount: 1 }, + ]; + + const calleeParams = new Map([ + ['f:callee', [ + { filePath: 'a.ts', functionName: 'callee', functionId: 'f:callee', paramName: 'first', paramIndex: 0, isRest: false }, + { filePath: 'a.ts', functionName: 'callee', functionId: 'f:callee', paramName: 'rest', paramIndex: 1, isRest: true }, + ]], + ]); + + const edges = buildPassesToEdges(calls, calleeParams); + // first (index 0) matches, rest (index 1) also matches because isRest + expect(edges).toHaveLength(2); + }); + + it('returns empty when callee has no parameters', () => { + const calls = [ + { sourceId: 'f:caller', targetId: 'f:callee', argCount: 0 }, + ]; + const calleeParams = new Map(); + const edges = buildPassesToEdges(calls, calleeParams); + expect(edges).toHaveLength(0); + }); + + it('handles multiple calls to different callees', () => { + const calls = [ + { sourceId: 'f:main', targetId: 'f:a', argCount: 2 }, + { sourceId: 'f:main', targetId: 'f:b', argCount: 1 }, + ]; + + const calleeParams = new Map([ + ['f:a', [ + { filePath: 'a.ts', functionName: 'a', functionId: 'f:a', paramName: 'x', paramIndex: 0, isRest: false }, + { filePath: 'a.ts', functionName: 'a', functionId: 'f:a', paramName: 'y', paramIndex: 1, isRest: false }, + ]], + ['f:b', [ + { filePath: 'b.ts', functionName: 'b', functionId: 'f:b', paramName: 'data', paramIndex: 0, isRest: false }, + ]], + ]); + + const edges = buildPassesToEdges(calls, calleeParams); + expect(edges).toHaveLength(3); // 2 for f:a + 1 for f:b + }); +}); diff --git a/gitnexus/test/unit/parameter-schema.test.ts b/gitnexus/test/unit/parameter-schema.test.ts new file mode 100644 index 0000000000..3a6cfb7f26 --- /dev/null +++ b/gitnexus/test/unit/parameter-schema.test.ts @@ -0,0 +1,40 @@ +import { describe, it, expect } from 'vitest'; +import { NODE_TABLES, REL_TYPES, RELATION_SCHEMA } from '../../src/core/lbug/schema.js'; +import type { NodeLabel, RelationshipType } from '../../src/core/graph/types.js'; + +describe('parameter schema', () => { + it('NODE_TABLES includes Parameter', () => { + expect(NODE_TABLES).toContain('Parameter'); + }); + + it('REL_TYPES includes PASSES_TO', () => { + expect(REL_TYPES).toContain('PASSES_TO'); + }); + + it('REL_TYPES includes DATA_FLOWS_TO', () => { + expect(REL_TYPES).toContain('DATA_FLOWS_TO'); + }); + + it('RELATION_SCHEMA has FROM Function TO Parameter', () => { + expect(RELATION_SCHEMA).toContain('FROM Function TO Parameter'); + }); + + it('RELATION_SCHEMA has FROM Parameter TO Parameter (cross-function flow)', () => { + expect(RELATION_SCHEMA).toContain('FROM Parameter TO Parameter'); + }); + + it('NodeLabel union accepts Parameter', () => { + const label: NodeLabel = 'Parameter'; + expect(label).toBe('Parameter'); + }); + + it('RelationshipType union accepts PASSES_TO', () => { + const rel: RelationshipType = 'PASSES_TO'; + expect(rel).toBe('PASSES_TO'); + }); + + it('RelationshipType union accepts DATA_FLOWS_TO', () => { + const rel: RelationshipType = 'DATA_FLOWS_TO'; + expect(rel).toBe('DATA_FLOWS_TO'); + }); +}); diff --git a/gitnexus/test/unit/schema.test.ts b/gitnexus/test/unit/schema.test.ts index c616b2ce9c..52606b6eeb 100644 --- a/gitnexus/test/unit/schema.test.ts +++ b/gitnexus/test/unit/schema.test.ts @@ -39,8 +39,8 @@ describe('LadybugDB Schema', () => { }); it('has expected total count', () => { - // 9 core + 18 multi-language + Route + Tool = 30 - expect(NODE_TABLES).toHaveLength(30); + // 9 core + 18 multi-language + Route + Tool + Parameter = 31 + expect(NODE_TABLES).toHaveLength(31); }); }); @@ -164,7 +164,7 @@ describe('LadybugDB Schema', () => { describe('schema query ordering', () => { it('NODE_SCHEMA_QUERIES has correct count', () => { - expect(NODE_SCHEMA_QUERIES).toHaveLength(30); + expect(NODE_SCHEMA_QUERIES).toHaveLength(31); }); it('REL_SCHEMA_QUERIES has one relation table', () => { @@ -172,8 +172,8 @@ describe('LadybugDB Schema', () => { }); it('SCHEMA_QUERIES includes all node + rel + embedding schemas', () => { - // 30 node + 1 rel + 1 embedding = 32 - expect(SCHEMA_QUERIES).toHaveLength(32); + // 31 node + 1 rel + 1 embedding = 33 + expect(SCHEMA_QUERIES).toHaveLength(33); }); it('node schemas come before relation schemas in SCHEMA_QUERIES', () => { diff --git a/gitnexus/test/unit/source-sink-scanner.test.ts b/gitnexus/test/unit/source-sink-scanner.test.ts new file mode 100644 index 0000000000..b9ece117df --- /dev/null +++ b/gitnexus/test/unit/source-sink-scanner.test.ts @@ -0,0 +1,262 @@ +import { describe, it, expect } from 'vitest'; +import { + SOURCE_CATALOG, + SINK_CATALOG, + isSourceAdjacent, + isSinkAdjacent, + mergeCatalogs, + compilePatterns, + getMatchingSources, + getMatchingSinks, + type UserSecurityConfig, +} from '../../src/security/catalogs.js'; +import { buildSourceSinkPaths, type SourceSinkPath } from '../../src/security/source-sink-scanner.js'; + +describe('SOURCE_CATALOG', () => { + it('contains user input sources', () => { + const names = SOURCE_CATALOG.map(s => s.pattern); + expect(names).toContain('request.json'); + expect(names).toContain('req.body'); + expect(names).toContain('request.GET'); + }); + + it('each source has a category', () => { + for (const source of SOURCE_CATALOG) { + expect(['user_input', 'environment', 'file_read', 'network']).toContain(source.category); + } + }); + + it('contains Go sources', () => { + const patterns = SOURCE_CATALOG.map(s => s.pattern); + expect(patterns).toContain('r.Body'); + expect(patterns).toContain('r.URL.Query()'); + expect(patterns).toContain('r.FormValue'); + expect(patterns).toContain('r.Header.Get'); + }); + + it('contains Rust/Actix sources', () => { + const patterns = SOURCE_CATALOG.map(s => s.pattern); + expect(patterns).toContain('web::Json'); + expect(patterns).toContain('web::Query'); + expect(patterns).toContain('web::Path'); + }); + + it('contains Spring annotation sources', () => { + const patterns = SOURCE_CATALOG.map(s => s.pattern); + expect(patterns).toContain('@RequestBody'); + expect(patterns).toContain('@RequestParam'); + expect(patterns).toContain('@PathVariable'); + }); + + it('contains Rails sources', () => { + const patterns = SOURCE_CATALOG.map(s => s.pattern); + expect(patterns).toContain('params['); + }); + + it('contains Ktor sources', () => { + const patterns = SOURCE_CATALOG.map(s => s.pattern); + expect(patterns).toContain('call.receive'); + expect(patterns).toContain('call.parameters'); + }); +}); + +describe('SINK_CATALOG', () => { + it('contains dangerous sinks', () => { + const names = SINK_CATALOG.map(s => s.pattern); + expect(names).toContain('eval'); + expect(names).toContain('exec'); + expect(names).toContain('innerHTML'); + }); + + it('each sink has an OWASP category', () => { + for (const sink of SINK_CATALOG) { + expect(sink.owasp).toBeDefined(); + } + }); + + it('contains Go sinks', () => { + const patterns = SINK_CATALOG.map(s => s.pattern); + expect(patterns).toContain('os.exec'); + expect(patterns).toContain('sql.Query'); + expect(patterns).toContain('template.HTML'); + }); + + it('contains Rust sinks', () => { + const patterns = SINK_CATALOG.map(s => s.pattern); + expect(patterns).toContain('Command::new'); + expect(patterns).toContain('sqlx::query'); + }); + + it('contains Spring sinks', () => { + const patterns = SINK_CATALOG.map(s => s.pattern); + expect(patterns).toContain('jdbcTemplate.query'); + expect(patterns).toContain('Runtime.exec'); + }); + + it('contains Rails sinks', () => { + const patterns = SINK_CATALOG.map(s => s.pattern); + expect(patterns).toContain('system('); + expect(patterns).toContain('ActiveRecord::Base.connection.execute'); + }); +}); + +describe('isSourceAdjacent', () => { + it('matches function that reads request body', () => { + const content = `async function handlePOST(req) { const data = await req.json(); }`; + expect(isSourceAdjacent('handlePOST', content)).toBe(true); + }); + + it('does not match function without user input', () => { + const content = `function add(a, b) { return a + b; }`; + expect(isSourceAdjacent('add', content)).toBe(false); + }); + + it('matches Go HTTP handler reading body', () => { + const content = `func handler(w http.ResponseWriter, r *http.Request) { body := r.Body }`; + expect(isSourceAdjacent('handler', content, 'go')).toBe(true); + }); + + it('matches Spring annotation in Java content', () => { + const content = `public ResponseEntity create(@RequestBody UserDto dto) { return ok(); }`; + expect(isSourceAdjacent('create', content, 'java')).toBe(true); + }); +}); + +describe('isSinkAdjacent', () => { + it('matches function with database write', () => { + const content = `async function save(data) { await prisma.grant.create({ data }); }`; + expect(isSinkAdjacent('save', content)).toBe(true); + }); + + it('matches function with exec call', () => { + const content = `function run(cmd) { exec(cmd); }`; + expect(isSinkAdjacent('run', content)).toBe(true); + }); + + it('does not match safe function', () => { + const content = `function format(s) { return s.trim(); }`; + expect(isSinkAdjacent('format', content)).toBe(false); + }); + + it('matches Go sql.Query sink', () => { + const content = `func getUser(db *sql.DB, id string) { rows, _ := db.sql.Query("SELECT * FROM users WHERE id=" + id) }`; + expect(isSinkAdjacent('getUser', content, 'go')).toBe(true); + }); + + it('matches Rust Command::new sink', () => { + const content = `fn run_cmd(input: &str) { Command::new(input).output().unwrap(); }`; + expect(isSinkAdjacent('run_cmd', content, 'rust')).toBe(true); + }); +}); + +describe('mergeCatalogs', () => { + it('returns built-in catalogs when user config is null', () => { + const result = mergeCatalogs(null); + expect(result.sources).toEqual(SOURCE_CATALOG); + expect(result.sinks).toEqual(SINK_CATALOG); + }); + + it('merges user-defined sources with built-in catalog', () => { + const userConfig: UserSecurityConfig = { + sources: [ + { pattern: 'myCustomInput', category: 'user_input', description: 'Custom input source' }, + ], + }; + const result = mergeCatalogs(userConfig); + expect(result.sources.length).toBe(SOURCE_CATALOG.length + 1); + expect(result.sources[result.sources.length - 1].pattern).toBe('myCustomInput'); + }); + + it('merges user-defined sinks with built-in catalog', () => { + const userConfig: UserSecurityConfig = { + sinks: [ + { pattern: 'dangerousOp', owasp: 'A03-injection', severity: 'high', description: 'Custom sink' }, + ], + }; + const result = mergeCatalogs(userConfig); + expect(result.sinks.length).toBe(SINK_CATALOG.length + 1); + expect(result.sinks[result.sinks.length - 1].pattern).toBe('dangerousOp'); + }); + + it('merged catalogs work with compilePatterns and getMatchingSources', () => { + const userConfig: UserSecurityConfig = { + sources: [ + { pattern: 'myCustomInput', category: 'user_input', description: 'Custom input source' }, + ], + sinks: [ + { pattern: 'dangerousOp', owasp: 'A03-injection', severity: 'high', description: 'Custom sink' }, + ], + }; + const merged = mergeCatalogs(userConfig); + const compiledSources = compilePatterns(merged.sources); + const compiledSinks = compilePatterns(merged.sinks); + + // User-defined source should be detected + const content = `function handle() { const data = myCustomInput(); dangerousOp(data); }`; + const matchedSources = getMatchingSources(content, undefined, compiledSources); + expect(matchedSources.some(s => s.pattern === 'myCustomInput')).toBe(true); + + // User-defined sink should be detected + const matchedSinks = getMatchingSinks(content, undefined, compiledSinks); + expect(matchedSinks.some(s => s.pattern === 'dangerousOp')).toBe(true); + }); + + it('does not modify built-in catalog arrays', () => { + const originalSourceCount = SOURCE_CATALOG.length; + const originalSinkCount = SINK_CATALOG.length; + mergeCatalogs({ + sources: [{ pattern: 'x', category: 'user_input', description: 'test' }], + sinks: [{ pattern: 'y', owasp: 'A03-injection', severity: 'high', description: 'test' }], + }); + expect(SOURCE_CATALOG.length).toBe(originalSourceCount); + expect(SINK_CATALOG.length).toBe(originalSinkCount); + }); +}); + +describe('buildSourceSinkPaths', () => { + it('finds path from source to sink through CALLS chain', () => { + const sources = [{ id: 'func:handlePOST', name: 'handlePOST', filePath: 'route.ts', sourcePatterns: ['req.body'] }]; + const sinks = [{ id: 'func:createGrant', name: 'createGrant', filePath: 'service.ts', sinkPatterns: ['prisma.'], owasp: 'A03-injection' as const }]; + const callsGraph = new Map([ + ['func:handlePOST', ['func:validateInput']], + ['func:validateInput', ['func:createGrant']], + ]); + + const paths = buildSourceSinkPaths(sources, sinks, callsGraph, 5); + expect(paths).toHaveLength(1); + expect(paths[0].source.name).toBe('handlePOST'); + expect(paths[0].sink.name).toBe('createGrant'); + expect(paths[0].path).toEqual(['func:handlePOST', 'func:validateInput', 'func:createGrant']); + expect(paths[0].depth).toBe(2); + }); + + it('returns empty when no path exists', () => { + const sources = [{ id: 'func:a', name: 'a', filePath: 'a.ts', sourcePatterns: ['req.body'] }]; + const sinks = [{ id: 'func:z', name: 'z', filePath: 'z.ts', sinkPatterns: ['eval'], owasp: 'A03-injection' as const }]; + const callsGraph = new Map([ + ['func:a', ['func:b']], + // func:b doesn't call func:z + ]); + + const paths = buildSourceSinkPaths(sources, sinks, callsGraph, 5); + expect(paths).toHaveLength(0); + }); + + it('respects maxDepth', () => { + const sources = [{ id: 'func:a', name: 'a', filePath: 'a.ts', sourcePatterns: ['req.body'] }]; + const sinks = [{ id: 'func:d', name: 'd', filePath: 'd.ts', sinkPatterns: ['eval'], owasp: 'A03-injection' as const }]; + const callsGraph = new Map([ + ['func:a', ['func:b']], + ['func:b', ['func:c']], + ['func:c', ['func:d']], + ]); + + // maxDepth 2 should not reach func:d (3 hops away) + const paths = buildSourceSinkPaths(sources, sinks, callsGraph, 2); + expect(paths).toHaveLength(0); + + // maxDepth 3 should find it + const paths3 = buildSourceSinkPaths(sources, sinks, callsGraph, 3); + expect(paths3).toHaveLength(1); + }); +});