abhigyanpatwari · marxo126 · Mar 28, 2026 · Mar 28, 2026 · Mar 31, 2026
@@ -8,6 +8,7 @@
 
 import fs from 'fs/promises';
 import path from 'path';
+import { getLanguageFromFilename } from 'gitnexus-shared';
 import {
   initLbug,
   executeQuery,
@@ -497,6 +498,8 @@ export class LocalBackend {
         return this.toolMap(repo, params);
       case 'api_impact':
         return this.apiImpact(repo, params);
+      case 'source_sink':
+        return this.sourceSinkScan(repo, params);
       default:
         throw new Error(`Unknown tool: ${method}`);
     }
@@ -3219,6 +3222,154 @@ export class LocalBackend {
     };
   }
 
+  private async sourceSinkScan(
+    repo: RepoHandle,
+    params: {
+      max_depth?: number;
+      owasp?: string;
+      source_category?: string;
+    },
+  ): Promise<any> {
+    await this.ensureInitialized(repo.id);
+    const maxDepth = params.max_depth ?? 5;
+
+    // Step 1: Find all Function and Method nodes with their content.
+    const nodesResult = await executeQuery(
+      repo.id,
+      `
+      MATCH (n:Function)
+      WHERE n.id IS NOT NULL
+      RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content
+      UNION ALL
+      MATCH (n:Method)
+      WHERE n.id IS NOT NULL
+      RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content
+    `,
+    );
+
+    // Step 2: Load user-defined catalog extensions (if any) and merge with built-in catalogs
+    const {
+      getMatchingSources,
+      getMatchingSinks,
+      loadUserSecurityConfig,
+      mergeCatalogs,
+      compilePatterns,
+    } = await import('../../security/catalogs.js');
+
+    const userConfig = await loadUserSecurityConfig(repo.repoPath);
+    const merged = mergeCatalogs(userConfig);
+    const compiledSources = compilePatterns(merged.sources);
+    const compiledSinks = compilePatterns(merged.sinks);
+
+    // Step 3: Tag source-adjacent and sink-adjacent functions
+    const sources: Array<{
+      id: string;
+      name: string;
+      filePath: string;
+      sourcePatterns: string[];
+    }> = [];
+    const sinks: Array<{
+      id: string;
+      name: string;
+      filePath: string;
+      sinkPatterns: string[];
+      owasp: string;
+    }> = [];
+    const nodeNameMap = new Map<string, { name: string; filePath: string }>();
+
+    for (const row of nodesResult) {
+      const content = row.content ?? row[3] ?? '';
+      const name = row.name ?? row[1] ?? '';
+      const filePath = row.filePath ?? row[2] ?? '';
+      const id = row.id ?? row[0] ?? '';
+      const language = getLanguageFromFilename(filePath) ?? undefined;
+
+      nodeNameMap.set(id, { name, filePath });
+
+      const matchedSources = getMatchingSources(content, language, compiledSources);
+      if (matchedSources.length > 0) {
+        if (
+          !params.source_category ||
+          matchedSources.some((s: any) => s.category === params.source_category)
+        ) {
+          sources.push({
+            id,
+            name,
+            filePath,
+            sourcePatterns: matchedSources.map((s: any) => s.pattern),
+          });
+        }
+      }
+
+      const matchedSinks = getMatchingSinks(content, language, compiledSinks);
+      if (matchedSinks.length > 0) {
+        if (!params.owasp || matchedSinks.some((s: any) => s.owasp === params.owasp)) {
+          sinks.push({
+            id,
+            name,
+            filePath,
+            sinkPatterns: matchedSinks.map((s: any) => s.pattern),
+            owasp: matchedSinks[0]?.owasp || 'unknown',
+          });
+        }
+      }
+    }
+
+    // Step 4: Build CALLS adjacency map via Cypher
+    const callsResult = await executeQuery(
+      repo.id,
+      `
+      MATCH (a)-[r:CodeRelation {type: 'CALLS'}]->(b)
+      RETURN a.id AS sourceId, b.id AS targetId
+    `,
+    );
+
+    const callsGraph = new Map<string, string[]>();
+    for (const row of callsResult) {
+      const sourceId = row.sourceId ?? row[0];
+      const targetId = row.targetId ?? row[1];
+      let callees = callsGraph.get(sourceId);
+      if (!callees) {
+        callees = [];
+        callsGraph.set(sourceId, callees);
+      }
+      callees.push(targetId);
+    }
+
+    // Step 5: BFS from sources to sinks
+    const { buildSourceSinkPaths } = await import('../../security/source-sink-scanner.js');
+    const paths = buildSourceSinkPaths(sources, sinks, callsGraph, maxDepth);
+
+    // Step 6: Format results
+    const findings = paths.map((p: any) => ({
+      risk: p.risk,
+      owasp: p.owasp,
+      source: { name: p.source.name, file: p.source.filePath, patterns: p.source.sourcePatterns },
+      sink: { name: p.sink.name, file: p.sink.filePath, patterns: p.sink.sinkPatterns },
+      depth: p.depth,
+      path: p.path.map((id: string) => {
+        const info = nodeNameMap.get(id);
+        return info ? `${info.name} (${info.filePath})` : id;
+      }),
+    }));
+
+    const riskCounts = { critical: 0, high: 0, medium: 0 };
+    for (const f of findings) {
+      if (f.risk in riskCounts) riskCounts[f.risk as keyof typeof riskCounts]++;
+    }
+
+    return {
+      summary: {
+        sources_found: sources.length,
+        sinks_found: sinks.length,
+        paths_found: findings.length,
+        ...riskCounts,
+      },
+      findings,
+      note: 'Structural reachability scan — paths may contain sanitizers. Use context() on flagged functions to verify.',
+    };
+  }
+
   async disconnect(): Promise<void> {
     await closeLbug(); // close all connections
     // Note: we intentionally do NOT call disposeEmbedder() here.

@@ -72,6 +72,9 @@ function getNextStepHint(toolName: string, args: Record<string, any> | undefined
     case 'overview':
       return `\n\n---\n**Next:** To drill into an area, READ gitnexus://repo/${repoPath}/cluster/{name}. To see execution flows, READ gitnexus://repo/${repoPath}/processes.`;
 
+    case 'source_sink':
+      return '\n\n---\n**Next:** Use context() on flagged source or sink functions to understand the full call chain and check for sanitizers.';
+
     default:
       return '';
   }

@@ -377,6 +377,41 @@ Returns: single route object when one match, or { routes: [...], total: N } for
       required: [],
     },
   },
+  {
+    name: 'source_sink',
+    description: `Scan for security-relevant data paths: find functions that read user input (sources) and trace whether they can reach dangerous operations (sinks) through the CALLS graph.
+
+WHEN TO USE: Security review, pre-deployment audit, checking if user input reaches dangerous operations (SQL injection, command injection, XSS, SSRF).
+AFTER THIS: Use context() on flagged functions to understand the full call chain, then verify if sanitizers exist in between.
+
+Returns paths from source functions to sink functions, ranked by risk level.
+Uses BFS over existing CALLS edges — structural reachability, not taint tracking.`,
+    inputSchema: {
+      type: 'object',
+      properties: {
+        repo: {
+          type: 'string',
+          description: 'Repository name or path. Omit if only one repo is indexed.',
+        },
+        max_depth: {
+          type: 'number',
+          description:
+            'Maximum BFS depth from source to sink (default: 5). Lower = fewer false positives, higher = more coverage.',
+        },
+        owasp: {
+          type: 'string',
+          description:
+            'Filter by OWASP category: "A03-injection", "A07-xss", "A10-ssrf". Omit for all.',
+        },
+        source_category: {
+          type: 'string',
+          description:
+            'Filter sources by category: "user_input", "environment", "file_read", "network". Omit for all.',
+        },
+      },
+      required: [],
+    },
+  },
   {
     name: 'group_list',
     description: `List all configured repository groups, or return details for one group (repos, manifest links).