Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions gitnexus/src/mcp/local/local-backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import fs from 'fs/promises';
import path from 'path';
import { getLanguageFromFilename } from 'gitnexus-shared';
import {
initLbug,
executeQuery,
Expand Down Expand Up @@ -497,6 +498,8 @@ export class LocalBackend {
return this.toolMap(repo, params);
case 'api_impact':
return this.apiImpact(repo, params);
case 'source_sink':
return this.sourceSinkScan(repo, params);
default:
throw new Error(`Unknown tool: ${method}`);
}
Expand Down Expand Up @@ -3219,6 +3222,154 @@ export class LocalBackend {
};
}

private async sourceSinkScan(
repo: RepoHandle,
params: {
max_depth?: number;
owasp?: string;
source_category?: string;
},
): Promise<any> {
await this.ensureInitialized(repo.id);
const maxDepth = params.max_depth ?? 5;

// Step 1: Find all Function and Method nodes with their content.
const nodesResult = await executeQuery(
repo.id,
`
MATCH (n:Function)
WHERE n.id IS NOT NULL
RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content
UNION ALL
MATCH (n:Method)
WHERE n.id IS NOT NULL
RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content
`,
);

// Step 2: Load user-defined catalog extensions (if any) and merge with built-in catalogs
const {
getMatchingSources,
getMatchingSinks,
loadUserSecurityConfig,
mergeCatalogs,
compilePatterns,
} = await import('../../security/catalogs.js');

const userConfig = await loadUserSecurityConfig(repo.repoPath);
const merged = mergeCatalogs(userConfig);
const compiledSources = compilePatterns(merged.sources);
const compiledSinks = compilePatterns(merged.sinks);

// Step 3: Tag source-adjacent and sink-adjacent functions
const sources: Array<{
id: string;
name: string;
filePath: string;
sourcePatterns: string[];
}> = [];
const sinks: Array<{
id: string;
name: string;
filePath: string;
sinkPatterns: string[];
owasp: string;
}> = [];
const nodeNameMap = new Map<string, { name: string; filePath: string }>();

for (const row of nodesResult) {
const content = row.content ?? row[3] ?? '';
const name = row.name ?? row[1] ?? '';
const filePath = row.filePath ?? row[2] ?? '';
const id = row.id ?? row[0] ?? '';
const language = getLanguageFromFilename(filePath) ?? undefined;

nodeNameMap.set(id, { name, filePath });

const matchedSources = getMatchingSources(content, language, compiledSources);
if (matchedSources.length > 0) {
if (
!params.source_category ||
matchedSources.some((s: any) => s.category === params.source_category)
) {
sources.push({
id,
name,
filePath,
sourcePatterns: matchedSources.map((s: any) => s.pattern),
});
}
}

const matchedSinks = getMatchingSinks(content, language, compiledSinks);
if (matchedSinks.length > 0) {
if (!params.owasp || matchedSinks.some((s: any) => s.owasp === params.owasp)) {
sinks.push({
id,
name,
filePath,
sinkPatterns: matchedSinks.map((s: any) => s.pattern),
owasp: matchedSinks[0]?.owasp || 'unknown',
});
}
}
}

// Step 4: Build CALLS adjacency map via Cypher
const callsResult = await executeQuery(
repo.id,
`
MATCH (a)-[r:CodeRelation {type: 'CALLS'}]->(b)
RETURN a.id AS sourceId, b.id AS targetId
`,
);

const callsGraph = new Map<string, string[]>();
for (const row of callsResult) {
const sourceId = row.sourceId ?? row[0];
const targetId = row.targetId ?? row[1];
let callees = callsGraph.get(sourceId);
if (!callees) {
callees = [];
callsGraph.set(sourceId, callees);
}
callees.push(targetId);
}

// Step 5: BFS from sources to sinks
const { buildSourceSinkPaths } = await import('../../security/source-sink-scanner.js');
const paths = buildSourceSinkPaths(sources, sinks, callsGraph, maxDepth);

// Step 6: Format results
const findings = paths.map((p: any) => ({
risk: p.risk,
owasp: p.owasp,
source: { name: p.source.name, file: p.source.filePath, patterns: p.source.sourcePatterns },
sink: { name: p.sink.name, file: p.sink.filePath, patterns: p.sink.sinkPatterns },
depth: p.depth,
path: p.path.map((id: string) => {
const info = nodeNameMap.get(id);
return info ? `${info.name} (${info.filePath})` : id;
}),
}));

const riskCounts = { critical: 0, high: 0, medium: 0 };
for (const f of findings) {
if (f.risk in riskCounts) riskCounts[f.risk as keyof typeof riskCounts]++;
}

return {
summary: {
sources_found: sources.length,
sinks_found: sinks.length,
paths_found: findings.length,
...riskCounts,
},
findings,
note: 'Structural reachability scan — paths may contain sanitizers. Use context() on flagged functions to verify.',
};
}

async disconnect(): Promise<void> {
await closeLbug(); // close all connections
// Note: we intentionally do NOT call disposeEmbedder() here.
Expand Down
3 changes: 3 additions & 0 deletions gitnexus/src/mcp/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ function getNextStepHint(toolName: string, args: Record<string, any> | undefined
case 'overview':
return `\n\n---\n**Next:** To drill into an area, READ gitnexus://repo/${repoPath}/cluster/{name}. To see execution flows, READ gitnexus://repo/${repoPath}/processes.`;

case 'source_sink':
return '\n\n---\n**Next:** Use context() on flagged source or sink functions to understand the full call chain and check for sanitizers.';

default:
return '';
}
Expand Down
35 changes: 35 additions & 0 deletions gitnexus/src/mcp/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,41 @@ Returns: single route object when one match, or { routes: [...], total: N } for
required: [],
},
},
{
name: 'source_sink',
description: `Scan for security-relevant data paths: find functions that read user input (sources) and trace whether they can reach dangerous operations (sinks) through the CALLS graph.

WHEN TO USE: Security review, pre-deployment audit, checking if user input reaches dangerous operations (SQL injection, command injection, XSS, SSRF).
AFTER THIS: Use context() on flagged functions to understand the full call chain, then verify if sanitizers exist in between.

Returns paths from source functions to sink functions, ranked by risk level.
Uses BFS over existing CALLS edges — structural reachability, not taint tracking.`,
inputSchema: {
type: 'object',
properties: {
repo: {
type: 'string',
description: 'Repository name or path. Omit if only one repo is indexed.',
},
max_depth: {
type: 'number',
description:
'Maximum BFS depth from source to sink (default: 5). Lower = fewer false positives, higher = more coverage.',
},
owasp: {
type: 'string',
description:
'Filter by OWASP category: "A03-injection", "A07-xss", "A10-ssrf". Omit for all.',
},
source_category: {
type: 'string',
description:
'Filter sources by category: "user_input", "environment", "file_read", "network". Omit for all.',
},
},
required: [],
},
},
{
name: 'group_list',
description: `List all configured repository groups, or return details for one group (repos, manifest links).
Expand Down
Loading
Loading