From 872c1e1bbdb735e88e2c365a9c29180ff5e3ebd4 Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Tue, 6 Jan 2026 05:00:20 -0800 Subject: [PATCH 1/6] feat(mcp): Add monorepo submodule support with caching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 4 new MCP tools for monorepo development workflows: - init_monorepo_config: Auto-detect project structure (Rust/TS/Go/Python) - list_submodules: List configured submodules with dependencies - get_submodule_context: Load submodule content with caching - invalidate_submodule_cache: Manually invalidate cache Key features: - Git commit-based cache invalidation - Support for git submodules with independent history - Dependency graph with cycle detection - Justfile and script generation for cache management - Project detector for Rust workspace, npm/pnpm workspaces, Go workspaces This enables loading specific submodules on-demand instead of packing the entire monorepo, significantly reducing token usage for large projects. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/mcp/cache/cacheManager.ts | 275 +++++++++ src/mcp/cache/cacheTypes.ts | 74 +++ src/mcp/config/monorepoConfigLoader.ts | 113 ++++ src/mcp/dependency/dependencyGraph.ts | 280 +++++++++ src/mcp/detection/projectDetector.ts | 561 ++++++++++++++++++ src/mcp/generation/justfileGenerator.ts | 345 +++++++++++ src/mcp/mcpServer.ts | 13 + src/mcp/tools/getSubmoduleContextTool.ts | 226 +++++++ src/mcp/tools/initMonorepoConfigTool.ts | 208 +++++++ src/mcp/tools/invalidateSubmoduleCacheTool.ts | 97 +++ src/mcp/tools/listSubmodulesTool.ts | 127 ++++ tests/mcp/cache/cacheManager.test.ts | 161 +++++ tests/mcp/config/monorepoConfigLoader.test.ts | 87 +++ tests/mcp/dependency/dependencyGraph.test.ts | 193 ++++++ 14 files changed, 2760 insertions(+) create mode 100644 src/mcp/cache/cacheManager.ts create mode 100644 src/mcp/cache/cacheTypes.ts create mode 100644 src/mcp/config/monorepoConfigLoader.ts create mode 100644 src/mcp/dependency/dependencyGraph.ts create mode 100644 src/mcp/detection/projectDetector.ts create mode 100644 src/mcp/generation/justfileGenerator.ts create mode 100644 src/mcp/tools/getSubmoduleContextTool.ts create mode 100644 src/mcp/tools/initMonorepoConfigTool.ts create mode 100644 src/mcp/tools/invalidateSubmoduleCacheTool.ts create mode 100644 src/mcp/tools/listSubmodulesTool.ts create mode 100644 tests/mcp/cache/cacheManager.test.ts create mode 100644 tests/mcp/config/monorepoConfigLoader.test.ts create mode 100644 tests/mcp/dependency/dependencyGraph.test.ts diff --git a/src/mcp/cache/cacheManager.ts b/src/mcp/cache/cacheManager.ts new file mode 100644 index 000000000..c7c0849d1 --- /dev/null +++ b/src/mcp/cache/cacheManager.ts @@ -0,0 +1,275 @@ +import { execFile } from 'node:child_process'; +import fs from 'node:fs/promises'; +import path from 'node:path'; +import { promisify } from 'node:util'; +import { logger } from '../../shared/logger.js'; +import type { CacheCheckResult, CachedContent, CacheMetadata } from './cacheTypes.js'; + +const execFileAsync = promisify(execFile); + +/** + * Default dependencies for CacheManager (for testability) + */ +export interface CacheManagerDeps { + execFileAsync: typeof execFileAsync; + fsReadFile: typeof fs.readFile; + fsWriteFile: typeof fs.writeFile; + fsMkdir: typeof fs.mkdir; + fsRm: typeof fs.rm; + fsAccess: typeof fs.access; + fsStat: typeof fs.stat; +} + +const defaultDeps: CacheManagerDeps = { + execFileAsync, + fsReadFile: fs.readFile, + fsWriteFile: fs.writeFile, + fsMkdir: fs.mkdir, + fsRm: fs.rm, + fsAccess: fs.access, + fsStat: fs.stat, +}; + +/** + * Manages cache for monorepo submodules + */ +export class CacheManager { + private cacheDir: string; + private rootDir: string; + private deps: CacheManagerDeps; + + constructor(cacheDir: string, rootDir: string = process.cwd(), deps: CacheManagerDeps = defaultDeps) { + this.cacheDir = cacheDir; + this.rootDir = rootDir; + this.deps = deps; + } + + /** + * Get the full path for a submodule's cache file + */ + private getContentPath(submoduleName: string): string { + return path.join(this.cacheDir, `${submoduleName}.xml`); + } + + /** + * Get the full path for a submodule's metadata file + */ + private getMetaPath(submoduleName: string): string { + return path.join(this.cacheDir, `${submoduleName}.meta.json`); + } + + /** + * Check if a file exists + */ + private async exists(filePath: string): Promise { + try { + await this.deps.fsAccess(filePath); + return true; + } catch { + return false; + } + } + + /** + * Read metadata from cache + */ + private async readMeta(metaPath: string): Promise { + const content = await this.deps.fsReadFile(metaPath, 'utf-8'); + return JSON.parse(content) as CacheMetadata; + } + + /** + * Get current git commit for a submodule + * If it's a git submodule, reads the submodule's HEAD + * Otherwise reads the main repository's HEAD + */ + async getSubmoduleGitCommit(submodulePath: string, isGitSubmodule: boolean): Promise { + try { + if (isGitSubmodule) { + // Git submodule: read its own HEAD + const fullPath = path.join(this.rootDir, submodulePath); + const result = await this.deps.execFileAsync('git', ['-C', fullPath, 'rev-parse', 'HEAD']); + return result.stdout.trim(); + } + // Normal directory: read main repo HEAD + const result = await this.deps.execFileAsync('git', ['-C', this.rootDir, 'rev-parse', 'HEAD']); + return result.stdout.trim(); + } catch (error) { + logger.trace(`Failed to get git commit for ${submodulePath}:`, (error as Error).message); + return ''; + } + } + + /** + * Check if a submodule has changed since the cached commit + */ + async hasChangedSince(submodulePath: string, isGitSubmodule: boolean, lastCommit: string): Promise { + try { + const currentCommit = await this.getSubmoduleGitCommit(submodulePath, isGitSubmodule); + + if (!currentCommit || !lastCommit) { + return true; // Assume changed if we can't determine + } + + if (currentCommit === lastCommit) { + return false; // Same commit, no changes + } + + // Further check: are there actual file changes in this submodule? + if (isGitSubmodule) { + const fullPath = path.join(this.rootDir, submodulePath); + const result = await this.deps.execFileAsync('git', [ + '-C', + fullPath, + 'diff', + '--name-only', + lastCommit, + currentCommit, + ]); + return result.stdout.trim().length > 0; + } + // Normal directory: check for changes in that path + const result = await this.deps.execFileAsync('git', [ + '-C', + this.rootDir, + 'diff', + '--name-only', + lastCommit, + currentCommit, + '--', + `${submodulePath}/`, + ]); + return result.stdout.trim().length > 0; + } catch (error) { + logger.trace(`Failed to check changes for ${submodulePath}:`, (error as Error).message); + return true; // Assume changed on error + } + } + + /** + * Check cache status for a submodule + */ + async check(submoduleName: string, submodulePath: string, isGitSubmodule: boolean): Promise { + const metaPath = this.getMetaPath(submoduleName); + const contentPath = this.getContentPath(submoduleName); + + // Check if cache files exist + if (!(await this.exists(metaPath)) || !(await this.exists(contentPath))) { + return { + exists: false, + valid: false, + invalidReason: 'Cache files not found', + }; + } + + try { + const meta = await this.readMeta(metaPath); + + // Check if content has changed + const hasChanged = await this.hasChangedSince(submodulePath, isGitSubmodule, meta.gitCommit); + + if (hasChanged) { + return { + exists: true, + valid: false, + meta, + invalidReason: 'Content has changed since cache was created', + }; + } + + return { + exists: true, + valid: true, + meta, + }; + } catch (error) { + return { + exists: true, + valid: false, + invalidReason: `Failed to read cache metadata: ${(error as Error).message}`, + }; + } + } + + /** + * Get cached content for a submodule + */ + async get(submoduleName: string, submodulePath: string, isGitSubmodule: boolean): Promise { + const checkResult = await this.check(submoduleName, submodulePath, isGitSubmodule); + + if (!checkResult.exists || !checkResult.valid || !checkResult.meta) { + return null; + } + + try { + const content = await this.deps.fsReadFile(this.getContentPath(submoduleName), 'utf-8'); + return { + content, + meta: checkResult.meta, + }; + } catch (error) { + logger.trace(`Failed to read cache content for ${submoduleName}:`, (error as Error).message); + return null; + } + } + + /** + * Save content to cache + */ + async set(submoduleName: string, content: string, meta: Omit): Promise { + const metaPath = this.getMetaPath(submoduleName); + const contentPath = this.getContentPath(submoduleName); + + const fullMeta: CacheMetadata = { + ...meta, + generatedAt: new Date().toISOString(), + }; + + // Ensure cache directory exists + await this.deps.fsMkdir(this.cacheDir, { recursive: true }); + + // Write files + await this.deps.fsWriteFile(metaPath, JSON.stringify(fullMeta, null, 2), 'utf-8'); + await this.deps.fsWriteFile(contentPath, content, 'utf-8'); + + logger.trace(`Cache saved for ${submoduleName}`); + } + + /** + * Invalidate cache for a submodule + */ + async invalidate(submoduleName: string): Promise { + const metaPath = this.getMetaPath(submoduleName); + const contentPath = this.getContentPath(submoduleName); + + await this.deps.fsRm(metaPath, { force: true }); + await this.deps.fsRm(contentPath, { force: true }); + + logger.trace(`Cache invalidated for ${submoduleName}`); + } + + /** + * Invalidate all caches + */ + async invalidateAll(): Promise { + try { + await this.deps.fsRm(this.cacheDir, { recursive: true, force: true }); + logger.trace('All caches invalidated'); + } catch (error) { + logger.trace('Failed to invalidate all caches:', (error as Error).message); + } + } + + /** + * List all cached submodules + */ + async listCached(): Promise { + try { + const files = await fs.readdir(this.cacheDir); + const metaFiles = files.filter((f) => f.endsWith('.meta.json')); + return metaFiles.map((f) => f.replace('.meta.json', '')); + } catch { + return []; + } + } +} diff --git a/src/mcp/cache/cacheTypes.ts b/src/mcp/cache/cacheTypes.ts new file mode 100644 index 000000000..60f9b91cd --- /dev/null +++ b/src/mcp/cache/cacheTypes.ts @@ -0,0 +1,74 @@ +/** + * Types for Monorepo Submodule Caching + */ + +/** + * Metadata stored alongside cached content + */ +export interface CacheMetadata { + /** Submodule name */ + submodule: string; + + /** When the cache was generated */ + generatedAt: string; + + /** Git commit hash when cache was generated */ + gitCommit: string; + + /** Number of files in the submodule */ + fileCount: number; + + /** Total token count of the content */ + tokenCount: number; + + /** List of dependencies (other submodule names) */ + dependencies: string[]; + + /** Repomix version used to generate */ + repomixVersion: string; + + /** Whether compression was enabled */ + compressed: boolean; + + /** Whether this is a git submodule */ + isGitSubmodule?: boolean; +} + +/** + * Cached content with metadata + */ +export interface CachedContent { + /** The packed content */ + content: string; + + /** Cache metadata */ + meta: CacheMetadata; +} + +/** + * Result of cache check + */ +export interface CacheCheckResult { + /** Whether cache exists */ + exists: boolean; + + /** Whether cache is valid (not expired) */ + valid: boolean; + + /** Metadata if cache exists */ + meta?: CacheMetadata; + + /** Reason if cache is invalid */ + invalidReason?: string; +} + +/** + * Options for cache operations + */ +export interface CacheOptions { + /** Force regenerate even if cache exists */ + forceRegenerate?: boolean; + + /** Whether to use compression */ + compress?: boolean; +} diff --git a/src/mcp/config/monorepoConfigLoader.ts b/src/mcp/config/monorepoConfigLoader.ts new file mode 100644 index 000000000..b6cbde6f3 --- /dev/null +++ b/src/mcp/config/monorepoConfigLoader.ts @@ -0,0 +1,113 @@ +import fs from 'node:fs/promises'; +import path from 'node:path'; +import { z } from 'zod'; +import { logger } from '../../shared/logger.js'; + +/** + * Schema for submodule configuration + */ +const SubmoduleConfigSchema = z.object({ + path: z.string().describe('Path to the submodule relative to project root'), + description: z.string().optional().describe('Human-readable description'), + dependencies: z.array(z.string()).default([]).describe('List of dependency submodule names'), + includePatterns: z.array(z.string()).optional().describe('Glob patterns to include'), + ignorePatterns: z.array(z.string()).optional().describe('Glob patterns to ignore'), + isGitSubmodule: z.boolean().default(false).describe('Whether this is a git submodule'), +}); + +/** + * Schema for cache configuration + */ +const CacheConfigSchema = z.object({ + directory: z.string().default('.repomix-cache').describe('Cache directory path'), + enabled: z.boolean().default(true).describe('Whether caching is enabled'), +}); + +/** + * Schema for repomix options + */ +const RepomixConfigSchema = z.object({ + compress: z.boolean().default(true).describe('Enable Tree-sitter compression'), + style: z.enum(['xml', 'markdown', 'json', 'plain']).default('xml').describe('Output format'), + removeComments: z.boolean().default(false).describe('Remove comments from code'), + showLineNumbers: z.boolean().default(true).describe('Show line numbers in output'), +}); + +/** + * Schema for monorepo configuration + */ +export const MonorepoConfigSchema = z.object({ + submodules: z.record(z.string(), SubmoduleConfigSchema).describe('Map of submodule name to configuration'), + cache: CacheConfigSchema.default({ + directory: '.repomix-cache', + enabled: true, + }).describe('Cache configuration'), + repomix: RepomixConfigSchema.default({ + compress: true, + style: 'xml', + removeComments: false, + showLineNumbers: true, + }).describe('Repomix options'), +}); + +/** + * Type for submodule configuration + */ +export type SubmoduleConfig = z.infer; + +/** + * Type for monorepo configuration + */ +export type MonorepoConfig = z.infer; + +/** + * Configuration file name + */ +export const MONOREPO_CONFIG_FILE = '.repomix-monorepo.json'; + +/** + * Load monorepo configuration from file + * @param rootDir Project root directory + * @returns Configuration or null if not found + */ +export async function loadMonorepoConfig(rootDir: string = process.cwd()): Promise { + const configPath = path.join(rootDir, MONOREPO_CONFIG_FILE); + + try { + const content = await fs.readFile(configPath, 'utf-8'); + const parsed = JSON.parse(content); + return MonorepoConfigSchema.parse(parsed); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + logger.trace(`Monorepo config not found at ${configPath}`); + return null; + } + logger.trace(`Failed to load monorepo config: ${(error as Error).message}`); + return null; + } +} + +/** + * Save monorepo configuration to file + * @param config Configuration to save + * @param rootDir Project root directory + */ +export async function saveMonorepoConfig(config: MonorepoConfig, rootDir: string = process.cwd()): Promise { + const configPath = path.join(rootDir, MONOREPO_CONFIG_FILE); + await fs.writeFile(configPath, JSON.stringify(config, null, 2), 'utf-8'); + logger.trace(`Monorepo config saved to ${configPath}`); +} + +/** + * Get submodule by name from config + */ +export function getSubmodule(config: MonorepoConfig, name: string): SubmoduleConfig | null { + return config.submodules[name] ?? null; +} + +/** + * Get all submodule names from config + */ +export function getSubmoduleNames(config: MonorepoConfig): string[] { + return Object.keys(config.submodules); +} diff --git a/src/mcp/dependency/dependencyGraph.ts b/src/mcp/dependency/dependencyGraph.ts new file mode 100644 index 000000000..a72967f07 --- /dev/null +++ b/src/mcp/dependency/dependencyGraph.ts @@ -0,0 +1,280 @@ +import type { MonorepoConfig } from '../config/monorepoConfigLoader.js'; + +/** + * Result of cycle detection + */ +export interface CycleInfo { + /** Nodes forming the cycle */ + cycle: string[]; + /** Human-readable description */ + description: string; +} + +/** + * Manages dependency relationships between submodules + */ +export class DependencyGraph { + private graph: Map>; + private reverseGraph: Map>; + + constructor(config: MonorepoConfig) { + this.graph = new Map(); + this.reverseGraph = new Map(); + + // Build forward and reverse dependency graphs + for (const [name, submodule] of Object.entries(config.submodules)) { + this.graph.set(name, new Set(submodule.dependencies)); + + // Initialize reverse graph entry + if (!this.reverseGraph.has(name)) { + this.reverseGraph.set(name, new Set()); + } + + // Build reverse edges + for (const dep of submodule.dependencies) { + if (!this.reverseGraph.has(dep)) { + this.reverseGraph.set(dep, new Set()); + } + this.reverseGraph.get(dep)?.add(name); + } + } + } + + /** + * Get direct dependencies of a submodule + */ + getDirectDependencies(submoduleName: string): string[] { + const deps = this.graph.get(submoduleName); + return deps ? Array.from(deps) : []; + } + + /** + * Get all dependencies of a submodule (recursive) + * Uses topological order to ensure dependencies come before dependents + */ + getAllDependencies(submoduleName: string): string[] { + const visited = new Set(); + const result: string[] = []; + + const visit = (name: string): void => { + if (visited.has(name)) return; + visited.add(name); + + const deps = this.graph.get(name) || new Set(); + for (const dep of deps) { + visit(dep); + } + + // Add to result after visiting dependencies (topological order) + if (name !== submoduleName) { + result.push(name); + } + }; + + visit(submoduleName); + return result; + } + + /** + * Get submodules that depend on the given submodule (reverse dependencies) + */ + getDependents(submoduleName: string): string[] { + const dependents = this.reverseGraph.get(submoduleName); + return dependents ? Array.from(dependents) : []; + } + + /** + * Get all dependents recursively + */ + getAllDependents(submoduleName: string): string[] { + const visited = new Set(); + const result: string[] = []; + + const visit = (name: string): void => { + if (visited.has(name)) return; + visited.add(name); + + const dependents = this.reverseGraph.get(name) || new Set(); + for (const dep of dependents) { + visit(dep); + } + + if (name !== submoduleName) { + result.push(name); + } + }; + + visit(submoduleName); + return result; + } + + /** + * Detect cycles in the dependency graph + * Returns all cycles found + */ + detectCycles(): CycleInfo[] { + const cycles: CycleInfo[] = []; + const visited = new Set(); + const recursionStack = new Set(); + + const dfs = (node: string, path: string[]): void => { + visited.add(node); + recursionStack.add(node); + path.push(node); + + const deps = this.graph.get(node) || new Set(); + for (const dep of deps) { + if (!visited.has(dep)) { + dfs(dep, [...path]); + } else if (recursionStack.has(dep)) { + // Found a cycle + const cycleStart = path.indexOf(dep); + const cycle = [...path.slice(cycleStart), dep]; + cycles.push({ + cycle, + description: `Cycle detected: ${cycle.join(' -> ')}`, + }); + } + } + + recursionStack.delete(node); + }; + + for (const node of this.graph.keys()) { + if (!visited.has(node)) { + dfs(node, []); + } + } + + return cycles; + } + + /** + * Check if the graph has any cycles + */ + hasCycles(): boolean { + return this.detectCycles().length > 0; + } + + /** + * Get topologically sorted list of all submodules + * Returns null if graph has cycles + */ + topologicalSort(): string[] | null { + if (this.hasCycles()) { + return null; + } + + const visited = new Set(); + const result: string[] = []; + + const visit = (name: string): void => { + if (visited.has(name)) return; + visited.add(name); + + const deps = this.graph.get(name) || new Set(); + for (const dep of deps) { + visit(dep); + } + + result.push(name); + }; + + for (const node of this.graph.keys()) { + visit(node); + } + + return result; + } + + /** + * Get submodules with no dependencies (leaf nodes) + */ + getLeafNodes(): string[] { + const leaves: string[] = []; + for (const [name, deps] of this.graph.entries()) { + if (deps.size === 0) { + leaves.push(name); + } + } + return leaves; + } + + /** + * Get submodules with no dependents (root nodes) + */ + getRootNodes(): string[] { + const roots: string[] = []; + for (const [name, dependents] of this.reverseGraph.entries()) { + if (dependents.size === 0) { + roots.push(name); + } + } + return roots; + } + + /** + * Calculate the depth of a submodule in the dependency tree + * Depth 0 = no dependencies (leaf) + */ + getDepth(submoduleName: string): number { + const visited = new Set(); + + const calculateDepth = (name: string): number => { + if (visited.has(name)) return 0; + visited.add(name); + + const deps = this.graph.get(name) || new Set(); + if (deps.size === 0) return 0; + + let maxDepth = 0; + for (const dep of deps) { + maxDepth = Math.max(maxDepth, calculateDepth(dep) + 1); + } + return maxDepth; + }; + + return calculateDepth(submoduleName); + } + + /** + * Get all submodule names in the graph + */ + getAllNodes(): string[] { + return Array.from(this.graph.keys()); + } + + /** + * Check if a submodule exists in the graph + */ + hasNode(name: string): boolean { + return this.graph.has(name); + } + + /** + * Get graph statistics + */ + getStats(): { + totalNodes: number; + totalEdges: number; + leafNodes: number; + rootNodes: number; + maxDepth: number; + } { + let totalEdges = 0; + let maxDepth = 0; + + for (const [name, deps] of this.graph.entries()) { + totalEdges += deps.size; + maxDepth = Math.max(maxDepth, this.getDepth(name)); + } + + return { + totalNodes: this.graph.size, + totalEdges, + leafNodes: this.getLeafNodes().length, + rootNodes: this.getRootNodes().length, + maxDepth, + }; + } +} diff --git a/src/mcp/detection/projectDetector.ts b/src/mcp/detection/projectDetector.ts new file mode 100644 index 000000000..b3643dda3 --- /dev/null +++ b/src/mcp/detection/projectDetector.ts @@ -0,0 +1,561 @@ +import { execFile } from 'node:child_process'; +import fs from 'node:fs/promises'; +import path from 'node:path'; +import { promisify } from 'node:util'; +import { logger } from '../../shared/logger.js'; + +const execFileAsync = promisify(execFile); + +/** + * Supported project types + */ +export type ProjectType = 'rust' | 'typescript' | 'python' | 'go' | 'mixed' | 'generic'; + +/** + * Submodule type information + */ +export type SubmoduleType = 'rust' | 'typescript' | 'python' | 'go' | 'generic'; + +/** + * Information about a detected submodule + */ +export interface DetectedSubmodule { + name: string; + path: string; + type: SubmoduleType; + isGitSubmodule: boolean; + description?: string; + dependencies: string[]; +} + +/** + * Project detection result + */ +export interface ProjectDetectionResult { + projectType: ProjectType; + submodules: DetectedSubmodule[]; + gitSubmodulePaths: Set; +} + +/** + * Dependencies for ProjectDetector (for testability) + */ +export interface ProjectDetectorDeps { + execFileAsync: typeof execFileAsync; + fsReadFile: typeof fs.readFile; + fsAccess: typeof fs.access; + fsStat: typeof fs.stat; + fsReaddir: typeof fs.readdir; +} + +const defaultDeps: ProjectDetectorDeps = { + execFileAsync, + fsReadFile: fs.readFile, + fsAccess: fs.access, + fsStat: fs.stat, + fsReaddir: fs.readdir, +}; + +/** + * Detects project structure and submodules + */ +export class ProjectDetector { + private rootDir: string; + private deps: ProjectDetectorDeps; + + constructor(rootDir: string, deps: ProjectDetectorDeps = defaultDeps) { + this.rootDir = rootDir; + this.deps = deps; + } + + /** + * Detect project type and submodules + */ + async detect(): Promise { + // 1. Detect git submodules first + const gitSubmodulePaths = await this.detectGitSubmodules(); + + // 2. Detect project type + const projectType = await this.detectProjectType(); + + // 3. Detect submodules based on project type + let submodules: DetectedSubmodule[] = []; + + switch (projectType) { + case 'rust': + submodules = await this.detectRustCrates(); + break; + case 'typescript': + submodules = await this.detectTypeScriptPackages(); + break; + case 'python': + submodules = await this.detectPythonPackages(); + break; + case 'go': + submodules = await this.detectGoModules(); + break; + case 'mixed': + case 'generic': + submodules = await this.detectMixedProject(); + break; + } + + // 4. Mark git submodules + for (const submodule of submodules) { + submodule.isGitSubmodule = gitSubmodulePaths.has(submodule.path); + } + + return { projectType, submodules, gitSubmodulePaths }; + } + + /** + * Detect git submodules from .gitmodules + */ + private async detectGitSubmodules(): Promise> { + const submodules = new Set(); + + try { + const gitmodulesPath = path.join(this.rootDir, '.gitmodules'); + const content = await this.deps.fsReadFile(gitmodulesPath, 'utf-8'); + + // Parse [submodule "xxx"] path = xxx + const pathRegex = /path\s*=\s*(.+)/g; + let match: RegExpExecArray | null; + while (true) { + match = pathRegex.exec(content); + if (match === null) break; + submodules.add(match[1].trim()); + } + } catch { + // .gitmodules doesn't exist, no submodules + logger.trace('.gitmodules not found'); + } + + return submodules; + } + + /** + * Detect the main project type + */ + private async detectProjectType(): Promise { + const checks: Array<{ file: string; type: ProjectType }> = [ + { file: 'Cargo.toml', type: 'rust' }, + { file: 'package.json', type: 'typescript' }, + { file: 'pyproject.toml', type: 'python' }, + { file: 'go.mod', type: 'go' }, + ]; + + const found: ProjectType[] = []; + + for (const check of checks) { + if (await this.fileExists(path.join(this.rootDir, check.file))) { + found.push(check.type); + } + } + + if (found.length === 0) return 'generic'; + if (found.length === 1) return found[0]; + return 'mixed'; + } + + /** + * Detect Rust crates using cargo metadata + */ + private async detectRustCrates(): Promise { + try { + const { stdout } = await this.deps.execFileAsync('cargo', ['metadata', '--format-version=1', '--no-deps'], { + cwd: this.rootDir, + maxBuffer: 50 * 1024 * 1024, + }); + + const metadata = JSON.parse(stdout); + const workspaceRoot = metadata.workspace_root || this.rootDir; + const workspaceMembers = new Set(metadata.workspace_members || []); + const crates: DetectedSubmodule[] = []; + + // Build a map of package names for dependency resolution + const packageNames = new Set(); + for (const pkg of metadata.packages || []) { + packageNames.add(pkg.name); + } + + for (const pkg of metadata.packages || []) { + // Check if this package is a workspace member + const isWorkspaceMember = + workspaceMembers.size === 0 || + Array.from(workspaceMembers).some( + (member) => member.includes(pkg.name) || member.includes(path.dirname(pkg.manifest_path)), + ); + + if (!isWorkspaceMember) continue; + + const manifestDir = path.dirname(pkg.manifest_path); + const relativePath = path.relative(workspaceRoot, manifestDir); + + // Skip root package (empty path) + if (!relativePath) continue; + + // Extract workspace-internal dependencies + const dependencies: string[] = []; + for (const dep of pkg.dependencies || []) { + if (packageNames.has(dep.name) && dep.path) { + dependencies.push(dep.name); + } + } + + crates.push({ + name: pkg.name, + path: relativePath, + type: 'rust', + isGitSubmodule: false, + description: pkg.description, + dependencies, + }); + } + + return crates; + } catch (error) { + logger.trace('Failed to detect Rust crates:', (error as Error).message); + return []; + } + } + + /** + * Detect TypeScript/JavaScript packages from npm/pnpm/yarn workspaces + */ + private async detectTypeScriptPackages(): Promise { + try { + const packageJsonPath = path.join(this.rootDir, 'package.json'); + const content = await this.deps.fsReadFile(packageJsonPath, 'utf-8'); + const packageJson = JSON.parse(content); + + // Get workspace patterns + let workspacePatterns: string[] = []; + if (Array.isArray(packageJson.workspaces)) { + workspacePatterns = packageJson.workspaces; + } else if (packageJson.workspaces?.packages) { + workspacePatterns = packageJson.workspaces.packages; + } + + if (workspacePatterns.length === 0) { + // Check for pnpm-workspace.yaml + try { + const pnpmWorkspacePath = path.join(this.rootDir, 'pnpm-workspace.yaml'); + const pnpmContent = await this.deps.fsReadFile(pnpmWorkspacePath, 'utf-8'); + // Simple YAML parsing for packages array + const packagesMatch = pnpmContent.match(/packages:\s*\n((?:\s+-\s+.+\n?)+)/); + if (packagesMatch) { + const items = packagesMatch[1].match(/-\s+(.+)/g); + if (items) { + workspacePatterns = items.map((item) => item.replace(/^-\s+/, '').trim().replace(/['"]/g, '')); + } + } + } catch { + // No pnpm-workspace.yaml + } + } + + if (workspacePatterns.length === 0) { + return []; + } + + const packages: DetectedSubmodule[] = []; + const packageNames = new Map(); // name -> path + + // Expand glob patterns and find packages + for (const pattern of workspacePatterns) { + const dirs = await this.expandWorkspacePattern(pattern); + + for (const dir of dirs) { + try { + const pkgJsonPath = path.join(this.rootDir, dir, 'package.json'); + const pkgContent = await this.deps.fsReadFile(pkgJsonPath, 'utf-8'); + const pkgJson = JSON.parse(pkgContent); + + if (pkgJson.name) { + packageNames.set(pkgJson.name, dir); + } + } catch { + // package.json doesn't exist + } + } + } + + // Second pass: resolve dependencies + for (const [name, pkgPath] of packageNames) { + try { + const pkgJsonPath = path.join(this.rootDir, pkgPath, 'package.json'); + const pkgContent = await this.deps.fsReadFile(pkgJsonPath, 'utf-8'); + const pkgJson = JSON.parse(pkgContent); + + const allDeps = { + ...pkgJson.dependencies, + ...pkgJson.devDependencies, + ...pkgJson.peerDependencies, + }; + + const workspaceDeps = Object.keys(allDeps).filter((dep) => packageNames.has(dep)); + + packages.push({ + name, + path: pkgPath, + type: 'typescript', + isGitSubmodule: false, + description: pkgJson.description, + dependencies: workspaceDeps, + }); + } catch { + // Skip if can't read package.json + } + } + + return packages; + } catch (error) { + logger.trace('Failed to detect TypeScript packages:', (error as Error).message); + return []; + } + } + + /** + * Expand workspace glob pattern to actual directories + */ + private async expandWorkspacePattern(pattern: string): Promise { + const dirs: string[] = []; + + // Handle simple patterns like "packages/*" or "apps/*" + if (pattern.endsWith('/*')) { + const baseDir = pattern.slice(0, -2); + const fullPath = path.join(this.rootDir, baseDir); + + try { + const entries = await this.deps.fsReaddir(fullPath, { withFileTypes: true }); + for (const entry of entries) { + if (entry.isDirectory()) { + dirs.push(path.join(baseDir, entry.name)); + } + } + } catch { + // Directory doesn't exist + } + } else if (pattern.includes('*')) { + // More complex patterns - use fast-glob if available + try { + const fg = await import('fast-glob'); + const matches = await fg.default(pattern, { + cwd: this.rootDir, + onlyDirectories: true, + }); + dirs.push(...matches); + } catch { + // fast-glob not available, skip complex patterns + logger.trace(`Skipping complex pattern: ${pattern}`); + } + } else { + // Exact path + if (await this.directoryExists(path.join(this.rootDir, pattern))) { + dirs.push(pattern); + } + } + + return dirs; + } + + /** + * Detect Python packages (poetry workspaces, src layout, etc.) + */ + private async detectPythonPackages(): Promise { + const packages: DetectedSubmodule[] = []; + + // Check for pyproject.toml with workspace config + try { + const pyprojectPath = path.join(this.rootDir, 'pyproject.toml'); + await this.deps.fsAccess(pyprojectPath); + + // Look for common Python monorepo patterns + const commonDirs = ['packages', 'libs', 'src', 'apps']; + + for (const dir of commonDirs) { + const fullPath = path.join(this.rootDir, dir); + if (await this.directoryExists(fullPath)) { + const entries = await this.deps.fsReaddir(fullPath, { withFileTypes: true }); + + for (const entry of entries) { + if (entry.isDirectory()) { + const subPath = path.join(dir, entry.name); + // Check if it has pyproject.toml or setup.py + const hasPyproject = await this.fileExists(path.join(this.rootDir, subPath, 'pyproject.toml')); + const hasSetupPy = await this.fileExists(path.join(this.rootDir, subPath, 'setup.py')); + + if (hasPyproject || hasSetupPy) { + packages.push({ + name: entry.name, + path: subPath, + type: 'python', + isGitSubmodule: false, + description: `Python package: ${entry.name}`, + dependencies: [], + }); + } + } + } + } + } + } catch { + // No pyproject.toml + } + + return packages; + } + + /** + * Detect Go modules (go.work workspaces) + */ + private async detectGoModules(): Promise { + const modules: DetectedSubmodule[] = []; + + try { + // Check for go.work file (Go 1.18+ workspaces) + const goWorkPath = path.join(this.rootDir, 'go.work'); + const content = await this.deps.fsReadFile(goWorkPath, 'utf-8'); + + // Parse use directives + const useRegex = /use\s+\(\s*([\s\S]*?)\s*\)|use\s+(\S+)/g; + let match: RegExpExecArray | null; + + while (true) { + match = useRegex.exec(content); + if (match === null) break; + + const paths = match[1] + ? match[1] + .split('\n') + .map((l) => l.trim()) + .filter((l) => l && !l.startsWith('//')) + : [match[2]]; + + for (const modulePath of paths) { + const cleanPath = modulePath.replace(/^\.\//, ''); + if (await this.directoryExists(path.join(this.rootDir, cleanPath))) { + // Read go.mod to get module name + try { + const goModPath = path.join(this.rootDir, cleanPath, 'go.mod'); + const goModContent = await this.deps.fsReadFile(goModPath, 'utf-8'); + const moduleMatch = goModContent.match(/module\s+(\S+)/); + const moduleName = moduleMatch ? moduleMatch[1] : path.basename(cleanPath); + + modules.push({ + name: moduleName, + path: cleanPath, + type: 'go', + isGitSubmodule: false, + description: `Go module: ${moduleName}`, + dependencies: [], + }); + } catch { + // No go.mod, skip + } + } + } + } + } catch { + // No go.work file, try to find go modules in common directories + const commonDirs = ['cmd', 'pkg', 'internal', 'apps', 'services']; + + for (const dir of commonDirs) { + const fullPath = path.join(this.rootDir, dir); + if (await this.directoryExists(fullPath)) { + const entries = await this.deps.fsReaddir(fullPath, { withFileTypes: true }); + + for (const entry of entries) { + if (entry.isDirectory()) { + const subPath = path.join(dir, entry.name); + const hasGoMod = await this.fileExists(path.join(this.rootDir, subPath, 'go.mod')); + + if (hasGoMod) { + modules.push({ + name: entry.name, + path: subPath, + type: 'go', + isGitSubmodule: false, + description: `Go module: ${entry.name}`, + dependencies: [], + }); + } + } + } + } + } + } + + return modules; + } + + /** + * Detect submodules in mixed or generic projects + */ + private async detectMixedProject(): Promise { + const submodules: DetectedSubmodule[] = []; + const commonDirs = ['packages', 'crates', 'libs', 'apps', 'services', 'modules', 'components']; + + for (const dir of commonDirs) { + const fullPath = path.join(this.rootDir, dir); + if (!(await this.directoryExists(fullPath))) continue; + + const entries = await this.deps.fsReaddir(fullPath, { withFileTypes: true }); + + for (const entry of entries) { + if (!entry.isDirectory()) continue; + + const subPath = path.join(dir, entry.name); + const type = await this.detectSubmoduleType(path.join(this.rootDir, subPath)); + + submodules.push({ + name: entry.name, + path: subPath, + type, + isGitSubmodule: false, + description: `${type} package: ${entry.name}`, + dependencies: [], + }); + } + } + + return submodules; + } + + /** + * Detect the type of a specific submodule directory + */ + private async detectSubmoduleType(dir: string): Promise { + if (await this.fileExists(path.join(dir, 'Cargo.toml'))) return 'rust'; + if (await this.fileExists(path.join(dir, 'package.json'))) return 'typescript'; + if (await this.fileExists(path.join(dir, 'pyproject.toml'))) return 'python'; + if (await this.fileExists(path.join(dir, 'setup.py'))) return 'python'; + if (await this.fileExists(path.join(dir, 'go.mod'))) return 'go'; + return 'generic'; + } + + /** + * Check if a file exists + */ + private async fileExists(filePath: string): Promise { + try { + await this.deps.fsAccess(filePath); + return true; + } catch { + return false; + } + } + + /** + * Check if a directory exists + */ + private async directoryExists(dirPath: string): Promise { + try { + const stat = await this.deps.fsStat(dirPath); + return stat.isDirectory(); + } catch { + return false; + } + } +} diff --git a/src/mcp/generation/justfileGenerator.ts b/src/mcp/generation/justfileGenerator.ts new file mode 100644 index 000000000..5b0a7d204 --- /dev/null +++ b/src/mcp/generation/justfileGenerator.ts @@ -0,0 +1,345 @@ +import type { MonorepoConfig } from '../config/monorepoConfigLoader.js'; +import type { DetectedSubmodule } from '../detection/projectDetector.js'; + +/** + * Options for justfile generation + */ +export interface JustfileGeneratorOptions { + /** Include comments in generated file */ + includeComments?: boolean; + /** Script directory (relative to project root) */ + scriptDir?: string; + /** Cache directory (from config) */ + cacheDir?: string; +} + +const defaultOptions: Required = { + includeComments: true, + scriptDir: 'scripts', + cacheDir: '.repomix-cache', +}; + +/** + * Generates justfile content for monorepo cache management + */ +export class JustfileGenerator { + private submodules: DetectedSubmodule[]; + private options: Required; + + constructor(config: MonorepoConfig, submodules: DetectedSubmodule[], options: JustfileGeneratorOptions = {}) { + this.submodules = submodules; + this.options = { ...defaultOptions, ...options }; + if (config.cache?.directory) { + this.options.cacheDir = config.cache.directory; + } + } + + /** + * Generate the complete justfile content + */ + generate(): string { + const lines: string[] = []; + + // Header + if (this.options.includeComments) { + lines.push('# Repomix Monorepo Cache Management'); + lines.push('# Auto-generated by repomix init_monorepo_config'); + lines.push('# Do not edit manually - regenerate with the init_monorepo_config tool'); + lines.push(''); + } + + // Variables + lines.push(`cache_dir := "${this.options.cacheDir}"`); + lines.push(`script_dir := "${this.options.scriptDir}"`); + lines.push(''); + + // Default recipe (list all recipes) + lines.push('# List all available recipes'); + lines.push('default:'); + lines.push(' @just --list'); + lines.push(''); + + // Global cache operations + lines.push(...this.generateGlobalRecipes()); + + // Per-submodule recipes + lines.push(...this.generateSubmoduleRecipes()); + + // Utility recipes + lines.push(...this.generateUtilityRecipes()); + + return lines.join('\n'); + } + + /** + * Generate global cache management recipes + */ + private generateGlobalRecipes(): string[] { + const lines: string[] = []; + + if (this.options.includeComments) { + lines.push('# ═══════════════════════════════════════════════════════════'); + lines.push('# Global Cache Operations'); + lines.push('# ═══════════════════════════════════════════════════════════'); + lines.push(''); + } + + // Update all caches + lines.push('# Regenerate cache for all submodules'); + lines.push('update-all-caches:'); + lines.push(' node {{script_dir}}/repomix-cache.mjs --all --regenerate'); + lines.push(''); + + // Invalidate all caches + lines.push('# Invalidate all cached content (force regeneration on next access)'); + lines.push('invalidate-all-caches:'); + lines.push(' node {{script_dir}}/repomix-cache.mjs --all --invalidate'); + lines.push(''); + + // Update only changed caches + lines.push('# Update only submodules that have changed since last cache'); + lines.push('update-changed-caches:'); + lines.push(' node {{script_dir}}/repomix-cache.mjs --changed-only'); + lines.push(''); + + // Show cache status + lines.push('# Show cache status for all submodules'); + lines.push('cache-status:'); + lines.push(' node {{script_dir}}/repomix-cache.mjs --status'); + lines.push(''); + + // Clean cache directory + lines.push('# Remove all cached files'); + lines.push('clean-cache:'); + lines.push(' rm -rf {{cache_dir}}'); + lines.push(' @echo "Cache directory cleaned"'); + lines.push(''); + + return lines; + } + + /** + * Generate per-submodule recipes + */ + private generateSubmoduleRecipes(): string[] { + const lines: string[] = []; + + if (this.submodules.length === 0) { + return lines; + } + + if (this.options.includeComments) { + lines.push('# ═══════════════════════════════════════════════════════════'); + lines.push('# Individual Submodule Operations'); + lines.push('# ═══════════════════════════════════════════════════════════'); + lines.push(''); + } + + for (const submodule of this.submodules) { + const safeName = this.sanitizeName(submodule.name); + const description = submodule.description || `${submodule.type} package at ${submodule.path}`; + + // Update recipe + lines.push(`# Update cache for ${submodule.name} (${description})`); + lines.push(`update-${safeName}:`); + lines.push(` node {{script_dir}}/repomix-cache.mjs --module=${submodule.name} --regenerate`); + lines.push(''); + + // Invalidate recipe + lines.push(`# Invalidate cache for ${submodule.name}`); + lines.push(`invalidate-${safeName}:`); + lines.push(` node {{script_dir}}/repomix-cache.mjs --module=${submodule.name} --invalidate`); + lines.push(''); + } + + // Generic update/invalidate with parameter + lines.push('# Update cache for a specific module by name'); + lines.push('update-module module:'); + lines.push(' node {{script_dir}}/repomix-cache.mjs --module={{module}} --regenerate'); + lines.push(''); + + lines.push('# Invalidate cache for a specific module by name'); + lines.push('invalidate-module module:'); + lines.push(' node {{script_dir}}/repomix-cache.mjs --module={{module}} --invalidate'); + lines.push(''); + + return lines; + } + + /** + * Generate utility recipes + */ + private generateUtilityRecipes(): string[] { + const lines: string[] = []; + + if (this.options.includeComments) { + lines.push('# ═══════════════════════════════════════════════════════════'); + lines.push('# Utility Recipes'); + lines.push('# ═══════════════════════════════════════════════════════════'); + lines.push(''); + } + + // List submodules + lines.push('# List all configured submodules'); + lines.push('list-submodules:'); + lines.push(' node {{script_dir}}/repomix-cache.mjs --list'); + lines.push(''); + + // Show dependencies + lines.push('# Show dependency graph'); + lines.push('show-dependencies:'); + lines.push(' node {{script_dir}}/repomix-cache.mjs --dependencies'); + lines.push(''); + + // Initialize/reinitialize config + lines.push('# Regenerate monorepo configuration (auto-detect submodules)'); + lines.push('init-config:'); + lines.push(' @echo "Use the init_monorepo_config MCP tool to regenerate configuration"'); + lines.push(''); + + // Install git hooks + lines.push('# Install git post-commit hook for automatic cache invalidation'); + lines.push('install-hooks:'); + lines.push(' #!/usr/bin/env bash'); + lines.push(' mkdir -p .git/hooks'); + lines.push(" cat > .git/hooks/post-commit << 'HOOK'"); + lines.push(' #!/bin/bash'); + lines.push(' # Repomix cache invalidation hook'); + lines.push(' (node {{script_dir}}/repomix-cache.mjs --changed-only &) 2>/dev/null'); + lines.push(' HOOK'); + lines.push(' chmod +x .git/hooks/post-commit'); + lines.push(' @echo "Git hook installed"'); + lines.push(''); + + return lines; + } + + /** + * Sanitize a name for use in just recipes + * Converts to lowercase, replaces non-alphanumeric with dashes + */ + private sanitizeName(name: string): string { + return name + .toLowerCase() + .replace(/[^a-z0-9]/g, '-') + .replace(/-+/g, '-') + .replace(/^-|-$/g, ''); + } +} + +/** + * Generate the cache management script content + */ +export function generateCacheScript(): string { + return `#!/usr/bin/env node +/** + * Repomix Monorepo Cache Management Script + * Auto-generated by repomix init_monorepo_config + */ + +import fs from 'node:fs/promises'; +import path from 'node:path'; +import { parseArgs } from 'node:util'; + +const CONFIG_FILE = '.repomix-monorepo.json'; + +async function main() { + const { values } = parseArgs({ + options: { + all: { type: 'boolean', default: false }, + module: { type: 'string' }, + 'changed-only': { type: 'boolean', default: false }, + regenerate: { type: 'boolean', default: false }, + invalidate: { type: 'boolean', default: false }, + status: { type: 'boolean', default: false }, + list: { type: 'boolean', default: false }, + dependencies: { type: 'boolean', default: false }, + }, + }); + + // Load config + let config; + try { + const content = await fs.readFile(CONFIG_FILE, 'utf-8'); + config = JSON.parse(content); + } catch (error) { + console.error('Error: Could not load', CONFIG_FILE); + console.error('Run the init_monorepo_config MCP tool first.'); + process.exit(1); + } + + const cacheDir = config.cache?.directory || '.repomix-cache'; + + if (values.list) { + console.log('Configured submodules:'); + for (const [name, sub] of Object.entries(config.submodules)) { + const submodule = sub; + console.log(\` - \${name}: \${submodule.path}\${submodule.isGitSubmodule ? ' (git submodule)' : ''}\`); + } + return; + } + + if (values.dependencies) { + console.log('Dependency graph:'); + for (const [name, sub] of Object.entries(config.submodules)) { + const submodule = sub; + const deps = submodule.dependencies || []; + console.log(\` \${name}: \${deps.length > 0 ? deps.join(', ') : '(no dependencies)'}\`); + } + return; + } + + if (values.status) { + console.log('Cache status:'); + console.log(''); + console.log('Submodule'.padEnd(30), 'Status'.padEnd(15), 'Last Updated'); + console.log('-'.repeat(70)); + + for (const [name] of Object.entries(config.submodules)) { + const metaPath = path.join(cacheDir, \`\${name}.meta.json\`); + try { + const metaContent = await fs.readFile(metaPath, 'utf-8'); + const meta = JSON.parse(metaContent); + const lastUpdated = new Date(meta.generatedAt).toLocaleString(); + console.log(name.padEnd(30), 'Cached'.padEnd(15), lastUpdated); + } catch { + console.log(name.padEnd(30), 'Not cached'.padEnd(15), '-'); + } + } + return; + } + + // Determine which submodules to process + let submodulesToProcess = []; + if (values.module) { + if (!config.submodules[values.module]) { + console.error(\`Error: Submodule "\${values.module}" not found in configuration\`); + process.exit(1); + } + submodulesToProcess = [values.module]; + } else if (values.all || values['changed-only']) { + submodulesToProcess = Object.keys(config.submodules); + } else { + console.error('Error: Specify --all, --changed-only, or --module='); + process.exit(1); + } + + // Process submodules + for (const name of submodulesToProcess) { + if (values.invalidate) { + const metaPath = path.join(cacheDir, \`\${name}.meta.json\`); + const contentPath = path.join(cacheDir, \`\${name}.xml\`); + await fs.rm(metaPath, { force: true }); + await fs.rm(contentPath, { force: true }); + console.log(\`Invalidated: \${name}\`); + } else if (values.regenerate) { + console.log(\`Regenerating: \${name} (use MCP tool for actual regeneration)\`); + // Note: Actual regeneration happens through MCP tool + // This script just manages cache files + } + } +} + +main().catch(console.error); +`; +} diff --git a/src/mcp/mcpServer.ts b/src/mcp/mcpServer.ts index d198a405b..6a86cca37 100644 --- a/src/mcp/mcpServer.ts +++ b/src/mcp/mcpServer.ts @@ -7,7 +7,11 @@ import { registerAttachPackedOutputTool } from './tools/attachPackedOutputTool.j import { registerFileSystemReadDirectoryTool } from './tools/fileSystemReadDirectoryTool.js'; import { registerFileSystemReadFileTool } from './tools/fileSystemReadFileTool.js'; import { registerGenerateSkillTool } from './tools/generateSkillTool.js'; +import { registerGetSubmoduleContextTool } from './tools/getSubmoduleContextTool.js'; import { registerGrepRepomixOutputTool } from './tools/grepRepomixOutputTool.js'; +import { registerInitMonorepoConfigTool } from './tools/initMonorepoConfigTool.js'; +import { registerInvalidateSubmoduleCacheTool } from './tools/invalidateSubmoduleCacheTool.js'; +import { registerListSubmodulesTool } from './tools/listSubmodulesTool.js'; import { registerPackCodebaseTool } from './tools/packCodebaseTool.js'; import { registerPackRemoteRepositoryTool } from './tools/packRemoteRepositoryTool.js'; import { registerReadRepomixOutputTool } from './tools/readRepomixOutputTool.js'; @@ -21,6 +25,9 @@ const MCP_SERVER_INSTRUCTIONS = 'use generate_skill to create Claude Agent Skills from codebases, ' + 'use attach_packed_output to work with existing packed outputs, ' + 'then read_repomix_output and grep_repomix_output to analyze it. ' + + 'For monorepos, use init_monorepo_config to auto-detect and configure submodules, ' + + 'list_submodules to see available submodules, get_submodule_context to load specific submodule content with caching, ' + + 'and invalidate_submodule_cache to force cache refresh. ' + 'Perfect for code reviews, documentation generation, bug investigation, GitHub repository analysis, and understanding large codebases. ' + 'Includes security scanning and supports compression for token efficiency.'; @@ -48,6 +55,12 @@ export const createMcpServer = async () => { registerFileSystemReadFileTool(mcpServer); registerFileSystemReadDirectoryTool(mcpServer); + // Register monorepo tools + registerInitMonorepoConfigTool(mcpServer); + registerListSubmodulesTool(mcpServer); + registerGetSubmoduleContextTool(mcpServer); + registerInvalidateSubmoduleCacheTool(mcpServer); + return mcpServer; }; diff --git a/src/mcp/tools/getSubmoduleContextTool.ts b/src/mcp/tools/getSubmoduleContextTool.ts new file mode 100644 index 000000000..12ee0e113 --- /dev/null +++ b/src/mcp/tools/getSubmoduleContextTool.ts @@ -0,0 +1,226 @@ +import path from 'node:path'; +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import { z } from 'zod'; +import { runCli } from '../../cli/cliRun.js'; +import type { CliOptions } from '../../cli/types.js'; +import { defaultFilePathMap } from '../../config/configSchema.js'; +import { CacheManager } from '../cache/cacheManager.js'; +import { getSubmodule, loadMonorepoConfig } from '../config/monorepoConfigLoader.js'; +import { DependencyGraph } from '../dependency/dependencyGraph.js'; +import { + buildMcpToolErrorResponse, + buildMcpToolSuccessResponse, + convertErrorToJson, + createToolWorkspace, +} from './mcpToolRuntime.js'; + +const getSubmoduleContextInputSchema = z.object({ + submoduleName: z.string().describe('Name of the submodule to load (as defined in .repomix-monorepo.json)'), + includeDependencies: z.boolean().default(false).describe('Whether to include dependency submodules in the response'), + forceRegenerate: z.boolean().default(false).describe('Force regenerate cache even if valid cache exists'), + compress: z.boolean().default(true).describe('Enable Tree-sitter compression to reduce token usage (~70% reduction)'), + projectRoot: z.string().optional().describe('Project root directory (defaults to current working directory)'), +}); + +const getSubmoduleContextOutputSchema = z.object({ + submodule: z.string().describe('Name of the requested submodule'), + content: z.string().describe('Packed content of the submodule'), + dependencies: z + .record(z.string(), z.string()) + .optional() + .describe('Map of dependency submodule names to their content'), + metadata: z.object({ + tokenCount: z.number().describe('Estimated total token count'), + loadedSubmodules: z.array(z.string()).describe('List of submodules that were loaded'), + cacheStatus: z.array( + z.object({ + submodule: z.string(), + fromCache: z.boolean(), + }), + ), + }), +}); + +/** + * Register the get_submodule_context MCP tool + */ +export const registerGetSubmoduleContextTool = (mcpServer: McpServer): void => { + mcpServer.registerTool( + 'get_submodule_context', + { + title: 'Get Submodule Context', + description: + 'Get packed content for a specific submodule in a monorepo with caching support. Requires a .repomix-monorepo.json configuration file. Use list_submodules to see available submodules.', + inputSchema: getSubmoduleContextInputSchema, + outputSchema: getSubmoduleContextOutputSchema, + annotations: { + readOnlyHint: true, + destructiveHint: false, + idempotentHint: true, + openWorldHint: false, + }, + }, + async ({ submoduleName, includeDependencies, forceRegenerate, compress, projectRoot }): Promise => { + try { + const rootDir = projectRoot || process.cwd(); + + // 1. Load configuration + const config = await loadMonorepoConfig(rootDir); + if (!config) { + return buildMcpToolErrorResponse({ + errorMessage: + '.repomix-monorepo.json not found. Please run init_monorepo_config first to generate configuration.', + }); + } + + const submodule = getSubmodule(config, submoduleName); + if (!submodule) { + return buildMcpToolErrorResponse({ + errorMessage: `Submodule "${submoduleName}" not found in configuration. Use list_submodules to see available submodules.`, + }); + } + + // 2. Determine submodules to load + const submodulesToLoad = [submoduleName]; + if (includeDependencies) { + const depGraph = new DependencyGraph(config); + const deps = depGraph.getAllDependencies(submoduleName); + submodulesToLoad.push(...deps); + } + + // 3. Initialize cache manager + const cacheDir = path.join(rootDir, config.cache.directory); + const cacheManager = new CacheManager(cacheDir, rootDir); + + // 4. Load or generate content for each submodule + const contents: Record = {}; + const cacheStatus: Array<{ submodule: string; fromCache: boolean }> = []; + + for (const name of submodulesToLoad) { + const subConfig = config.submodules[name]; + if (!subConfig) { + continue; // Skip if dependency not found in config + } + + // Check cache first + if (config.cache.enabled && !forceRegenerate) { + const cached = await cacheManager.get(name, subConfig.path, subConfig.isGitSubmodule); + if (cached) { + contents[name] = cached.content; + cacheStatus.push({ submodule: name, fromCache: true }); + continue; + } + } + + // Generate new content + const content = await generateSubmoduleContent(rootDir, subConfig.path, { + compress: compress ?? config.repomix.compress, + style: config.repomix.style, + includePatterns: subConfig.includePatterns?.join(','), + ignorePatterns: subConfig.ignorePatterns?.join(','), + showLineNumbers: config.repomix.showLineNumbers, + removeComments: config.repomix.removeComments, + }); + + contents[name] = content.output; + cacheStatus.push({ submodule: name, fromCache: false }); + + // Save to cache + if (config.cache.enabled) { + const gitCommit = await cacheManager.getSubmoduleGitCommit(subConfig.path, subConfig.isGitSubmodule); + await cacheManager.set(name, content.output, { + submodule: name, + gitCommit, + fileCount: content.fileCount, + tokenCount: content.tokenCount, + dependencies: subConfig.dependencies, + repomixVersion: '1.0.0', // TODO: Get from package.json + compressed: compress, + isGitSubmodule: subConfig.isGitSubmodule, + }); + } + } + + // 5. Calculate total tokens + const totalTokens = Object.values(contents).reduce((sum, content) => sum + estimateTokens(content), 0); + + // 6. Build response + const mainContent = contents[submoduleName]; + const dependencyContents = includeDependencies + ? Object.fromEntries(Object.entries(contents).filter(([k]) => k !== submoduleName)) + : undefined; + + return buildMcpToolSuccessResponse({ + submodule: submoduleName, + content: mainContent, + dependencies: dependencyContents, + metadata: { + tokenCount: totalTokens, + loadedSubmodules: submodulesToLoad, + cacheStatus, + }, + }); + } catch (error) { + return buildMcpToolErrorResponse(convertErrorToJson(error)); + } + }, + ); +}; + +/** + * Generate packed content for a submodule + */ +async function generateSubmoduleContent( + rootDir: string, + submodulePath: string, + options: { + compress?: boolean; + style?: string; + includePatterns?: string; + ignorePatterns?: string; + showLineNumbers?: boolean; + removeComments?: boolean; + }, +): Promise<{ output: string; fileCount: number; tokenCount: number }> { + const tempDir = await createToolWorkspace(); + const styleKey = (options.style || 'xml') as keyof typeof defaultFilePathMap; + const outputFileName = defaultFilePathMap[styleKey]; + const outputFilePath = path.join(tempDir, outputFileName); + + const fullPath = path.join(rootDir, submodulePath); + + const cliOptions: CliOptions = { + compress: options.compress, + include: options.includePatterns, + ignore: options.ignorePatterns, + output: outputFilePath, + style: styleKey, + securityCheck: true, + quiet: true, + outputShowLineNumbers: options.showLineNumbers, + removeComments: options.removeComments, + }; + + const result = await runCli(['.'], fullPath, cliOptions); + if (!result) { + throw new Error(`Failed to pack submodule: ${submodulePath}`); + } + + const { packResult } = result; + const output = await import('node:fs/promises').then((fs) => fs.readFile(outputFilePath, 'utf-8')); + + return { + output, + fileCount: packResult.totalFiles, + tokenCount: packResult.totalTokens, + }; +} + +/** + * Estimate token count from content length + * Rough estimate: 4 characters = 1 token + */ +function estimateTokens(content: string): number { + return Math.ceil(content.length / 4); +} diff --git a/src/mcp/tools/initMonorepoConfigTool.ts b/src/mcp/tools/initMonorepoConfigTool.ts new file mode 100644 index 000000000..0a661b480 --- /dev/null +++ b/src/mcp/tools/initMonorepoConfigTool.ts @@ -0,0 +1,208 @@ +import fs from 'node:fs/promises'; +import path from 'node:path'; +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import { z } from 'zod'; +import type { MonorepoConfig, SubmoduleConfig } from '../config/monorepoConfigLoader.js'; +import { MONOREPO_CONFIG_FILE, saveMonorepoConfig } from '../config/monorepoConfigLoader.js'; +import { ProjectDetector } from '../detection/projectDetector.js'; +import { generateCacheScript, JustfileGenerator } from '../generation/justfileGenerator.js'; +import { buildMcpToolErrorResponse, buildMcpToolSuccessResponse, convertErrorToJson } from './mcpToolRuntime.js'; + +const initMonorepoConfigInputSchema = z.object({ + projectRoot: z.string().optional().describe('Project root directory (defaults to current working directory)'), + detectGitSubmodules: z.boolean().default(true).describe('Detect and mark git submodules'), + generateJustfile: z.boolean().default(true).describe('Generate justfile for cache management'), + generateScript: z.boolean().default(true).describe('Generate cache management script'), + cacheDirectory: z.string().default('.repomix-cache').describe('Cache directory path'), + compress: z.boolean().default(true).describe('Enable compression by default'), + style: z.enum(['xml', 'markdown', 'json', 'plain']).default('xml').describe('Default output format'), +}); + +const initMonorepoConfigOutputSchema = z.object({ + projectType: z.string().describe('Detected project type (rust, typescript, python, go, mixed, generic)'), + submodules: z.array( + z.object({ + name: z.string(), + path: z.string(), + type: z.string(), + isGitSubmodule: z.boolean(), + }), + ), + configPath: z.string().describe('Path to generated configuration file'), + justfilePath: z.string().optional().describe('Path to generated justfile (if requested)'), + scriptPath: z.string().optional().describe('Path to generated script (if requested)'), + message: z.string().describe('Human-readable summary'), +}); + +/** + * Register the init_monorepo_config MCP tool + */ +export const registerInitMonorepoConfigTool = (mcpServer: McpServer): void => { + mcpServer.registerTool( + 'init_monorepo_config', + { + title: 'Initialize Monorepo Configuration', + description: + 'Automatically detect project structure and generate .repomix-monorepo.json configuration. Supports Rust workspaces, npm/pnpm/yarn workspaces, Go workspaces, and Python monorepos. Optionally generates justfile and cache management scripts.', + inputSchema: initMonorepoConfigInputSchema, + outputSchema: initMonorepoConfigOutputSchema, + annotations: { + readOnlyHint: false, + destructiveHint: false, + idempotentHint: false, + openWorldHint: false, + }, + }, + async ({ + projectRoot, + detectGitSubmodules, + generateJustfile, + generateScript, + cacheDirectory, + compress, + style, + }): Promise => { + try { + const rootDir = projectRoot || process.cwd(); + + // 1. Detect project structure + const detector = new ProjectDetector(rootDir); + const detection = await detector.detect(); + + if (detection.submodules.length === 0) { + return buildMcpToolErrorResponse({ + errorMessage: + 'No submodules detected. This tool works best with Rust workspaces, npm/pnpm workspaces, Go workspaces, or projects with standard directory structures (packages/, crates/, libs/, apps/, etc.).', + }); + } + + // 2. Build configuration + const submodules: Record = {}; + for (const sub of detection.submodules) { + submodules[sub.name] = { + path: sub.path, + description: sub.description, + dependencies: sub.dependencies, + isGitSubmodule: detectGitSubmodules ? sub.isGitSubmodule : false, + }; + } + + const config: MonorepoConfig = { + submodules, + cache: { + directory: cacheDirectory, + enabled: true, + }, + repomix: { + compress, + style, + removeComments: false, + showLineNumbers: true, + }, + }; + + // 3. Save configuration file + await saveMonorepoConfig(config, rootDir); + const configPath = path.join(rootDir, MONOREPO_CONFIG_FILE); + + // 4. Generate justfile (if requested) + let justfilePath: string | undefined; + if (generateJustfile) { + const generator = new JustfileGenerator(config, detection.submodules, { + cacheDir: cacheDirectory, + scriptDir: 'scripts', + }); + const justfileContent = generator.generate(); + + // Check if justfile already exists + const justfileFullPath = path.join(rootDir, 'justfile'); + let existingContent = ''; + try { + existingContent = await fs.readFile(justfileFullPath, 'utf-8'); + } catch { + // File doesn't exist + } + + if (existingContent) { + // Append to existing justfile if repomix section doesn't exist + if (!existingContent.includes('# Repomix Monorepo Cache Management')) { + await fs.writeFile(justfileFullPath, `${existingContent}\n\n${justfileContent}`, 'utf-8'); + justfilePath = justfileFullPath; + } + } else { + await fs.writeFile(justfileFullPath, justfileContent, 'utf-8'); + justfilePath = justfileFullPath; + } + } + + // 5. Generate cache script (if requested) + let scriptPath: string | undefined; + if (generateScript) { + const scriptsDir = path.join(rootDir, 'scripts'); + await fs.mkdir(scriptsDir, { recursive: true }); + + const scriptContent = generateCacheScript(); + scriptPath = path.join(scriptsDir, 'repomix-cache.mjs'); + await fs.writeFile(scriptPath, scriptContent, 'utf-8'); + } + + // 6. Update .gitignore to exclude cache directory + await updateGitignore(rootDir, cacheDirectory); + + // 7. Build response + const submoduleList = detection.submodules.map((sub) => ({ + name: sub.name, + path: sub.path, + type: sub.type, + isGitSubmodule: sub.isGitSubmodule, + })); + + const gitSubmoduleCount = submoduleList.filter((s) => s.isGitSubmodule).length; + let message = `Detected ${detection.projectType} project with ${submoduleList.length} submodule(s)`; + if (gitSubmoduleCount > 0) { + message += ` (${gitSubmoduleCount} git submodule(s))`; + } + message += `. Configuration saved to ${MONOREPO_CONFIG_FILE}.`; + if (justfilePath) { + message += ' Justfile updated.'; + } + if (scriptPath) { + message += ' Cache script generated.'; + } + + return buildMcpToolSuccessResponse({ + projectType: detection.projectType, + submodules: submoduleList, + configPath, + justfilePath, + scriptPath, + message, + }); + } catch (error) { + return buildMcpToolErrorResponse(convertErrorToJson(error)); + } + }, + ); +}; + +/** + * Update .gitignore to exclude cache directory + */ +async function updateGitignore(rootDir: string, cacheDirectory: string): Promise { + const gitignorePath = path.join(rootDir, '.gitignore'); + let content = ''; + + try { + content = await fs.readFile(gitignorePath, 'utf-8'); + } catch { + // File doesn't exist, will create + } + + // Check if cache directory is already in .gitignore + const cachePattern = cacheDirectory.startsWith('/') ? cacheDirectory : `/${cacheDirectory}`; + if (!content.includes(cacheDirectory) && !content.includes(cachePattern)) { + const addition = `\n# Repomix monorepo cache\n${cachePattern}/\n`; + await fs.writeFile(gitignorePath, content + addition, 'utf-8'); + } +} diff --git a/src/mcp/tools/invalidateSubmoduleCacheTool.ts b/src/mcp/tools/invalidateSubmoduleCacheTool.ts new file mode 100644 index 000000000..f17edbaba --- /dev/null +++ b/src/mcp/tools/invalidateSubmoduleCacheTool.ts @@ -0,0 +1,97 @@ +import path from 'node:path'; +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import { z } from 'zod'; +import { CacheManager } from '../cache/cacheManager.js'; +import { getSubmoduleNames, loadMonorepoConfig } from '../config/monorepoConfigLoader.js'; +import { buildMcpToolErrorResponse, buildMcpToolSuccessResponse, convertErrorToJson } from './mcpToolRuntime.js'; + +const invalidateSubmoduleCacheInputSchema = z.object({ + submodules: z + .array(z.string()) + .optional() + .describe('List of submodule names to invalidate. If not provided, invalidates all caches.'), + projectRoot: z.string().optional().describe('Project root directory (defaults to current working directory)'), +}); + +const invalidateSubmoduleCacheOutputSchema = z.object({ + invalidated: z.array(z.string()).describe('List of submodules whose cache was invalidated'), + skipped: z.array(z.string()).optional().describe('List of submodules that were skipped (not found)'), + message: z.string().describe('Human-readable summary'), +}); + +/** + * Register the invalidate_submodule_cache MCP tool + */ +export const registerInvalidateSubmoduleCacheTool = (mcpServer: McpServer): void => { + mcpServer.registerTool( + 'invalidate_submodule_cache', + { + title: 'Invalidate Submodule Cache', + description: + 'Invalidate cached content for one or more submodules. Forces regeneration on next access. Use after making changes to submodule code.', + inputSchema: invalidateSubmoduleCacheInputSchema, + outputSchema: invalidateSubmoduleCacheOutputSchema, + annotations: { + readOnlyHint: false, + destructiveHint: false, + idempotentHint: true, + openWorldHint: false, + }, + }, + async ({ submodules, projectRoot }): Promise => { + try { + const rootDir = projectRoot || process.cwd(); + + // Load configuration + const config = await loadMonorepoConfig(rootDir); + if (!config) { + return buildMcpToolErrorResponse({ + errorMessage: + '.repomix-monorepo.json not found. Please run init_monorepo_config first to generate configuration.', + }); + } + + const cacheDir = path.join(rootDir, config.cache.directory); + const cacheManager = new CacheManager(cacheDir, rootDir); + + // Determine which submodules to invalidate + const allSubmodules = getSubmoduleNames(config); + const toInvalidate = submodules && submodules.length > 0 ? submodules : allSubmodules; + + const invalidated: string[] = []; + const skipped: string[] = []; + + for (const name of toInvalidate) { + if (allSubmodules.includes(name)) { + await cacheManager.invalidate(name); + invalidated.push(name); + } else { + skipped.push(name); + } + } + + // Build message + let message = ''; + if (invalidated.length > 0) { + message = `Invalidated cache for ${invalidated.length} submodule(s): ${invalidated.join(', ')}`; + } + if (skipped.length > 0) { + message += message ? '. ' : ''; + message += `Skipped ${skipped.length} unknown submodule(s): ${skipped.join(', ')}`; + } + if (invalidated.length === 0 && skipped.length === 0) { + message = 'No submodules to invalidate'; + } + + return buildMcpToolSuccessResponse({ + invalidated, + skipped: skipped.length > 0 ? skipped : undefined, + message, + }); + } catch (error) { + return buildMcpToolErrorResponse(convertErrorToJson(error)); + } + }, + ); +}; diff --git a/src/mcp/tools/listSubmodulesTool.ts b/src/mcp/tools/listSubmodulesTool.ts new file mode 100644 index 000000000..606423312 --- /dev/null +++ b/src/mcp/tools/listSubmodulesTool.ts @@ -0,0 +1,127 @@ +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import { z } from 'zod'; +import { getSubmoduleNames, loadMonorepoConfig } from '../config/monorepoConfigLoader.js'; +import { DependencyGraph } from '../dependency/dependencyGraph.js'; +import { buildMcpToolErrorResponse, buildMcpToolSuccessResponse, convertErrorToJson } from './mcpToolRuntime.js'; + +const listSubmodulesInputSchema = z.object({ + showDependencies: z.boolean().default(false).describe('Include dependency relationships in the response'), + showStats: z.boolean().default(false).describe('Include dependency graph statistics'), + projectRoot: z.string().optional().describe('Project root directory (defaults to current working directory)'), +}); + +const listSubmodulesOutputSchema = z.object({ + submodules: z.array( + z.object({ + name: z.string().describe('Submodule name'), + path: z.string().describe('Path relative to project root'), + description: z.string().optional().describe('Human-readable description'), + isGitSubmodule: z.boolean().describe('Whether this is a git submodule'), + dependencies: z.array(z.string()).optional().describe('Direct dependencies'), + dependents: z.array(z.string()).optional().describe('Submodules that depend on this'), + }), + ), + stats: z + .object({ + totalNodes: z.number(), + totalEdges: z.number(), + leafNodes: z.number(), + rootNodes: z.number(), + maxDepth: z.number(), + }) + .optional(), + cycles: z.array(z.string()).optional().describe('Detected dependency cycles (if any)'), +}); + +/** + * Register the list_submodules MCP tool + */ +export const registerListSubmodulesTool = (mcpServer: McpServer): void => { + mcpServer.registerTool( + 'list_submodules', + { + title: 'List Submodules', + description: + 'List all configured submodules in the monorepo. Shows names, paths, and optionally dependency relationships. Requires a .repomix-monorepo.json configuration file.', + inputSchema: listSubmodulesInputSchema, + outputSchema: listSubmodulesOutputSchema, + annotations: { + readOnlyHint: true, + destructiveHint: false, + idempotentHint: true, + openWorldHint: false, + }, + }, + async ({ showDependencies, showStats, projectRoot }): Promise => { + try { + const rootDir = projectRoot || process.cwd(); + + // Load configuration + const config = await loadMonorepoConfig(rootDir); + if (!config) { + return buildMcpToolErrorResponse({ + errorMessage: + '.repomix-monorepo.json not found. Please run init_monorepo_config first to generate configuration.', + }); + } + + const submoduleNames = getSubmoduleNames(config); + + // Build dependency graph if needed + let depGraph: DependencyGraph | null = null; + if (showDependencies || showStats) { + depGraph = new DependencyGraph(config); + } + + // Build submodule list + const submodules = submoduleNames.map((name) => { + const sub = config.submodules[name]; + const result: { + name: string; + path: string; + description?: string; + isGitSubmodule: boolean; + dependencies?: string[]; + dependents?: string[]; + } = { + name, + path: sub.path, + description: sub.description, + isGitSubmodule: sub.isGitSubmodule, + }; + + if (showDependencies && depGraph) { + result.dependencies = depGraph.getDirectDependencies(name); + result.dependents = depGraph.getDependents(name); + } + + return result; + }); + + // Build response + const response: { + submodules: typeof submodules; + stats?: ReturnType; + cycles?: string[]; + } = { + submodules, + }; + + if (showStats && depGraph) { + response.stats = depGraph.getStats(); + + // Check for cycles + const cycles = depGraph.detectCycles(); + if (cycles.length > 0) { + response.cycles = cycles.map((c) => c.description); + } + } + + return buildMcpToolSuccessResponse(response); + } catch (error) { + return buildMcpToolErrorResponse(convertErrorToJson(error)); + } + }, + ); +}; diff --git a/tests/mcp/cache/cacheManager.test.ts b/tests/mcp/cache/cacheManager.test.ts new file mode 100644 index 000000000..2519adacb --- /dev/null +++ b/tests/mcp/cache/cacheManager.test.ts @@ -0,0 +1,161 @@ +import { describe, expect, it, vi, beforeEach } from 'vitest'; +import { CacheManager, type CacheManagerDeps } from '../../../src/mcp/cache/cacheManager.js'; +import type { CacheMetadata } from '../../../src/mcp/cache/cacheTypes.js'; + +describe('CacheManager', () => { + const mockDeps: CacheManagerDeps = { + execFileAsync: vi.fn(), + fsReadFile: vi.fn(), + fsWriteFile: vi.fn(), + fsMkdir: vi.fn(), + fsRm: vi.fn(), + fsAccess: vi.fn(), + fsStat: vi.fn(), + }; + + beforeEach(() => { + vi.resetAllMocks(); + }); + + describe('check', () => { + it('should return not exists when cache files are missing', async () => { + (mockDeps.fsAccess as ReturnType).mockRejectedValue(new Error('ENOENT')); + + const manager = new CacheManager('/cache', '/root', mockDeps); + const result = await manager.check('test-module', 'path/to/module', false); + + expect(result.exists).toBe(false); + expect(result.valid).toBe(false); + expect(result.invalidReason).toBe('Cache files not found'); + }); + + it('should return valid when cache exists and commit matches', async () => { + (mockDeps.fsAccess as ReturnType).mockResolvedValue(undefined); + + const metadata: CacheMetadata = { + submodule: 'test-module', + generatedAt: new Date().toISOString(), + gitCommit: 'abc123', + fileCount: 10, + tokenCount: 1000, + dependencies: [], + repomixVersion: '1.0.0', + compressed: true, + }; + + (mockDeps.fsReadFile as ReturnType).mockResolvedValue(JSON.stringify(metadata)); + (mockDeps.execFileAsync as ReturnType).mockResolvedValue({ stdout: 'abc123\n', stderr: '' }); + + const manager = new CacheManager('/cache', '/root', mockDeps); + const result = await manager.check('test-module', 'path/to/module', false); + + expect(result.exists).toBe(true); + expect(result.valid).toBe(true); + expect(result.meta).toEqual(metadata); + }); + + it('should return invalid when commit has changed', async () => { + (mockDeps.fsAccess as ReturnType).mockResolvedValue(undefined); + + const metadata: CacheMetadata = { + submodule: 'test-module', + generatedAt: new Date().toISOString(), + gitCommit: 'old-commit', + fileCount: 10, + tokenCount: 1000, + dependencies: [], + repomixVersion: '1.0.0', + compressed: true, + }; + + (mockDeps.fsReadFile as ReturnType).mockResolvedValue(JSON.stringify(metadata)); + (mockDeps.execFileAsync as ReturnType) + .mockResolvedValueOnce({ stdout: 'new-commit\n', stderr: '' }) // getCurrentCommit + .mockResolvedValueOnce({ stdout: 'file1.ts\nfile2.ts\n', stderr: '' }); // git diff + + const manager = new CacheManager('/cache', '/root', mockDeps); + const result = await manager.check('test-module', 'path/to/module', false); + + expect(result.exists).toBe(true); + expect(result.valid).toBe(false); + expect(result.invalidReason).toBe('Content has changed since cache was created'); + }); + }); + + describe('set', () => { + it('should write cache files', async () => { + (mockDeps.fsMkdir as ReturnType).mockResolvedValue(undefined); + (mockDeps.fsWriteFile as ReturnType).mockResolvedValue(undefined); + + const manager = new CacheManager('/cache', '/root', mockDeps); + await manager.set('test-module', 'test', { + submodule: 'test-module', + gitCommit: 'abc123', + fileCount: 10, + tokenCount: 1000, + dependencies: [], + repomixVersion: '1.0.0', + compressed: true, + }); + + expect(mockDeps.fsMkdir).toHaveBeenCalledWith('/cache', { recursive: true }); + expect(mockDeps.fsWriteFile).toHaveBeenCalledTimes(2); // meta + content + }); + }); + + describe('invalidate', () => { + it('should remove cache files', async () => { + (mockDeps.fsRm as ReturnType).mockResolvedValue(undefined); + + const manager = new CacheManager('/cache', '/root', mockDeps); + await manager.invalidate('test-module'); + + expect(mockDeps.fsRm).toHaveBeenCalledWith('/cache/test-module.meta.json', { force: true }); + expect(mockDeps.fsRm).toHaveBeenCalledWith('/cache/test-module.xml', { force: true }); + }); + }); + + describe('invalidateAll', () => { + it('should remove entire cache directory', async () => { + (mockDeps.fsRm as ReturnType).mockResolvedValue(undefined); + + const manager = new CacheManager('/cache', '/root', mockDeps); + await manager.invalidateAll(); + + expect(mockDeps.fsRm).toHaveBeenCalledWith('/cache', { recursive: true, force: true }); + }); + }); + + describe('getSubmoduleGitCommit', () => { + it('should get commit for regular directory from main repo', async () => { + (mockDeps.execFileAsync as ReturnType).mockResolvedValue({ + stdout: 'main-repo-commit\n', + stderr: '', + }); + + const manager = new CacheManager('/cache', '/root', mockDeps); + const commit = await manager.getSubmoduleGitCommit('path/to/module', false); + + expect(commit).toBe('main-repo-commit'); + expect(mockDeps.execFileAsync).toHaveBeenCalledWith('git', ['-C', '/root', 'rev-parse', 'HEAD']); + }); + + it('should get commit for git submodule from submodule directory', async () => { + (mockDeps.execFileAsync as ReturnType).mockResolvedValue({ + stdout: 'submodule-commit\n', + stderr: '', + }); + + const manager = new CacheManager('/cache', '/root', mockDeps); + const commit = await manager.getSubmoduleGitCommit('path/to/submodule', true); + + expect(commit).toBe('submodule-commit'); + expect(mockDeps.execFileAsync).toHaveBeenCalledWith('git', [ + '-C', + '/root/path/to/submodule', + 'rev-parse', + 'HEAD', + ]); + }); + }); +}); diff --git a/tests/mcp/config/monorepoConfigLoader.test.ts b/tests/mcp/config/monorepoConfigLoader.test.ts new file mode 100644 index 000000000..0d1555376 --- /dev/null +++ b/tests/mcp/config/monorepoConfigLoader.test.ts @@ -0,0 +1,87 @@ +import { describe, expect, it } from 'vitest'; +import { + getSubmodule, + getSubmoduleNames, + loadMonorepoConfig, + MONOREPO_CONFIG_FILE, + type MonorepoConfig, +} from '../../../src/mcp/config/monorepoConfigLoader.js'; + +describe('monorepoConfigLoader', () => { + describe('loadMonorepoConfig', () => { + it('should return null when config file does not exist', async () => { + const result = await loadMonorepoConfig('/nonexistent/path'); + expect(result).toBeNull(); + }); + }); + + describe('getSubmodule', () => { + it('should return submodule config by name', () => { + const config: MonorepoConfig = { + submodules: { + 'crate-foo': { + path: 'crates/foo', + description: 'Test crate', + dependencies: ['crate-bar'], + isGitSubmodule: false, + }, + 'crate-bar': { + path: 'crates/bar', + dependencies: [], + isGitSubmodule: false, + }, + }, + cache: { directory: '.repomix-cache', enabled: true }, + repomix: { compress: true, style: 'xml', removeComments: false, showLineNumbers: true }, + }; + + const submodule = getSubmodule(config, 'crate-foo'); + expect(submodule).not.toBeNull(); + expect(submodule?.path).toBe('crates/foo'); + expect(submodule?.description).toBe('Test crate'); + }); + + it('should return null for unknown submodule', () => { + const config: MonorepoConfig = { + submodules: {}, + cache: { directory: '.repomix-cache', enabled: true }, + repomix: { compress: true, style: 'xml', removeComments: false, showLineNumbers: true }, + }; + + expect(getSubmodule(config, 'unknown')).toBeNull(); + }); + }); + + describe('getSubmoduleNames', () => { + it('should return all submodule names', () => { + const config: MonorepoConfig = { + submodules: { + 'crate-a': { path: 'crates/a', dependencies: [], isGitSubmodule: false }, + 'crate-b': { path: 'crates/b', dependencies: [], isGitSubmodule: false }, + 'crate-c': { path: 'crates/c', dependencies: [], isGitSubmodule: false }, + }, + cache: { directory: '.repomix-cache', enabled: true }, + repomix: { compress: true, style: 'xml', removeComments: false, showLineNumbers: true }, + }; + + const names = getSubmoduleNames(config); + expect(names).toEqual(['crate-a', 'crate-b', 'crate-c']); + }); + + it('should return empty array for empty config', () => { + const config: MonorepoConfig = { + submodules: {}, + cache: { directory: '.repomix-cache', enabled: true }, + repomix: { compress: true, style: 'xml', removeComments: false, showLineNumbers: true }, + }; + + expect(getSubmoduleNames(config)).toEqual([]); + }); + }); + + describe('MONOREPO_CONFIG_FILE', () => { + it('should have correct filename', () => { + expect(MONOREPO_CONFIG_FILE).toBe('.repomix-monorepo.json'); + }); + }); +}); diff --git a/tests/mcp/dependency/dependencyGraph.test.ts b/tests/mcp/dependency/dependencyGraph.test.ts new file mode 100644 index 000000000..9246b3994 --- /dev/null +++ b/tests/mcp/dependency/dependencyGraph.test.ts @@ -0,0 +1,193 @@ +import { describe, expect, it } from 'vitest'; +import type { MonorepoConfig } from '../../../src/mcp/config/monorepoConfigLoader.js'; +import { DependencyGraph } from '../../../src/mcp/dependency/dependencyGraph.js'; + +describe('DependencyGraph', () => { + const createConfig = (submodules: Record): MonorepoConfig => ({ + submodules: Object.fromEntries( + Object.entries(submodules).map(([name, { path, dependencies }]) => [ + name, + { path, dependencies, isGitSubmodule: false }, + ]), + ), + cache: { directory: '.repomix-cache', enabled: true }, + repomix: { compress: true, style: 'xml', removeComments: false, showLineNumbers: true }, + }); + + describe('getDirectDependencies', () => { + it('should return direct dependencies', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: ['crate-b', 'crate-c'] }, + 'crate-b': { path: 'crates/b', dependencies: [] }, + 'crate-c': { path: 'crates/c', dependencies: [] }, + }); + + const graph = new DependencyGraph(config); + expect(graph.getDirectDependencies('crate-a')).toEqual(['crate-b', 'crate-c']); + expect(graph.getDirectDependencies('crate-b')).toEqual([]); + }); + + it('should return empty array for unknown submodule', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: [] }, + }); + + const graph = new DependencyGraph(config); + expect(graph.getDirectDependencies('unknown')).toEqual([]); + }); + }); + + describe('getAllDependencies', () => { + it('should return all transitive dependencies', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: ['crate-b'] }, + 'crate-b': { path: 'crates/b', dependencies: ['crate-c'] }, + 'crate-c': { path: 'crates/c', dependencies: [] }, + }); + + const graph = new DependencyGraph(config); + const deps = graph.getAllDependencies('crate-a'); + + // Should include both b and c + expect(deps).toContain('crate-b'); + expect(deps).toContain('crate-c'); + expect(deps).not.toContain('crate-a'); + }); + + it('should handle diamond dependencies', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: ['crate-b', 'crate-c'] }, + 'crate-b': { path: 'crates/b', dependencies: ['crate-d'] }, + 'crate-c': { path: 'crates/c', dependencies: ['crate-d'] }, + 'crate-d': { path: 'crates/d', dependencies: [] }, + }); + + const graph = new DependencyGraph(config); + const deps = graph.getAllDependencies('crate-a'); + + expect(deps).toContain('crate-b'); + expect(deps).toContain('crate-c'); + expect(deps).toContain('crate-d'); + expect(deps.length).toBe(3); + }); + }); + + describe('getDependents', () => { + it('should return submodules that depend on the given one', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: ['crate-c'] }, + 'crate-b': { path: 'crates/b', dependencies: ['crate-c'] }, + 'crate-c': { path: 'crates/c', dependencies: [] }, + }); + + const graph = new DependencyGraph(config); + const dependents = graph.getDependents('crate-c'); + + expect(dependents).toContain('crate-a'); + expect(dependents).toContain('crate-b'); + }); + }); + + describe('detectCycles', () => { + it('should detect no cycles in acyclic graph', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: ['crate-b'] }, + 'crate-b': { path: 'crates/b', dependencies: ['crate-c'] }, + 'crate-c': { path: 'crates/c', dependencies: [] }, + }); + + const graph = new DependencyGraph(config); + expect(graph.detectCycles()).toEqual([]); + expect(graph.hasCycles()).toBe(false); + }); + + it('should detect direct cycle', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: ['crate-b'] }, + 'crate-b': { path: 'crates/b', dependencies: ['crate-a'] }, + }); + + const graph = new DependencyGraph(config); + expect(graph.hasCycles()).toBe(true); + expect(graph.detectCycles().length).toBeGreaterThan(0); + }); + + it('should detect indirect cycle', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: ['crate-b'] }, + 'crate-b': { path: 'crates/b', dependencies: ['crate-c'] }, + 'crate-c': { path: 'crates/c', dependencies: ['crate-a'] }, + }); + + const graph = new DependencyGraph(config); + expect(graph.hasCycles()).toBe(true); + }); + }); + + describe('topologicalSort', () => { + it('should return sorted list for acyclic graph', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: ['crate-b'] }, + 'crate-b': { path: 'crates/b', dependencies: ['crate-c'] }, + 'crate-c': { path: 'crates/c', dependencies: [] }, + }); + + const graph = new DependencyGraph(config); + const sorted = graph.topologicalSort(); + + expect(sorted).not.toBeNull(); + if (sorted) { + // c should come before b, b should come before a + expect(sorted.indexOf('crate-c')).toBeLessThan(sorted.indexOf('crate-b')); + expect(sorted.indexOf('crate-b')).toBeLessThan(sorted.indexOf('crate-a')); + } + }); + + it('should return null for cyclic graph', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: ['crate-b'] }, + 'crate-b': { path: 'crates/b', dependencies: ['crate-a'] }, + }); + + const graph = new DependencyGraph(config); + expect(graph.topologicalSort()).toBeNull(); + }); + }); + + describe('getLeafNodes and getRootNodes', () => { + it('should identify leaf and root nodes', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: ['crate-b'] }, + 'crate-b': { path: 'crates/b', dependencies: ['crate-c'] }, + 'crate-c': { path: 'crates/c', dependencies: [] }, + }); + + const graph = new DependencyGraph(config); + + // crate-c has no dependencies, so it's a leaf + expect(graph.getLeafNodes()).toContain('crate-c'); + + // crate-a has no dependents, so it's a root + expect(graph.getRootNodes()).toContain('crate-a'); + }); + }); + + describe('getStats', () => { + it('should return correct statistics', () => { + const config = createConfig({ + 'crate-a': { path: 'crates/a', dependencies: ['crate-b', 'crate-c'] }, + 'crate-b': { path: 'crates/b', dependencies: ['crate-c'] }, + 'crate-c': { path: 'crates/c', dependencies: [] }, + }); + + const graph = new DependencyGraph(config); + const stats = graph.getStats(); + + expect(stats.totalNodes).toBe(3); + expect(stats.totalEdges).toBe(3); + expect(stats.leafNodes).toBe(1); // crate-c + expect(stats.rootNodes).toBe(1); // crate-a + expect(stats.maxDepth).toBe(2); // a -> b -> c + }); + }); +}); From 19146937172725929b7a41416ad60424822f0b3d Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Tue, 6 Jan 2026 05:10:51 -0800 Subject: [PATCH 2/6] fix(mcp): Add path traversal protection for monorepo tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add security hardening to prevent path traversal attacks: - Create pathValidator.ts with validation utilities: - validatePathWithinRoot(): prevents escaping project root - validateProjectRoot(): normalizes and validates project root - sanitizeSubmoduleName(): sanitizes names for safe file paths - validateSubmodulePath(): validates submodule paths - Apply path validation to all MCP tools: - get_submodule_context: validate projectRoot and submodule paths - list_submodules: validate projectRoot - invalidate_submodule_cache: validate projectRoot - init_monorepo_config: validate projectRoot - Add Zod schema constraints: - submoduleName: min(1), max(255) - projectRoot: max(4096) - submodules array: max(100) items - cacheDirectory: min(1), max(255) - Add 20 unit tests for path validation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/mcp/cache/cacheManager.ts | 13 +- src/mcp/security/pathValidator.ts | 100 ++++++++++++++ src/mcp/tools/getSubmoduleContextTool.ts | 19 ++- src/mcp/tools/initMonorepoConfigTool.ts | 12 +- src/mcp/tools/invalidateSubmoduleCacheTool.ts | 13 +- src/mcp/tools/listSubmodulesTool.ts | 10 +- tests/mcp/cache/cacheManager.test.ts | 2 +- tests/mcp/security/pathValidator.test.ts | 126 ++++++++++++++++++ 8 files changed, 278 insertions(+), 17 deletions(-) create mode 100644 src/mcp/security/pathValidator.ts create mode 100644 tests/mcp/security/pathValidator.test.ts diff --git a/src/mcp/cache/cacheManager.ts b/src/mcp/cache/cacheManager.ts index c7c0849d1..1ba994145 100644 --- a/src/mcp/cache/cacheManager.ts +++ b/src/mcp/cache/cacheManager.ts @@ -3,6 +3,7 @@ import fs from 'node:fs/promises'; import path from 'node:path'; import { promisify } from 'node:util'; import { logger } from '../../shared/logger.js'; +import { sanitizeSubmoduleName, validatePathWithinRoot } from '../security/pathValidator.js'; import type { CacheCheckResult, CachedContent, CacheMetadata } from './cacheTypes.js'; const execFileAsync = promisify(execFile); @@ -48,14 +49,16 @@ export class CacheManager { * Get the full path for a submodule's cache file */ private getContentPath(submoduleName: string): string { - return path.join(this.cacheDir, `${submoduleName}.xml`); + const safeName = sanitizeSubmoduleName(submoduleName); + return path.join(this.cacheDir, `${safeName}.xml`); } /** * Get the full path for a submodule's metadata file */ private getMetaPath(submoduleName: string): string { - return path.join(this.cacheDir, `${submoduleName}.meta.json`); + const safeName = sanitizeSubmoduleName(submoduleName); + return path.join(this.cacheDir, `${safeName}.meta.json`); } /** @@ -87,7 +90,8 @@ export class CacheManager { try { if (isGitSubmodule) { // Git submodule: read its own HEAD - const fullPath = path.join(this.rootDir, submodulePath); + // Validate path stays within root to prevent path traversal + const fullPath = validatePathWithinRoot(submodulePath, this.rootDir); const result = await this.deps.execFileAsync('git', ['-C', fullPath, 'rev-parse', 'HEAD']); return result.stdout.trim(); } @@ -117,7 +121,8 @@ export class CacheManager { // Further check: are there actual file changes in this submodule? if (isGitSubmodule) { - const fullPath = path.join(this.rootDir, submodulePath); + // Validate path stays within root to prevent path traversal + const fullPath = validatePathWithinRoot(submodulePath, this.rootDir); const result = await this.deps.execFileAsync('git', [ '-C', fullPath, diff --git a/src/mcp/security/pathValidator.ts b/src/mcp/security/pathValidator.ts new file mode 100644 index 000000000..dbc0a79b6 --- /dev/null +++ b/src/mcp/security/pathValidator.ts @@ -0,0 +1,100 @@ +import path from 'node:path'; + +/** + * Path validation error + */ +export class PathSecurityError extends Error { + constructor(message: string) { + super(message); + this.name = 'PathSecurityError'; + } +} + +/** + * Validate that a resolved path stays within the allowed root directory. + * Prevents path traversal attacks using ../ or absolute paths. + * + * @param userPath - The user-provided path (may be relative or absolute) + * @param rootDir - The root directory that paths must stay within + * @returns The resolved absolute path + * @throws PathSecurityError if path escapes the root directory + */ +export function validatePathWithinRoot(userPath: string, rootDir: string): string { + // Resolve both paths to absolute, normalized forms + const resolvedRoot = path.resolve(rootDir); + const resolvedPath = path.resolve(resolvedRoot, userPath); + + // Check that the resolved path is within the root directory + const relative = path.relative(resolvedRoot, resolvedPath); + + // Path escapes if: + // 1. It starts with '..' (goes above root) + // 2. It's an absolute path that doesn't share the root prefix + if (relative.startsWith('..') || path.isAbsolute(relative)) { + throw new PathSecurityError(`Path traversal detected: "${userPath}" escapes the project root "${rootDir}"`); + } + + return resolvedPath; +} + +/** + * Validate and normalize a project root directory. + * Ensures the path is absolute and normalized. + * + * @param projectRoot - The user-provided project root (or undefined for cwd) + * @returns Resolved absolute path to project root + */ +export function validateProjectRoot(projectRoot?: string): string { + // Default to current working directory if not provided + const root = projectRoot || process.cwd(); + + // Resolve to absolute path and normalize + return path.resolve(root); +} + +/** + * Sanitize a submodule name for use in file paths. + * Only allows alphanumeric characters, hyphens, underscores, and dots. + * + * @param name - The submodule name to sanitize + * @returns Sanitized name safe for use in file paths + * @throws PathSecurityError if name contains only invalid characters + */ +export function sanitizeSubmoduleName(name: string): string { + // Remove any path separators and invalid characters + const sanitized = name + .replace(/[/\\]/g, '-') // Replace path separators with hyphens + .replace(/[^a-zA-Z0-9._-]/g, '') // Keep only safe characters + .replace(/\.{2,}/g, '.') // Collapse multiple dots + .replace(/^\.+|\.+$/g, ''); // Remove leading/trailing dots + + if (!sanitized) { + throw new PathSecurityError(`Invalid submodule name: "${name}" contains no valid characters`); + } + + // Limit length to prevent filesystem issues + if (sanitized.length > 255) { + throw new PathSecurityError(`Submodule name too long: "${name}" exceeds 255 characters`); + } + + return sanitized; +} + +/** + * Validate a submodule path from configuration. + * Ensures the path is relative and doesn't escape the project root. + * + * @param submodulePath - The submodule path from config + * @param rootDir - The project root directory + * @returns The validated and resolved absolute path + * @throws PathSecurityError if path is invalid or escapes root + */ +export function validateSubmodulePath(submodulePath: string, rootDir: string): string { + // Reject absolute paths + if (path.isAbsolute(submodulePath)) { + throw new PathSecurityError(`Submodule path must be relative, got absolute path: "${submodulePath}"`); + } + + // Validate path stays within root + return validatePathWithinRoot(submodulePath, rootDir); +} diff --git a/src/mcp/tools/getSubmoduleContextTool.ts b/src/mcp/tools/getSubmoduleContextTool.ts index 12ee0e113..722bf2f4b 100644 --- a/src/mcp/tools/getSubmoduleContextTool.ts +++ b/src/mcp/tools/getSubmoduleContextTool.ts @@ -8,6 +8,7 @@ import { defaultFilePathMap } from '../../config/configSchema.js'; import { CacheManager } from '../cache/cacheManager.js'; import { getSubmodule, loadMonorepoConfig } from '../config/monorepoConfigLoader.js'; import { DependencyGraph } from '../dependency/dependencyGraph.js'; +import { validateProjectRoot, validateSubmodulePath } from '../security/pathValidator.js'; import { buildMcpToolErrorResponse, buildMcpToolSuccessResponse, @@ -16,11 +17,19 @@ import { } from './mcpToolRuntime.js'; const getSubmoduleContextInputSchema = z.object({ - submoduleName: z.string().describe('Name of the submodule to load (as defined in .repomix-monorepo.json)'), + submoduleName: z + .string() + .min(1) + .max(255) + .describe('Name of the submodule to load (as defined in .repomix-monorepo.json)'), includeDependencies: z.boolean().default(false).describe('Whether to include dependency submodules in the response'), forceRegenerate: z.boolean().default(false).describe('Force regenerate cache even if valid cache exists'), compress: z.boolean().default(true).describe('Enable Tree-sitter compression to reduce token usage (~70% reduction)'), - projectRoot: z.string().optional().describe('Project root directory (defaults to current working directory)'), + projectRoot: z + .string() + .max(4096) + .optional() + .describe('Project root directory (defaults to current working directory)'), }); const getSubmoduleContextOutputSchema = z.object({ @@ -63,7 +72,8 @@ export const registerGetSubmoduleContextTool = (mcpServer: McpServer): void => { }, async ({ submoduleName, includeDependencies, forceRegenerate, compress, projectRoot }): Promise => { try { - const rootDir = projectRoot || process.cwd(); + // Validate and normalize project root to prevent path traversal + const rootDir = validateProjectRoot(projectRoot); // 1. Load configuration const config = await loadMonorepoConfig(rootDir); @@ -188,7 +198,8 @@ async function generateSubmoduleContent( const outputFileName = defaultFilePathMap[styleKey]; const outputFilePath = path.join(tempDir, outputFileName); - const fullPath = path.join(rootDir, submodulePath); + // Validate submodule path to prevent path traversal + const fullPath = validateSubmodulePath(submodulePath, rootDir); const cliOptions: CliOptions = { compress: options.compress, diff --git a/src/mcp/tools/initMonorepoConfigTool.ts b/src/mcp/tools/initMonorepoConfigTool.ts index 0a661b480..4496d4029 100644 --- a/src/mcp/tools/initMonorepoConfigTool.ts +++ b/src/mcp/tools/initMonorepoConfigTool.ts @@ -7,14 +7,19 @@ import type { MonorepoConfig, SubmoduleConfig } from '../config/monorepoConfigLo import { MONOREPO_CONFIG_FILE, saveMonorepoConfig } from '../config/monorepoConfigLoader.js'; import { ProjectDetector } from '../detection/projectDetector.js'; import { generateCacheScript, JustfileGenerator } from '../generation/justfileGenerator.js'; +import { validateProjectRoot } from '../security/pathValidator.js'; import { buildMcpToolErrorResponse, buildMcpToolSuccessResponse, convertErrorToJson } from './mcpToolRuntime.js'; const initMonorepoConfigInputSchema = z.object({ - projectRoot: z.string().optional().describe('Project root directory (defaults to current working directory)'), + projectRoot: z + .string() + .max(4096) + .optional() + .describe('Project root directory (defaults to current working directory)'), detectGitSubmodules: z.boolean().default(true).describe('Detect and mark git submodules'), generateJustfile: z.boolean().default(true).describe('Generate justfile for cache management'), generateScript: z.boolean().default(true).describe('Generate cache management script'), - cacheDirectory: z.string().default('.repomix-cache').describe('Cache directory path'), + cacheDirectory: z.string().min(1).max(255).default('.repomix-cache').describe('Cache directory path'), compress: z.boolean().default(true).describe('Enable compression by default'), style: z.enum(['xml', 'markdown', 'json', 'plain']).default('xml').describe('Default output format'), }); @@ -64,7 +69,8 @@ export const registerInitMonorepoConfigTool = (mcpServer: McpServer): void => { style, }): Promise => { try { - const rootDir = projectRoot || process.cwd(); + // Validate and normalize project root to prevent path traversal + const rootDir = validateProjectRoot(projectRoot); // 1. Detect project structure const detector = new ProjectDetector(rootDir); diff --git a/src/mcp/tools/invalidateSubmoduleCacheTool.ts b/src/mcp/tools/invalidateSubmoduleCacheTool.ts index f17edbaba..bf394ce2b 100644 --- a/src/mcp/tools/invalidateSubmoduleCacheTool.ts +++ b/src/mcp/tools/invalidateSubmoduleCacheTool.ts @@ -4,14 +4,20 @@ import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import { z } from 'zod'; import { CacheManager } from '../cache/cacheManager.js'; import { getSubmoduleNames, loadMonorepoConfig } from '../config/monorepoConfigLoader.js'; +import { validateProjectRoot } from '../security/pathValidator.js'; import { buildMcpToolErrorResponse, buildMcpToolSuccessResponse, convertErrorToJson } from './mcpToolRuntime.js'; const invalidateSubmoduleCacheInputSchema = z.object({ submodules: z - .array(z.string()) + .array(z.string().min(1).max(255)) + .max(100) .optional() .describe('List of submodule names to invalidate. If not provided, invalidates all caches.'), - projectRoot: z.string().optional().describe('Project root directory (defaults to current working directory)'), + projectRoot: z + .string() + .max(4096) + .optional() + .describe('Project root directory (defaults to current working directory)'), }); const invalidateSubmoduleCacheOutputSchema = z.object({ @@ -41,7 +47,8 @@ export const registerInvalidateSubmoduleCacheTool = (mcpServer: McpServer): void }, async ({ submodules, projectRoot }): Promise => { try { - const rootDir = projectRoot || process.cwd(); + // Validate and normalize project root to prevent path traversal + const rootDir = validateProjectRoot(projectRoot); // Load configuration const config = await loadMonorepoConfig(rootDir); diff --git a/src/mcp/tools/listSubmodulesTool.ts b/src/mcp/tools/listSubmodulesTool.ts index 606423312..e48fa2d86 100644 --- a/src/mcp/tools/listSubmodulesTool.ts +++ b/src/mcp/tools/listSubmodulesTool.ts @@ -3,12 +3,17 @@ import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; import { z } from 'zod'; import { getSubmoduleNames, loadMonorepoConfig } from '../config/monorepoConfigLoader.js'; import { DependencyGraph } from '../dependency/dependencyGraph.js'; +import { validateProjectRoot } from '../security/pathValidator.js'; import { buildMcpToolErrorResponse, buildMcpToolSuccessResponse, convertErrorToJson } from './mcpToolRuntime.js'; const listSubmodulesInputSchema = z.object({ showDependencies: z.boolean().default(false).describe('Include dependency relationships in the response'), showStats: z.boolean().default(false).describe('Include dependency graph statistics'), - projectRoot: z.string().optional().describe('Project root directory (defaults to current working directory)'), + projectRoot: z + .string() + .max(4096) + .optional() + .describe('Project root directory (defaults to current working directory)'), }); const listSubmodulesOutputSchema = z.object({ @@ -55,7 +60,8 @@ export const registerListSubmodulesTool = (mcpServer: McpServer): void => { }, async ({ showDependencies, showStats, projectRoot }): Promise => { try { - const rootDir = projectRoot || process.cwd(); + // Validate and normalize project root to prevent path traversal + const rootDir = validateProjectRoot(projectRoot); // Load configuration const config = await loadMonorepoConfig(rootDir); diff --git a/tests/mcp/cache/cacheManager.test.ts b/tests/mcp/cache/cacheManager.test.ts index 2519adacb..dfe561613 100644 --- a/tests/mcp/cache/cacheManager.test.ts +++ b/tests/mcp/cache/cacheManager.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it, vi, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; import { CacheManager, type CacheManagerDeps } from '../../../src/mcp/cache/cacheManager.js'; import type { CacheMetadata } from '../../../src/mcp/cache/cacheTypes.js'; diff --git a/tests/mcp/security/pathValidator.test.ts b/tests/mcp/security/pathValidator.test.ts new file mode 100644 index 000000000..634f9c7ef --- /dev/null +++ b/tests/mcp/security/pathValidator.test.ts @@ -0,0 +1,126 @@ +import { describe, expect, it } from 'vitest'; +import { + PathSecurityError, + sanitizeSubmoduleName, + validatePathWithinRoot, + validateProjectRoot, + validateSubmodulePath, +} from '../../../src/mcp/security/pathValidator.js'; + +describe('pathValidator', () => { + describe('validatePathWithinRoot', () => { + it('should allow valid relative paths', () => { + const result = validatePathWithinRoot('crates/foo', '/project'); + expect(result).toBe('/project/crates/foo'); + }); + + it('should normalize paths with redundant segments', () => { + const result = validatePathWithinRoot('crates/../crates/foo', '/project'); + expect(result).toBe('/project/crates/foo'); + }); + + it('should reject paths that escape root with ../', () => { + expect(() => { + validatePathWithinRoot('../../../etc/passwd', '/project'); + }).toThrow(PathSecurityError); + }); + + it('should reject paths that start with ..', () => { + expect(() => { + validatePathWithinRoot('..', '/project'); + }).toThrow(PathSecurityError); + }); + + it('should allow deeply nested paths within root', () => { + const result = validatePathWithinRoot('a/b/c/d/e/f', '/project'); + expect(result).toBe('/project/a/b/c/d/e/f'); + }); + }); + + describe('validateProjectRoot', () => { + it('should return cwd when no projectRoot provided', () => { + const result = validateProjectRoot(); + expect(result).toBe(process.cwd()); + }); + + it('should return cwd when undefined provided', () => { + const result = validateProjectRoot(undefined); + expect(result).toBe(process.cwd()); + }); + + it('should normalize and resolve relative paths', () => { + const result = validateProjectRoot('./subdir'); + expect(result).toContain('subdir'); + expect(result.startsWith('/')).toBe(true); // Should be absolute + }); + }); + + describe('sanitizeSubmoduleName', () => { + it('should pass through valid names', () => { + expect(sanitizeSubmoduleName('crate-foo')).toBe('crate-foo'); + expect(sanitizeSubmoduleName('crate_bar')).toBe('crate_bar'); + expect(sanitizeSubmoduleName('my.package')).toBe('my.package'); + }); + + it('should replace path separators with hyphens', () => { + expect(sanitizeSubmoduleName('crates/foo')).toBe('crates-foo'); + expect(sanitizeSubmoduleName('crates\\bar')).toBe('crates-bar'); + }); + + it('should remove invalid characters', () => { + expect(sanitizeSubmoduleName('crate@foo#bar')).toBe('cratefoobar'); + }); + + it('should collapse multiple dots', () => { + expect(sanitizeSubmoduleName('name..test')).toBe('name.test'); + }); + + it('should remove leading/trailing dots', () => { + expect(sanitizeSubmoduleName('.hidden.')).toBe('hidden'); + }); + + it('should throw for names with only invalid characters', () => { + expect(() => { + sanitizeSubmoduleName('@#$%'); + }).toThrow(PathSecurityError); + }); + + it('should throw for empty names', () => { + expect(() => { + sanitizeSubmoduleName(''); + }).toThrow(PathSecurityError); + }); + + it('should throw for names exceeding 255 characters', () => { + const longName = 'a'.repeat(300); + expect(() => { + sanitizeSubmoduleName(longName); + }).toThrow(PathSecurityError); + }); + }); + + describe('validateSubmodulePath', () => { + it('should allow valid relative paths', () => { + const result = validateSubmodulePath('crates/foo', '/project'); + expect(result).toBe('/project/crates/foo'); + }); + + it('should reject absolute paths', () => { + expect(() => { + validateSubmodulePath('/etc/passwd', '/project'); + }).toThrow(PathSecurityError); + }); + + it('should reject paths with traversal', () => { + expect(() => { + validateSubmodulePath('../outside', '/project'); + }).toThrow(PathSecurityError); + }); + + it('should reject paths that eventually escape', () => { + expect(() => { + validateSubmodulePath('crates/../../outside', '/project'); + }).toThrow(PathSecurityError); + }); + }); +}); From e400553d5b02269feab02a892fef102045bd0489 Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Tue, 6 Jan 2026 10:00:55 -0800 Subject: [PATCH 3/6] feat(cli): Add file lock to prevent concurrent repomix execution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [MOTIVATION] Multiple repomix processes running simultaneously in the same directory can cause data races and corrupt output files. Users need protection against accidental concurrent execution. [IMPLEMENTATION DETAILS] - Add src/core/lock/fileLock.ts with PID-based lock file mechanism - Lock is acquired before task execution in defaultAction.ts - Stale lock detection (checks if holding process is still alive) - Lock automatically released on completion or error (via finally block) - Skip locking for stdout mode (no file conflict possible) - 30-minute timeout as fallback for zombie processes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/cli/actions/defaultAction.ts | 21 ++ src/core/lock/fileLock.ts | 174 ++++++++++++++++ src/core/lock/index.ts | 2 + tests/cli/actions/defaultAction.test.ts | 5 + .../defaultAction.tokenCountTree.test.ts | 5 + tests/core/lock/fileLock.test.ts | 191 ++++++++++++++++++ 6 files changed, 398 insertions(+) create mode 100644 src/core/lock/fileLock.ts create mode 100644 src/core/lock/index.ts create mode 100644 tests/core/lock/fileLock.test.ts diff --git a/src/cli/actions/defaultAction.ts b/src/cli/actions/defaultAction.ts index 0fe69e1fd..f6745183d 100644 --- a/src/cli/actions/defaultAction.ts +++ b/src/cli/actions/defaultAction.ts @@ -8,6 +8,7 @@ import { repomixConfigCliSchema, } from '../../config/configSchema.js'; import { readFilePathsFromStdin } from '../../core/file/fileStdin.js'; +import { acquireLock, FileLockError, releaseLock } from '../../core/lock/index.js'; import type { PackResult } from '../../core/packager.js'; import { generateDefaultSkillName } from '../../core/skill/skillUtils.js'; import { RepomixError, rethrowValidationErrorIfZodError } from '../../shared/errorHandle.js'; @@ -104,6 +105,21 @@ export const runDefaultAction = async ( logger.trace(`Read ${stdinFilePaths.length} file paths from stdin in main process`); } + // Acquire lock for the working directory (skip for stdout mode) + const isStdoutMode = config.output.stdout === true; + let lockPath: string | undefined; + + if (!isStdoutMode) { + try { + lockPath = await acquireLock(cwd); + } catch (error) { + if (error instanceof FileLockError) { + throw new RepomixError(error.message); + } + throw error; + } + } + // Create worker task runner const taskRunner = initTaskRunner({ numOfTasks: 1, @@ -137,6 +153,11 @@ export const runDefaultAction = async ( } finally { // Always cleanup worker pool await taskRunner.cleanup(); + + // Release lock if acquired + if (lockPath) { + await releaseLock(lockPath); + } } }; diff --git a/src/core/lock/fileLock.ts b/src/core/lock/fileLock.ts new file mode 100644 index 000000000..601f627a0 --- /dev/null +++ b/src/core/lock/fileLock.ts @@ -0,0 +1,174 @@ +import fs from 'node:fs/promises'; +import path from 'node:path'; +import { logger } from '../../shared/logger.js'; + +const LOCK_FILENAME = '.repomix.lock'; +const STALE_THRESHOLD_MS = 30 * 60 * 1000; // 30 minutes + +export interface LockInfo { + pid: number; + startTime: number; + cwd: string; +} + +export class FileLockError extends Error { + constructor( + message: string, + public readonly lockPath: string, + public readonly existingLock?: LockInfo, + ) { + super(message); + this.name = 'FileLockError'; + } +} + +/** + * Check if a process with given PID is still running. + * Uses a platform-agnostic approach by trying to send signal 0. + */ +const isProcessRunning = (pid: number): boolean => { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +}; + +/** + * Read and parse lock file contents. + */ +const readLockFile = async (lockPath: string): Promise => { + try { + const content = await fs.readFile(lockPath, 'utf-8'); + const parsed = JSON.parse(content) as LockInfo; + + if (typeof parsed.pid !== 'number' || typeof parsed.startTime !== 'number') { + return null; + } + + return parsed; + } catch { + return null; + } +}; + +/** + * Check if a lock is stale (process dead or lock too old). + */ +const isLockStale = (lockInfo: LockInfo): boolean => { + // Check if process is still running + if (!isProcessRunning(lockInfo.pid)) { + logger.debug(`Lock is stale: process ${lockInfo.pid} is not running`); + return true; + } + + // Check if lock is too old (fallback for zombie processes) + const age = Date.now() - lockInfo.startTime; + if (age > STALE_THRESHOLD_MS) { + logger.debug(`Lock is stale: lock age ${age}ms exceeds threshold ${STALE_THRESHOLD_MS}ms`); + return true; + } + + return false; +}; + +/** + * Acquire a file lock for the specified directory. + * + * @param targetDir The directory to lock (where .repomix.lock will be created) + * @returns The path to the lock file (for cleanup) + * @throws FileLockError if lock cannot be acquired + */ +export const acquireLock = async (targetDir: string): Promise => { + const lockPath = path.join(targetDir, LOCK_FILENAME); + const lockInfo: LockInfo = { + pid: process.pid, + startTime: Date.now(), + cwd: process.cwd(), + }; + + // Check for existing lock + const existingLock = await readLockFile(lockPath); + + if (existingLock) { + if (isLockStale(existingLock)) { + // Remove stale lock + logger.debug(`Removing stale lock file: ${lockPath}`); + try { + await fs.unlink(lockPath); + } catch { + // Ignore errors when removing stale lock + } + } else { + // Lock is held by another active process + throw new FileLockError( + `Another repomix process (PID: ${existingLock.pid}) is already running in this directory. ` + + `If you believe this is an error, remove the lock file: ${lockPath}`, + lockPath, + existingLock, + ); + } + } + + // Try to create lock file atomically + try { + const fileHandle = await fs.open(lockPath, 'wx'); + await fileHandle.writeFile(JSON.stringify(lockInfo, null, 2)); + await fileHandle.close(); + logger.debug(`Acquired lock: ${lockPath}`); + return lockPath; + } catch (error) { + if (error instanceof Error && 'code' in error && error.code === 'EEXIST') { + // Race condition: another process created the lock between our check and create + const raceLock = await readLockFile(lockPath); + throw new FileLockError( + `Another repomix process acquired the lock. If you believe this is an error, remove the lock file: ${lockPath}`, + lockPath, + raceLock ?? undefined, + ); + } + throw error; + } +}; + +/** + * Release a file lock. + * + * @param lockPath The path to the lock file to remove + */ +export const releaseLock = async (lockPath: string): Promise => { + try { + // Verify we own the lock before releasing + const lockInfo = await readLockFile(lockPath); + if (lockInfo && lockInfo.pid === process.pid) { + await fs.unlink(lockPath); + logger.debug(`Released lock: ${lockPath}`); + } else if (lockInfo) { + logger.warn(`Lock file owned by different process (PID: ${lockInfo.pid}), not releasing`); + } + } catch (error) { + if (error instanceof Error && 'code' in error && error.code === 'ENOENT') { + // Lock file already removed, that's fine + logger.debug(`Lock file already removed: ${lockPath}`); + return; + } + logger.warn(`Failed to release lock: ${lockPath}`, error); + } +}; + +/** + * Execute a function while holding a lock on the target directory. + * + * @param targetDir The directory to lock + * @param fn The function to execute while holding the lock + * @returns The result of the function + */ +export const withLock = async (targetDir: string, fn: () => Promise): Promise => { + const lockPath = await acquireLock(targetDir); + try { + return await fn(); + } finally { + await releaseLock(lockPath); + } +}; diff --git a/src/core/lock/index.ts b/src/core/lock/index.ts new file mode 100644 index 000000000..52b30f801 --- /dev/null +++ b/src/core/lock/index.ts @@ -0,0 +1,2 @@ +export type { LockInfo } from './fileLock.js'; +export { acquireLock, FileLockError, releaseLock, withLock } from './fileLock.js'; diff --git a/tests/cli/actions/defaultAction.test.ts b/tests/cli/actions/defaultAction.test.ts index 4d5852a98..a0cd5a3a6 100644 --- a/tests/cli/actions/defaultAction.test.ts +++ b/tests/cli/actions/defaultAction.test.ts @@ -17,6 +17,11 @@ vi.mock('../../../src/core/file/packageJsonParse'); vi.mock('../../../src/core/file/fileStdin'); vi.mock('../../../src/shared/logger'); vi.mock('../../../src/shared/processConcurrency'); +vi.mock('../../../src/core/lock/index', () => ({ + acquireLock: vi.fn().mockResolvedValue('/mock/.repomix.lock'), + releaseLock: vi.fn().mockResolvedValue(undefined), + FileLockError: class FileLockError extends Error {}, +})); const mockSpinner = { start: vi.fn() as MockedFunction<() => void>, diff --git a/tests/cli/actions/defaultAction.tokenCountTree.test.ts b/tests/cli/actions/defaultAction.tokenCountTree.test.ts index 23f6d91d2..f24db8c38 100644 --- a/tests/cli/actions/defaultAction.tokenCountTree.test.ts +++ b/tests/cli/actions/defaultAction.tokenCountTree.test.ts @@ -13,6 +13,11 @@ vi.mock('../../../src/shared/processConcurrency.js'); vi.mock('../../../src/cli/actions/migrationAction.js', () => ({ runMigrationAction: vi.fn(), })); +vi.mock('../../../src/core/lock/index', () => ({ + acquireLock: vi.fn().mockResolvedValue('/mock/.repomix.lock'), + releaseLock: vi.fn().mockResolvedValue(undefined), + FileLockError: class FileLockError extends Error {}, +})); describe('defaultAction with tokenCountTree', () => { const mockLoadFileConfig = configLoad.loadFileConfig as Mock; diff --git a/tests/core/lock/fileLock.test.ts b/tests/core/lock/fileLock.test.ts new file mode 100644 index 000000000..9602edab4 --- /dev/null +++ b/tests/core/lock/fileLock.test.ts @@ -0,0 +1,191 @@ +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { acquireLock, FileLockError, releaseLock, withLock } from '../../../src/core/lock/fileLock.js'; + +describe('fileLock', () => { + let testDir: string; + + beforeEach(async () => { + // Create a unique temp directory for each test + testDir = await fs.mkdtemp(path.join(os.tmpdir(), 'repomix-lock-test-')); + }); + + afterEach(async () => { + // Clean up temp directory + try { + await fs.rm(testDir, { recursive: true, force: true }); + } catch { + // Ignore cleanup errors + } + }); + + describe('acquireLock', () => { + it('should create a lock file in the target directory', async () => { + const lockPath = await acquireLock(testDir); + + expect(lockPath).toBe(path.join(testDir, '.repomix.lock')); + + const lockExists = await fs + .access(lockPath) + .then(() => true) + .catch(() => false); + expect(lockExists).toBe(true); + + // Clean up + await releaseLock(lockPath); + }); + + it('should store PID and timestamp in lock file', async () => { + const lockPath = await acquireLock(testDir); + + const content = await fs.readFile(lockPath, 'utf-8'); + const lockInfo = JSON.parse(content); + + expect(lockInfo.pid).toBe(process.pid); + expect(typeof lockInfo.startTime).toBe('number'); + expect(lockInfo.startTime).toBeLessThanOrEqual(Date.now()); + + await releaseLock(lockPath); + }); + + it('should throw FileLockError if lock already exists from same process', async () => { + const lockPath = await acquireLock(testDir); + + await expect(acquireLock(testDir)).rejects.toThrow(FileLockError); + + await releaseLock(lockPath); + }); + + it('should remove stale lock from non-existent process', async () => { + // Create a fake stale lock with a non-existent PID + const lockPath = path.join(testDir, '.repomix.lock'); + const staleLockInfo = { + pid: 999999999, // Very unlikely to exist + startTime: Date.now(), + cwd: '/some/path', + }; + await fs.writeFile(lockPath, JSON.stringify(staleLockInfo)); + + // Should succeed because the stale lock should be removed + const newLockPath = await acquireLock(testDir); + expect(newLockPath).toBe(lockPath); + + // Verify the lock now has our PID + const content = await fs.readFile(lockPath, 'utf-8'); + const lockInfo = JSON.parse(content); + expect(lockInfo.pid).toBe(process.pid); + + await releaseLock(newLockPath); + }); + }); + + describe('releaseLock', () => { + it('should remove the lock file', async () => { + const lockPath = await acquireLock(testDir); + + await releaseLock(lockPath); + + const lockExists = await fs + .access(lockPath) + .then(() => true) + .catch(() => false); + expect(lockExists).toBe(false); + }); + + it('should not throw if lock file does not exist', async () => { + const fakeLockPath = path.join(testDir, '.repomix.lock'); + + await expect(releaseLock(fakeLockPath)).resolves.not.toThrow(); + }); + + it('should not remove lock owned by different process', async () => { + // Create a lock with a different PID (simulating another process) + const lockPath = path.join(testDir, '.repomix.lock'); + const otherLockInfo = { + pid: process.pid + 1, // Different PID + startTime: Date.now(), + cwd: '/some/path', + }; + await fs.writeFile(lockPath, JSON.stringify(otherLockInfo)); + + // releaseLock should not remove it + await releaseLock(lockPath); + + // Lock should still exist + const lockExists = await fs + .access(lockPath) + .then(() => true) + .catch(() => false); + expect(lockExists).toBe(true); + }); + }); + + describe('withLock', () => { + it('should execute function while holding lock', async () => { + let lockExistedDuringExecution = false; + + await withLock(testDir, async () => { + const lockPath = path.join(testDir, '.repomix.lock'); + lockExistedDuringExecution = await fs + .access(lockPath) + .then(() => true) + .catch(() => false); + }); + + expect(lockExistedDuringExecution).toBe(true); + + // Lock should be released after + const lockPath = path.join(testDir, '.repomix.lock'); + const lockExists = await fs + .access(lockPath) + .then(() => true) + .catch(() => false); + expect(lockExists).toBe(false); + }); + + it('should release lock even if function throws', async () => { + await expect( + withLock(testDir, async () => { + throw new Error('Test error'); + }), + ).rejects.toThrow('Test error'); + + // Lock should still be released + const lockPath = path.join(testDir, '.repomix.lock'); + const lockExists = await fs + .access(lockPath) + .then(() => true) + .catch(() => false); + expect(lockExists).toBe(false); + }); + + it('should return the result of the function', async () => { + const result = await withLock(testDir, async () => { + return 'test result'; + }); + + expect(result).toBe('test result'); + }); + }); + + describe('FileLockError', () => { + it('should include lock path and existing lock info', async () => { + const lockPath = await acquireLock(testDir); + + try { + await acquireLock(testDir); + expect.fail('Should have thrown'); + } catch (error) { + expect(error).toBeInstanceOf(FileLockError); + const lockError = error as FileLockError; + expect(lockError.lockPath).toBe(path.join(testDir, '.repomix.lock')); + expect(lockError.existingLock).toBeDefined(); + expect(lockError.existingLock?.pid).toBe(process.pid); + } + + await releaseLock(lockPath); + }); + }); +}); From c6127542435bf66d63aea626cbf9bcd2c69ce98f Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Thu, 8 Jan 2026 06:01:32 -0800 Subject: [PATCH 4/6] chore(config): Update .gitignore patterns --- .gitignore | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 7a9c50be6..45e69a127 100644 --- a/.gitignore +++ b/.gitignore @@ -24,11 +24,7 @@ coverage/ *.temp # Repomix output -repomix-output.txt -repomix-output.xml -repomix-output.md -repomix-output.json -repomix-output.* +**/repomix-output.* # ESLint cache .eslintcache @@ -43,10 +39,12 @@ repomix-output.* .aider* # repomix runner +**/*.repomix.lock .repomix/ # repomix references .claude/skills/repomix-reference-*/ +.claude/rules # Agent /.mcp.json From 9f5ea93f3accdd3f7bd1ca2af4fdfbc99570f75e Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Thu, 8 Jan 2026 05:55:04 -0800 Subject: [PATCH 5/6] fix(mcp): add submodule count limit to prevent resource exhaustion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [MOTIVATION] SEC-005: MonorepoConfigSchema had no size limit on submodules dictionary. Malicious configuration files could define thousands of submodules, causing memory exhaustion or excessive processing time. [IMPLEMENTATION DETAILS] - Added MAX_SUBMODULES constant (1000) to define maximum allowed submodules - Added .refine() validation to submodules field using Zod - Provides clear error message when limit is exceeded - Maintains backward compatibility for normal usage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- src/mcp/config/monorepoConfigLoader.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/mcp/config/monorepoConfigLoader.ts b/src/mcp/config/monorepoConfigLoader.ts index b6cbde6f3..6bc5cc4ae 100644 --- a/src/mcp/config/monorepoConfigLoader.ts +++ b/src/mcp/config/monorepoConfigLoader.ts @@ -33,11 +33,21 @@ const RepomixConfigSchema = z.object({ showLineNumbers: z.boolean().default(true).describe('Show line numbers in output'), }); +/** + * Maximum allowed submodules in monorepo configuration + */ +const MAX_SUBMODULES = 1000; + /** * Schema for monorepo configuration */ export const MonorepoConfigSchema = z.object({ - submodules: z.record(z.string(), SubmoduleConfigSchema).describe('Map of submodule name to configuration'), + submodules: z + .record(z.string(), SubmoduleConfigSchema) + .refine((obj) => Object.keys(obj).length <= MAX_SUBMODULES, { + message: `Too many submodules defined (maximum: ${MAX_SUBMODULES})`, + }) + .describe('Map of submodule name to configuration'), cache: CacheConfigSchema.default({ directory: '.repomix-cache', enabled: true, From 1ffd4b3b0d3b4f91973701830776c14a8ecb135c Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Thu, 8 Jan 2026 05:56:19 -0800 Subject: [PATCH 6/6] fix(mcp): add schema validation for cache metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Zod schema validation when reading cache metadata to prevent security issues from malformed or malicious cache files. [MOTIVATION] Cache metadata files are read from disk and parsed as JSON without validation. This creates a security vulnerability where malformed or malicious cache files could cause runtime errors or unexpected behavior. [IMPLEMENTATION DETAILS] - Created CacheMetadataSchema in cacheTypes.ts using Zod - Schema matches all fields in CacheMetadata interface - Added validation for numeric fields (fileCount, tokenCount) to be non-negative integers - Updated readMeta() method to use CacheMetadataSchema.parse() - Zod validation throws meaningful errors on invalid data Fixes security issue SEC-003 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- src/mcp/cache/cacheManager.ts | 4 +++- src/mcp/cache/cacheTypes.ts | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/mcp/cache/cacheManager.ts b/src/mcp/cache/cacheManager.ts index 1ba994145..6a42928f4 100644 --- a/src/mcp/cache/cacheManager.ts +++ b/src/mcp/cache/cacheManager.ts @@ -5,6 +5,7 @@ import { promisify } from 'node:util'; import { logger } from '../../shared/logger.js'; import { sanitizeSubmoduleName, validatePathWithinRoot } from '../security/pathValidator.js'; import type { CacheCheckResult, CachedContent, CacheMetadata } from './cacheTypes.js'; +import { CacheMetadataSchema } from './cacheTypes.js'; const execFileAsync = promisify(execFile); @@ -78,7 +79,8 @@ export class CacheManager { */ private async readMeta(metaPath: string): Promise { const content = await this.deps.fsReadFile(metaPath, 'utf-8'); - return JSON.parse(content) as CacheMetadata; + const parsed = JSON.parse(content); + return CacheMetadataSchema.parse(parsed); } /** diff --git a/src/mcp/cache/cacheTypes.ts b/src/mcp/cache/cacheTypes.ts index 60f9b91cd..a1bb2405f 100644 --- a/src/mcp/cache/cacheTypes.ts +++ b/src/mcp/cache/cacheTypes.ts @@ -2,6 +2,23 @@ * Types for Monorepo Submodule Caching */ +import { z } from 'zod'; + +/** + * Schema for cache metadata validation + */ +export const CacheMetadataSchema = z.object({ + submodule: z.string().describe('Submodule name'), + generatedAt: z.string().describe('When the cache was generated'), + gitCommit: z.string().describe('Git commit hash when cache was generated'), + fileCount: z.number().int().nonnegative().describe('Number of files in the submodule'), + tokenCount: z.number().int().nonnegative().describe('Total token count of the content'), + dependencies: z.array(z.string()).describe('List of dependencies (other submodule names)'), + repomixVersion: z.string().describe('Repomix version used to generate'), + compressed: z.boolean().describe('Whether compression was enabled'), + isGitSubmodule: z.boolean().optional().describe('Whether this is a git submodule'), +}); + /** * Metadata stored alongside cached content */