diff --git a/benchmarks/memory/.gitignore b/benchmarks/memory/.gitignore new file mode 100644 index 000000000..b5105602d --- /dev/null +++ b/benchmarks/memory/.gitignore @@ -0,0 +1,24 @@ +# Dependencies +node_modules/ + +# Build output +dist/ + +# Test outputs +memory-test-output.txt +test-output-*.txt + +# Test results +memory-history-*.json +memory-test-results-*.json + +# npm +package-lock.json +npm-debug.log* + +# TypeScript +*.tsbuildinfo + +# OS files +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/benchmarks/memory/README.md b/benchmarks/memory/README.md new file mode 100644 index 000000000..96b6db2bf --- /dev/null +++ b/benchmarks/memory/README.md @@ -0,0 +1,33 @@ +# Memory Benchmarks + +Memory usage monitoring tools for repomix. + +## Setup + +```bash +cd benchmarks/memory +npm install +``` + +## Quick Start + +```bash +# Quick memory leak check +npm run leak:quick + +# Detailed analysis +npm run leak:analyze +``` + +## Available Scripts + +- `npm run leak:quick` - Fast leak detection (20 iterations) +- `npm run leak:watch` - Continuous monitoring +- `npm run leak:analyze` - Comprehensive analysis with reports + +## Understanding Results + +- **Heap Memory**: JavaScript objects (should stabilize) +- **RSS Memory**: Total process memory (watch for growth > 100%) + +Look for consistent upward trends that indicate memory leaks. \ No newline at end of file diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json new file mode 100644 index 000000000..bc21e3ee5 --- /dev/null +++ b/benchmarks/memory/package.json @@ -0,0 +1,32 @@ +{ + "name": "@repomix/memory-benchmarks", + "version": "1.0.0", + "private": true, + "type": "module", + "description": "Memory usage benchmarks and leak detection for repomix", + "scripts": { + "build": "tsc", + "build:repomix": "cd ../.. && node --run build", + "build:all": "node --run build:repomix && node --run build", + "clean": "rm -rf dist", + "memory:check": "node --run build:all && node --expose-gc dist/simple-memory-test.js", + "memory:watch": "node --run build:all && node --expose-gc dist/simple-memory-test.js continuous", + "memory:analyze": "node --run build:all && node --expose-gc dist/memory-leak-test.js 200 500" + }, + "dependencies": { + "repomix": "file:../.." + }, + "devDependencies": { + "@types/node": "^20.0.0", + "typescript": "^5.3.0" + }, + "engines": { + "node": ">=18.0.0" + }, + "keywords": [ + "benchmark", + "memory", + "performance", + "leak-detection" + ] +} diff --git a/benchmarks/memory/src/memory-leak-test.ts b/benchmarks/memory/src/memory-leak-test.ts new file mode 100644 index 000000000..0cf0e799d --- /dev/null +++ b/benchmarks/memory/src/memory-leak-test.ts @@ -0,0 +1,285 @@ +#!/usr/bin/env node + +/** + * Comprehensive memory leak test for runCli + * Tests multiple configurations and generates detailed reports + */ + +import fs from 'node:fs/promises'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { runCli } from 'repomix'; +import type { MemoryHistory, MemoryTestSummary, MemoryUsage, TestConfig } from './types.js'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const projectRoot = path.resolve(__dirname, '..'); + +// Configuration +const DEFAULT_ITERATIONS = 500; +const DEFAULT_DELAY = 100; +const MEMORY_LOG_INTERVAL = 10; +const FORCE_GC_INTERVAL = 20; + +// Test configurations +const TEST_CONFIGS: TestConfig[] = [ + { + name: 'Local Directory (src/)', + args: ['.'], + cwd: projectRoot, + options: { + include: 'src/**/*.ts', + output: path.join(__dirname, '../test-output-1.txt'), + style: 'plain', + quiet: true, + }, + }, + { + name: 'Local Directory with compression', + args: ['.'], + cwd: projectRoot, + options: { + include: 'src/**/*.ts', + output: path.join(__dirname, '../test-output-2.txt'), + style: 'xml', + compress: true, + quiet: true, + }, + }, + { + name: 'Complex patterns', + args: ['.'], + cwd: projectRoot, + options: { + include: 'src/**/*.{ts,js}', + ignore: '**/*.test.ts,**/*.d.ts', + output: path.join(__dirname, '../test-output-3.txt'), + style: 'markdown', + quiet: true, + }, + }, +]; + +// Memory tracking +const memoryHistory: MemoryHistory[] = []; + +const iterations = Number.parseInt(process.argv[2]) || DEFAULT_ITERATIONS; +const delay = Number.parseInt(process.argv[3]) || DEFAULT_DELAY; + +console.log('๐Ÿงช Comprehensive Memory Leak Test'); +console.log(`๐Ÿ“‹ Configuration: ${iterations} iterations, ${delay}ms delay`); +console.log(`๐ŸŽฏ Test Configurations: ${TEST_CONFIGS.length} different configs`); +console.log('๐Ÿ›‘ Press Ctrl+C to stop\n'); + +function getMemoryUsage(): MemoryUsage { + const usage = process.memoryUsage(); + const heapUsed = Math.round((usage.heapUsed / 1024 / 1024) * 100) / 100; + const heapTotal = Math.round((usage.heapTotal / 1024 / 1024) * 100) / 100; + const external = Math.round((usage.external / 1024 / 1024) * 100) / 100; + const rss = Math.round((usage.rss / 1024 / 1024) * 100) / 100; + const heapUsagePercent = Math.round((usage.heapUsed / usage.heapTotal) * 100 * 100) / 100; + + return { + heapUsed, + heapTotal, + external, + rss, + heapUsagePercent, + }; +} + +function forceGC(): void { + if (global.gc) { + global.gc(); + console.log('๐Ÿ—‘๏ธ Forced garbage collection'); + } +} + +function logMemoryUsage(iteration: number, configName: string, error: Error | null = null): void { + const usage = getMemoryUsage(); + const timestamp = new Date().toISOString(); + + memoryHistory.push({ + iteration, + configName, + timestamp, + ...usage, + error: !!error, + }); + + const statusIcon = error ? 'โŒ' : 'โœ…'; + const errorText = error ? ` (ERROR: ${error.message})` : ''; + + console.log( + `${statusIcon} Iteration ${iteration}: ${configName} - ` + + `Heap: ${usage.heapUsed}MB/${usage.heapTotal}MB (${usage.heapUsagePercent}%), ` + + `RSS: ${usage.rss}MB${errorText}`, + ); +} + +async function cleanupFiles(): Promise { + const filesToClean = TEST_CONFIGS.map((config) => config.options.output); + + for (const file of filesToClean) { + try { + await fs.unlink(file); + } catch (error) { + if (error instanceof Error && 'code' in error && error.code !== 'ENOENT') { + console.warn(`Failed to cleanup ${file}:`, error.message); + } + } + } +} + +function analyzeMemoryTrends(): void { + if (memoryHistory.length < 10) return; + + const recent = memoryHistory.slice(-10); + const initial = memoryHistory.slice(0, 10); + + const avgRecentHeap = recent.reduce((sum, entry) => sum + entry.heapUsed, 0) / recent.length; + const avgInitialHeap = initial.reduce((sum, entry) => sum + entry.heapUsed, 0) / initial.length; + const avgRecentRSS = recent.reduce((sum, entry) => sum + entry.rss, 0) / recent.length; + const avgInitialRSS = initial.reduce((sum, entry) => sum + entry.rss, 0) / initial.length; + + const heapGrowth = ((avgRecentHeap - avgInitialHeap) / avgInitialHeap) * 100; + const rssGrowth = ((avgRecentRSS - avgInitialRSS) / avgInitialRSS) * 100; + + console.log('\n๐Ÿ“Š Memory Trend Analysis:'); + console.log( + ` Heap Growth: ${heapGrowth.toFixed(2)}% (${avgInitialHeap.toFixed(2)}MB โ†’ ${avgRecentHeap.toFixed(2)}MB)`, + ); + console.log(` RSS Growth: ${rssGrowth.toFixed(2)}% (${avgInitialRSS.toFixed(2)}MB โ†’ ${avgRecentRSS.toFixed(2)}MB)`); + + if (heapGrowth > 50 || rssGrowth > 50) { + console.log('โš ๏ธ WARNING: Significant memory growth detected - possible memory leak!'); + } +} + +async function saveMemoryHistory(): Promise { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const filename = path.join(__dirname, '..', `memory-test-results-${timestamp}.json`); + + const summary: MemoryTestSummary = { + testInfo: { + iterations: memoryHistory.length, + configurations: TEST_CONFIGS.length, + startTime: memoryHistory[0]?.timestamp || '', + endTime: memoryHistory[memoryHistory.length - 1]?.timestamp || '', + }, + memoryHistory, + analysis: { + peakHeapUsage: Math.max(...memoryHistory.map((h) => h.heapUsed)), + peakRSSUsage: Math.max(...memoryHistory.map((h) => h.rss)), + errorCount: memoryHistory.filter((h) => h.error).length, + averageHeapUsage: memoryHistory.reduce((sum, h) => sum + h.heapUsed, 0) / memoryHistory.length, + averageRSSUsage: memoryHistory.reduce((sum, h) => sum + h.rss, 0) / memoryHistory.length, + }, + }; + + try { + await fs.writeFile(filename, JSON.stringify(summary, null, 2)); + console.log(`\n๐Ÿ’พ Memory test results saved to: ${filename}`); + } catch (error) { + console.error('Failed to save memory history:', error instanceof Error ? error.message : String(error)); + } +} + +async function runMemoryLeakTest(): Promise { + // Log initial memory usage + console.log('๐Ÿ“Š Initial Memory Usage:'); + logMemoryUsage(0, 'Initial', null); + + console.log('\n๐Ÿš€ Starting test iterations...\n'); + + for (let i = 1; i <= iterations; i++) { + const config = TEST_CONFIGS[(i - 1) % TEST_CONFIGS.length]; + let error: Error | null = null; + + try { + // Run the CLI with current configuration + await runCli(config.args, config.cwd, config.options); + + // Clean up output files after each run + await cleanupFiles(); + } catch (err) { + error = err instanceof Error ? err : new Error(String(err)); + } + + // Log memory usage at specified intervals or on error + if (i % MEMORY_LOG_INTERVAL === 0 || error) { + logMemoryUsage(i, config.name, error); + } + + // Force garbage collection at specified intervals + if (i % FORCE_GC_INTERVAL === 0) { + forceGC(); + } + + // Analyze trends periodically + if (i % (MEMORY_LOG_INTERVAL * 2) === 0 && i > 20) { + analyzeMemoryTrends(); + } + + // Add delay between iterations + if (delay > 0) { + await new Promise((resolve) => setTimeout(resolve, delay)); + } + } + + console.log('\nโœ… Memory leak test completed!'); + + // Final analysis + console.log('\n๐Ÿ“Š Final Memory Analysis:'); + const finalUsage = getMemoryUsage(); + const initialUsage = memoryHistory[0]; + + if (initialUsage) { + console.log(`Initial: Heap ${initialUsage.heapUsed}MB, RSS ${initialUsage.rss}MB`); + console.log(`Final: Heap ${finalUsage.heapUsed}MB, RSS ${finalUsage.rss}MB`); + console.log( + `Growth: Heap ${(((finalUsage.heapUsed - initialUsage.heapUsed) / initialUsage.heapUsed) * 100).toFixed(2)}%, RSS ${(((finalUsage.rss - initialUsage.rss) / initialUsage.rss) * 100).toFixed(2)}%`, + ); + } + + // Save results + await saveMemoryHistory(); + + // Final cleanup + await cleanupFiles(); + + console.log('\n๐ŸŽ‰ Test completed successfully!'); +} + +// Handle process termination +process.on('SIGINT', async () => { + console.log('\n\nโš ๏ธ Test interrupted by user'); + await saveMemoryHistory(); + await cleanupFiles(); + process.exit(0); +}); + +process.on('uncaughtException', async (error) => { + console.error('\nโŒ Uncaught exception:', error); + await saveMemoryHistory(); + await cleanupFiles(); + process.exit(1); +}); + +// Validate arguments +if (Number.isNaN(iterations) || iterations <= 0) { + console.error('โŒ Invalid iterations count. Must be a positive number.'); + process.exit(1); +} + +if (Number.isNaN(delay) || delay < 0) { + console.error('โŒ Invalid delay. Must be a non-negative number.'); + process.exit(1); +} + +// Run the test +runMemoryLeakTest().catch(async (error) => { + console.error('\nโŒ Test failed:', error); + await saveMemoryHistory(); + await cleanupFiles(); + process.exit(1); +}); diff --git a/benchmarks/memory/src/simple-memory-test.ts b/benchmarks/memory/src/simple-memory-test.ts new file mode 100644 index 000000000..8e035cbf1 --- /dev/null +++ b/benchmarks/memory/src/simple-memory-test.ts @@ -0,0 +1,111 @@ +#!/usr/bin/env node + +/** + * Simple memory leak test for runCli + * Tests local directory processing in a loop + * Runs continuously until stopped with Ctrl+C + */ + +import fs from 'node:fs/promises'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { runCli } from 'repomix'; +import type { MemoryUsage } from './types.js'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const projectRoot = path.resolve(__dirname, '..'); + +const CONTINUOUS = process.argv[2] === 'continuous' || process.argv[2] === '-c'; +const ITERATIONS = CONTINUOUS ? Number.POSITIVE_INFINITY : Number.parseInt(process.argv[2]) || 100; +const DELAY = Number.parseInt(process.argv[3]) || 200; + +if (CONTINUOUS) { + console.log(`๐Ÿงช Continuous Memory Test: Running until stopped (Ctrl+C), ${DELAY}ms delay`); +} else { + console.log(`๐Ÿงช Simple Memory Test: ${ITERATIONS} iterations, ${DELAY}ms delay`); +} + +function getMemoryMB(): Pick { + const usage = process.memoryUsage(); + return { + heapUsed: Math.round((usage.heapUsed / 1024 / 1024) * 100) / 100, + rss: Math.round((usage.rss / 1024 / 1024) * 100) / 100, + }; +} + +async function cleanup(): Promise { + try { + await fs.unlink(path.join(__dirname, '../memory-test-output.txt')); + } catch (error) { + // Ignore if file doesn't exist + } +} + +async function runTest(): Promise { + const initialMemory = getMemoryMB(); + console.log(`๐Ÿ“Š Initial: Heap ${initialMemory.heapUsed}MB, RSS ${initialMemory.rss}MB`); + + for (let i = 1; i <= ITERATIONS; i++) { + try { + // Run repomix on the src directory from project root + await runCli(['.'], projectRoot, { + include: 'src/**/*.ts', + output: path.join(__dirname, '../memory-test-output.txt'), + style: 'plain', + quiet: true, + }); + + // Clean up output file + await cleanup(); + + // Log memory every 5 iterations + if (i % 5 === 0) { + const current = getMemoryMB(); + const heapGrowth = (((current.heapUsed - initialMemory.heapUsed) / initialMemory.heapUsed) * 100).toFixed(1); + const rssGrowth = (((current.rss - initialMemory.rss) / initialMemory.rss) * 100).toFixed(1); + + console.log( + `โœ… Iteration ${i}: Heap ${current.heapUsed}MB (+${heapGrowth}%), RSS ${current.rss}MB (+${rssGrowth}%)`, + ); + + // Force garbage collection if available + if (global.gc) { + global.gc(); + } + } + + // Delay between iterations + await new Promise((resolve) => setTimeout(resolve, DELAY)); + } catch (error) { + console.error(`โŒ Iteration ${i} failed:`, error instanceof Error ? error.message : String(error)); + } + } + + if (!CONTINUOUS) { + const finalMemory = getMemoryMB(); + const heapGrowth = (((finalMemory.heapUsed - initialMemory.heapUsed) / initialMemory.heapUsed) * 100).toFixed(1); + const rssGrowth = (((finalMemory.rss - initialMemory.rss) / initialMemory.rss) * 100).toFixed(1); + + console.log('\n๐Ÿ“Š Final Results:'); + console.log(` Initial: Heap ${initialMemory.heapUsed}MB, RSS ${initialMemory.rss}MB`); + console.log(` Final: Heap ${finalMemory.heapUsed}MB, RSS ${finalMemory.rss}MB`); + console.log(` Growth: Heap +${heapGrowth}%, RSS +${rssGrowth}%`); + + if (Number.parseFloat(heapGrowth) > 100 || Number.parseFloat(rssGrowth) > 100) { + console.log('โš ๏ธ WARNING: Significant memory growth detected!'); + } else { + console.log('โœ… Memory usage appears stable'); + } + } +} + +// Handle graceful shutdown for continuous mode +process.on('SIGINT', () => { + console.log('\n\n๐Ÿ›‘ Test interrupted by user'); + const current = getMemoryMB(); + console.log(`๐Ÿ“Š Final Memory: Heap ${current.heapUsed}MB, RSS ${current.rss}MB`); + process.exit(0); +}); + +// Main execution +runTest().catch(console.error); diff --git a/benchmarks/memory/src/types.ts b/benchmarks/memory/src/types.ts new file mode 100644 index 000000000..242316916 --- /dev/null +++ b/benchmarks/memory/src/types.ts @@ -0,0 +1,50 @@ +export interface MemoryUsage { + heapUsed: number; + heapTotal: number; + external: number; + rss: number; + heapUsagePercent: number; +} + +export interface MemoryHistory { + iteration: number; + configName: string; + timestamp: string; + heapUsed: number; + heapTotal: number; + external: number; + rss: number; + heapUsagePercent: number; + error: boolean; +} + +export interface TestConfig { + name: string; + args: string[]; + cwd: string; + options: { + include?: string; + ignore?: string; + output: string; + style: 'plain' | 'xml' | 'markdown'; + compress?: boolean; + quiet: boolean; + }; +} + +export interface MemoryTestSummary { + testInfo: { + iterations: number; + configurations?: number; + startTime: string; + endTime: string; + }; + memoryHistory: MemoryHistory[]; + analysis: { + peakHeapUsage: number; + peakRSSUsage: number; + errorCount: number; + averageHeapUsage: number; + averageRSSUsage: number; + }; +} diff --git a/benchmarks/memory/tsconfig.json b/benchmarks/memory/tsconfig.json new file mode 100644 index 000000000..b383bc3cb --- /dev/null +++ b/benchmarks/memory/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "node", + "lib": ["ES2022"], + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "types": ["node"] + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} \ No newline at end of file diff --git a/package.json b/package.json index 513145c4b..4590e7424 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,8 @@ "repomix": "node --run build && node --trace-warnings bin/repomix.cjs", "repomix-src": "node --run repomix -- --include 'src,tests'", "repomix-website": "node --run repomix -- --include 'website'", + "memory-check": "node --run repomix -- --verbose | grep Memory", + "memory-check-one-file": "node --run repomix -- --verbose --include 'package.json' | grep Memory", "website": "docker compose -f website/compose.yml up --build", "website-generate-schema": "tsx website/client/scripts/generateSchema.ts", "npm-publish": "node --run npm-publish-check-branch && node --run lint && node --run test-coverage && node --run build && npm publish", diff --git a/repomix.config.json b/repomix.config.json index 3b1a318ac..b3fefb466 100644 --- a/repomix.config.json +++ b/repomix.config.json @@ -22,7 +22,7 @@ "git": { "sortByChanges": true, "sortByChangesMaxCommits": 100, - "includeDiffs": false + "includeDiffs": true } }, "include": [], diff --git a/src/core/file/fileCollect.ts b/src/core/file/fileCollect.ts index fe0b15341..1e28b16cb 100644 --- a/src/core/file/fileCollect.ts +++ b/src/core/file/fileCollect.ts @@ -1,16 +1,11 @@ import pc from 'picocolors'; import type { RepomixConfigMerged } from '../../config/configSchema.js'; import { logger } from '../../shared/logger.js'; -import { initWorker } from '../../shared/processConcurrency.js'; +import { initTaskRunner } from '../../shared/processConcurrency.js'; import type { RepomixProgressCallback } from '../../shared/types.js'; import type { RawFile } from './fileTypes.js'; import type { FileCollectTask } from './workers/fileCollectWorker.js'; -const initTaskRunner = (numOfTasks: number) => { - const pool = initWorker(numOfTasks, new URL('./workers/fileCollectWorker.js', import.meta.url).href); - return (task: FileCollectTask) => pool.run(task); -}; - export const collectFiles = async ( filePaths: string[], rootDir: string, @@ -20,7 +15,10 @@ export const collectFiles = async ( initTaskRunner, }, ): Promise => { - const runTask = deps.initTaskRunner(filePaths.length); + const taskRunner = deps.initTaskRunner( + filePaths.length, + new URL('./workers/fileCollectWorker.js', import.meta.url).href, + ); const tasks = filePaths.map( (filePath) => ({ @@ -39,7 +37,7 @@ export const collectFiles = async ( const results = await Promise.all( tasks.map((task) => - runTask(task).then((result) => { + taskRunner.run(task).then((result) => { completedTasks++; progressCallback(`Collect file... (${completedTasks}/${totalTasks}) ${pc.dim(task.filePath)}`); logger.trace(`Collect files... (${completedTasks}/${totalTasks}) ${task.filePath}`); @@ -56,5 +54,8 @@ export const collectFiles = async ( } catch (error) { logger.error('Error during file collection:', error); throw error; + } finally { + // Always cleanup worker pool + await taskRunner.cleanup(); } }; diff --git a/src/core/file/fileProcess.ts b/src/core/file/fileProcess.ts index 52aa5ac61..e5775c261 100644 --- a/src/core/file/fileProcess.ts +++ b/src/core/file/fileProcess.ts @@ -1,7 +1,7 @@ import pc from 'picocolors'; import type { RepomixConfigMerged } from '../../config/configSchema.js'; import { logger } from '../../shared/logger.js'; -import { initWorker } from '../../shared/processConcurrency.js'; +import { initTaskRunner } from '../../shared/processConcurrency.js'; import type { RepomixProgressCallback } from '../../shared/types.js'; import { type FileManipulator, getFileManipulator } from './fileManipulate.js'; import type { ProcessedFile, RawFile } from './fileTypes.js'; @@ -9,11 +9,6 @@ import type { FileProcessTask } from './workers/fileProcessWorker.js'; type GetFileManipulator = (filePath: string) => FileManipulator | null; -const initTaskRunner = (numOfTasks: number) => { - const pool = initWorker(numOfTasks, new URL('./workers/fileProcessWorker.js', import.meta.url).href); - return (task: FileProcessTask) => pool.run(task); -}; - export const processFiles = async ( rawFiles: RawFile[], config: RepomixConfigMerged, @@ -26,7 +21,10 @@ export const processFiles = async ( getFileManipulator, }, ): Promise => { - const runTask = deps.initTaskRunner(rawFiles.length); + const taskRunner = deps.initTaskRunner( + rawFiles.length, + new URL('./workers/fileProcessWorker.js', import.meta.url).href, + ); const tasks = rawFiles.map( (rawFile, index) => ({ @@ -44,7 +42,7 @@ export const processFiles = async ( const results = await Promise.all( tasks.map((task) => - runTask(task).then((result) => { + taskRunner.run(task).then((result) => { completedTasks++; progressCallback(`Processing file... (${completedTasks}/${totalTasks}) ${pc.dim(task.rawFile.path)}`); logger.trace(`Processing file... (${completedTasks}/${totalTasks}) ${task.rawFile.path}`); @@ -61,5 +59,8 @@ export const processFiles = async ( } catch (error) { logger.error('Error during file processing:', error); throw error; + } finally { + // Always cleanup worker pool + await taskRunner.cleanup(); } }; diff --git a/src/core/file/fileSearch.ts b/src/core/file/fileSearch.ts index 974822291..563208043 100644 --- a/src/core/file/fileSearch.ts +++ b/src/core/file/fileSearch.ts @@ -1,13 +1,13 @@ import type { Stats } from 'node:fs'; import fs from 'node:fs/promises'; import path from 'node:path'; -import { globby } from 'globby'; import { minimatch } from 'minimatch'; import type { RepomixConfigMerged } from '../../config/configSchema.js'; import { defaultIgnoreList } from '../../config/defaultIgnore.js'; import { RepomixError } from '../../shared/errorHandle.js'; import { logger } from '../../shared/logger.js'; import { sortPaths } from './filePathSort.js'; +import { executeGlobbyInWorker } from './globbyExecute.js'; import { PermissionError, checkDirectoryPermissions } from './permissionCheck.js'; export interface FileSearchResult { @@ -191,7 +191,7 @@ export const searchFiles = async ( logger.trace('Include patterns with explicit files:', includePatterns); - const filePaths = await globby(includePatterns, { + const filePaths = await executeGlobbyInWorker(includePatterns, { cwd: rootDir, ignore: [...adjustedIgnorePatterns], ignoreFiles: [...ignoreFilePatterns], @@ -212,7 +212,7 @@ export const searchFiles = async ( let emptyDirPaths: string[] = []; if (config.output.includeEmptyDirectories) { - const directories = await globby(includePatterns, { + const directories = await executeGlobbyInWorker(includePatterns, { cwd: rootDir, ignore: [...adjustedIgnorePatterns], ignoreFiles: [...ignoreFilePatterns], diff --git a/src/core/file/globbyExecute.ts b/src/core/file/globbyExecute.ts new file mode 100644 index 000000000..48e74f18d --- /dev/null +++ b/src/core/file/globbyExecute.ts @@ -0,0 +1,42 @@ +import type { Options } from 'globby'; +import { logger } from '../../shared/logger.js'; +import { initTaskRunner } from '../../shared/processConcurrency.js'; +import type { GlobbyTask } from './workers/globbyWorker.js'; + +/** + * Execute globby in worker to isolate memory usage + */ +export const executeGlobbyInWorker = async ( + patterns: string[], + options: Options, + deps = { + initTaskRunner, + }, +): Promise => { + const taskRunner = deps.initTaskRunner( + 1, + new URL('./workers/globbyWorker.js', import.meta.url).href, + ); + + try { + logger.trace('Starting globby in worker for memory isolation'); + const startTime = process.hrtime.bigint(); + + const result = await taskRunner.run({ + patterns, + options, + }); + + const endTime = process.hrtime.bigint(); + const duration = Number(endTime - startTime) / 1e6; + logger.trace(`Globby completed in worker in ${duration.toFixed(2)}ms`); + + return result; + } catch (error) { + logger.error('Error during globby execution:', error); + throw error; + } finally { + // Always cleanup worker pool + await taskRunner.cleanup(); + } +}; diff --git a/src/core/file/workers/globbyWorker.ts b/src/core/file/workers/globbyWorker.ts new file mode 100644 index 000000000..682e07d3e --- /dev/null +++ b/src/core/file/workers/globbyWorker.ts @@ -0,0 +1,10 @@ +import { type Options, globby } from 'globby'; + +export interface GlobbyTask { + patterns: string[]; + options: Options; +} + +export default async ({ patterns, options }: GlobbyTask): Promise => { + return globby(patterns, options); +}; diff --git a/src/core/metrics/calculateAllFileMetrics.ts b/src/core/metrics/calculateAllFileMetrics.ts index c7ac3cffa..e15541bd2 100644 --- a/src/core/metrics/calculateAllFileMetrics.ts +++ b/src/core/metrics/calculateAllFileMetrics.ts @@ -1,17 +1,12 @@ import pc from 'picocolors'; import type { TiktokenEncoding } from 'tiktoken'; import { logger } from '../../shared/logger.js'; -import { initWorker } from '../../shared/processConcurrency.js'; +import { initTaskRunner } from '../../shared/processConcurrency.js'; import type { RepomixProgressCallback } from '../../shared/types.js'; import type { ProcessedFile } from '../file/fileTypes.js'; import type { FileMetricsTask } from './workers/fileMetricsWorker.js'; import type { FileMetrics } from './workers/types.js'; -const initTaskRunner = (numOfTasks: number) => { - const pool = initWorker(numOfTasks, new URL('./workers/fileMetricsWorker.js', import.meta.url).href); - return (task: FileMetricsTask) => pool.run(task); -}; - export const calculateAllFileMetrics = async ( processedFiles: ProcessedFile[], tokenCounterEncoding: TiktokenEncoding, @@ -20,7 +15,10 @@ export const calculateAllFileMetrics = async ( initTaskRunner, }, ): Promise => { - const runTask = deps.initTaskRunner(processedFiles.length); + const taskRunner = deps.initTaskRunner( + processedFiles.length, + new URL('./workers/fileMetricsWorker.js', import.meta.url).href, + ); const tasks = processedFiles.map( (file, index) => ({ @@ -38,7 +36,7 @@ export const calculateAllFileMetrics = async ( let completedTasks = 0; const results = await Promise.all( tasks.map((task) => - runTask(task).then((result) => { + taskRunner.run(task).then((result) => { completedTasks++; progressCallback(`Calculating metrics... (${completedTasks}/${task.totalFiles}) ${pc.dim(task.file.path)}`); logger.trace(`Calculating metrics... (${completedTasks}/${task.totalFiles}) ${task.file.path}`); @@ -55,6 +53,9 @@ export const calculateAllFileMetrics = async ( } catch (error) { logger.error('Error during metrics calculation:', error); throw error; + } finally { + // Always cleanup worker pool + await taskRunner.cleanup(); } }; @@ -74,7 +75,10 @@ export const calculateSelectiveFileMetrics = async ( return []; } - const runTask = deps.initTaskRunner(filesToProcess.length); + const taskRunner = deps.initTaskRunner( + filesToProcess.length, + new URL('./workers/fileMetricsWorker.js', import.meta.url).href, + ); const tasks = filesToProcess.map( (file, index) => ({ @@ -92,7 +96,7 @@ export const calculateSelectiveFileMetrics = async ( let completedTasks = 0; const results = await Promise.all( tasks.map((task) => - runTask(task).then((result) => { + taskRunner.run(task).then((result) => { completedTasks++; progressCallback(`Calculating metrics... (${completedTasks}/${task.totalFiles}) ${pc.dim(task.file.path)}`); logger.trace(`Calculating metrics... (${completedTasks}/${task.totalFiles}) ${task.file.path}`); @@ -109,5 +113,8 @@ export const calculateSelectiveFileMetrics = async ( } catch (error) { logger.error('Error during selective metrics calculation:', error); throw error; + } finally { + // Always cleanup worker pool + await taskRunner.cleanup(); } }; diff --git a/src/core/metrics/calculateGitDiffMetrics.ts b/src/core/metrics/calculateGitDiffMetrics.ts new file mode 100644 index 000000000..994c72859 --- /dev/null +++ b/src/core/metrics/calculateGitDiffMetrics.ts @@ -0,0 +1,53 @@ +import type { RepomixConfigMerged } from '../../config/configSchema.js'; +import { logger } from '../../shared/logger.js'; +import { initTaskRunner } from '../../shared/processConcurrency.js'; +import type { GitDiffResult } from '../git/gitDiffHandle.js'; +import type { GitDiffMetricsTask } from './workers/gitDiffMetricsWorker.js'; + +/** + * Calculate token count for git diffs if included + */ +export const calculateGitDiffMetrics = async ( + config: RepomixConfigMerged, + gitDiffResult: GitDiffResult | undefined, + deps = { + initTaskRunner, + }, +): Promise => { + if (!config.output.git?.includeDiffs || !gitDiffResult) { + return 0; + } + + // Check if we have any diff content to process + if (!gitDiffResult.workTreeDiffContent && !gitDiffResult.stagedDiffContent) { + return 0; + } + + const taskRunner = deps.initTaskRunner( + 1, // Single task for git diff calculation + new URL('./workers/gitDiffMetricsWorker.js', import.meta.url).href, + ); + + try { + const startTime = process.hrtime.bigint(); + logger.trace('Starting git diff token calculation using worker'); + + const result = await taskRunner.run({ + workTreeDiffContent: gitDiffResult.workTreeDiffContent, + stagedDiffContent: gitDiffResult.stagedDiffContent, + encoding: config.tokenCount.encoding, + }); + + const endTime = process.hrtime.bigint(); + const duration = Number(endTime - startTime) / 1e6; + logger.trace(`Git diff token calculation completed in ${duration.toFixed(2)}ms`); + + return result; + } catch (error) { + logger.error('Error during git diff token calculation:', error); + throw error; + } finally { + // Always cleanup worker pool + await taskRunner.cleanup(); + } +}; diff --git a/src/core/metrics/calculateMetrics.ts b/src/core/metrics/calculateMetrics.ts index 4040745ac..3512aa92a 100644 --- a/src/core/metrics/calculateMetrics.ts +++ b/src/core/metrics/calculateMetrics.ts @@ -14,7 +14,7 @@ export interface CalculateMetricsResult { gitDiffTokenCount: number; } -import { TokenCounter } from './TokenCounter.js'; +import { calculateGitDiffMetrics } from './calculateGitDiffMetrics.js'; export const calculateMetrics = async ( processedFiles: ProcessedFile[], @@ -26,27 +26,11 @@ export const calculateMetrics = async ( calculateAllFileMetrics, calculateSelectiveFileMetrics, calculateOutputMetrics, + calculateGitDiffMetrics, }, ): Promise => { progressCallback('Calculating metrics...'); - // Calculate token count for git diffs if included - let gitDiffTokenCount = 0; - if (config.output.git?.includeDiffs && gitDiffResult) { - const tokenCounter = new TokenCounter(config.tokenCount.encoding); - - const countPromises = []; - if (gitDiffResult.workTreeDiffContent) { - countPromises.push(Promise.resolve().then(() => tokenCounter.countTokens(gitDiffResult.workTreeDiffContent))); - } - if (gitDiffResult.stagedDiffContent) { - countPromises.push(Promise.resolve().then(() => tokenCounter.countTokens(gitDiffResult.stagedDiffContent))); - } - - gitDiffTokenCount = (await Promise.all(countPromises)).reduce((sum, count) => sum + count, 0); - tokenCounter.free(); - } - // For top files display optimization: calculate token counts only for top files by character count const topFilesLength = config.output.topFilesLength; const candidateFilesCount = Math.min(processedFiles.length, Math.max(topFilesLength * 10, topFilesLength)); @@ -58,9 +42,10 @@ export const calculateMetrics = async ( const topFilePaths = topFilesByChar.map((file) => file.path); - const [selectiveFileMetrics, totalTokens] = await Promise.all([ + const [selectiveFileMetrics, totalTokens, gitDiffTokenCount] = await Promise.all([ deps.calculateSelectiveFileMetrics(processedFiles, topFilePaths, config.tokenCount.encoding, progressCallback), deps.calculateOutputMetrics(output, config.tokenCount.encoding, config.output.filePath), + deps.calculateGitDiffMetrics(config, gitDiffResult), ]); const totalFiles = processedFiles.length; diff --git a/src/core/metrics/calculateOutputMetrics.ts b/src/core/metrics/calculateOutputMetrics.ts index b40efc52a..04bbf8930 100644 --- a/src/core/metrics/calculateOutputMetrics.ts +++ b/src/core/metrics/calculateOutputMetrics.ts @@ -1,16 +1,11 @@ import type { TiktokenEncoding } from 'tiktoken'; import { logger } from '../../shared/logger.js'; -import { initWorker } from '../../shared/processConcurrency.js'; +import { initTaskRunner } from '../../shared/processConcurrency.js'; import type { OutputMetricsTask } from './workers/outputMetricsWorker.js'; const CHUNK_SIZE = 1000; const MIN_CONTENT_LENGTH_FOR_PARALLEL = 1_000_000; // 1000KB -const initTaskRunner = (numOfTasks: number) => { - const pool = initWorker(numOfTasks, new URL('./workers/outputMetricsWorker.js', import.meta.url).href); - return (task: OutputMetricsTask) => pool.run(task); -}; - export const calculateOutputMetrics = async ( content: string, encoding: TiktokenEncoding, @@ -21,7 +16,10 @@ export const calculateOutputMetrics = async ( ): Promise => { const shouldRunInParallel = content.length > MIN_CONTENT_LENGTH_FOR_PARALLEL; const numOfTasks = shouldRunInParallel ? CHUNK_SIZE : 1; - const runTask = deps.initTaskRunner(numOfTasks); + const taskRunner = deps.initTaskRunner( + numOfTasks, + new URL('./workers/outputMetricsWorker.js', import.meta.url).href, + ); try { logger.trace(`Starting output token count for ${path || 'output'}`); @@ -41,7 +39,7 @@ export const calculateOutputMetrics = async ( // Process chunks in parallel const chunkResults = await Promise.all( chunks.map((chunk, index) => - runTask({ + taskRunner.run({ content: chunk, encoding, path: path ? `${path}-chunk-${index}` : undefined, @@ -53,7 +51,7 @@ export const calculateOutputMetrics = async ( result = chunkResults.reduce((sum, count) => sum + count, 0); } else { // Process small content directly - result = await runTask({ content, encoding, path }); + result = await taskRunner.run({ content, encoding, path }); } const endTime = process.hrtime.bigint(); @@ -64,5 +62,8 @@ export const calculateOutputMetrics = async ( } catch (error) { logger.error('Error during token count:', error); throw error; + } finally { + // Always cleanup worker pool + await taskRunner.cleanup(); } }; diff --git a/src/core/metrics/tokenCounterFactory.ts b/src/core/metrics/tokenCounterFactory.ts index 4f9ae1577..8f51f0ba5 100644 --- a/src/core/metrics/tokenCounterFactory.ts +++ b/src/core/metrics/tokenCounterFactory.ts @@ -1,4 +1,5 @@ import type { TiktokenEncoding } from 'tiktoken'; +import { logger } from '../../shared/logger.js'; import { TokenCounter } from './TokenCounter.js'; // Worker-level cache for TokenCounter instances by encoding @@ -21,9 +22,10 @@ export const getTokenCounter = (encoding: TiktokenEncoding): TokenCounter => { * Free all TokenCounter resources and clear the cache. * This should be called when the worker is terminating. */ -export const freeTokenCounter = (): void => { - for (const tokenCounter of tokenCounters.values()) { +export const freeTokenCounters = (): void => { + for (const [encoding, tokenCounter] of tokenCounters.entries()) { tokenCounter.free(); + logger.debug(`Freed TokenCounter resources for encoding: ${encoding}`); } tokenCounters.clear(); }; diff --git a/src/core/metrics/workers/fileMetricsWorker.ts b/src/core/metrics/workers/fileMetricsWorker.ts index f18db7bee..399cb7c59 100644 --- a/src/core/metrics/workers/fileMetricsWorker.ts +++ b/src/core/metrics/workers/fileMetricsWorker.ts @@ -1,7 +1,7 @@ import type { TiktokenEncoding } from 'tiktoken'; import { logger, setLogLevelByWorkerData } from '../../../shared/logger.js'; import type { ProcessedFile } from '../../file/fileTypes.js'; -import { freeTokenCounter, getTokenCounter } from '../tokenCounterFactory.js'; +import { freeTokenCounters, getTokenCounter } from '../tokenCounterFactory.js'; import type { FileMetrics } from './types.js'; // Initialize logger configuration from workerData at module load time @@ -39,5 +39,5 @@ export const calculateIndividualFileMetrics = async ( // Cleanup when worker is terminated process.on('exit', () => { - freeTokenCounter(); + freeTokenCounters(); }); diff --git a/src/core/metrics/workers/gitDiffMetricsWorker.ts b/src/core/metrics/workers/gitDiffMetricsWorker.ts new file mode 100644 index 000000000..a285bc484 --- /dev/null +++ b/src/core/metrics/workers/gitDiffMetricsWorker.ts @@ -0,0 +1,42 @@ +import type { TiktokenEncoding } from 'tiktoken'; +import { logger, setLogLevelByWorkerData } from '../../../shared/logger.js'; +import { freeTokenCounters, getTokenCounter } from '../tokenCounterFactory.js'; + +// Initialize logger configuration from workerData at module load time +// This must be called before any logging operations in the worker +setLogLevelByWorkerData(); + +export interface GitDiffMetricsTask { + workTreeDiffContent?: string; + stagedDiffContent?: string; + encoding: TiktokenEncoding; +} + +export default async ({ workTreeDiffContent, stagedDiffContent, encoding }: GitDiffMetricsTask): Promise => { + const processStartAt = process.hrtime.bigint(); + + const tokenCounter = getTokenCounter(encoding); + + const countPromises = []; + if (workTreeDiffContent) { + countPromises.push(Promise.resolve().then(() => tokenCounter.countTokens(workTreeDiffContent))); + } + if (stagedDiffContent) { + countPromises.push(Promise.resolve().then(() => tokenCounter.countTokens(stagedDiffContent))); + } + + const results = await Promise.all(countPromises); + const totalTokens = results.reduce((sum, count) => sum + count, 0); + + const processEndAt = process.hrtime.bigint(); + logger.trace( + `Calculated git diff metrics. Tokens: ${totalTokens}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`, + ); + + return totalTokens; +}; + +// Cleanup when worker is terminated +process.on('exit', () => { + freeTokenCounters(); +}); diff --git a/src/core/metrics/workers/outputMetricsWorker.ts b/src/core/metrics/workers/outputMetricsWorker.ts index 22638ca73..738f6c68c 100644 --- a/src/core/metrics/workers/outputMetricsWorker.ts +++ b/src/core/metrics/workers/outputMetricsWorker.ts @@ -1,6 +1,6 @@ import type { TiktokenEncoding } from 'tiktoken'; import { logger, setLogLevelByWorkerData } from '../../../shared/logger.js'; -import { freeTokenCounter, getTokenCounter } from '../tokenCounterFactory.js'; +import { freeTokenCounters, getTokenCounter } from '../tokenCounterFactory.js'; // Initialize logger configuration from workerData at module load time // This must be called before any logging operations in the worker @@ -27,5 +27,5 @@ export default async ({ content, encoding, path }: OutputMetricsTask): Promise { - freeTokenCounter(); + freeTokenCounters(); }); diff --git a/src/core/packager.ts b/src/core/packager.ts index 103c0f15e..950bab017 100644 --- a/src/core/packager.ts +++ b/src/core/packager.ts @@ -1,5 +1,6 @@ import type { RepomixConfigMerged } from '../config/configSchema.js'; import { RepomixError } from '../shared/errorHandle.js'; +import { logMemoryUsage, withMemoryLogging } from '../shared/memoryUtils.js'; import type { RepomixProgressCallback } from '../shared/types.js'; import { collectFiles } from './file/fileCollect.js'; import { sortPaths } from './file/filePathSort.js'; @@ -52,57 +53,71 @@ export const pack = async ( ...overrideDeps, }; + logMemoryUsage('Pack - Start'); + progressCallback('Searching for files...'); - const filePathsByDir = await Promise.all( - rootDirs.map(async (rootDir) => ({ - rootDir, - filePaths: (await deps.searchFiles(rootDir, config, explicitFiles)).filePaths, - })), + const filePathsByDir = await withMemoryLogging('Search Files', async () => + Promise.all( + rootDirs.map(async (rootDir) => ({ + rootDir, + filePaths: (await deps.searchFiles(rootDir, config, explicitFiles)).filePaths, + })), + ), ); // Sort file paths progressCallback('Sorting files...'); const allFilePaths = filePathsByDir.flatMap(({ filePaths }) => filePaths); - const sortedFilePaths = await deps.sortPaths(allFilePaths); + const sortedFilePaths = deps.sortPaths(allFilePaths); // Regroup sorted file paths by rootDir const sortedFilePathsByDir = rootDirs.map((rootDir) => ({ rootDir, - filePaths: sortedFilePaths.filter((filePath) => + filePaths: sortedFilePaths.filter((filePath: string) => filePathsByDir.find((item) => item.rootDir === rootDir)?.filePaths.includes(filePath), ), })); progressCallback('Collecting files...'); - const rawFiles = ( - await Promise.all( - sortedFilePathsByDir.map(({ rootDir, filePaths }) => - deps.collectFiles(filePaths, rootDir, config, progressCallback), - ), - ) - ).reduce((acc: RawFile[], curr: RawFile[]) => acc.concat(...curr), []); + const rawFiles = await withMemoryLogging('Collect Files', async () => + ( + await Promise.all( + sortedFilePathsByDir.map(({ rootDir, filePaths }) => + deps.collectFiles(filePaths, rootDir, config, progressCallback), + ), + ) + ).reduce((acc: RawFile[], curr: RawFile[]) => acc.concat(...curr), []), + ); // Get git diffs if enabled - run this before security check progressCallback('Getting git diffs...'); const gitDiffResult = await deps.getGitDiffs(rootDirs, config); // Run security check and get filtered safe files - const { safeFilePaths, safeRawFiles, suspiciousFilesResults, suspiciousGitDiffResults } = - await deps.validateFileSafety(rawFiles, progressCallback, config, gitDiffResult); + const { safeFilePaths, safeRawFiles, suspiciousFilesResults, suspiciousGitDiffResults } = await withMemoryLogging( + 'Security Check', + () => deps.validateFileSafety(rawFiles, progressCallback, config, gitDiffResult), + ); // Process files (remove comments, etc.) progressCallback('Processing files...'); - const processedFiles = await deps.processFiles(safeRawFiles, config, progressCallback); + const processedFiles = await withMemoryLogging('Process Files', () => + deps.processFiles(safeRawFiles, config, progressCallback), + ); progressCallback('Generating output...'); - const output = await deps.generateOutput(rootDirs, config, processedFiles, safeFilePaths, gitDiffResult); + const output = await withMemoryLogging('Generate Output', () => + deps.generateOutput(rootDirs, config, processedFiles, safeFilePaths, gitDiffResult), + ); progressCallback('Writing output file...'); - await deps.writeOutputToDisk(output, config); + await withMemoryLogging('Write Output', () => deps.writeOutputToDisk(output, config)); await deps.copyToClipboardIfEnabled(output, progressCallback, config); - const metrics = await deps.calculateMetrics(processedFiles, output, progressCallback, config, gitDiffResult); + const metrics = await withMemoryLogging('Calculate Metrics', () => + deps.calculateMetrics(processedFiles, output, progressCallback, config, gitDiffResult), + ); // Create a result object that includes metrics and security results const result = { @@ -113,5 +128,7 @@ export const pack = async ( safeFilePaths, }; + logMemoryUsage('Pack - End'); + return result; }; diff --git a/src/core/security/securityCheck.ts b/src/core/security/securityCheck.ts index 6cb888458..767b99514 100644 --- a/src/core/security/securityCheck.ts +++ b/src/core/security/securityCheck.ts @@ -1,6 +1,6 @@ import pc from 'picocolors'; import { logger } from '../../shared/logger.js'; -import { initWorker } from '../../shared/processConcurrency.js'; +import { initTaskRunner } from '../../shared/processConcurrency.js'; import type { RepomixProgressCallback } from '../../shared/types.js'; import type { RawFile } from '../file/fileTypes.js'; import type { GitDiffResult } from '../git/gitDiffHandle.js'; @@ -12,11 +12,6 @@ export interface SuspiciousFileResult { type: SecurityCheckType; } -const initTaskRunner = (numOfTasks: number) => { - const pool = initWorker(numOfTasks, new URL('./workers/securityCheckWorker.js', import.meta.url).href); - return (task: SecurityCheckTask) => pool.run(task); -}; - export const runSecurityCheck = async ( rawFiles: RawFile[], progressCallback: RepomixProgressCallback = () => {}, @@ -46,7 +41,10 @@ export const runSecurityCheck = async ( } } - const runTask = deps.initTaskRunner(rawFiles.length + gitDiffTasks.length); + const taskRunner = deps.initTaskRunner( + rawFiles.length + gitDiffTasks.length, + new URL('./workers/securityCheckWorker.js', import.meta.url).href, + ); const fileTasks = rawFiles.map( (file) => ({ @@ -68,7 +66,7 @@ export const runSecurityCheck = async ( const results = await Promise.all( tasks.map((task) => - runTask(task).then((result) => { + taskRunner.run(task).then((result) => { completedTasks++; progressCallback(`Running security check... (${completedTasks}/${totalTasks}) ${pc.dim(task.filePath)}`); logger.trace(`Running security check... (${completedTasks}/${totalTasks}) ${task.filePath}`); @@ -85,5 +83,8 @@ export const runSecurityCheck = async ( } catch (error) { logger.error('Error during security check:', error); throw error; + } finally { + // Always cleanup worker pool + await taskRunner.cleanup(); } }; diff --git a/src/shared/memoryUtils.ts b/src/shared/memoryUtils.ts new file mode 100644 index 000000000..fbebe8e50 --- /dev/null +++ b/src/shared/memoryUtils.ts @@ -0,0 +1,87 @@ +/** + * Memory utility functions for monitoring memory usage across the application + */ + +import { logger } from './logger.js'; + +export interface MemoryStats { + heapUsed: number; + heapTotal: number; + external: number; + rss: number; + heapUsagePercent: number; +} + +/** + * Convert bytes to MB with 2 decimal precision + */ +function bytesToMB(bytes: number): number { + return Math.round((bytes / 1024 / 1024) * 100) / 100; +} + +/** + * Get current memory usage statistics in MB + */ +export function getMemoryStats(): MemoryStats { + const usage = process.memoryUsage(); + + const heapUsed = bytesToMB(usage.heapUsed); + const heapTotal = bytesToMB(usage.heapTotal); + const external = bytesToMB(usage.external); + const rss = bytesToMB(usage.rss); + const heapUsagePercent = Math.round((heapUsed / heapTotal) * 10000) / 100; + + return { + heapUsed, + heapTotal, + external, + rss, + heapUsagePercent, + }; +} + +/** + * Log memory usage at trace level with a context message + */ +export function logMemoryUsage(context: string): void { + const stats = getMemoryStats(); + logger.trace( + `Memory [${context}] | Heap: ${stats.heapUsed}/${stats.heapTotal}MB (${stats.heapUsagePercent}%) | RSS: ${stats.rss}MB | Ext: ${stats.external}MB`, + ); +} + +/** + * Log memory usage difference between two points + */ +export function logMemoryDifference(context: string, before: MemoryStats, after: MemoryStats): void { + const heapDiff = after.heapUsed - before.heapUsed; + const rssDiff = after.rss - before.rss; + const externalDiff = after.external - before.external; + + const formatDiff = (diff: number) => `${diff >= 0 ? '+' : ''}${diff.toFixed(2)}`; + + logger.trace( + `Memory [${context} - Delta] | Heap: ${formatDiff(heapDiff)}MB | RSS: ${formatDiff(rssDiff)}MB | Ext: ${formatDiff(externalDiff)}MB`, + ); +} + +/** + * Execute a function and log memory usage before and after + */ +export async function withMemoryLogging(context: string, fn: () => Promise): Promise { + const before = getMemoryStats(); + logMemoryUsage(`${context} - Before`); + + try { + const result = await fn(); + const after = getMemoryStats(); + logMemoryUsage(`${context} - After`); + logMemoryDifference(context, before, after); + return result; + } catch (error) { + const after = getMemoryStats(); + logMemoryUsage(`${context} - After (Error)`); + logMemoryDifference(context, before, after); + throw error; + } +} diff --git a/src/shared/processConcurrency.ts b/src/shared/processConcurrency.ts index baa97699d..be6dc6ad0 100644 --- a/src/shared/processConcurrency.ts +++ b/src/shared/processConcurrency.ts @@ -23,7 +23,7 @@ export const getWorkerThreadCount = (numOfTasks: number): { minThreads: number; }; }; -export const initWorker = (numOfTasks: number, workerPath: string): Tinypool => { +export const createWorkerPool = (numOfTasks: number, workerPath: string): Tinypool => { const { minThreads, maxThreads } = getWorkerThreadCount(numOfTasks); logger.trace( @@ -34,6 +34,8 @@ export const initWorker = (numOfTasks: number, workerPath: string): Tinypool => const pool = new Tinypool({ filename: workerPath, + // Use child_process for better memory management + runtime: 'child_process', minThreads, maxThreads, idleTimeout: 5000, @@ -49,3 +51,37 @@ export const initWorker = (numOfTasks: number, workerPath: string): Tinypool => return pool; }; + +export const cleanupWorkerPool = async (pool: Tinypool): Promise => { + try { + logger.debug('Cleaning up worker pool...'); + + // Check if running in Bun runtime + const isBun = process.versions?.bun; + + if (isBun) { + // If running in Bun, we cannot use Tinypool's destroy method + logger.debug('Running in Bun environment, skipping Tinypool destroy method'); + } else { + // Standard Node.js cleanup + await pool.destroy(); + } + + logger.debug('Worker pool cleaned up successfully'); + } catch (error) { + logger.debug('Error during worker pool cleanup:', error); + } +}; + +export interface TaskRunner { + run: (task: T) => Promise; + cleanup: () => Promise; +} + +export const initTaskRunner = (numOfTasks: number, workerPath: string): TaskRunner => { + const pool = createWorkerPool(numOfTasks, workerPath); + return { + run: (task: T) => pool.run(task), + cleanup: () => cleanupWorkerPool(pool), + }; +}; diff --git a/tests/core/file/fileCollect.test.ts b/tests/core/file/fileCollect.test.ts index cd36006af..2b8e6e50b 100644 --- a/tests/core/file/fileCollect.test.ts +++ b/tests/core/file/fileCollect.test.ts @@ -21,9 +21,14 @@ vi.mock('jschardet'); vi.mock('iconv-lite'); vi.mock('../../../src/shared/logger'); -const mockInitTaskRunner = () => { - return async (task: FileCollectTask) => { - return await fileCollectWorker(task); +const mockInitTaskRunner = (numOfTasks: number, workerPath: string) => { + return { + run: async (task: T) => { + return (await fileCollectWorker(task as FileCollectTask)) as R; + }, + cleanup: async () => { + // Mock cleanup - no-op for tests + }, }; }; diff --git a/tests/core/file/fileProcess.test.ts b/tests/core/file/fileProcess.test.ts index 3476f3226..fa1f39236 100644 --- a/tests/core/file/fileProcess.test.ts +++ b/tests/core/file/fileProcess.test.ts @@ -19,9 +19,14 @@ const mockGetFileManipulator = (filePath: string): FileManipulator | null => { return null; }; -const mockInitTaskRunner = (numOfTasks: number) => { - return async (task: FileProcessTask) => { - return await fileProcessWorker(task); +const mockInitTaskRunner = (numOfTasks: number, workerPath: string) => { + return { + run: async (task: T) => { + return (await fileProcessWorker(task as FileProcessTask)) as R; + }, + cleanup: async () => { + // Mock cleanup - no-op for tests + }, }; }; diff --git a/tests/core/file/fileSearch.test.ts b/tests/core/file/fileSearch.test.ts index de6dad94e..d81faa43a 100644 --- a/tests/core/file/fileSearch.test.ts +++ b/tests/core/file/fileSearch.test.ts @@ -17,10 +17,14 @@ import { PermissionError } from '../../../src/core/file/permissionCheck.js'; import { RepomixError } from '../../../src/shared/errorHandle.js'; import { createMockConfig, isWindows } from '../../testing/testUtils.js'; +import { executeGlobbyInWorker } from '../../../src/core/file/globbyExecute.js'; import { checkDirectoryPermissions } from '../../../src/core/file/permissionCheck.js'; vi.mock('fs/promises'); vi.mock('globby'); +vi.mock('../../../src/core/file/globbyExecute.js', () => ({ + executeGlobbyInWorker: vi.fn(), +})); vi.mock('../../../src/core/file/permissionCheck.js', () => ({ checkDirectoryPermissions: vi.fn(), PermissionError: class extends Error { @@ -48,6 +52,8 @@ describe('fileSearch', () => { hasAllPermission: true, details: { read: true, write: true, execute: true }, }); + // Default mock for executeGlobbyInWorker + vi.mocked(executeGlobbyInWorker).mockResolvedValue([]); }); describe('getIgnoreFilePaths', () => { @@ -87,7 +93,7 @@ describe('fileSearch', () => { const mockFilePaths = ['src/file1.js', 'src/file2.js']; const mockEmptyDirs = ['src/empty', 'empty-root']; - vi.mocked(globby).mockImplementation(async (_, options) => { + vi.mocked(executeGlobbyInWorker).mockImplementation(async (_, options) => { if (options?.onlyDirectories) { return mockEmptyDirs; } @@ -111,7 +117,7 @@ describe('fileSearch', () => { const mockFilePaths = ['src/file1.js', 'src/file2.js']; - vi.mocked(globby).mockImplementation(async (_, options) => { + vi.mocked(executeGlobbyInWorker).mockImplementation(async (_, options) => { if (options?.onlyDirectories) { throw new Error('Should not search for directories when disabled'); } @@ -122,7 +128,7 @@ describe('fileSearch', () => { expect(result.filePaths).toEqual(mockFilePaths); expect(result.emptyDirPaths).toEqual([]); - expect(globby).toHaveBeenCalledTimes(1); + expect(executeGlobbyInWorker).toHaveBeenCalledTimes(1); }); }); @@ -252,12 +258,12 @@ node_modules }, }); - vi.mocked(globby).mockResolvedValue(['file1.js', 'file2.js']); + vi.mocked(executeGlobbyInWorker).mockResolvedValue(['file1.js', 'file2.js']); vi.mocked(fs.access).mockResolvedValue(undefined); await searchFiles('/mock/root', mockConfig); - expect(globby).toHaveBeenCalledWith( + expect(executeGlobbyInWorker).toHaveBeenCalledWith( ['**/*.js'], expect.objectContaining({ cwd: '/mock/root', @@ -293,7 +299,7 @@ node_modules '/mock/root/subdir/.gitignore': 'ignored.js', }; - vi.mocked(globby).mockImplementation(async () => { + vi.mocked(executeGlobbyInWorker).mockImplementation(async () => { // Simulate filtering files based on .gitignore return mockFileStructure.filter((file) => { const relativePath = file.replace('root/', ''); @@ -334,7 +340,7 @@ node_modules 'root/subdir/ignored.js', ]; - vi.mocked(globby).mockResolvedValue(mockFileStructure); + vi.mocked(executeGlobbyInWorker).mockResolvedValue(mockFileStructure); const result = await searchFiles('/mock/root', mockConfig); @@ -366,7 +372,7 @@ node_modules }); // Mock globby to return some test files - vi.mocked(globby).mockResolvedValue(['file1.js', 'file2.js']); + vi.mocked(executeGlobbyInWorker).mockResolvedValue(['file1.js', 'file2.js']); const mockConfig = createMockConfig({ ignore: { @@ -379,8 +385,8 @@ node_modules const result = await searchFiles('/test/dir', mockConfig); // Check that globby was called with correct ignore patterns - const globbyCall = vi.mocked(globby).mock.calls[0]; - const ignorePatterns = globbyCall[1]?.ignore as string[]; + const executeGlobbyCall = vi.mocked(executeGlobbyInWorker).mock.calls[0]; + const ignorePatterns = executeGlobbyCall[1]?.ignore as string[]; // Verify .git file (not directory) is in ignore patterns expect(ignorePatterns).toContain('.git'); @@ -410,7 +416,7 @@ node_modules }); // Mock globby to return some test files - vi.mocked(globby).mockResolvedValue(['file1.js', 'file2.js']); + vi.mocked(executeGlobbyInWorker).mockResolvedValue(['file1.js', 'file2.js']); const mockConfig = createMockConfig({ ignore: { @@ -423,8 +429,8 @@ node_modules const result = await searchFiles('/test/dir', mockConfig); // Check that globby was called with correct ignore patterns - const globbyCall = vi.mocked(globby).mock.calls[0]; - const ignorePatterns = globbyCall[1]?.ignore as string[]; + const executeGlobbyCall = vi.mocked(executeGlobbyInWorker).mock.calls[0]; + const ignorePatterns = executeGlobbyCall[1]?.ignore as string[]; // Verify .git/** is in ignore patterns for regular git repos expect(ignorePatterns).toContain('.git/**'); @@ -553,7 +559,7 @@ node_modules }); test('should succeed when target path is a valid directory', async () => { - vi.mocked(globby).mockResolvedValue(['test.js']); + vi.mocked(executeGlobbyInWorker).mockResolvedValue(['test.js']); const mockConfig = createMockConfig(); @@ -581,7 +587,7 @@ node_modules ]; // Mock globby to return the expected filtered files - vi.mocked(globby).mockResolvedValue(['src/file1.ts', 'src/file3.ts']); + vi.mocked(executeGlobbyInWorker).mockResolvedValue(['src/file1.ts', 'src/file3.ts']); const result = await searchFiles('/test', mockConfig, explicitFiles); @@ -602,7 +608,7 @@ node_modules const explicitFiles = ['/test/src/main.ts', '/test/tests/unit.test.ts', '/test/lib/utils.ts']; // Mock globby to return the expected filtered files - vi.mocked(globby).mockResolvedValue(['src/main.ts', 'lib/utils.ts']); + vi.mocked(executeGlobbyInWorker).mockResolvedValue(['src/main.ts', 'lib/utils.ts']); const result = await searchFiles('/test', mockConfig, explicitFiles); diff --git a/tests/core/metrics/calculateAllFileMetrics.test.ts b/tests/core/metrics/calculateAllFileMetrics.test.ts index cdb8f0813..29b52ad79 100644 --- a/tests/core/metrics/calculateAllFileMetrics.test.ts +++ b/tests/core/metrics/calculateAllFileMetrics.test.ts @@ -12,9 +12,14 @@ vi.mock('../../shared/processConcurrency', () => ({ getProcessConcurrency: () => 1, })); -const mockInitTaskRunner = (numOfTasks: number) => { - return async (task: FileMetricsTask) => { - return await fileMetricsWorker(task); +const mockInitTaskRunner = (numOfTasks: number, workerPath: string) => { + return { + run: async (task: T) => { + return (await fileMetricsWorker(task as FileMetricsTask)) as R; + }, + cleanup: async () => { + // Mock cleanup - no-op for tests + }, }; }; diff --git a/tests/core/metrics/calculateMetrics.test.ts b/tests/core/metrics/calculateMetrics.test.ts index 350453b69..9f1aa33da 100644 --- a/tests/core/metrics/calculateMetrics.test.ts +++ b/tests/core/metrics/calculateMetrics.test.ts @@ -62,6 +62,7 @@ describe('calculateMetrics', () => { calculateAllFileMetrics, calculateSelectiveFileMetrics, calculateOutputMetrics: () => Promise.resolve(30), + calculateGitDiffMetrics: () => Promise.resolve(0), }); expect(progressCallback).toHaveBeenCalledWith('Calculating metrics...'); diff --git a/tests/core/metrics/calculateOutputMetrics.test.ts b/tests/core/metrics/calculateOutputMetrics.test.ts index 62baf1f63..7f97e4c38 100644 --- a/tests/core/metrics/calculateOutputMetrics.test.ts +++ b/tests/core/metrics/calculateOutputMetrics.test.ts @@ -6,9 +6,14 @@ import { logger } from '../../../src/shared/logger.js'; vi.mock('../../../src/shared/logger'); -const mockInitTaskRunner = () => { - return async (task: OutputMetricsTask) => { - return await outputMetricsWorker(task); +const mockInitTaskRunner = (numOfTasks: number, workerPath: string) => { + return { + run: async (task: T) => { + return (await outputMetricsWorker(task as OutputMetricsTask)) as R; + }, + cleanup: async () => { + // Mock cleanup - no-op for tests + }, }; }; @@ -41,9 +46,14 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const mockError = new Error('Worker error'); - const mockErrorTaskRunner = () => { - return async () => { - throw mockError; + const mockErrorTaskRunner = (numOfTasks: number, workerPath: string) => { + return { + run: async (task: T) => { + throw mockError; + }, + cleanup: async () => { + // Mock cleanup - no-op for tests + }, }; }; @@ -86,11 +96,16 @@ describe('calculateOutputMetrics', () => { const path = 'large-file.txt'; let chunksProcessed = 0; - const mockParallelTaskRunner = () => { - return async (task: OutputMetricsTask) => { - chunksProcessed++; - // Return a fixed token count for each chunk - return 100; + const mockParallelTaskRunner = (numOfTasks: number, workerPath: string) => { + return { + run: async (task: T) => { + chunksProcessed++; + // Return a fixed token count for each chunk + return 100 as R; + }, + cleanup: async () => { + // Mock cleanup - no-op for tests + }, }; }; @@ -107,9 +122,14 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const mockError = new Error('Parallel processing error'); - const mockErrorTaskRunner = () => { - return async () => { - throw mockError; + const mockErrorTaskRunner = (numOfTasks: number, workerPath: string) => { + return { + run: async (task: T) => { + throw mockError; + }, + cleanup: async () => { + // Mock cleanup - no-op for tests + }, }; }; @@ -127,10 +147,16 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const processedChunks: string[] = []; - const mockChunkTrackingTaskRunner = () => { - return async (task: OutputMetricsTask) => { - processedChunks.push(task.content); - return task.content.length; + const mockChunkTrackingTaskRunner = (numOfTasks: number, workerPath: string) => { + return { + run: async (task: T) => { + const outputTask = task as OutputMetricsTask; + processedChunks.push(outputTask.content); + return outputTask.content.length as R; + }, + cleanup: async () => { + // Mock cleanup - no-op for tests + }, }; }; diff --git a/tests/core/metrics/diffTokenCount.test.ts b/tests/core/metrics/diffTokenCount.test.ts index f7687b293..11d7e5f4f 100644 --- a/tests/core/metrics/diffTokenCount.test.ts +++ b/tests/core/metrics/diffTokenCount.test.ts @@ -111,17 +111,15 @@ index 123..456 100644 calculateAllFileMetrics: mockCalculateAllFileMetrics, calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]), calculateOutputMetrics: mockCalculateOutputMetrics, + calculateGitDiffMetrics: vi.fn().mockResolvedValue(25), }, ); - // Check TokenCounter was instantiated with the correct encoding - expect(TokenCounter).toHaveBeenCalledWith('o200k_base'); - // Check token counting was called with the diff content expect(result).toHaveProperty('gitDiffTokenCount'); - // Our mock counts words as tokens - the sample diff should have multiple tokens - expect(result.gitDiffTokenCount).toBeGreaterThan(0); + // Mock returns 25 tokens for git diff content + expect(result.gitDiffTokenCount).toBe(25); }); test('should not calculate diff token count when diffs are disabled', async () => { @@ -194,10 +192,11 @@ index 123..456 100644 calculateAllFileMetrics: mockCalculateAllFileMetrics, calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]), calculateOutputMetrics: mockCalculateOutputMetrics, + calculateGitDiffMetrics: vi.fn().mockResolvedValue(0), }, ); - // TokenCounter should not be called for diff content + // Git diff should return 0 when disabled expect(result.gitDiffTokenCount).toBe(0); }); @@ -272,10 +271,11 @@ index 123..456 100644 calculateAllFileMetrics: mockCalculateAllFileMetrics, calculateSelectiveFileMetrics: vi.fn().mockResolvedValue([]), calculateOutputMetrics: mockCalculateOutputMetrics, + calculateGitDiffMetrics: vi.fn().mockResolvedValue(0), }, ); - // gitDiffTokenCount should not be set + // Git diff should return 0 when content is undefined expect(result.gitDiffTokenCount).toBe(0); }); }); diff --git a/tests/core/packager.test.ts b/tests/core/packager.test.ts index 0985e5876..a1820e13b 100644 --- a/tests/core/packager.test.ts +++ b/tests/core/packager.test.ts @@ -41,7 +41,7 @@ describe('packager', () => { filePaths: mockFilePaths, emptyDirPaths: [], }), - sortPaths: vi.fn().mockImplementation((paths) => Promise.resolve(paths)), + sortPaths: vi.fn().mockImplementation((paths) => paths), collectFiles: vi.fn().mockResolvedValue(mockRawFiles), processFiles: vi.fn().mockReturnValue(mockProcessedFiles), validateFileSafety: vi.fn().mockResolvedValue({ diff --git a/tests/core/security/securityCheck.test.ts b/tests/core/security/securityCheck.test.ts index 651a4cfc1..3037819bb 100644 --- a/tests/core/security/securityCheck.test.ts +++ b/tests/core/security/securityCheck.test.ts @@ -17,6 +17,13 @@ vi.mock('../../../src/shared/processConcurrency', () => ({ return await securityCheckWorker(task); }), })), + cleanupWorkerPool: vi.fn(), + initTaskRunner: vi.fn(() => ({ + run: vi.fn().mockImplementation(async (task: SecurityCheckTask) => { + return await securityCheckWorker(task); + }), + cleanup: vi.fn(), + })), })); const mockFiles: RawFile[] = [ @@ -32,9 +39,14 @@ const mockFiles: RawFile[] = [ }, ]; -const mockInitTaskRunner = () => { - return async (task: SecurityCheckTask) => { - return await securityCheckWorker(task); +const mockInitTaskRunner = (numOfTasks: number, workerPath: string) => { + return { + run: async (task: T) => { + return (await securityCheckWorker(task as SecurityCheckTask)) as R; + }, + cleanup: async () => { + // Mock cleanup - no-op for tests + }, }; }; @@ -65,8 +77,13 @@ describe('runSecurityCheck', () => { it('should handle worker errors gracefully', async () => { const mockError = new Error('Worker error'); const mockErrorTaskRunner = () => { - return async () => { - throw mockError; + return { + run: async () => { + throw mockError; + }, + cleanup: async () => { + // Mock cleanup - no-op for tests + }, }; }; diff --git a/tests/integration-tests/packager.test.ts b/tests/integration-tests/packager.test.ts index a07b03b71..7aaaa7a21 100644 --- a/tests/integration-tests/packager.test.ts +++ b/tests/integration-tests/packager.test.ts @@ -2,12 +2,18 @@ import fs from 'node:fs/promises'; import os from 'node:os'; import path from 'node:path'; import process from 'node:process'; -import { afterEach, beforeEach, describe, expect, test } from 'vitest'; +import { afterEach, beforeEach, describe, expect, test, vi } from 'vitest'; + +// Mock globby worker for integration tests to avoid worker file loading issues +vi.mock('../../src/core/file/globbyExecute.js', () => ({ + executeGlobbyInWorker: vi.fn(), +})); import { loadFileConfig, mergeConfigs } from '../../src/config/configLoad.js'; import type { RepomixConfigFile, RepomixConfigMerged, RepomixOutputStyle } from '../../src/config/configSchema.js'; import { collectFiles } from '../../src/core/file/fileCollect.js'; import { searchFiles } from '../../src/core/file/fileSearch.js'; import type { ProcessedFile } from '../../src/core/file/fileTypes.js'; +import { executeGlobbyInWorker } from '../../src/core/file/globbyExecute.js'; import type { FileCollectTask } from '../../src/core/file/workers/fileCollectWorker.js'; import fileCollectWorker from '../../src/core/file/workers/fileCollectWorker.js'; import fileProcessWorker from '../../src/core/file/workers/fileProcessWorker.js'; @@ -24,9 +30,14 @@ const fixturesDir = path.join(__dirname, 'fixtures', 'packager'); const inputsDir = path.join(fixturesDir, 'inputs'); const outputsDir = path.join(fixturesDir, 'outputs'); -const mockCollectFileInitTaskRunner = () => { - return async (task: FileCollectTask) => { - return await fileCollectWorker(task); +const mockCollectFileInitTaskRunner = (numOfTasks: number, workerPath: string) => { + return { + run: async (task: T) => { + return (await fileCollectWorker(task as FileCollectTask)) as R; + }, + cleanup: async () => { + // Mock cleanup - no-op for tests + }, }; }; @@ -63,6 +74,12 @@ describe.runIf(!isWindows)('packager integration', () => { beforeEach(async () => { // Create a temporary directory for each test tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'repomix-test-')); + + // Mock executeGlobbyInWorker to return the actual files in the test directory + vi.mocked(executeGlobbyInWorker).mockImplementation(async (patterns, options) => { + const { globby } = await import('globby'); + return globby(patterns, options); + }); }); afterEach(async () => { diff --git a/tests/shared/processConcurrency.test.ts b/tests/shared/processConcurrency.test.ts index bcd9fa075..6970c0b8f 100644 --- a/tests/shared/processConcurrency.test.ts +++ b/tests/shared/processConcurrency.test.ts @@ -1,7 +1,12 @@ import os from 'node:os'; import { Tinypool } from 'tinypool'; import { beforeEach, describe, expect, it, vi } from 'vitest'; -import { getProcessConcurrency, getWorkerThreadCount, initWorker } from '../../src/shared/processConcurrency.js'; +import { + createWorkerPool, + getProcessConcurrency, + getWorkerThreadCount, + initTaskRunner, +} from '../../src/shared/processConcurrency.js'; vi.mock('node:os'); vi.mock('tinypool'); @@ -68,10 +73,11 @@ describe('processConcurrency', () => { it('should initialize Tinypool with correct configuration', () => { const workerPath = '/path/to/worker.js'; - const tinypool = initWorker(500, workerPath); + const tinypool = createWorkerPool(500, workerPath); expect(Tinypool).toHaveBeenCalledWith({ filename: workerPath, + runtime: 'child_process', minThreads: 1, maxThreads: 4, // Math.min(4, 500/100) = 4 idleTimeout: 5000, @@ -82,4 +88,27 @@ describe('processConcurrency', () => { expect(tinypool).toBeDefined(); }); }); + + describe('initTaskRunner', () => { + beforeEach(() => { + vi.mocked(os).availableParallelism = vi.fn().mockReturnValue(4); + vi.mocked(Tinypool).mockImplementation( + () => + ({ + run: vi.fn(), + destroy: vi.fn(), + }) as unknown as Tinypool, + ); + }); + + it('should return a TaskRunner with run and cleanup methods', () => { + const workerPath = '/path/to/worker.js'; + const taskRunner = initTaskRunner(100, workerPath); + + expect(taskRunner).toHaveProperty('run'); + expect(taskRunner).toHaveProperty('cleanup'); + expect(typeof taskRunner.run).toBe('function'); + expect(typeof taskRunner.cleanup).toBe('function'); + }); + }); }); diff --git a/website/server/src/index.ts b/website/server/src/index.ts index 2a95ec417..4f9fe3bff 100644 --- a/website/server/src/index.ts +++ b/website/server/src/index.ts @@ -9,7 +9,7 @@ import { processZipFile } from './processZipFile.js'; import { processRemoteRepo } from './remoteRepo.js'; import type { PackResult } from './types.js'; import { handlePackError } from './utils/errorHandler.js'; -import { cloudLogger, createErrorResponse, logError, logInfo } from './utils/logger.js'; +import { cloudLogger, createErrorResponse, logError, logInfo, logMemoryUsage } from './utils/logger.js'; import { getProcessConcurrency } from './utils/processConcurrency.js'; import { calculateLatency, formatLatencyForDisplay } from './utils/time.js'; @@ -20,6 +20,11 @@ logInfo('Server starting', { }, }); +// Log initial memory usage +logMemoryUsage('Server startup', { + processConcurrency: getProcessConcurrency(), +}); + const app = new Hono(); // Setup custom logger @@ -109,7 +114,7 @@ app.post( result = await processRemoteRepo(url, format, options, clientIp); } - // Log operation result + // Log operation result with memory usage logInfo('Pack operation completed', { requestId, format, @@ -123,6 +128,14 @@ app.post( }, }); + // Log memory usage after processing + logMemoryUsage('Pack operation memory usage', { + requestId, + repository: result.metadata.repository, + totalFiles: result.metadata.summary?.totalFiles, + totalCharacters: result.metadata.summary?.totalCharacters, + }); + return c.json(result); } catch (error) { // Handle errors diff --git a/website/server/src/processZipFile.ts b/website/server/src/processZipFile.ts index 7492f0081..78e891635 100644 --- a/website/server/src/processZipFile.ts +++ b/website/server/src/processZipFile.ts @@ -8,6 +8,7 @@ import type { PackOptions, PackResult } from './types.js'; import { generateCacheKey } from './utils/cache.js'; import { AppError } from './utils/errorHandler.js'; import { cleanupTempDirectory, copyOutputToCurrentDirectory, createTempDirectory } from './utils/fileUtils.js'; +import { logMemoryUsage } from './utils/logger.js'; import { cache, rateLimiter } from './utils/sharedInstance.js'; import { sanitizePattern, validateRequest } from './utils/validation.js'; @@ -88,6 +89,13 @@ export async function processZipFile( const tempDirPath = await createTempDirectory(); try { + // Log memory usage before processing + logMemoryUsage('ZIP file processing started', { + fileName: file.name, + fileSize: file.size, + format: validatedData.format, + }); + // Extract the ZIP file to the temporary directory with enhanced security checks await extractZipWithSecurity(file, tempDirPath); @@ -125,6 +133,14 @@ export async function processZipFile( // Save the result to cache await cache.set(cacheKey, packResultData); + // Log memory usage after processing + logMemoryUsage('ZIP file processing completed', { + fileName: file.name, + totalFiles: packResult.totalFiles, + totalCharacters: packResult.totalCharacters, + totalTokens: packResult.totalTokens, + }); + return packResultData; } catch (error) { console.error('Error processing uploaded file:', error); diff --git a/website/server/src/remoteRepo.ts b/website/server/src/remoteRepo.ts index 1e1f9c615..90b2140be 100644 --- a/website/server/src/remoteRepo.ts +++ b/website/server/src/remoteRepo.ts @@ -5,6 +5,7 @@ import { packRequestSchema } from './schemas/request.js'; import type { PackOptions, PackResult } from './types.js'; import { generateCacheKey } from './utils/cache.js'; import { AppError } from './utils/errorHandler.js'; +import { logMemoryUsage } from './utils/logger.js'; import { cache, rateLimiter } from './utils/sharedInstance.js'; import { sanitizePattern, validateRequest } from './utils/validation.js'; @@ -66,6 +67,12 @@ export async function processRemoteRepo( } as CliOptions; try { + // Log memory usage before processing + logMemoryUsage('Remote repository processing started', { + repository: repoUrl, + format: validatedData.format, + }); + // Execute remote action const result = await runCli(['.'], process.cwd(), cliOptions); if (!result) { @@ -102,6 +109,14 @@ export async function processRemoteRepo( // Save the result to cache await cache.set(cacheKey, packResultData); + // Log memory usage after processing + logMemoryUsage('Remote repository processing completed', { + repository: repoUrl, + totalFiles: packResult.totalFiles, + totalCharacters: packResult.totalCharacters, + totalTokens: packResult.totalTokens, + }); + return packResultData; } catch (error) { console.error('Error in remote action:', error); diff --git a/website/server/src/utils/logger.ts b/website/server/src/utils/logger.ts index 40ea1ae64..fe3b16845 100644 --- a/website/server/src/utils/logger.ts +++ b/website/server/src/utils/logger.ts @@ -1,6 +1,7 @@ import { LoggingWinston } from '@google-cloud/logging-winston'; import type { Context, Next } from 'hono'; import winston from 'winston'; +import { formatMemoryUsage, getMemoryMetrics, getMemoryUsage } from './memory.js'; import { getClientIP } from './network.js'; import { calculateLatency, formatLatencyForDisplay } from './time.js'; @@ -167,3 +168,17 @@ export function logError(message: string, error?: Error, context?: Record): void { + const memoryUsage = getMemoryUsage(); + const memoryMetrics = getMemoryMetrics(); + + logger.info({ + message: `${message} - Memory: ${formatMemoryUsage(memoryUsage)}`, + memory: memoryMetrics, + ...context, + }); +} diff --git a/website/server/src/utils/memory.ts b/website/server/src/utils/memory.ts new file mode 100644 index 000000000..848d04621 --- /dev/null +++ b/website/server/src/utils/memory.ts @@ -0,0 +1,59 @@ +/** + * Memory usage utility functions for tracking and logging memory consumption + */ + +export interface MemoryUsage { + /** Used heap size in MB */ + heapUsed: number; + /** Total heap size in MB */ + heapTotal: number; + /** External memory usage in MB */ + external: number; + /** RSS (Resident Set Size) in MB */ + rss: number; + /** Heap usage percentage */ + heapUsagePercent: number; +} + +/** + * Get current memory usage statistics + */ +export function getMemoryUsage(): MemoryUsage { + const memoryUsage = process.memoryUsage(); + + // Convert bytes to MB + const heapUsed = Math.round((memoryUsage.heapUsed / 1024 / 1024) * 100) / 100; + const heapTotal = Math.round((memoryUsage.heapTotal / 1024 / 1024) * 100) / 100; + const external = Math.round((memoryUsage.external / 1024 / 1024) * 100) / 100; + const rss = Math.round((memoryUsage.rss / 1024 / 1024) * 100) / 100; + + const heapUsagePercent = Math.round((heapUsed / heapTotal) * 100 * 100) / 100; + + return { + heapUsed, + heapTotal, + external, + rss, + heapUsagePercent, + }; +} + +/** + * Format memory usage for display + */ +export function formatMemoryUsage(usage: MemoryUsage): string { + return `${usage.heapUsed}MB/${usage.heapTotal}MB (${usage.heapUsagePercent}%) RSS: ${usage.rss}MB`; +} + +/** + * Get a simplified memory usage object for logging + */ +export function getMemoryMetrics(): Record { + const usage = getMemoryUsage(); + return { + heapUsedMB: usage.heapUsed, + heapTotalMB: usage.heapTotal, + rssMB: usage.rss, + heapUsagePercent: usage.heapUsagePercent, + }; +}