diff --git a/src/core/security/securityCheck.ts b/src/core/security/securityCheck.ts index 3fffed911..f1736a580 100644 --- a/src/core/security/securityCheck.ts +++ b/src/core/security/securityCheck.ts @@ -5,7 +5,9 @@ import type { RepomixProgressCallback } from '../../shared/types.js'; import type { RawFile } from '../file/fileTypes.js'; import type { GitDiffResult } from '../git/gitDiffHandle.js'; import type { GitLogResult } from '../git/gitLogHandle.js'; -import type { SecurityCheckTask, SecurityCheckType } from './workers/securityCheckWorker.js'; +import type { SecurityCheckItem, SecurityCheckTask, SecurityCheckType } from './workers/securityCheckWorker.js'; + +export type { SecurityCheckType } from './workers/securityCheckWorker.js'; export interface SuspiciousFileResult { filePath: string; @@ -13,6 +15,13 @@ export interface SuspiciousFileResult { type: SecurityCheckType; } +// Batch size for grouping files into worker tasks to reduce IPC overhead. +// Each batch is sent as a single message to a worker thread, avoiding +// per-file round-trip costs that dominate when processing many files. +// A moderate batch size (50) reduces IPC round-trips by ~98% (990 → 20 for a typical repo) +// while keeping enough batches to utilize all available CPU cores. +const BATCH_SIZE = 50; + export const runSecurityCheck = async ( rawFiles: RawFile[], progressCallback: RepomixProgressCallback = () => {}, @@ -22,13 +31,13 @@ export const runSecurityCheck = async ( initTaskRunner, }, ): Promise => { - const gitDiffTasks: SecurityCheckTask[] = []; - const gitLogTasks: SecurityCheckTask[] = []; + const gitDiffItems: SecurityCheckItem[] = []; + const gitLogItems: SecurityCheckItem[] = []; // Add Git diff content for security checking if available if (gitDiffResult) { if (gitDiffResult.workTreeDiffContent) { - gitDiffTasks.push({ + gitDiffItems.push({ filePath: 'Working tree changes', content: gitDiffResult.workTreeDiffContent, type: 'gitDiff', @@ -36,7 +45,7 @@ export const runSecurityCheck = async ( } if (gitDiffResult.stagedDiffContent) { - gitDiffTasks.push({ + gitDiffItems.push({ filePath: 'Staged changes', content: gitDiffResult.stagedDiffContent, type: 'gitDiff', @@ -47,7 +56,7 @@ export const runSecurityCheck = async ( // Add Git log content for security checking if available if (gitLogResult) { if (gitLogResult.logContent) { - gitLogTasks.push({ + gitLogItems.push({ filePath: 'Git log history', content: gitLogResult.logContent, type: 'gitLog', @@ -55,51 +64,60 @@ export const runSecurityCheck = async ( } } - const taskRunner = deps.initTaskRunner({ - numOfTasks: rawFiles.length + gitDiffTasks.length + gitLogTasks.length, + const fileItems: SecurityCheckItem[] = rawFiles.map((file) => ({ + filePath: file.path, + content: file.content, + type: 'file', + })); + + // Combine all items, then split into batches + const allItems = [...fileItems, ...gitDiffItems, ...gitLogItems]; + const totalItems = allItems.length; + + // NOTE: numOfTasks uses totalItems (not batches.length) intentionally. + // getWorkerThreadCount uses Math.ceil(numOfTasks / TASKS_PER_THREAD) to size the pool, + // where TASKS_PER_THREAD=100 is calibrated for fine-grained tasks. + // Passing batches.length (e.g. 2) would yield maxThreads=1, forcing sequential execution. + const taskRunner = deps.initTaskRunner({ + numOfTasks: totalItems, workerType: 'securityCheck', runtime: 'worker_threads', }); - const fileTasks = rawFiles.map( - (file) => - ({ - filePath: file.path, - content: file.content, - type: 'file', - }) satisfies SecurityCheckTask, - ); - - // Combine file tasks, Git diff tasks, and Git log tasks - const tasks = [...fileTasks, ...gitDiffTasks, ...gitLogTasks]; + + // Split items into batches to reduce IPC round-trips + const batches: SecurityCheckItem[][] = []; + for (let i = 0; i < allItems.length; i += BATCH_SIZE) { + batches.push(allItems.slice(i, i + BATCH_SIZE)); + } try { - logger.trace(`Starting security check for ${tasks.length} files/content`); + logger.trace(`Starting security check for ${totalItems} files/content in ${batches.length} batches`); const startTime = process.hrtime.bigint(); - let completedTasks = 0; - const totalTasks = tasks.length; - - const results = await Promise.all( - tasks.map((task) => - taskRunner.run(task).then((result) => { - completedTasks++; - progressCallback(`Running security check... (${completedTasks}/${totalTasks}) ${pc.dim(task.filePath)}`); - logger.trace(`Running security check... (${completedTasks}/${totalTasks}) ${task.filePath}`); - return result; - }), - ), + let completedItems = 0; + + const batchResults = await Promise.all( + batches.map(async (batch) => { + const results = await taskRunner.run({ items: batch }); + + completedItems += batch.length; + const lastItem = batch[batch.length - 1]; + progressCallback(`Running security check... (${completedItems}/${totalItems}) ${pc.dim(lastItem.filePath)}`); + logger.trace(`Running security check... (${completedItems}/${totalItems}) ${lastItem.filePath}`); + + return results; + }), ); const endTime = process.hrtime.bigint(); const duration = Number(endTime - startTime) / 1e6; logger.trace(`Security check completed in ${duration.toFixed(2)}ms`); - return results.filter((result): result is SuspiciousFileResult => result !== null); + return batchResults.flat().filter((result): result is SuspiciousFileResult => result !== null); } catch (error) { logger.error('Error during security check:', error); throw error; } finally { - // Always cleanup worker pool await taskRunner.cleanup(); } }; diff --git a/src/core/security/workers/securityCheckWorker.ts b/src/core/security/workers/securityCheckWorker.ts index 53edeaa64..656278460 100644 --- a/src/core/security/workers/securityCheckWorker.ts +++ b/src/core/security/workers/securityCheckWorker.ts @@ -10,12 +10,16 @@ setLogLevelByWorkerData(); // Security check type to distinguish between regular files, git diffs, and git logs export type SecurityCheckType = 'file' | 'gitDiff' | 'gitLog'; -export interface SecurityCheckTask { +export interface SecurityCheckItem { filePath: string; content: string; type: SecurityCheckType; } +export interface SecurityCheckTask { + items: SecurityCheckItem[]; +} + export interface SuspiciousFileResult { filePath: string; messages: string[]; @@ -34,21 +38,24 @@ export const createSecretLintConfig = (): SecretLintCoreConfig => ({ // Cache config at module level - created once per worker, reused for all tasks const cachedConfig = createSecretLintConfig(); -export default async ({ filePath, content, type }: SecurityCheckTask) => { +export default async (task: SecurityCheckTask): Promise<(SuspiciousFileResult | null)[]> => { const config = cachedConfig; + const processStartAt = process.hrtime.bigint(); try { - const processStartAt = process.hrtime.bigint(); - const secretLintResult = await runSecretLint(filePath, content, type, config); - const processEndAt = process.hrtime.bigint(); + const results: (SuspiciousFileResult | null)[] = []; + for (const item of task.items) { + results.push(await runSecretLint(item.filePath, item.content, item.type, config)); + } + const processEndAt = process.hrtime.bigint(); logger.trace( - `Checked security on ${filePath}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`, + `Checked security on ${task.items.length} items. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`, ); - return secretLintResult; + return results; } catch (error) { - logger.error(`Error checking security on ${filePath}:`, error); + logger.error('Error in security check worker:', error); throw error; } }; diff --git a/src/shared/unifiedWorker.ts b/src/shared/unifiedWorker.ts index 8e0dd35de..5032a3041 100644 --- a/src/shared/unifiedWorker.ts +++ b/src/shared/unifiedWorker.ts @@ -85,8 +85,8 @@ const inferWorkerTypeFromTask = (task: unknown): WorkerType | null => { return 'calculateMetrics'; } - // securityCheck: has filePath, content, type - if ('filePath' in taskObj && 'content' in taskObj && 'type' in taskObj) { + // securityCheck: has items array (without encoding, which distinguishes it from calculateMetrics) + if ('items' in taskObj && !('encoding' in taskObj)) { return 'securityCheck'; } diff --git a/tests/core/security/securityCheck.test.ts b/tests/core/security/securityCheck.test.ts index 7727e625d..22d110616 100644 --- a/tests/core/security/securityCheck.test.ts +++ b/tests/core/security/securityCheck.test.ts @@ -61,16 +61,15 @@ describe('runSecurityCheck', () => { expect(result[0].messages).toHaveLength(1); }); - it('should call progress callback with correct messages', async () => { + it('should call progress callback for each batch', async () => { const progressCallback = vi.fn(); await runSecurityCheck(mockFiles, progressCallback, undefined, undefined, { initTaskRunner: mockInitTaskRunner, }); - expect(progressCallback).toHaveBeenCalledWith( - expect.stringContaining(`Running security check... (1/2) ${pc.dim('test1.js')}`), - ); + // With 2 files and batch size 50, all files are in a single batch + // Progress callback is called once per batch with the last file in the batch expect(progressCallback).toHaveBeenCalledWith( expect.stringContaining(`Running security check... (2/2) ${pc.dim('test2.js')}`), ); @@ -162,12 +161,8 @@ describe('runSecurityCheck', () => { initTaskRunner: mockInitTaskRunner, }); - // Should process 2 files + 2 git diff contents = 4 total tasks - expect(progressCallback).toHaveBeenCalledTimes(4); - - // Check that Git diff tasks were processed - expect(progressCallback).toHaveBeenCalledWith(expect.stringContaining('Working tree changes')); - expect(progressCallback).toHaveBeenCalledWith(expect.stringContaining('Staged changes')); + // With batch size 50 and 4 items (2 files + 2 git diffs), all in a single batch + expect(progressCallback).toHaveBeenCalledTimes(1); // Should find security issues in files (at least 1 from test1.js) expect(result.length).toBeGreaterThanOrEqual(1); @@ -184,13 +179,8 @@ describe('runSecurityCheck', () => { initTaskRunner: mockInitTaskRunner, }); - // Should process 2 files + 1 git diff content = 3 total tasks - expect(progressCallback).toHaveBeenCalledTimes(3); - - // Check that only working tree diff was processed - expect(progressCallback).toHaveBeenCalledWith(expect.stringContaining('Working tree changes')); - // Staged changes should not be processed because content is empty string (falsy) - expect(progressCallback).not.toHaveBeenCalledWith(expect.stringContaining('Staged changes')); + // With batch size 50 and 3 items (2 files + 1 git diff), all in a single batch + expect(progressCallback).toHaveBeenCalledTimes(1); }); it('should process only stagedDiffContent when workTreeDiffContent is not available', async () => { @@ -204,13 +194,8 @@ describe('runSecurityCheck', () => { initTaskRunner: mockInitTaskRunner, }); - // Should process 2 files + 1 git diff content = 3 total tasks - expect(progressCallback).toHaveBeenCalledTimes(3); - - // Check that only staged diff was processed - expect(progressCallback).toHaveBeenCalledWith(expect.stringContaining('Staged changes')); - // Working tree changes should not be processed because content is empty string (falsy) - expect(progressCallback).not.toHaveBeenCalledWith(expect.stringContaining('Working tree changes')); + // With batch size 50 and 3 items (2 files + 1 git diff), all in a single batch + expect(progressCallback).toHaveBeenCalledTimes(1); }); it('should handle gitDiffResult with no diff content', async () => { @@ -225,10 +210,7 @@ describe('runSecurityCheck', () => { }); // Should process only 2 files, no git diff content because both are empty strings (falsy) - expect(progressCallback).toHaveBeenCalledTimes(2); - - // Check that no git diff tasks were processed - expect(progressCallback).not.toHaveBeenCalledWith(expect.stringContaining('Working tree changes')); - expect(progressCallback).not.toHaveBeenCalledWith(expect.stringContaining('Staged changes')); + // With batch size 50, all in a single batch + expect(progressCallback).toHaveBeenCalledTimes(1); }); }); diff --git a/tests/shared/unifiedWorker.test.ts b/tests/shared/unifiedWorker.test.ts index e09582e5e..0b1314981 100644 --- a/tests/shared/unifiedWorker.test.ts +++ b/tests/shared/unifiedWorker.test.ts @@ -53,12 +53,10 @@ describe('unifiedWorker', () => { expect(calculateMetricsWorker.default).toHaveBeenCalledWith(task); }); - it('should infer securityCheck from task with filePath, content, type', async () => { + it('should infer securityCheck from task with items (without encoding)', async () => { const { default: handler } = await import('../../src/shared/unifiedWorker.js'); const task = { - filePath: 'test.ts', - content: 'test content', - type: 'file', + items: [{ filePath: 'test.ts', content: 'test content', type: 'file' }], }; await handler(task);