Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 52 additions & 34 deletions src/core/security/securityCheck.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,23 @@ import type { RepomixProgressCallback } from '../../shared/types.js';
import type { RawFile } from '../file/fileTypes.js';
import type { GitDiffResult } from '../git/gitDiffHandle.js';
import type { GitLogResult } from '../git/gitLogHandle.js';
import type { SecurityCheckTask, SecurityCheckType } from './workers/securityCheckWorker.js';
import type { SecurityCheckItem, SecurityCheckTask, SecurityCheckType } from './workers/securityCheckWorker.js';

export type { SecurityCheckType } from './workers/securityCheckWorker.js';

export interface SuspiciousFileResult {
filePath: string;
messages: string[];
type: SecurityCheckType;
}

// Batch size for grouping files into worker tasks to reduce IPC overhead.
// Each batch is sent as a single message to a worker thread, avoiding
// per-file round-trip costs that dominate when processing many files.
// A moderate batch size (50) reduces IPC round-trips by ~98% (990 → 20 for a typical repo)
// while keeping enough batches to utilize all available CPU cores.
const BATCH_SIZE = 50;

export const runSecurityCheck = async (
rawFiles: RawFile[],
progressCallback: RepomixProgressCallback = () => {},
Expand All @@ -22,21 +31,21 @@ export const runSecurityCheck = async (
initTaskRunner,
},
): Promise<SuspiciousFileResult[]> => {
const gitDiffTasks: SecurityCheckTask[] = [];
const gitLogTasks: SecurityCheckTask[] = [];
const gitDiffItems: SecurityCheckItem[] = [];
const gitLogItems: SecurityCheckItem[] = [];

// Add Git diff content for security checking if available
if (gitDiffResult) {
if (gitDiffResult.workTreeDiffContent) {
gitDiffTasks.push({
gitDiffItems.push({
filePath: 'Working tree changes',
content: gitDiffResult.workTreeDiffContent,
type: 'gitDiff',
});
}

if (gitDiffResult.stagedDiffContent) {
gitDiffTasks.push({
gitDiffItems.push({
filePath: 'Staged changes',
content: gitDiffResult.stagedDiffContent,
type: 'gitDiff',
Expand All @@ -47,59 +56,68 @@ export const runSecurityCheck = async (
// Add Git log content for security checking if available
if (gitLogResult) {
if (gitLogResult.logContent) {
gitLogTasks.push({
gitLogItems.push({
filePath: 'Git log history',
content: gitLogResult.logContent,
type: 'gitLog',
});
}
}

const taskRunner = deps.initTaskRunner<SecurityCheckTask, SuspiciousFileResult | null>({
numOfTasks: rawFiles.length + gitDiffTasks.length + gitLogTasks.length,
const fileItems: SecurityCheckItem[] = rawFiles.map((file) => ({
filePath: file.path,
content: file.content,
type: 'file',
}));

// Combine all items, then split into batches
const allItems = [...fileItems, ...gitDiffItems, ...gitLogItems];
const totalItems = allItems.length;

// NOTE: numOfTasks uses totalItems (not batches.length) intentionally.
// getWorkerThreadCount uses Math.ceil(numOfTasks / TASKS_PER_THREAD) to size the pool,
// where TASKS_PER_THREAD=100 is calibrated for fine-grained tasks.
// Passing batches.length (e.g. 2) would yield maxThreads=1, forcing sequential execution.
const taskRunner = deps.initTaskRunner<SecurityCheckTask, (SuspiciousFileResult | null)[]>({
numOfTasks: totalItems,
workerType: 'securityCheck',
runtime: 'worker_threads',
});
Comment thread
yamadashy marked this conversation as resolved.
const fileTasks = rawFiles.map(
(file) =>
({
filePath: file.path,
content: file.content,
type: 'file',
}) satisfies SecurityCheckTask,
);

// Combine file tasks, Git diff tasks, and Git log tasks
const tasks = [...fileTasks, ...gitDiffTasks, ...gitLogTasks];

// Split items into batches to reduce IPC round-trips
const batches: SecurityCheckItem[][] = [];
for (let i = 0; i < allItems.length; i += BATCH_SIZE) {
batches.push(allItems.slice(i, i + BATCH_SIZE));
}

try {
logger.trace(`Starting security check for ${tasks.length} files/content`);
logger.trace(`Starting security check for ${totalItems} files/content in ${batches.length} batches`);
const startTime = process.hrtime.bigint();

let completedTasks = 0;
const totalTasks = tasks.length;

const results = await Promise.all(
tasks.map((task) =>
taskRunner.run(task).then((result) => {
completedTasks++;
progressCallback(`Running security check... (${completedTasks}/${totalTasks}) ${pc.dim(task.filePath)}`);
logger.trace(`Running security check... (${completedTasks}/${totalTasks}) ${task.filePath}`);
return result;
}),
),
let completedItems = 0;

const batchResults = await Promise.all(
batches.map(async (batch) => {
const results = await taskRunner.run({ items: batch });

completedItems += batch.length;
const lastItem = batch[batch.length - 1];
progressCallback(`Running security check... (${completedItems}/${totalItems}) ${pc.dim(lastItem.filePath)}`);
logger.trace(`Running security check... (${completedItems}/${totalItems}) ${lastItem.filePath}`);

return results;
}),
);
Comment thread
yamadashy marked this conversation as resolved.

const endTime = process.hrtime.bigint();
const duration = Number(endTime - startTime) / 1e6;
logger.trace(`Security check completed in ${duration.toFixed(2)}ms`);

return results.filter((result): result is SuspiciousFileResult => result !== null);
return batchResults.flat().filter((result): result is SuspiciousFileResult => result !== null);
} catch (error) {
logger.error('Error during security check:', error);
throw error;
} finally {
// Always cleanup worker pool
await taskRunner.cleanup();
}
};
23 changes: 15 additions & 8 deletions src/core/security/workers/securityCheckWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@ setLogLevelByWorkerData();
// Security check type to distinguish between regular files, git diffs, and git logs
export type SecurityCheckType = 'file' | 'gitDiff' | 'gitLog';

export interface SecurityCheckTask {
export interface SecurityCheckItem {
filePath: string;
content: string;
type: SecurityCheckType;
}

export interface SecurityCheckTask {
items: SecurityCheckItem[];
}

export interface SuspiciousFileResult {
filePath: string;
messages: string[];
Expand All @@ -34,21 +38,24 @@ export const createSecretLintConfig = (): SecretLintCoreConfig => ({
// Cache config at module level - created once per worker, reused for all tasks
const cachedConfig = createSecretLintConfig();

export default async ({ filePath, content, type }: SecurityCheckTask) => {
export default async (task: SecurityCheckTask): Promise<(SuspiciousFileResult | null)[]> => {
const config = cachedConfig;
const processStartAt = process.hrtime.bigint();

try {
const processStartAt = process.hrtime.bigint();
const secretLintResult = await runSecretLint(filePath, content, type, config);
const processEndAt = process.hrtime.bigint();
const results: (SuspiciousFileResult | null)[] = [];
for (const item of task.items) {
results.push(await runSecretLint(item.filePath, item.content, item.type, config));
}

const processEndAt = process.hrtime.bigint();
logger.trace(
`Checked security on ${filePath}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`,
`Checked security on ${task.items.length} items. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`,
);

return secretLintResult;
return results;
} catch (error) {
logger.error(`Error checking security on ${filePath}:`, error);
logger.error('Error in security check worker:', error);
throw error;
}
};
Expand Down
4 changes: 2 additions & 2 deletions src/shared/unifiedWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ const inferWorkerTypeFromTask = (task: unknown): WorkerType | null => {
return 'calculateMetrics';
}

// securityCheck: has filePath, content, type
if ('filePath' in taskObj && 'content' in taskObj && 'type' in taskObj) {
// securityCheck: has items array (without encoding, which distinguishes it from calculateMetrics)
if ('items' in taskObj && !('encoding' in taskObj)) {
return 'securityCheck';
}

Expand Down
40 changes: 11 additions & 29 deletions tests/core/security/securityCheck.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,15 @@ describe('runSecurityCheck', () => {
expect(result[0].messages).toHaveLength(1);
});

it('should call progress callback with correct messages', async () => {
it('should call progress callback for each batch', async () => {
const progressCallback = vi.fn();

await runSecurityCheck(mockFiles, progressCallback, undefined, undefined, {
initTaskRunner: mockInitTaskRunner,
});

expect(progressCallback).toHaveBeenCalledWith(
expect.stringContaining(`Running security check... (1/2) ${pc.dim('test1.js')}`),
);
// With 2 files and batch size 50, all files are in a single batch
// Progress callback is called once per batch with the last file in the batch
expect(progressCallback).toHaveBeenCalledWith(
expect.stringContaining(`Running security check... (2/2) ${pc.dim('test2.js')}`),
);
Expand Down Expand Up @@ -162,12 +161,8 @@ describe('runSecurityCheck', () => {
initTaskRunner: mockInitTaskRunner,
});

// Should process 2 files + 2 git diff contents = 4 total tasks
expect(progressCallback).toHaveBeenCalledTimes(4);

// Check that Git diff tasks were processed
expect(progressCallback).toHaveBeenCalledWith(expect.stringContaining('Working tree changes'));
expect(progressCallback).toHaveBeenCalledWith(expect.stringContaining('Staged changes'));
// With batch size 50 and 4 items (2 files + 2 git diffs), all in a single batch
expect(progressCallback).toHaveBeenCalledTimes(1);

// Should find security issues in files (at least 1 from test1.js)
expect(result.length).toBeGreaterThanOrEqual(1);
Expand All @@ -184,13 +179,8 @@ describe('runSecurityCheck', () => {
initTaskRunner: mockInitTaskRunner,
});

// Should process 2 files + 1 git diff content = 3 total tasks
expect(progressCallback).toHaveBeenCalledTimes(3);

// Check that only working tree diff was processed
expect(progressCallback).toHaveBeenCalledWith(expect.stringContaining('Working tree changes'));
// Staged changes should not be processed because content is empty string (falsy)
expect(progressCallback).not.toHaveBeenCalledWith(expect.stringContaining('Staged changes'));
// With batch size 50 and 3 items (2 files + 1 git diff), all in a single batch
expect(progressCallback).toHaveBeenCalledTimes(1);
});

it('should process only stagedDiffContent when workTreeDiffContent is not available', async () => {
Expand All @@ -204,13 +194,8 @@ describe('runSecurityCheck', () => {
initTaskRunner: mockInitTaskRunner,
});

// Should process 2 files + 1 git diff content = 3 total tasks
expect(progressCallback).toHaveBeenCalledTimes(3);

// Check that only staged diff was processed
expect(progressCallback).toHaveBeenCalledWith(expect.stringContaining('Staged changes'));
// Working tree changes should not be processed because content is empty string (falsy)
expect(progressCallback).not.toHaveBeenCalledWith(expect.stringContaining('Working tree changes'));
// With batch size 50 and 3 items (2 files + 1 git diff), all in a single batch
expect(progressCallback).toHaveBeenCalledTimes(1);
});

it('should handle gitDiffResult with no diff content', async () => {
Expand All @@ -225,10 +210,7 @@ describe('runSecurityCheck', () => {
});

// Should process only 2 files, no git diff content because both are empty strings (falsy)
expect(progressCallback).toHaveBeenCalledTimes(2);

// Check that no git diff tasks were processed
expect(progressCallback).not.toHaveBeenCalledWith(expect.stringContaining('Working tree changes'));
expect(progressCallback).not.toHaveBeenCalledWith(expect.stringContaining('Staged changes'));
// With batch size 50, all in a single batch
expect(progressCallback).toHaveBeenCalledTimes(1);
});
});
6 changes: 2 additions & 4 deletions tests/shared/unifiedWorker.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,10 @@ describe('unifiedWorker', () => {
expect(calculateMetricsWorker.default).toHaveBeenCalledWith(task);
});

it('should infer securityCheck from task with filePath, content, type', async () => {
it('should infer securityCheck from task with items (without encoding)', async () => {
const { default: handler } = await import('../../src/shared/unifiedWorker.js');
const task = {
filePath: 'test.ts',
content: 'test content',
type: 'file',
items: [{ filePath: 'test.ts', content: 'test content', type: 'file' }],
};

await handler(task);
Expand Down
Loading