Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/config/configLoad.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import * as fs from 'node:fs/promises';
import path from 'node:path';
import { pathToFileURL } from 'node:url';
import { createJiti } from 'jiti';
import JSON5 from 'json5';
import pc from 'picocolors';
import { RepomixError, rethrowValidationErrorIfZodError } from '../shared/errorHandle.js';
Expand Down Expand Up @@ -58,7 +57,10 @@ const findConfigFile = async (configPaths: string[], logPrefix: string): Promise
};

// Default jiti import implementation for loading JS/TS config files
// Lazy-loads jiti to avoid importing its heavy TypeScript toolchain
// when using JSON/JSON5 config files or default config (the common case)
const defaultJitiImport = async (fileUrl: string): Promise<unknown> => {
const { createJiti } = await import('jiti');
const jiti = createJiti(import.meta.url, {
moduleCache: false, // Disable cache to ensure fresh config loads
interopDefault: true, // Automatically use default export
Expand Down
11 changes: 6 additions & 5 deletions src/core/file/fileRead.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ export interface FileReadResult {
*/
export const readRawFile = async (filePath: string, maxFileSize: number): Promise<FileReadResult> => {
try {
// Check binary extension first (no I/O needed) to skip stat + read for binary files
if (isBinaryPath(filePath)) {
logger.debug(`Skipping binary file: ${filePath}`);
return { content: null, skippedReason: 'binary-extension' };
}

const stats = await fs.stat(filePath);

if (stats.size > maxFileSize) {
Expand All @@ -29,11 +35,6 @@ export const readRawFile = async (filePath: string, maxFileSize: number): Promis
return { content: null, skippedReason: 'size-limit' };
}

if (isBinaryPath(filePath)) {
logger.debug(`Skipping binary file: ${filePath}`);
return { content: null, skippedReason: 'binary-extension' };
}

logger.trace(`Reading file: ${filePath}`);

const buffer = await fs.readFile(filePath);
Expand Down
30 changes: 23 additions & 7 deletions src/core/metrics/calculateMetrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,37 @@ export interface CalculateMetricsResult {
gitLogTokenCount: number;
}

/**
* Create a metrics task runner that can be pre-initialized to overlap
* tiktoken WASM loading with other pipeline stages.
*/
export const createMetricsTaskRunner = (numOfTasks: number): TaskRunner<TokenCountTask, number> => {
return initTaskRunner<TokenCountTask, number>({
numOfTasks,
workerType: 'calculateMetrics',
runtime: 'worker_threads',
});
};

const defaultDeps = {
calculateSelectiveFileMetrics,
calculateOutputMetrics,
calculateGitDiffMetrics,
calculateGitLogMetrics,
taskRunner: undefined as TaskRunner<TokenCountTask, number> | undefined,
};

export const calculateMetrics = async (
processedFiles: ProcessedFile[],
output: string | string[],
progressCallback: RepomixProgressCallback,
config: RepomixConfigMerged,
gitDiffResult: GitDiffResult | undefined,
gitLogResult: GitLogResult | undefined,
deps = {
calculateSelectiveFileMetrics,
calculateOutputMetrics,
calculateGitDiffMetrics,
calculateGitLogMetrics,
taskRunner: undefined as TaskRunner<TokenCountTask, number> | undefined,
},
overrideDeps: Partial<typeof defaultDeps> = {},
): Promise<CalculateMetricsResult> => {
const deps = { ...defaultDeps, ...overrideDeps };

progressCallback('Calculating metrics...');

// Initialize a single task runner for all metrics calculations
Expand Down
178 changes: 98 additions & 80 deletions src/core/packager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import type { FilesByRoot } from './file/fileTreeGenerate.js';
import type { ProcessedFile } from './file/fileTypes.js';
import { getGitDiffs } from './git/gitDiffHandle.js';
import { getGitLogs } from './git/gitLogHandle.js';
import { calculateMetrics } from './metrics/calculateMetrics.js';
import { calculateMetrics, createMetricsTaskRunner } from './metrics/calculateMetrics.js';
import { produceOutput } from './packager/produceOutput.js';
import type { SuspiciousFileResult } from './security/securityCheck.js';
import { validateFileSafety } from './security/validateFileSafety.js';
Expand Down Expand Up @@ -40,6 +40,7 @@ const defaultDeps = {
validateFileSafety,
produceOutput,
calculateMetrics,
createMetricsTaskRunner,
sortPaths,
getGitDiffs,
getGitLogs,
Expand Down Expand Up @@ -83,109 +84,126 @@ export const pack = async (
const allFilePaths = filePathsByDir.flatMap(({ filePaths }) => filePaths);
const sortedFilePaths = deps.sortPaths(allFilePaths);

// Regroup sorted file paths by rootDir
// Regroup sorted file paths by rootDir using Set for O(1) membership checks
const filePathSetByDir = new Map(filePathsByDir.map(({ rootDir, filePaths }) => [rootDir, new Set(filePaths)]));
const sortedFilePathsByDir = rootDirs.map((rootDir) => ({
rootDir,
filePaths: sortedFilePaths.filter((filePath: string) =>
filePathsByDir.find((item) => item.rootDir === rootDir)?.filePaths.includes(filePath),
),
filePaths: sortedFilePaths.filter((filePath) => filePathSetByDir.get(rootDir)?.has(filePath) ?? false),
}));

progressCallback('Collecting files...');
const collectResults = await withMemoryLogging(
'Collect Files',
async () =>
await Promise.all(
sortedFilePathsByDir.map(({ rootDir, filePaths }) =>
deps.collectFiles(filePaths, rootDir, config, progressCallback),
// Pre-initialize metrics worker pool to overlap tiktoken WASM loading with subsequent pipeline stages
// (security check, file processing, output generation). The warm-up task triggers tiktoken
// initialization in the worker thread without blocking the main pipeline.
const metricsTaskRunner = deps.createMetricsTaskRunner(allFilePaths.length);
const warmupPromise = metricsTaskRunner.run({ content: '', encoding: config.tokenCount.encoding }).catch(() => 0); // Suppress unhandled rejection; errors surface when awaited

try {
progressCallback('Collecting files...');
const collectResults = await withMemoryLogging(
'Collect Files',
async () =>
await Promise.all(
sortedFilePathsByDir.map(({ rootDir, filePaths }) =>
deps.collectFiles(filePaths, rootDir, config, progressCallback),
),
),
),
);
);

const rawFiles = collectResults.flatMap((curr) => curr.rawFiles);
const allSkippedFiles = collectResults.flatMap((curr) => curr.skippedFiles);
const rawFiles = collectResults.flatMap((curr) => curr.rawFiles);
const allSkippedFiles = collectResults.flatMap((curr) => curr.skippedFiles);

// Get git diffs if enabled - run this before security check
progressCallback('Getting git diffs...');
const gitDiffResult = await deps.getGitDiffs(rootDirs, config);
// Get git diffs if enabled - run this before security check
progressCallback('Getting git diffs...');
const gitDiffResult = await deps.getGitDiffs(rootDirs, config);

// Get git logs if enabled - run this before security check
progressCallback('Getting git logs...');
const gitLogResult = await deps.getGitLogs(rootDirs, config);
// Get git logs if enabled - run this before security check
progressCallback('Getting git logs...');
const gitLogResult = await deps.getGitLogs(rootDirs, config);

// Run security check and get filtered safe files
const { safeFilePaths, safeRawFiles, suspiciousFilesResults, suspiciousGitDiffResults, suspiciousGitLogResults } =
await withMemoryLogging('Security Check', () =>
deps.validateFileSafety(rawFiles, progressCallback, config, gitDiffResult, gitLogResult),
);
// Run security check and get filtered safe files
const { safeFilePaths, safeRawFiles, suspiciousFilesResults, suspiciousGitDiffResults, suspiciousGitLogResults } =
await withMemoryLogging('Security Check', () =>
deps.validateFileSafety(rawFiles, progressCallback, config, gitDiffResult, gitLogResult),
);

// Process files (remove comments, etc.)
progressCallback('Processing files...');
const processedFiles = await withMemoryLogging('Process Files', () =>
deps.processFiles(safeRawFiles, config, progressCallback),
);

progressCallback('Generating output...');
// Process files (remove comments, etc.)
progressCallback('Processing files...');
const processedFiles = await withMemoryLogging('Process Files', () =>
deps.processFiles(safeRawFiles, config, progressCallback),
);

// Check if skill generation is requested
if (config.skillGenerate !== undefined && options.skillDir) {
const result = await deps.packSkill({
progressCallback('Generating output...');

// Check if skill generation is requested
if (config.skillGenerate !== undefined && options.skillDir) {
// Await warmup to ensure graceful worker shutdown (avoid terminating WASM-loading thread)
await warmupPromise;

const result = await deps.packSkill({
rootDirs,
config,
options,
processedFiles,
allFilePaths,
gitDiffResult,
gitLogResult,
suspiciousFilesResults,
suspiciousGitDiffResults,
suspiciousGitLogResults,
safeFilePaths,
skippedFiles: allSkippedFiles,
progressCallback,
});

logMemoryUsage('Pack - End');
return result;
}

// Build filePathsByRoot for multi-root tree generation
// Use directory basename as the label for each root
// Fallback to rootDir if basename is empty (e.g., filesystem root "/")
const filePathsByRoot: FilesByRoot[] = sortedFilePathsByDir.map(({ rootDir, filePaths }) => ({
rootLabel: path.basename(rootDir) || rootDir,
files: filePaths,
}));

// Generate and write output (handles both single and split output)
const { outputFiles, outputForMetrics } = await deps.produceOutput(
rootDirs,
config,
options,
processedFiles,
allFilePaths,
gitDiffResult,
gitLogResult,
progressCallback,
filePathsByRoot,
);

// Ensure warm-up task completes before metrics calculation
await warmupPromise;

const metrics = await withMemoryLogging('Calculate Metrics', () =>
deps.calculateMetrics(processedFiles, outputForMetrics, progressCallback, config, gitDiffResult, gitLogResult, {
taskRunner: metricsTaskRunner,
}),
);

// Create a result object that includes metrics and security results
const result = {
...metrics,
...(outputFiles && { outputFiles }),
suspiciousFilesResults,
suspiciousGitDiffResults,
suspiciousGitLogResults,
processedFiles,
safeFilePaths,
skippedFiles: allSkippedFiles,
progressCallback,
});
};

logMemoryUsage('Pack - End');

return result;
} finally {
await metricsTaskRunner.cleanup();
}

// Build filePathsByRoot for multi-root tree generation
// Use directory basename as the label for each root
// Fallback to rootDir if basename is empty (e.g., filesystem root "/")
const filePathsByRoot: FilesByRoot[] = sortedFilePathsByDir.map(({ rootDir, filePaths }) => ({
rootLabel: path.basename(rootDir) || rootDir,
files: filePaths,
}));

// Generate and write output (handles both single and split output)
const { outputFiles, outputForMetrics } = await deps.produceOutput(
rootDirs,
config,
processedFiles,
allFilePaths,
gitDiffResult,
gitLogResult,
progressCallback,
filePathsByRoot,
);

const metrics = await withMemoryLogging('Calculate Metrics', () =>
deps.calculateMetrics(processedFiles, outputForMetrics, progressCallback, config, gitDiffResult, gitLogResult),
);

// Create a result object that includes metrics and security results
const result = {
...metrics,
...(outputFiles && { outputFiles }),
suspiciousFilesResults,
suspiciousGitDiffResults,
suspiciousGitLogResults,
processedFiles,
safeFilePaths,
skippedFiles: allSkippedFiles,
};

logMemoryUsage('Pack - End');

return result;
};
25 changes: 13 additions & 12 deletions src/core/packager/produceOutput.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,18 +95,19 @@ const generateAndWriteSplitOutput = async (

progressCallback('Writing output files...');
await withMemoryLogging('Write Split Output', async () => {
for (const part of parts) {
const partConfig = {
...config,
output: {
...config.output,
stdout: false,
filePath: part.filePath,
},
};
// eslint-disable-next-line no-await-in-loop
await deps.writeOutputToDisk(part.content, partConfig);
}
await Promise.all(
parts.map((part) => {
const partConfig = {
...config,
output: {
...config.output,
stdout: false,
filePath: part.filePath,
},
};
return deps.writeOutputToDisk(part.content, partConfig);
}),
);
});

return {
Expand Down
5 changes: 5 additions & 0 deletions tests/core/packager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ describe('packager', () => {
produceOutput: vi.fn().mockResolvedValue({
outputForMetrics: mockOutput,
}),
createMetricsTaskRunner: vi.fn().mockReturnValue({
run: vi.fn().mockResolvedValue(0),
cleanup: vi.fn().mockResolvedValue(undefined),
}),
calculateMetrics: vi.fn().mockResolvedValue({
totalFiles: 2,
totalCharacters: 11,
Expand Down Expand Up @@ -107,6 +111,7 @@ describe('packager', () => {
mockConfig,
undefined,
undefined,
expect.objectContaining({ taskRunner: expect.anything() }),
);

// Check the result of pack function
Expand Down
Loading
Loading