diff --git a/src/core/metrics/calculateGitDiffMetrics.ts b/src/core/metrics/calculateGitDiffMetrics.ts index cbe3ec5ae..32fdb28ae 100644 --- a/src/core/metrics/calculateGitDiffMetrics.ts +++ b/src/core/metrics/calculateGitDiffMetrics.ts @@ -1,8 +1,7 @@ import type { RepomixConfigMerged } from '../../config/configSchema.js'; import { logger } from '../../shared/logger.js'; -import type { TaskRunner } from '../../shared/processConcurrency.js'; import type { GitDiffResult } from '../git/gitDiffHandle.js'; -import type { TokenCountTask } from './workers/calculateMetricsWorker.js'; +import { type MetricsTaskRunner, runTokenCount } from './metricsWorkerRunner.js'; /** * Calculate token count for git diffs if included @@ -10,7 +9,7 @@ import type { TokenCountTask } from './workers/calculateMetricsWorker.js'; export const calculateGitDiffMetrics = async ( config: RepomixConfigMerged, gitDiffResult: GitDiffResult | undefined, - deps: { taskRunner: TaskRunner }, + deps: { taskRunner: MetricsTaskRunner }, ): Promise => { if (!config.output.git?.includeDiffs || !gitDiffResult) { return 0; @@ -29,7 +28,7 @@ export const calculateGitDiffMetrics = async ( if (gitDiffResult.workTreeDiffContent) { countPromises.push( - deps.taskRunner.run({ + runTokenCount(deps.taskRunner, { content: gitDiffResult.workTreeDiffContent, encoding: config.tokenCount.encoding, }), @@ -37,7 +36,7 @@ export const calculateGitDiffMetrics = async ( } if (gitDiffResult.stagedDiffContent) { countPromises.push( - deps.taskRunner.run({ + runTokenCount(deps.taskRunner, { content: gitDiffResult.stagedDiffContent, encoding: config.tokenCount.encoding, }), diff --git a/src/core/metrics/calculateGitLogMetrics.ts b/src/core/metrics/calculateGitLogMetrics.ts index 97e94ae95..217bb954a 100644 --- a/src/core/metrics/calculateGitLogMetrics.ts +++ b/src/core/metrics/calculateGitLogMetrics.ts @@ -1,8 +1,7 @@ import type { RepomixConfigMerged } from '../../config/configSchema.js'; import { logger } from '../../shared/logger.js'; -import type { TaskRunner } from '../../shared/processConcurrency.js'; import type { GitLogResult } from '../git/gitLogHandle.js'; -import type { TokenCountTask } from './workers/calculateMetricsWorker.js'; +import { type MetricsTaskRunner, runTokenCount } from './metricsWorkerRunner.js'; /** * Calculate token count for git logs if included @@ -10,7 +9,7 @@ import type { TokenCountTask } from './workers/calculateMetricsWorker.js'; export const calculateGitLogMetrics = async ( config: RepomixConfigMerged, gitLogResult: GitLogResult | undefined, - deps: { taskRunner: TaskRunner }, + deps: { taskRunner: MetricsTaskRunner }, ): Promise<{ gitLogTokenCount: number }> => { // Return zero token count if git logs are disabled or no result if (!config.output.git?.includeLogs || !gitLogResult) { @@ -30,7 +29,7 @@ export const calculateGitLogMetrics = async ( const startTime = process.hrtime.bigint(); logger.trace('Starting git log token calculation using worker'); - const result = await deps.taskRunner.run({ + const result = await runTokenCount(deps.taskRunner, { content: gitLogResult.logContent, encoding: config.tokenCount.encoding, }); diff --git a/src/core/metrics/calculateMetrics.ts b/src/core/metrics/calculateMetrics.ts index d20dea52c..1797a0270 100644 --- a/src/core/metrics/calculateMetrics.ts +++ b/src/core/metrics/calculateMetrics.ts @@ -1,5 +1,5 @@ import type { RepomixConfigMerged } from '../../config/configSchema.js'; -import { getWorkerThreadCount, initTaskRunner, type TaskRunner } from '../../shared/processConcurrency.js'; +import { getWorkerThreadCount, initTaskRunner } from '../../shared/processConcurrency.js'; import type { RepomixProgressCallback } from '../../shared/types.js'; import type { ProcessedFile } from '../file/fileTypes.js'; import type { GitDiffResult } from '../git/gitDiffHandle.js'; @@ -9,8 +9,9 @@ import { calculateGitDiffMetrics } from './calculateGitDiffMetrics.js'; import { calculateGitLogMetrics } from './calculateGitLogMetrics.js'; import { calculateOutputMetrics } from './calculateOutputMetrics.js'; import { calculateSelectiveFileMetrics } from './calculateSelectiveFileMetrics.js'; +import type { MetricsTaskRunner } from './metricsWorkerRunner.js'; import type { TokenEncoding } from './TokenCounter.js'; -import type { TokenCountTask } from './workers/calculateMetricsWorker.js'; +import type { MetricsWorkerResult, MetricsWorkerTask } from './workers/calculateMetricsWorker.js'; export interface CalculateMetricsResult { totalFiles: number; @@ -23,7 +24,7 @@ export interface CalculateMetricsResult { } export interface MetricsTaskRunnerWithWarmup { - taskRunner: TaskRunner; + taskRunner: MetricsTaskRunner; warmupPromise: Promise; } @@ -34,7 +35,7 @@ export interface MetricsTaskRunnerWithWarmup { * output generation). */ export const createMetricsTaskRunner = (numOfTasks: number, encoding: TokenEncoding): MetricsTaskRunnerWithWarmup => { - const taskRunner = initTaskRunner({ + const taskRunner = initTaskRunner({ numOfTasks, workerType: 'calculateMetrics', runtime: 'worker_threads', @@ -53,7 +54,7 @@ const defaultDeps = { calculateOutputMetrics, calculateGitDiffMetrics, calculateGitLogMetrics, - taskRunner: undefined as TaskRunner | undefined, + taskRunner: undefined as MetricsTaskRunner | undefined, }; export const calculateMetrics = async ( @@ -72,7 +73,7 @@ export const calculateMetrics = async ( // Initialize a single task runner for all metrics calculations const taskRunner = deps.taskRunner ?? - initTaskRunner({ + initTaskRunner({ numOfTasks: processedFiles.length, workerType: 'calculateMetrics', runtime: 'worker_threads', diff --git a/src/core/metrics/calculateOutputMetrics.ts b/src/core/metrics/calculateOutputMetrics.ts index c809ab2a6..05f97a62f 100644 --- a/src/core/metrics/calculateOutputMetrics.ts +++ b/src/core/metrics/calculateOutputMetrics.ts @@ -1,7 +1,6 @@ import { logger } from '../../shared/logger.js'; -import type { TaskRunner } from '../../shared/processConcurrency.js'; +import { type MetricsTaskRunner, runTokenCount } from './metricsWorkerRunner.js'; import type { TokenEncoding } from './TokenCounter.js'; -import type { TokenCountTask } from './workers/calculateMetricsWorker.js'; // Target ~100KB per chunk so that each worker task does meaningful tokenization work. // Previously this was 1000 (number of chunks), which created ~1KB chunks for 1MB output, @@ -13,7 +12,7 @@ export const calculateOutputMetrics = async ( content: string, encoding: TokenEncoding, path: string | undefined, - deps: { taskRunner: TaskRunner }, + deps: { taskRunner: MetricsTaskRunner }, ): Promise => { const shouldRunInParallel = content.length > MIN_CONTENT_LENGTH_FOR_PARALLEL; @@ -34,7 +33,7 @@ export const calculateOutputMetrics = async ( // Process chunks in parallel const chunkResults = await Promise.all( chunks.map(async (chunk, index) => { - return deps.taskRunner.run({ + return runTokenCount(deps.taskRunner, { content: chunk, encoding, path: path ? `${path}-chunk-${index}` : undefined, @@ -46,7 +45,7 @@ export const calculateOutputMetrics = async ( result = chunkResults.reduce((sum, count) => sum + count, 0); } else { // Process small content directly - result = await deps.taskRunner.run({ + result = await runTokenCount(deps.taskRunner, { content, encoding, path, diff --git a/src/core/metrics/calculateSelectiveFileMetrics.ts b/src/core/metrics/calculateSelectiveFileMetrics.ts index 36d0cb980..5abd0d0de 100644 --- a/src/core/metrics/calculateSelectiveFileMetrics.ts +++ b/src/core/metrics/calculateSelectiveFileMetrics.ts @@ -1,18 +1,23 @@ import pc from 'picocolors'; import { logger } from '../../shared/logger.js'; -import type { TaskRunner } from '../../shared/processConcurrency.js'; import type { RepomixProgressCallback } from '../../shared/types.js'; import type { ProcessedFile } from '../file/fileTypes.js'; +import { type MetricsTaskRunner, runBatchTokenCount } from './metricsWorkerRunner.js'; import type { TokenEncoding } from './TokenCounter.js'; -import type { TokenCountTask } from './workers/calculateMetricsWorker.js'; import type { FileMetrics } from './workers/types.js'; +// Batch size for grouping files into worker tasks to reduce IPC overhead. +// Each batch is sent as a single message to a worker thread, avoiding +// per-file round-trip costs (~0.5ms each) that dominate when processing many files. +// For 991 files: 991 round-trips → 20 batches, saving ~485ms of IPC overhead. +const METRICS_BATCH_SIZE = 50; + export const calculateSelectiveFileMetrics = async ( processedFiles: ProcessedFile[], targetFilePaths: string[], tokenCounterEncoding: TokenEncoding, progressCallback: RepomixProgressCallback, - deps: { taskRunner: TaskRunner }, + deps: { taskRunner: MetricsTaskRunner }, ): Promise => { const targetFileSet = new Set(targetFilePaths); const filesToProcess = processedFiles.filter((file) => targetFileSet.has(file.path)); @@ -25,33 +30,47 @@ export const calculateSelectiveFileMetrics = async ( const startTime = process.hrtime.bigint(); logger.trace(`Starting selective metrics calculation for ${filesToProcess.length} files using worker pool`); - let completedTasks = 0; - const results = await Promise.all( - filesToProcess.map(async (file) => { - const tokenCount = await deps.taskRunner.run({ - content: file.content, + // Split files into batches to reduce IPC round-trips + const batches: ProcessedFile[][] = []; + for (let i = 0; i < filesToProcess.length; i += METRICS_BATCH_SIZE) { + batches.push(filesToProcess.slice(i, i + METRICS_BATCH_SIZE)); + } + + logger.trace(`Split ${filesToProcess.length} files into ${batches.length} batches for token counting`); + + let completedItems = 0; + + const batchResults = await Promise.all( + batches.map(async (batch) => { + const tokenCounts = await runBatchTokenCount(deps.taskRunner, { + items: batch.map((file) => ({ content: file.content, path: file.path })), encoding: tokenCounterEncoding, - path: file.path, }); - const result: FileMetrics = { + const results: FileMetrics[] = batch.map((file, index) => ({ path: file.path, charCount: file.content.length, - tokenCount, - }; + tokenCount: tokenCounts[index], + })); + + completedItems += batch.length; + const lastFile = batch[batch.length - 1]; + progressCallback( + `Calculating metrics... (${completedItems}/${filesToProcess.length}) ${pc.dim(lastFile.path)}`, + ); + logger.trace(`Calculating metrics... (${completedItems}/${filesToProcess.length}) ${lastFile.path}`); - completedTasks++; - progressCallback(`Calculating metrics... (${completedTasks}/${filesToProcess.length}) ${pc.dim(file.path)}`); - logger.trace(`Calculating metrics... (${completedTasks}/${filesToProcess.length}) ${file.path}`); - return result; + return results; }), ); + const allResults = batchResults.flat(); + const endTime = process.hrtime.bigint(); const duration = Number(endTime - startTime) / 1e6; logger.trace(`Selective metrics calculation completed in ${duration.toFixed(2)}ms`); - return results; + return allResults; } catch (error) { logger.error('Error during selective metrics calculation:', error); throw error; diff --git a/src/core/metrics/metricsWorkerRunner.ts b/src/core/metrics/metricsWorkerRunner.ts new file mode 100644 index 000000000..7f6e5a806 --- /dev/null +++ b/src/core/metrics/metricsWorkerRunner.ts @@ -0,0 +1,17 @@ +import type { TaskRunner } from '../../shared/processConcurrency.js'; +import type { + MetricsWorkerResult, + MetricsWorkerTask, + TokenCountBatchTask, + TokenCountTask, +} from './workers/calculateMetricsWorker.js'; + +export type MetricsTaskRunner = TaskRunner; + +export const runTokenCount = (taskRunner: MetricsTaskRunner, task: TokenCountTask): Promise => { + return taskRunner.run(task) as Promise; +}; + +export const runBatchTokenCount = (taskRunner: MetricsTaskRunner, task: TokenCountBatchTask): Promise => { + return taskRunner.run(task) as Promise; +}; diff --git a/src/core/metrics/workers/calculateMetricsWorker.ts b/src/core/metrics/workers/calculateMetricsWorker.ts index 99729f474..65b56fba9 100644 --- a/src/core/metrics/workers/calculateMetricsWorker.ts +++ b/src/core/metrics/workers/calculateMetricsWorker.ts @@ -3,11 +3,11 @@ import type { TokenEncoding } from '../TokenCounter.js'; import { freeTokenCounters, getTokenCounter } from '../tokenCounterFactory.js'; /** - * Simple token counting worker for metrics calculation. + * Token counting worker for metrics calculation. * - * This worker provides a focused interface for counting tokens from text content, - * using gpt-tokenizer. All complex metric calculation logic is handled - * by the calling side to maintain separation of concerns. + * Supports both single-content and batch modes. Batch mode reduces IPC overhead + * by processing multiple files per worker round-trip (~0.5ms overhead per round-trip). + * For 991 files, batching with size 50 reduces round-trips from 991 to 20. */ // Initialize logger configuration from workerData at module load time @@ -20,6 +20,19 @@ export interface TokenCountTask { path?: string; } +export interface TokenCountBatchItem { + content: string; + path?: string; +} + +export interface TokenCountBatchTask { + items: TokenCountBatchItem[]; + encoding: TokenEncoding; +} + +export type MetricsWorkerTask = TokenCountTask | TokenCountBatchTask; +export type MetricsWorkerResult = number | number[]; + export const countTokens = async (task: TokenCountTask): Promise => { const processStartAt = process.hrtime.bigint(); @@ -35,12 +48,30 @@ export const countTokens = async (task: TokenCountTask): Promise => { } }; +const countTokensBatch = async (task: TokenCountBatchTask): Promise => { + const processStartAt = process.hrtime.bigint(); + + try { + const counter = await getTokenCounter(task.encoding); + const results = task.items.map((item) => counter.countTokens(item.content, item.path)); + + logger.trace(`Counted tokens for ${task.items.length} items. Took: ${getProcessDuration(processStartAt)}ms`); + return results; + } catch (error) { + logger.error('Error in batch token counting worker:', error); + throw error; + } +}; + const getProcessDuration = (startTime: bigint): string => { const endTime = process.hrtime.bigint(); return (Number(endTime - startTime) / 1e6).toFixed(2); }; -export default async (task: TokenCountTask): Promise => { +export default async (task: MetricsWorkerTask): Promise => { + if ('items' in task) { + return countTokensBatch(task); + } return countTokens(task); }; diff --git a/src/shared/unifiedWorker.ts b/src/shared/unifiedWorker.ts index 5032a3041..814b98a16 100644 --- a/src/shared/unifiedWorker.ts +++ b/src/shared/unifiedWorker.ts @@ -80,12 +80,12 @@ const inferWorkerTypeFromTask = (task: unknown): WorkerType | null => { return 'fileProcess'; } - // calculateMetrics: has content, encoding (must check before securityCheck) - if ('content' in taskObj && 'encoding' in taskObj) { + // calculateMetrics: single mode has content+encoding, batch mode has items+encoding + if ('encoding' in taskObj && ('content' in taskObj || 'items' in taskObj)) { return 'calculateMetrics'; } - // securityCheck: has items array (without encoding, which distinguishes it from calculateMetrics) + // securityCheck: has items array without encoding (distinguishes from batch calculateMetrics) if ('items' in taskObj && !('encoding' in taskObj)) { return 'securityCheck'; } diff --git a/tests/core/metrics/calculateGitDiffMetrics.test.ts b/tests/core/metrics/calculateGitDiffMetrics.test.ts index adcdf5d75..73231c456 100644 --- a/tests/core/metrics/calculateGitDiffMetrics.test.ts +++ b/tests/core/metrics/calculateGitDiffMetrics.test.ts @@ -2,16 +2,21 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; import type { RepomixConfigMerged } from '../../../src/config/configSchema.js'; import type { GitDiffResult } from '../../../src/core/git/gitDiffHandle.js'; import { calculateGitDiffMetrics } from '../../../src/core/metrics/calculateGitDiffMetrics.js'; -import { countTokens, type TokenCountTask } from '../../../src/core/metrics/workers/calculateMetricsWorker.js'; +import type { MetricsTaskRunner } from '../../../src/core/metrics/metricsWorkerRunner.js'; +import { + countTokens, + type MetricsWorkerTask, + type TokenCountTask, +} from '../../../src/core/metrics/workers/calculateMetricsWorker.js'; import { logger } from '../../../src/shared/logger.js'; -import type { TaskRunner, WorkerOptions } from '../../../src/shared/processConcurrency.js'; +import type { WorkerOptions } from '../../../src/shared/processConcurrency.js'; vi.mock('../../../src/shared/logger'); -const mockInitTaskRunner = (_options: WorkerOptions): TaskRunner => { +const mockInitTaskRunner = (_options: WorkerOptions): MetricsTaskRunner => { return { - run: async (task: TokenCountTask) => { - return await countTokens(task); + run: async (task: MetricsWorkerTask) => { + return await countTokens(task as TokenCountTask); }, cleanup: async () => { // Mock cleanup - no-op for tests @@ -172,7 +177,7 @@ describe('calculateGitDiffMetrics', () => { .mockResolvedValueOnce(5) // workTree tokens .mockResolvedValueOnce(3); // staged tokens - const customTaskRunner: TaskRunner = { + const customTaskRunner: MetricsTaskRunner = { run: mockTaskRunnerSpy, cleanup: async () => {}, }; @@ -201,7 +206,7 @@ describe('calculateGitDiffMetrics', () => { const mockTaskRunnerSpy = vi.fn().mockResolvedValueOnce(7); - const customTaskRunner: TaskRunner = { + const customTaskRunner: MetricsTaskRunner = { run: mockTaskRunnerSpy, cleanup: async () => {}, }; @@ -226,7 +231,7 @@ describe('calculateGitDiffMetrics', () => { const mockTaskRunnerSpy = vi.fn().mockResolvedValueOnce(4); - const customTaskRunner: TaskRunner = { + const customTaskRunner: MetricsTaskRunner = { run: mockTaskRunnerSpy, cleanup: async () => {}, }; @@ -266,7 +271,7 @@ describe('calculateGitDiffMetrics', () => { stagedDiffContent: 'some staged content', }; - const errorTaskRunner: TaskRunner = { + const errorTaskRunner: MetricsTaskRunner = { run: vi.fn().mockRejectedValue(new Error('Task runner failed')), cleanup: async () => {}, }; @@ -286,7 +291,7 @@ describe('calculateGitDiffMetrics', () => { stagedDiffContent: 'staged content', }; - const errorTaskRunner: TaskRunner = { + const errorTaskRunner: MetricsTaskRunner = { run: vi .fn() .mockResolvedValueOnce(5) // First call succeeds @@ -338,7 +343,7 @@ describe('calculateGitDiffMetrics', () => { const mockTaskRunnerSpy = vi.fn().mockResolvedValueOnce(10); - const customTaskRunner: TaskRunner = { + const customTaskRunner: MetricsTaskRunner = { run: mockTaskRunnerSpy, cleanup: async () => {}, }; diff --git a/tests/core/metrics/calculateGitLogMetrics.test.ts b/tests/core/metrics/calculateGitLogMetrics.test.ts index 1c53b90b7..b5a7a4364 100644 --- a/tests/core/metrics/calculateGitLogMetrics.test.ts +++ b/tests/core/metrics/calculateGitLogMetrics.test.ts @@ -2,16 +2,21 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; import type { RepomixConfigMerged } from '../../../src/config/configSchema.js'; import type { GitLogResult } from '../../../src/core/git/gitLogHandle.js'; import { calculateGitLogMetrics } from '../../../src/core/metrics/calculateGitLogMetrics.js'; -import { countTokens, type TokenCountTask } from '../../../src/core/metrics/workers/calculateMetricsWorker.js'; +import type { MetricsTaskRunner } from '../../../src/core/metrics/metricsWorkerRunner.js'; +import { + countTokens, + type MetricsWorkerTask, + type TokenCountTask, +} from '../../../src/core/metrics/workers/calculateMetricsWorker.js'; import { logger } from '../../../src/shared/logger.js'; -import type { TaskRunner, WorkerOptions } from '../../../src/shared/processConcurrency.js'; +import type { WorkerOptions } from '../../../src/shared/processConcurrency.js'; vi.mock('../../../src/shared/logger'); -const mockInitTaskRunner = (_options: WorkerOptions): TaskRunner => { +const mockInitTaskRunner = (_options: WorkerOptions): MetricsTaskRunner => { return { - run: async (task: TokenCountTask) => { - return await countTokens(task); + run: async (task: MetricsWorkerTask) => { + return await countTokens(task as TokenCountTask); }, cleanup: async () => { // Mock cleanup - no-op for tests @@ -169,7 +174,7 @@ describe('calculateGitLogMetrics', () => { const mockTaskRunnerSpy = vi.fn().mockResolvedValueOnce(15); - const customTaskRunner: TaskRunner = { + const customTaskRunner: MetricsTaskRunner = { run: mockTaskRunnerSpy, cleanup: async () => {}, }; @@ -243,7 +248,7 @@ Date: Sun Dec 31 18:30:00 2022 +0000 commits: [], }; - const errorTaskRunner: TaskRunner = { + const errorTaskRunner: MetricsTaskRunner = { run: vi.fn().mockRejectedValue(new Error('Task runner failed')), cleanup: async () => {}, }; @@ -330,7 +335,7 @@ Date: Sun Dec 31 18:30:00 2022 +0000 const mockTaskRunnerSpy = vi.fn().mockResolvedValueOnce(10); - const customTaskRunner: TaskRunner = { + const customTaskRunner: MetricsTaskRunner = { run: mockTaskRunnerSpy, cleanup: async () => {}, }; diff --git a/tests/core/metrics/calculateOutputMetrics.test.ts b/tests/core/metrics/calculateOutputMetrics.test.ts index 2b42d0731..8a21ca5c0 100644 --- a/tests/core/metrics/calculateOutputMetrics.test.ts +++ b/tests/core/metrics/calculateOutputMetrics.test.ts @@ -1,15 +1,20 @@ import { describe, expect, it, vi } from 'vitest'; import { calculateOutputMetrics } from '../../../src/core/metrics/calculateOutputMetrics.js'; -import { countTokens, type TokenCountTask } from '../../../src/core/metrics/workers/calculateMetricsWorker.js'; +import type { MetricsTaskRunner } from '../../../src/core/metrics/metricsWorkerRunner.js'; +import { + countTokens, + type MetricsWorkerTask, + type TokenCountTask, +} from '../../../src/core/metrics/workers/calculateMetricsWorker.js'; import { logger } from '../../../src/shared/logger.js'; import type { WorkerOptions } from '../../../src/shared/processConcurrency.js'; vi.mock('../../../src/shared/logger'); -const mockInitTaskRunner = (_options: WorkerOptions) => { +const mockInitTaskRunner = (_options: WorkerOptions): MetricsTaskRunner => { return { - run: async (task: T) => { - return (await countTokens(task as TokenCountTask)) as R; + run: async (task: MetricsWorkerTask) => { + return await countTokens(task as TokenCountTask); }, cleanup: async () => { // Mock cleanup - no-op for tests @@ -46,9 +51,9 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const mockError = new Error('Worker error'); - const mockErrorTaskRunner = (_options: WorkerOptions) => { + const mockErrorTaskRunner = (_options: WorkerOptions): MetricsTaskRunner => { return { - run: async (_task: T) => { + run: async () => { throw mockError; }, cleanup: async () => { @@ -96,12 +101,12 @@ describe('calculateOutputMetrics', () => { const path = 'large-file.txt'; let chunksProcessed = 0; - const mockParallelTaskRunner = (_options: WorkerOptions) => { + const mockParallelTaskRunner = (_options: WorkerOptions): MetricsTaskRunner => { return { - run: async (_task: T) => { + run: async () => { chunksProcessed++; // Return a fixed token count for each chunk - return 100 as R; + return 100; }, cleanup: async () => { // Mock cleanup - no-op for tests @@ -122,9 +127,9 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const mockError = new Error('Parallel processing error'); - const mockErrorTaskRunner = (_options: WorkerOptions) => { + const mockErrorTaskRunner = (_options: WorkerOptions): MetricsTaskRunner => { return { - run: async (_task: T) => { + run: async () => { throw mockError; }, cleanup: async () => { @@ -147,12 +152,12 @@ describe('calculateOutputMetrics', () => { const encoding = 'o200k_base'; const processedChunks: string[] = []; - const mockChunkTrackingTaskRunner = (_options: WorkerOptions) => { + const mockChunkTrackingTaskRunner = (_options: WorkerOptions): MetricsTaskRunner => { return { - run: async (task: T) => { + run: async (task: MetricsWorkerTask) => { const outputTask = task as TokenCountTask; processedChunks.push(outputTask.content); - return outputTask.content.length as R; + return outputTask.content.length; }, cleanup: async () => { // Mock cleanup - no-op for tests diff --git a/tests/core/metrics/calculateSelectiveFileMetrics.test.ts b/tests/core/metrics/calculateSelectiveFileMetrics.test.ts index 2e89b4161..fd43529f4 100644 --- a/tests/core/metrics/calculateSelectiveFileMetrics.test.ts +++ b/tests/core/metrics/calculateSelectiveFileMetrics.test.ts @@ -1,7 +1,13 @@ import { describe, expect, it, vi } from 'vitest'; import type { ProcessedFile } from '../../../src/core/file/fileTypes.js'; import { calculateSelectiveFileMetrics } from '../../../src/core/metrics/calculateSelectiveFileMetrics.js'; -import { countTokens, type TokenCountTask } from '../../../src/core/metrics/workers/calculateMetricsWorker.js'; +import type { MetricsTaskRunner } from '../../../src/core/metrics/metricsWorkerRunner.js'; +import { + countTokens, + type MetricsWorkerTask, + type TokenCountBatchTask, + type TokenCountTask, +} from '../../../src/core/metrics/workers/calculateMetricsWorker.js'; import type { WorkerOptions } from '../../../src/shared/processConcurrency.js'; import type { RepomixProgressCallback } from '../../../src/shared/types.js'; @@ -9,10 +15,18 @@ vi.mock('../../shared/processConcurrency', () => ({ getProcessConcurrency: () => 1, })); -const mockInitTaskRunner = (_options: WorkerOptions) => { +const mockInitTaskRunner = (_options: WorkerOptions): MetricsTaskRunner => { return { - run: async (task: T) => { - return (await countTokens(task as TokenCountTask)) as R; + run: async (task: MetricsWorkerTask) => { + if ('items' in task) { + const batchTask = task as TokenCountBatchTask; + return Promise.all( + batchTask.items.map((item) => + countTokens({ content: item.content, encoding: batchTask.encoding, path: item.path }), + ), + ); + } + return countTokens(task as TokenCountTask); }, cleanup: async () => { // Mock cleanup - no-op for tests