yamadashy · yamadashy · Apr 6, 2026 · Apr 5, 2026 · Apr 5, 2026
@@ -1,16 +1,15 @@
 import type { RepomixConfigMerged } from '../../config/configSchema.js';
 import { logger } from '../../shared/logger.js';
-import type { TaskRunner } from '../../shared/processConcurrency.js';
 import type { GitDiffResult } from '../git/gitDiffHandle.js';
-import type { TokenCountTask } from './workers/calculateMetricsWorker.js';
+import { type MetricsTaskRunner, runTokenCount } from './metricsWorkerRunner.js';
 
 /**
  * Calculate token count for git diffs if included
  */
 export const calculateGitDiffMetrics = async (
   config: RepomixConfigMerged,
   gitDiffResult: GitDiffResult | undefined,
-  deps: { taskRunner: TaskRunner<TokenCountTask, number> },
+  deps: { taskRunner: MetricsTaskRunner },
 ): Promise<number> => {
   if (!config.output.git?.includeDiffs || !gitDiffResult) {
     return 0;
@@ -29,15 +28,15 @@ export const calculateGitDiffMetrics = async (
 
     if (gitDiffResult.workTreeDiffContent) {
       countPromises.push(
-        deps.taskRunner.run({
+        runTokenCount(deps.taskRunner, {
           content: gitDiffResult.workTreeDiffContent,
           encoding: config.tokenCount.encoding,
         }),
       );
     }
     if (gitDiffResult.stagedDiffContent) {
       countPromises.push(
-        deps.taskRunner.run({
+        runTokenCount(deps.taskRunner, {
           content: gitDiffResult.stagedDiffContent,
           encoding: config.tokenCount.encoding,
         }),

@@ -1,16 +1,15 @@
 import type { RepomixConfigMerged } from '../../config/configSchema.js';
 import { logger } from '../../shared/logger.js';
-import type { TaskRunner } from '../../shared/processConcurrency.js';
 import type { GitLogResult } from '../git/gitLogHandle.js';
-import type { TokenCountTask } from './workers/calculateMetricsWorker.js';
+import { type MetricsTaskRunner, runTokenCount } from './metricsWorkerRunner.js';
 
 /**
  * Calculate token count for git logs if included
  */
 export const calculateGitLogMetrics = async (
   config: RepomixConfigMerged,
   gitLogResult: GitLogResult | undefined,
-  deps: { taskRunner: TaskRunner<TokenCountTask, number> },
+  deps: { taskRunner: MetricsTaskRunner },
 ): Promise<{ gitLogTokenCount: number }> => {
   // Return zero token count if git logs are disabled or no result
   if (!config.output.git?.includeLogs || !gitLogResult) {
@@ -30,7 +29,7 @@ export const calculateGitLogMetrics = async (
     const startTime = process.hrtime.bigint();
     logger.trace('Starting git log token calculation using worker');
 
-    const result = await deps.taskRunner.run({
+    const result = await runTokenCount(deps.taskRunner, {
       content: gitLogResult.logContent,
       encoding: config.tokenCount.encoding,
     });

@@ -1,5 +1,5 @@
 import type { RepomixConfigMerged } from '../../config/configSchema.js';
-import { getWorkerThreadCount, initTaskRunner, type TaskRunner } from '../../shared/processConcurrency.js';
+import { getWorkerThreadCount, initTaskRunner } from '../../shared/processConcurrency.js';
 import type { RepomixProgressCallback } from '../../shared/types.js';
 import type { ProcessedFile } from '../file/fileTypes.js';
 import type { GitDiffResult } from '../git/gitDiffHandle.js';
@@ -9,8 +9,9 @@ import { calculateGitDiffMetrics } from './calculateGitDiffMetrics.js';
 import { calculateGitLogMetrics } from './calculateGitLogMetrics.js';
 import { calculateOutputMetrics } from './calculateOutputMetrics.js';
 import { calculateSelectiveFileMetrics } from './calculateSelectiveFileMetrics.js';
+import type { MetricsTaskRunner } from './metricsWorkerRunner.js';
 import type { TokenEncoding } from './TokenCounter.js';
-import type { TokenCountTask } from './workers/calculateMetricsWorker.js';
+import type { MetricsWorkerResult, MetricsWorkerTask } from './workers/calculateMetricsWorker.js';
 
 export interface CalculateMetricsResult {
   totalFiles: number;
@@ -23,7 +24,7 @@ export interface CalculateMetricsResult {
 }
 
 export interface MetricsTaskRunnerWithWarmup {
-  taskRunner: TaskRunner<TokenCountTask, number>;
+  taskRunner: MetricsTaskRunner;
   warmupPromise: Promise<unknown>;
 }
 
@@ -34,7 +35,7 @@ export interface MetricsTaskRunnerWithWarmup {
  * output generation).
  */
 export const createMetricsTaskRunner = (numOfTasks: number, encoding: TokenEncoding): MetricsTaskRunnerWithWarmup => {
-  const taskRunner = initTaskRunner<TokenCountTask, number>({
+  const taskRunner = initTaskRunner<MetricsWorkerTask, MetricsWorkerResult>({
     numOfTasks,
     workerType: 'calculateMetrics',
     runtime: 'worker_threads',
@@ -53,7 +54,7 @@ const defaultDeps = {
   calculateOutputMetrics,
   calculateGitDiffMetrics,
   calculateGitLogMetrics,
-  taskRunner: undefined as TaskRunner<TokenCountTask, number> | undefined,
+  taskRunner: undefined as MetricsTaskRunner | undefined,
 };
 
 export const calculateMetrics = async (
@@ -72,7 +73,7 @@ export const calculateMetrics = async (
   // Initialize a single task runner for all metrics calculations
   const taskRunner =
     deps.taskRunner ??
-    initTaskRunner<TokenCountTask, number>({
+    initTaskRunner<MetricsWorkerTask, MetricsWorkerResult>({
       numOfTasks: processedFiles.length,
       workerType: 'calculateMetrics',
       runtime: 'worker_threads',

@@ -1,7 +1,6 @@
 import { logger } from '../../shared/logger.js';
-import type { TaskRunner } from '../../shared/processConcurrency.js';
+import { type MetricsTaskRunner, runTokenCount } from './metricsWorkerRunner.js';
 import type { TokenEncoding } from './TokenCounter.js';
-import type { TokenCountTask } from './workers/calculateMetricsWorker.js';
 
 // Target ~100KB per chunk so that each worker task does meaningful tokenization work.
 // Previously this was 1000 (number of chunks), which created ~1KB chunks for 1MB output,
@@ -13,7 +12,7 @@ export const calculateOutputMetrics = async (
   content: string,
   encoding: TokenEncoding,
   path: string | undefined,
-  deps: { taskRunner: TaskRunner<TokenCountTask, number> },
+  deps: { taskRunner: MetricsTaskRunner },
 ): Promise<number> => {
   const shouldRunInParallel = content.length > MIN_CONTENT_LENGTH_FOR_PARALLEL;
 
@@ -34,7 +33,7 @@ export const calculateOutputMetrics = async (
       // Process chunks in parallel
       const chunkResults = await Promise.all(
         chunks.map(async (chunk, index) => {
-          return deps.taskRunner.run({
+          return runTokenCount(deps.taskRunner, {
             content: chunk,
             encoding,
             path: path ? `${path}-chunk-${index}` : undefined,
@@ -46,7 +45,7 @@ export const calculateOutputMetrics = async (
       result = chunkResults.reduce((sum, count) => sum + count, 0);
     } else {
       // Process small content directly
-      result = await deps.taskRunner.run({
+      result = await runTokenCount(deps.taskRunner, {
         content,
         encoding,
         path,

@@ -1,18 +1,23 @@
 import pc from 'picocolors';
 import { logger } from '../../shared/logger.js';
-import type { TaskRunner } from '../../shared/processConcurrency.js';
 import type { RepomixProgressCallback } from '../../shared/types.js';
 import type { ProcessedFile } from '../file/fileTypes.js';
+import { type MetricsTaskRunner, runBatchTokenCount } from './metricsWorkerRunner.js';
 import type { TokenEncoding } from './TokenCounter.js';
-import type { TokenCountTask } from './workers/calculateMetricsWorker.js';
 import type { FileMetrics } from './workers/types.js';
 
+// Batch size for grouping files into worker tasks to reduce IPC overhead.
+// Each batch is sent as a single message to a worker thread, avoiding
+// per-file round-trip costs (~0.5ms each) that dominate when processing many files.
+// For 991 files: 991 round-trips → 20 batches, saving ~485ms of IPC overhead.
+const METRICS_BATCH_SIZE = 50;
+
 export const calculateSelectiveFileMetrics = async (
   processedFiles: ProcessedFile[],
   targetFilePaths: string[],
   tokenCounterEncoding: TokenEncoding,
   progressCallback: RepomixProgressCallback,
-  deps: { taskRunner: TaskRunner<TokenCountTask, number> },
+  deps: { taskRunner: MetricsTaskRunner },
 ): Promise<FileMetrics[]> => {
   const targetFileSet = new Set(targetFilePaths);
   const filesToProcess = processedFiles.filter((file) => targetFileSet.has(file.path));
@@ -25,33 +30,47 @@ export const calculateSelectiveFileMetrics = async (
     const startTime = process.hrtime.bigint();
     logger.trace(`Starting selective metrics calculation for ${filesToProcess.length} files using worker pool`);
 
-    let completedTasks = 0;
-    const results = await Promise.all(
-      filesToProcess.map(async (file) => {
-        const tokenCount = await deps.taskRunner.run({
-          content: file.content,
+    // Split files into batches to reduce IPC round-trips
+    const batches: ProcessedFile[][] = [];
+    for (let i = 0; i < filesToProcess.length; i += METRICS_BATCH_SIZE) {
+      batches.push(filesToProcess.slice(i, i + METRICS_BATCH_SIZE));
+    }
+
+    logger.trace(`Split ${filesToProcess.length} files into ${batches.length} batches for token counting`);
+
+    let completedItems = 0;
+
+    const batchResults = await Promise.all(
+      batches.map(async (batch) => {
+        const tokenCounts = await runBatchTokenCount(deps.taskRunner, {
+          items: batch.map((file) => ({ content: file.content, path: file.path })),
           encoding: tokenCounterEncoding,
-          path: file.path,
         });
 
-        const result: FileMetrics = {
+        const results: FileMetrics[] = batch.map((file, index) => ({
           path: file.path,
           charCount: file.content.length,
-          tokenCount,
-        };
+          tokenCount: tokenCounts[index],
+        }));
+
+        completedItems += batch.length;
+        const lastFile = batch[batch.length - 1];
+        progressCallback(
+          `Calculating metrics... (${completedItems}/${filesToProcess.length}) ${pc.dim(lastFile.path)}`,
+        );
+        logger.trace(`Calculating metrics... (${completedItems}/${filesToProcess.length}) ${lastFile.path}`);
 
-        completedTasks++;
-        progressCallback(`Calculating metrics... (${completedTasks}/${filesToProcess.length}) ${pc.dim(file.path)}`);
-        logger.trace(`Calculating metrics... (${completedTasks}/${filesToProcess.length}) ${file.path}`);
-        return result;
+        return results;
       }),
     );
 
+    const allResults = batchResults.flat();
+
     const endTime = process.hrtime.bigint();
     const duration = Number(endTime - startTime) / 1e6;
     logger.trace(`Selective metrics calculation completed in ${duration.toFixed(2)}ms`);
 
-    return results;
+    return allResults;
   } catch (error) {
     logger.error('Error during selective metrics calculation:', error);
     throw error;

@@ -0,0 +1,17 @@
+import type { TaskRunner } from '../../shared/processConcurrency.js';
+import type {
+  MetricsWorkerResult,
+  MetricsWorkerTask,
+  TokenCountBatchTask,
+  TokenCountTask,
+} from './workers/calculateMetricsWorker.js';
+
+export type MetricsTaskRunner = TaskRunner<MetricsWorkerTask, MetricsWorkerResult>;
+
+export const runTokenCount = (taskRunner: MetricsTaskRunner, task: TokenCountTask): Promise<number> => {
+  return taskRunner.run(task) as Promise<number>;
+};
+
+export const runBatchTokenCount = (taskRunner: MetricsTaskRunner, task: TokenCountBatchTask): Promise<number[]> => {
+  return taskRunner.run(task) as Promise<number[]>;
+};
@@ -3,11 +3,11 @@ import type { TokenEncoding } from '../TokenCounter.js';
 import { freeTokenCounters, getTokenCounter } from '../tokenCounterFactory.js';
 
 /**
- * Simple token counting worker for metrics calculation.
+ * Token counting worker for metrics calculation.
  *
- * This worker provides a focused interface for counting tokens from text content,
- * using gpt-tokenizer. All complex metric calculation logic is handled
- * by the calling side to maintain separation of concerns.
+ * Supports both single-content and batch modes. Batch mode reduces IPC overhead
+ * by processing multiple files per worker round-trip (~0.5ms overhead per round-trip).
+ * For 991 files, batching with size 50 reduces round-trips from 991 to 20.
  */
 
 // Initialize logger configuration from workerData at module load time
@@ -20,6 +20,19 @@ export interface TokenCountTask {
   path?: string;
 }
 
+export interface TokenCountBatchItem {
+  content: string;
+  path?: string;
+}
+
+export interface TokenCountBatchTask {
+  items: TokenCountBatchItem[];
+  encoding: TokenEncoding;
+}
+
+export type MetricsWorkerTask = TokenCountTask | TokenCountBatchTask;
+export type MetricsWorkerResult = number | number[];
+
 export const countTokens = async (task: TokenCountTask): Promise<number> => {
   const processStartAt = process.hrtime.bigint();
 
@@ -35,12 +48,30 @@ export const countTokens = async (task: TokenCountTask): Promise<number> => {
   }
 };
 
+const countTokensBatch = async (task: TokenCountBatchTask): Promise<number[]> => {
+  const processStartAt = process.hrtime.bigint();
+
+  try {
+    const counter = await getTokenCounter(task.encoding);
+    const results = task.items.map((item) => counter.countTokens(item.content, item.path));
+
+    logger.trace(`Counted tokens for ${task.items.length} items. Took: ${getProcessDuration(processStartAt)}ms`);
+    return results;
+  } catch (error) {
+    logger.error('Error in batch token counting worker:', error);
+    throw error;
+  }
+};
+
 const getProcessDuration = (startTime: bigint): string => {
   const endTime = process.hrtime.bigint();
   return (Number(endTime - startTime) / 1e6).toFixed(2);
 };
 
-export default async (task: TokenCountTask): Promise<number> => {
+export default async (task: MetricsWorkerTask): Promise<MetricsWorkerResult> => {
+  if ('items' in task) {
+    return countTokensBatch(task);
+  }
   return countTokens(task);
 };
 

@@ -80,12 +80,12 @@ const inferWorkerTypeFromTask = (task: unknown): WorkerType | null => {
     return 'fileProcess';
   }
 
-  // calculateMetrics: has content, encoding (must check before securityCheck)
-  if ('content' in taskObj && 'encoding' in taskObj) {
+  // calculateMetrics: single mode has content+encoding, batch mode has items+encoding
+  if ('encoding' in taskObj && ('content' in taskObj || 'items' in taskObj)) {
     return 'calculateMetrics';
   }
 
-  // securityCheck: has items array (without encoding, which distinguishes it from calculateMetrics)
+  // securityCheck: has items array without encoding (distinguishes from batch calculateMetrics)
   if ('items' in taskObj && !('encoding' in taskObj)) {
     return 'securityCheck';
   }