yamadashy · yamadashy · Feb 17, 2026 · Feb 10, 2026 · Feb 10, 2026 · Feb 14, 2026
@@ -1,82 +1,82 @@
+import path from 'node:path';
 import pc from 'picocolors';
 import type { RepomixConfigMerged } from '../../config/configSchema.js';
 import { logger } from '../../shared/logger.js';
-import { initTaskRunner } from '../../shared/processConcurrency.js';
 import type { RepomixProgressCallback } from '../../shared/types.js';
+import { readRawFile as defaultReadRawFile, type FileSkipReason } from './fileRead.js';
 import type { RawFile } from './fileTypes.js';
-import type { FileCollectResult, FileCollectTask, SkippedFileInfo } from './workers/fileCollectWorker.js';
+
+// Concurrency limit for parallel file reads on the main thread.
+// 50 balances I/O throughput with FD/memory safety across different machines.
+const FILE_COLLECT_CONCURRENCY = 50;
+
+export interface SkippedFileInfo {
+  path: string;
+  reason: FileSkipReason;
+}
 
 export interface FileCollectResults {
   rawFiles: RawFile[];
   skippedFiles: SkippedFileInfo[];
 }
 
-// Re-export SkippedFileInfo for external use
-export type { SkippedFileInfo } from './workers/fileCollectWorker.js';
+const promisePool = async <T, R>(items: T[], concurrency: number, fn: (item: T) => Promise<R>): Promise<R[]> => {
+  const results: R[] = Array.from({ length: items.length });
+  let nextIndex = 0;
+
+  const worker = async () => {
+    while (nextIndex < items.length) {
+      const i = nextIndex++;
+      results[i] = await fn(items[i]);
+    }
+  };
+
+  await Promise.all(Array.from({ length: Math.min(concurrency, items.length) }, () => worker()));
+
+  return results;
+};
 
 export const collectFiles = async (
   filePaths: string[],
   rootDir: string,
   config: RepomixConfigMerged,
   progressCallback: RepomixProgressCallback = () => {},
   deps = {
-    initTaskRunner,
+    readRawFile: defaultReadRawFile,
   },
 ): Promise<FileCollectResults> => {
-  const taskRunner = deps.initTaskRunner<FileCollectTask, FileCollectResult>({
-    numOfTasks: filePaths.length,
-    workerType: 'fileCollect',
-    runtime: 'worker_threads',
+  const startTime = process.hrtime.bigint();
+  logger.trace(`Starting file collection for ${filePaths.length} files`);
+
+  let completedTasks = 0;
+  const totalTasks = filePaths.length;
+  const maxFileSize = config.input.maxFileSize;
+
+  const results = await promisePool(filePaths, FILE_COLLECT_CONCURRENCY, async (filePath) => {
+    const fullPath = path.resolve(rootDir, filePath);
+    const result = await deps.readRawFile(fullPath, maxFileSize);
+
+    completedTasks++;
+    progressCallback(`Collect file... (${completedTasks}/${totalTasks}) ${pc.dim(filePath)}`);
+    logger.trace(`Collect files... (${completedTasks}/${totalTasks}) ${filePath}`);
+
+    return { filePath, result };
   });
-  const tasks = filePaths.map(
-    (filePath) =>
-      ({
-        filePath,
-        rootDir,
-        maxFileSize: config.input.maxFileSize,
-      }) satisfies FileCollectTask,
-  );
-
-  try {
-    const startTime = process.hrtime.bigint();
-    logger.trace(`Starting file collection for ${filePaths.length} files using worker pool`);
-
-    let completedTasks = 0;
-    const totalTasks = tasks.length;
-
-    const results = await Promise.all(
-      tasks.map((task) =>
-        taskRunner.run(task).then((result) => {
-          completedTasks++;
-          progressCallback(`Collect file... (${completedTasks}/${totalTasks}) ${pc.dim(task.filePath)}`);
-          logger.trace(`Collect files... (${completedTasks}/${totalTasks}) ${task.filePath}`);
-          return result;
-        }),
-      ),
-    );
-
-    const endTime = process.hrtime.bigint();
-    const duration = Number(endTime - startTime) / 1e6;
-    logger.trace(`File collection completed in ${duration.toFixed(2)}ms`);
-
-    const rawFiles: RawFile[] = [];
-    const skippedFiles: SkippedFileInfo[] = [];
-
-    for (const result of results) {
-      if (result.rawFile) {
-        rawFiles.push(result.rawFile);
-      }
-      if (result.skippedFile) {
-        skippedFiles.push(result.skippedFile);
-      }
-    }
 
-    return { rawFiles, skippedFiles };
-  } catch (error) {
-    logger.error('Error during file collection:', error);
-    throw error;
-  } finally {
-    // Always cleanup worker pool
-    await taskRunner.cleanup();
+  const rawFiles: RawFile[] = [];
+  const skippedFiles: SkippedFileInfo[] = [];
+
+  for (const { filePath, result } of results) {
+    if (result.content !== null) {
+      rawFiles.push({ path: filePath, content: result.content });
+    } else if (result.skippedReason) {
+      skippedFiles.push({ path: filePath, reason: result.skippedReason });
+    }
   }
+
+  const endTime = process.hrtime.bigint();
+  const duration = Number(endTime - startTime) / 1e6;
+  logger.trace(`File collection completed in ${duration.toFixed(2)}ms`);
+
+  return { rawFiles, skippedFiles };
 };
@@ -43,24 +43,25 @@ export const readRawFile = async (filePath: string, maxFileSize: number): Promis
       return { content: null, skippedReason: 'binary-content' };
     }
 
+    // Fast path: Try UTF-8 decoding first (covers ~99% of source code files)
+    // This skips the expensive jschardet.detect() which scans the entire buffer
+    // through multiple encoding probers with frequency table lookups
+    try {
+      let content = new TextDecoder('utf-8', { fatal: true }).decode(buffer);
+      if (content.charCodeAt(0) === 0xfeff) {
+        content = content.slice(1); // strip UTF-8 BOM
+      }
+      return { content };
+    } catch {
+      // Not valid UTF-8, fall through to encoding detection
+    }
+
+    // Slow path: Detect encoding with jschardet for non-UTF-8 files (e.g., Shift-JIS, EUC-KR)
     const { encoding: detectedEncoding } = jschardet.detect(buffer) ?? {};
     const encoding = detectedEncoding && iconv.encodingExists(detectedEncoding) ? detectedEncoding : 'utf-8';
-
     const content = iconv.decode(buffer, encoding, { stripBOM: true });
 
-    // Only skip if there are actual decode errors (U+FFFD replacement characters)
-    // Don't rely on jschardet confidence as it can return low values for valid UTF-8/ASCII files
     if (content.includes('\uFFFD')) {
-      // For UTF-8, distinguish invalid byte sequences from a legitimate U+FFFD in the source
-      if (encoding.toLowerCase() === 'utf-8') {
-        try {
-          let utf8 = new TextDecoder('utf-8', { fatal: true }).decode(buffer);
-          if (utf8.charCodeAt(0) === 0xfeff) utf8 = utf8.slice(1); // strip UTF-8 BOM
-          return { content: utf8 };
-        } catch {
-          // fall through to skip below
-        }
-      }
       logger.debug(`Skipping file due to encoding errors (detected: ${encoding}): ${filePath}`);
       return { content: null, skippedReason: 'encoding-error' };
     }

@@ -27,8 +27,6 @@ const getWorkerPath = (workerType: WorkerType): string => {
 
   // Non-bundled environment: use individual worker files
   switch (workerType) {
-    case 'fileCollect':
-      return new URL('../core/file/workers/fileCollectWorker.js', import.meta.url).href;
     case 'fileProcess':
       return new URL('../core/file/workers/fileProcessWorker.js', import.meta.url).href;
     case 'securityCheck':

@@ -12,7 +12,7 @@
 import { workerData } from 'node:worker_threads';
 
 // Worker type definitions
-export type WorkerType = 'fileCollect' | 'fileProcess' | 'securityCheck' | 'calculateMetrics' | 'defaultAction';
+export type WorkerType = 'fileProcess' | 'securityCheck' | 'calculateMetrics' | 'defaultAction';
 
 // Worker handler type - uses 'any' to accommodate different worker signatures
 // biome-ignore lint/suspicious/noExplicitAny: Worker handlers have varying signatures
@@ -39,11 +39,6 @@ const loadWorkerHandler = async (
   let result: { handler: WorkerHandler; cleanup?: WorkerCleanup };
 
   switch (workerType) {
-    case 'fileCollect': {
-      const module = await import('../core/file/workers/fileCollectWorker.js');
-      result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination };
-      break;
-    }
     case 'fileProcess': {
       const module = await import('../core/file/workers/fileProcessWorker.js');
       result = { handler: module.default as WorkerHandler, cleanup: module.onWorkerTermination };
@@ -95,11 +90,6 @@ const inferWorkerTypeFromTask = (task: unknown): WorkerType | null => {
     return 'defaultAction';
   }
 
-  // fileCollect: has filePath, rootDir, maxFileSize
-  if ('filePath' in taskObj && 'rootDir' in taskObj && 'maxFileSize' in taskObj) {
-    return 'fileCollect';
-  }
-
   // fileProcess: has rawFile (nested object) and config
   if ('rawFile' in taskObj && 'config' in taskObj) {
     return 'fileProcess';