yamadashy · yamadashy · Apr 6, 2026 · Apr 5, 2026 · Apr 6, 2026
@@ -2,10 +2,10 @@ import { logger } from '../../shared/logger.js';
 import { type MetricsTaskRunner, runTokenCount } from './metricsWorkerRunner.js';
 import type { TokenEncoding } from './TokenCounter.js';
 
-// Target ~100KB per chunk so that each worker task does meaningful tokenization work.
-// Previously this was 1000 (number of chunks), which created ~1KB chunks for 1MB output,
-// causing ~1000 worker round-trips with ~0.5ms overhead each (~500ms total waste).
-const TARGET_CHARS_PER_CHUNK = 100_000;
+// Target ~200K characters per chunk to balance tokenization throughput and worker round-trip overhead.
+// Benchmarks show 200K is the sweet spot: fewer round-trips than 100K with enough chunks
+// for good parallelism across available threads (e.g., 20 chunks for a 4M character output).
+const TARGET_CHARS_PER_CHUNK = 200_000;
 const MIN_CONTENT_LENGTH_FOR_PARALLEL = 1_000_000; // 1MB
 
 export const calculateOutputMetrics = async (

@@ -173,13 +173,13 @@ describe('calculateOutputMetrics', () => {
       }),
     });
 
-    // With TARGET_CHARS_PER_CHUNK=100_000, 1.1MB content should produce 11 chunks
+    // With TARGET_CHARS_PER_CHUNK=200_000, 1.1M character content should produce 6 chunks
     const chunkSizes = processedChunks.map((chunk) => chunk.length);
 
-    expect(processedChunks.length).toBe(11);
+    expect(processedChunks.length).toBe(6);
     // All chunks except the last should be exactly TARGET_CHARS_PER_CHUNK
     for (let i = 0; i < chunkSizes.length - 1; i++) {
-      expect(chunkSizes[i]).toBe(100_000);
+      expect(chunkSizes[i]).toBe(200_000);
     }
     expect(processedChunks.join('')).toBe(content); // All content should be processed
   });