From ed8defffee86b710f09319e7801c6e05db214149 Mon Sep 17 00:00:00 2001
From: Kazuki Yamada <koukun0120@gmail.com>
Date: Fri, 3 Apr 2026 13:53:09 +0900
Subject: [PATCH] perf(metrics): Reduce output token counting chunks from ~1000
 to ~10

CHUNK_SIZE was used as the number of chunks (1000), creating ~1KB chunks
for 1MB output. Each chunk dispatched a worker task with ~0.5ms overhead
for serialization, scheduling, and callback resolution, totaling ~500ms
of overhead that dominated the actual tokenization work.

Replace with TARGET_CHARS_PER_CHUNK (100,000) so chunks are sized by
content rather than count. A 1MB output now produces ~10 chunks instead
of ~1000, reducing worker round-trip overhead by ~99%.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/core/metrics/calculateOutputMetrics.ts        | 12 +++++++-----
 tests/core/metrics/calculateOutputMetrics.test.ts | 12 +++++++-----
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/core/metrics/calculateOutputMetrics.ts b/src/core/metrics/calculateOutputMetrics.ts
index 1be4f57d5..c809ab2a6 100644
--- a/src/core/metrics/calculateOutputMetrics.ts
+++ b/src/core/metrics/calculateOutputMetrics.ts
@@ -3,8 +3,11 @@ import type { TaskRunner } from '../../shared/processConcurrency.js';
 import type { TokenEncoding } from './TokenCounter.js';
 import type { TokenCountTask } from './workers/calculateMetricsWorker.js';
 
-const CHUNK_SIZE = 1000;
-const MIN_CONTENT_LENGTH_FOR_PARALLEL = 1_000_000; // 1000KB
+// Target ~100KB per chunk so that each worker task does meaningful tokenization work.
+// Previously this was 1000 (number of chunks), which created ~1KB chunks for 1MB output,
+// causing ~1000 worker round-trips with ~0.5ms overhead each (~500ms total waste).
+const TARGET_CHARS_PER_CHUNK = 100_000;
+const MIN_CONTENT_LENGTH_FOR_PARALLEL = 1_000_000; // 1MB
 
 export const calculateOutputMetrics = async (
   content: string,
@@ -22,11 +25,10 @@ export const calculateOutputMetrics = async (
 
     if (shouldRunInParallel) {
       // Split content into chunks for parallel processing
-      const chunkSize = Math.ceil(content.length / CHUNK_SIZE);
       const chunks: string[] = [];
 
-      for (let i = 0; i < content.length; i += chunkSize) {
-        chunks.push(content.slice(i, i + chunkSize));
+      for (let i = 0; i < content.length; i += TARGET_CHARS_PER_CHUNK) {
+        chunks.push(content.slice(i, i + TARGET_CHARS_PER_CHUNK));
       }
 
       // Process chunks in parallel
diff --git a/tests/core/metrics/calculateOutputMetrics.test.ts b/tests/core/metrics/calculateOutputMetrics.test.ts
index 104914ff7..2b42d0731 100644
--- a/tests/core/metrics/calculateOutputMetrics.test.ts
+++ b/tests/core/metrics/calculateOutputMetrics.test.ts
@@ -114,7 +114,7 @@ describe('calculateOutputMetrics', () => {
     });
 
     expect(chunksProcessed).toBeGreaterThan(1); // Should have processed multiple chunks
-    expect(result).toBe(100_000); // 1000 chunks * 100 tokens per chunk
+    expect(result).toBe(chunksProcessed * 100); // chunks * 100 tokens per chunk
   });
 
   it('should handle errors in parallel processing', async () => {
@@ -168,12 +168,14 @@ describe('calculateOutputMetrics', () => {
       }),
     });
 
-    // Check that chunks are roughly equal in size
-    const _expectedChunkSize = Math.ceil(content.length / 1000); // CHUNK_SIZE is 1000
+    // With TARGET_CHARS_PER_CHUNK=100_000, 1.1MB content should produce 11 chunks
     const chunkSizes = processedChunks.map((chunk) => chunk.length);
 
-    expect(processedChunks.length).toBe(1000); // Should have 1000 chunks
-    expect(Math.max(...chunkSizes) - Math.min(...chunkSizes)).toBeLessThanOrEqual(1); // Chunks should be almost equal in size
+    expect(processedChunks.length).toBe(11);
+    // All chunks except the last should be exactly TARGET_CHARS_PER_CHUNK
+    for (let i = 0; i < chunkSizes.length - 1; i++) {
+      expect(chunkSizes[i]).toBe(100_000);
+    }
     expect(processedChunks.join('')).toBe(content); // All content should be processed
   });
 });