From ed8defffee86b710f09319e7801c6e05db214149 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Fri, 3 Apr 2026 13:53:09 +0900 Subject: [PATCH] perf(metrics): Reduce output token counting chunks from ~1000 to ~10 CHUNK_SIZE was used as the number of chunks (1000), creating ~1KB chunks for 1MB output. Each chunk dispatched a worker task with ~0.5ms overhead for serialization, scheduling, and callback resolution, totaling ~500ms of overhead that dominated the actual tokenization work. Replace with TARGET_CHARS_PER_CHUNK (100,000) so chunks are sized by content rather than count. A 1MB output now produces ~10 chunks instead of ~1000, reducing worker round-trip overhead by ~99%. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/core/metrics/calculateOutputMetrics.ts | 12 +++++++----- tests/core/metrics/calculateOutputMetrics.test.ts | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/core/metrics/calculateOutputMetrics.ts b/src/core/metrics/calculateOutputMetrics.ts index 1be4f57d5..c809ab2a6 100644 --- a/src/core/metrics/calculateOutputMetrics.ts +++ b/src/core/metrics/calculateOutputMetrics.ts @@ -3,8 +3,11 @@ import type { TaskRunner } from '../../shared/processConcurrency.js'; import type { TokenEncoding } from './TokenCounter.js'; import type { TokenCountTask } from './workers/calculateMetricsWorker.js'; -const CHUNK_SIZE = 1000; -const MIN_CONTENT_LENGTH_FOR_PARALLEL = 1_000_000; // 1000KB +// Target ~100KB per chunk so that each worker task does meaningful tokenization work. +// Previously this was 1000 (number of chunks), which created ~1KB chunks for 1MB output, +// causing ~1000 worker round-trips with ~0.5ms overhead each (~500ms total waste). +const TARGET_CHARS_PER_CHUNK = 100_000; +const MIN_CONTENT_LENGTH_FOR_PARALLEL = 1_000_000; // 1MB export const calculateOutputMetrics = async ( content: string, @@ -22,11 +25,10 @@ export const calculateOutputMetrics = async ( if (shouldRunInParallel) { // Split content into chunks for parallel processing - const chunkSize = Math.ceil(content.length / CHUNK_SIZE); const chunks: string[] = []; - for (let i = 0; i < content.length; i += chunkSize) { - chunks.push(content.slice(i, i + chunkSize)); + for (let i = 0; i < content.length; i += TARGET_CHARS_PER_CHUNK) { + chunks.push(content.slice(i, i + TARGET_CHARS_PER_CHUNK)); } // Process chunks in parallel diff --git a/tests/core/metrics/calculateOutputMetrics.test.ts b/tests/core/metrics/calculateOutputMetrics.test.ts index 104914ff7..2b42d0731 100644 --- a/tests/core/metrics/calculateOutputMetrics.test.ts +++ b/tests/core/metrics/calculateOutputMetrics.test.ts @@ -114,7 +114,7 @@ describe('calculateOutputMetrics', () => { }); expect(chunksProcessed).toBeGreaterThan(1); // Should have processed multiple chunks - expect(result).toBe(100_000); // 1000 chunks * 100 tokens per chunk + expect(result).toBe(chunksProcessed * 100); // chunks * 100 tokens per chunk }); it('should handle errors in parallel processing', async () => { @@ -168,12 +168,14 @@ describe('calculateOutputMetrics', () => { }), }); - // Check that chunks are roughly equal in size - const _expectedChunkSize = Math.ceil(content.length / 1000); // CHUNK_SIZE is 1000 + // With TARGET_CHARS_PER_CHUNK=100_000, 1.1MB content should produce 11 chunks const chunkSizes = processedChunks.map((chunk) => chunk.length); - expect(processedChunks.length).toBe(1000); // Should have 1000 chunks - expect(Math.max(...chunkSizes) - Math.min(...chunkSizes)).toBeLessThanOrEqual(1); // Chunks should be almost equal in size + expect(processedChunks.length).toBe(11); + // All chunks except the last should be exactly TARGET_CHARS_PER_CHUNK + for (let i = 0; i < chunkSizes.length - 1; i++) { + expect(chunkSizes[i]).toBe(100_000); + } expect(processedChunks.join('')).toBe(content); // All content should be processed }); });