Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions src/core/metrics/calculateOutputMetrics.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import { logger } from '../../shared/logger.js';
import { getProcessConcurrency } from '../../shared/processConcurrency.js';
import { type MetricsTaskRunner, runTokenCount } from './metricsWorkerRunner.js';
import type { TokenEncoding } from './TokenCounter.js';

// Target ~200K characters per chunk to balance tokenization throughput and worker round-trip overhead.
// Benchmarks show 200K is the sweet spot: fewer round-trips than 100K with enough chunks
// for good parallelism across available threads (e.g., 20 chunks for a 4M character output).
const TARGET_CHARS_PER_CHUNK = 200_000;
const MIN_CONTENT_LENGTH_FOR_PARALLEL = 1_000_000; // 1MB

export const calculateOutputMetrics = async (
Expand All @@ -23,11 +20,15 @@ export const calculateOutputMetrics = async (
let result: number;

if (shouldRunInParallel) {
// Split content into chunks for parallel processing
// Split content into chunks matching the number of available CPU cores.
// Using fewer, larger chunks minimizes IPC overhead (postMessage serialization
// per task) while still saturating all worker threads.
const numChunks = Math.max(1, getProcessConcurrency());
const chunkSize = Math.ceil(content.length / numChunks);
Comment on lines +26 to +27
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The current implementation may lead to excessive IPC overhead on systems with many CPU cores. For example, on a 64-core machine, a 1.1MB file would be split into 64 small chunks (~17KB each), resulting in 64 postMessage calls.

To minimize IPC overhead while still saturating available cores, it is better to ensure chunks don't fall below a reasonable size. Using the previously established 200KB "sweet spot" as a minimum chunk size (by capping the number of chunks) ensures that parallelization benefits aren't negated by message serialization costs.

Suggested change
const numChunks = Math.max(1, getProcessConcurrency());
const chunkSize = Math.ceil(content.length / numChunks);
const numChunks = Math.min(getProcessConcurrency(), Math.ceil(content.length / 200_000));
const chunkSize = Math.ceil(content.length / numChunks);

const chunks: string[] = [];

for (let i = 0; i < content.length; i += TARGET_CHARS_PER_CHUNK) {
chunks.push(content.slice(i, i + TARGET_CHARS_PER_CHUNK));
for (let i = 0; i < content.length; i += chunkSize) {
chunks.push(content.slice(i, i + chunkSize));
}

// Process chunks in parallel
Expand Down
7 changes: 6 additions & 1 deletion src/shared/memoryUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Memory utility functions for monitoring memory usage across the application
*/

import { logger } from './logger.js';
import { logger, repomixLogLevels } from './logger.js';

export interface MemoryStats {
heapUsed: number;
Expand Down Expand Up @@ -44,6 +44,7 @@ export function getMemoryStats(): MemoryStats {
* Log memory usage at trace level with a context message
*/
export function logMemoryUsage(context: string): void {
if (logger.getLogLevel() < repomixLogLevels.DEBUG) return;
const stats = getMemoryStats();
logger.trace(
`Memory [${context}] | Heap: ${stats.heapUsed}/${stats.heapTotal}MB (${stats.heapUsagePercent}%) | RSS: ${stats.rss}MB | Ext: ${stats.external}MB`,
Expand All @@ -69,6 +70,10 @@ export function logMemoryDifference(context: string, before: MemoryStats, after:
* Execute a function and log memory usage before and after
*/
export async function withMemoryLogging<T>(context: string, fn: () => Promise<T>): Promise<T> {
if (logger.getLogLevel() < repomixLogLevels.DEBUG) {
return fn();
}

const before = getMemoryStats();
logMemoryUsage(`${context} - Before`);

Expand Down
19 changes: 10 additions & 9 deletions tests/core/metrics/calculateOutputMetrics.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
type TokenCountTask,
} from '../../../src/core/metrics/workers/calculateMetricsWorker.js';
import { logger } from '../../../src/shared/logger.js';
import type { WorkerOptions } from '../../../src/shared/processConcurrency.js';
import { getProcessConcurrency, type WorkerOptions } from '../../../src/shared/processConcurrency.js';

vi.mock('../../../src/shared/logger');

Expand Down Expand Up @@ -118,8 +118,9 @@ describe('calculateOutputMetrics', () => {
taskRunner: mockParallelTaskRunner({ numOfTasks: 1, workerType: 'calculateMetrics', runtime: 'worker_threads' }),
});

expect(chunksProcessed).toBeGreaterThan(1); // Should have processed multiple chunks
expect(result).toBe(chunksProcessed * 100); // chunks * 100 tokens per chunk
const expectedChunks = getProcessConcurrency();
expect(chunksProcessed).toBe(expectedChunks); // Should match number of CPU cores
Comment on lines +121 to +122
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

If the chunking logic in calculateOutputMetrics.ts is updated to cap the number of chunks based on a minimum size, this test expectation should be updated accordingly to reflect the actual number of chunks produced.

Suggested change
const expectedChunks = getProcessConcurrency();
expect(chunksProcessed).toBe(expectedChunks); // Should match number of CPU cores
const expectedChunks = Math.min(getProcessConcurrency(), Math.ceil(content.length / 200_000));
expect(chunksProcessed).toBe(expectedChunks);

expect(result).toBe(expectedChunks * 100); // numChunks * 100 tokens per chunk
});

it('should handle errors in parallel processing', async () => {
Expand Down Expand Up @@ -173,14 +174,14 @@ describe('calculateOutputMetrics', () => {
}),
});

// With TARGET_CHARS_PER_CHUNK=200_000, 1.1M character content should produce 6 chunks
// Check that chunks are roughly equal in size
const expectedChunks = getProcessConcurrency();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This expectation should also be updated to match the improved chunking logic that accounts for a minimum chunk size.

Suggested change
const expectedChunks = getProcessConcurrency();
const expectedChunks = Math.min(getProcessConcurrency(), Math.ceil(content.length / 200_000));

const chunkSizes = processedChunks.map((chunk) => chunk.length);

expect(processedChunks.length).toBe(6);
// All chunks except the last should be exactly TARGET_CHARS_PER_CHUNK
for (let i = 0; i < chunkSizes.length - 1; i++) {
expect(chunkSizes[i]).toBe(200_000);
}
expect(processedChunks.length).toBe(expectedChunks); // Should match number of CPU cores
// Last chunk may be smaller due to Math.ceil rounding
const maxDiff = Math.max(...chunkSizes) - Math.min(...chunkSizes);
expect(maxDiff).toBeLessThan(Math.ceil(content.length / expectedChunks)); // Chunks should be roughly equal
expect(processedChunks.join('')).toBe(content); // All content should be processed
});
});
Loading