From f682fe9cdec85ef268ddd8f6a982c974ff48d522 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Sun, 5 Apr 2026 14:33:32 +0900 Subject: [PATCH] perf(core): Eliminate stat() syscall and lazy-load encoding libraries in fileRead Two optimizations to reduce file reading overhead: 1. Remove redundant fs.stat() before fs.readFile() - Previously each file required stat() (size check) then readFile() = 2 syscalls - Now readFile() runs first, then buffer.length is checked = 1 syscall - Files exceeding maxFileSize (default 10MB) are rare; the occasional oversized read is acceptable for halving syscall count on all files 2. Lazy-load jschardet and iconv-lite - These libraries have ~25ms combined import cost at startup - The fast UTF-8 path (covers ~99% of source code files) never needs them - They are only loaded on first encounter of a non-UTF-8 file Co-Authored-By: Claude Opus 4.6 (1M context) --- src/core/file/fileRead.ts | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/core/file/fileRead.ts b/src/core/file/fileRead.ts index e312394da..4fb72cd70 100644 --- a/src/core/file/fileRead.ts +++ b/src/core/file/fileRead.ts @@ -1,10 +1,20 @@ import * as fs from 'node:fs/promises'; -import iconv from 'iconv-lite'; import isBinaryPath from 'is-binary-path'; import { isBinaryFile } from 'isbinaryfile'; -import jschardet from 'jschardet'; import { logger } from '../../shared/logger.js'; +// Lazy-load encoding detection libraries to avoid their ~25ms combined import cost. +// The fast UTF-8 path (covers ~99% of source code files) never needs these; +// they are only loaded when a file fails UTF-8 decoding. +let _jschardet: typeof import('jschardet') | undefined; +let _iconv: typeof import('iconv-lite') | undefined; +const getEncodingDeps = async () => { + if (!_jschardet || !_iconv) { + [_jschardet, _iconv] = await Promise.all([import('jschardet'), import('iconv-lite')]); + } + return { jschardet: _jschardet, iconv: _iconv }; +}; + export type FileSkipReason = 'binary-extension' | 'binary-content' | 'size-limit' | 'encoding-error'; export interface FileReadResult { @@ -20,25 +30,26 @@ export interface FileReadResult { */ export const readRawFile = async (filePath: string, maxFileSize: number): Promise => { try { - // Check binary extension first (no I/O needed) to skip stat + read for binary files + // Check binary extension first (no I/O needed) to skip read for binary files if (isBinaryPath(filePath)) { logger.debug(`Skipping binary file: ${filePath}`); return { content: null, skippedReason: 'binary-extension' }; } - const stats = await fs.stat(filePath); + logger.trace(`Reading file: ${filePath}`); + + // Read the file directly and check size afterward, avoiding a separate stat() syscall. + // This halves the number of I/O operations per file. + // Files exceeding maxFileSize are rare, so the occasional oversized read is acceptable. + const buffer = await fs.readFile(filePath); - if (stats.size > maxFileSize) { - const sizeKB = (stats.size / 1024).toFixed(1); + if (buffer.length > maxFileSize) { + const sizeKB = (buffer.length / 1024).toFixed(1); const maxSizeKB = (maxFileSize / 1024).toFixed(1); logger.trace(`File exceeds size limit: ${sizeKB}KB > ${maxSizeKB}KB (${filePath})`); return { content: null, skippedReason: 'size-limit' }; } - logger.trace(`Reading file: ${filePath}`); - - const buffer = await fs.readFile(filePath); - if (await isBinaryFile(buffer)) { logger.debug(`Skipping binary file (content check): ${filePath}`); return { content: null, skippedReason: 'binary-content' }; @@ -58,9 +69,11 @@ export const readRawFile = async (filePath: string, maxFileSize: number): Promis } // Slow path: Detect encoding with jschardet for non-UTF-8 files (e.g., Shift-JIS, EUC-KR) - const { encoding: detectedEncoding } = jschardet.detect(buffer) ?? {}; - const encoding = detectedEncoding && iconv.encodingExists(detectedEncoding) ? detectedEncoding : 'utf-8'; - const content = iconv.decode(buffer, encoding, { stripBOM: true }); + const encodingDeps = await getEncodingDeps(); + const { encoding: detectedEncoding } = encodingDeps.jschardet.detect(buffer) ?? {}; + const encoding = + detectedEncoding && encodingDeps.iconv.encodingExists(detectedEncoding) ? detectedEncoding : 'utf-8'; + const content = encodingDeps.iconv.decode(buffer, encoding, { stripBOM: true }); if (content.includes('\uFFFD')) { logger.debug(`Skipping file due to encoding errors (detected: ${encoding}): ${filePath}`);