diff --git a/src/core/file/fileCollect.ts b/src/core/file/fileCollect.ts index f7cda1c64..4e14731d4 100644 --- a/src/core/file/fileCollect.ts +++ b/src/core/file/fileCollect.ts @@ -8,6 +8,10 @@ import { logger } from '../../shared/logger.js'; import { getProcessConcurrency } from '../../shared/processConcurrency.js'; import type { RawFile } from './fileTypes.js'; +// Maximum file size to process (50MB) +// This prevents out-of-memory errors when processing very large files +export const MAX_FILE_SIZE = 50 * 1024 * 1024; + export const collectFiles = async (filePaths: string[], rootDir: string): Promise => { const rawFiles = await pMap( filePaths, @@ -28,14 +32,27 @@ export const collectFiles = async (filePaths: string[], rootDir: string): Promis }; const readRawFile = async (filePath: string): Promise => { - if (isBinary(filePath)) { - logger.debug(`Skipping binary file: ${filePath}`); - return null; - } + try { + const stats = await fs.stat(filePath); - logger.trace(`Reading file: ${filePath}`); + if (stats.size > MAX_FILE_SIZE) { + const sizeMB = (stats.size / 1024 / 1024).toFixed(1); + logger.log(''); + logger.log('⚠️ Large File Warning:'); + logger.log('──────────────────────'); + logger.log(`File exceeds size limit: ${sizeMB}MB > ${MAX_FILE_SIZE / 1024 / 1024}MB (${filePath})`); + logger.note('Add this file to .repomixignore if you want to exclude it permanently'); + logger.log(''); + return null; + } + + if (isBinary(filePath)) { + logger.debug(`Skipping binary file: ${filePath}`); + return null; + } + + logger.trace(`Reading file: ${filePath}`); - try { const buffer = await fs.readFile(filePath); if (isBinary(null, buffer)) { diff --git a/tests/core/file/fileCollect.test.ts b/tests/core/file/fileCollect.test.ts index b31885821..0719b384f 100644 --- a/tests/core/file/fileCollect.test.ts +++ b/tests/core/file/fileCollect.test.ts @@ -1,10 +1,11 @@ +import type { Stats } from 'node:fs'; import * as fs from 'node:fs/promises'; import path from 'node:path'; import iconv from 'iconv-lite'; import { isBinary } from 'istextorbinary'; import jschardet from 'jschardet'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { collectFiles } from '../../../src/core/file/fileCollect.js'; +import { MAX_FILE_SIZE, collectFiles } from '../../../src/core/file/fileCollect.js'; import { logger } from '../../../src/shared/logger.js'; vi.mock('node:fs/promises'); @@ -16,6 +17,12 @@ vi.mock('../../../src/shared/logger'); describe('fileCollect', () => { beforeEach(() => { vi.resetAllMocks(); + + // Setup basic file size mock to fix stat + vi.mocked(fs.stat).mockResolvedValue({ + size: 1024, + isFile: () => true, + } as Stats); }); afterEach(() => { @@ -43,7 +50,9 @@ describe('fileCollect', () => { const mockFilePaths = ['binary.bin', 'text.txt']; const mockRootDir = '/root'; - vi.mocked(isBinary).mockReturnValueOnce(true).mockReturnValueOnce(false); + vi.mocked(isBinary) + .mockReturnValueOnce(true) // for binary.bin + .mockReturnValueOnce(false); // for text.txt vi.mocked(fs.readFile).mockResolvedValue(Buffer.from('file content')); vi.mocked(jschardet.detect).mockReturnValue({ encoding: 'utf-8', confidence: 0.99 }); vi.mocked(iconv.decode).mockReturnValue('decoded content'); @@ -54,6 +63,41 @@ describe('fileCollect', () => { expect(logger.debug).toHaveBeenCalledWith(`Skipping binary file: ${path.resolve('/root/binary.bin')}`); }); + it('should skip large files', async () => { + const mockFilePaths = ['large.txt', 'normal.txt']; + const mockRootDir = '/root'; + const largePath = path.resolve('/root/large.txt'); + + vi.mocked(fs.stat) + .mockResolvedValueOnce({ + // for large.txt + size: MAX_FILE_SIZE + 1024, // Slightly over limit + isFile: () => true, + } as Stats) + .mockResolvedValueOnce({ + // for normal.txt + size: 1024, + isFile: () => true, + } as Stats); + vi.mocked(isBinary).mockReturnValue(false); + vi.mocked(fs.readFile).mockResolvedValue(Buffer.from('file content')); + vi.mocked(jschardet.detect).mockReturnValue({ encoding: 'utf-8', confidence: 0.99 }); + vi.mocked(iconv.decode).mockReturnValue('decoded content'); + + const result = await collectFiles(mockFilePaths, mockRootDir); + + expect(result).toEqual([{ path: 'normal.txt', content: 'decoded content' }]); + expect(logger.log).toHaveBeenCalledWith('⚠️ Large File Warning:'); + expect(logger.log).toHaveBeenCalledWith('──────────────────────'); + expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('File exceeds size limit:')); + expect(logger.log).toHaveBeenCalledWith(expect.stringContaining(largePath)); + expect(logger.note).toHaveBeenCalledWith('Add this file to .repomixignore if you want to exclude it permanently'); + + // Verify fs.readFile is not called for the large file + expect(fs.readFile).not.toHaveBeenCalledWith(largePath); + expect(fs.readFile).toHaveBeenCalledTimes(1); + }); + it('should handle file read errors', async () => { const mockFilePaths = ['error.txt']; const mockRootDir = '/root';