Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions src/cli/cliReport.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import path from 'node:path';
import pc from 'picocolors';
import type { RepomixConfigMerged } from '../config/configSchema.js';
import type { SkippedFileInfo } from '../core/file/fileCollect.js';
import type { PackResult } from '../core/packager.js';
import type { SuspiciousFileResult } from '../core/security/securityCheck.js';
import { logger } from '../shared/logger.js';
Expand Down Expand Up @@ -36,6 +37,9 @@ export const reportResults = (cwd: string, packResult: PackResult, config: Repom
);
logger.log('');

reportSkippedFiles(cwd, packResult.skippedFiles);
logger.log('');

reportSummary(packResult, config);
logger.log('');

Expand Down Expand Up @@ -157,6 +161,31 @@ export const reportTopFiles = (
});
};

export const reportSkippedFiles = (rootDir: string, skippedFiles: SkippedFileInfo[]) => {
const binaryContentFiles = skippedFiles.filter((file) => file.reason === 'binary-content');

if (binaryContentFiles.length === 0) {
return;
}

logger.log(pc.white('📄 Binary Files Detected:'));
logger.log(pc.dim('─────────────────────────'));

if (binaryContentFiles.length === 1) {
logger.log(pc.yellow('1 file detected as binary by content inspection:'));
} else {
logger.log(pc.yellow(`${binaryContentFiles.length} files detected as binary by content inspection:`));
}

binaryContentFiles.forEach((file, index) => {
const relativeFilePath = path.relative(rootDir, file.path);
logger.log(`${pc.white(`${index + 1}.`)} ${pc.white(relativeFilePath)}`);
});

logger.log(pc.yellow('\nThese files have been excluded from the output.'));
logger.log(pc.yellow('Please review these files if you expected them to contain text content.'));
};

export const reportCompletion = () => {
logger.log(pc.green('🎉 All Done!'));
logger.log(pc.white('Your repository has been successfully packed.'));
Expand Down
28 changes: 24 additions & 4 deletions src/core/file/fileCollect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@ import { logger } from '../../shared/logger.js';
import { initTaskRunner } from '../../shared/processConcurrency.js';
import type { RepomixProgressCallback } from '../../shared/types.js';
import type { RawFile } from './fileTypes.js';
import type { FileCollectTask } from './workers/fileCollectWorker.js';
import type { FileCollectResult, FileCollectTask, SkippedFileInfo } from './workers/fileCollectWorker.js';

export interface FileCollectResults {
rawFiles: RawFile[];
skippedFiles: SkippedFileInfo[];
}

// Re-export SkippedFileInfo for external use
export type { SkippedFileInfo } from './workers/fileCollectWorker.js';

export const collectFiles = async (
filePaths: string[],
Expand All @@ -14,8 +22,8 @@ export const collectFiles = async (
deps = {
initTaskRunner,
},
): Promise<RawFile[]> => {
const taskRunner = deps.initTaskRunner<FileCollectTask, RawFile | null>(
): Promise<FileCollectResults> => {
const taskRunner = deps.initTaskRunner<FileCollectTask, FileCollectResult>(
filePaths.length,
new URL('./workers/fileCollectWorker.js', import.meta.url).href,
);
Expand Down Expand Up @@ -50,7 +58,19 @@ export const collectFiles = async (
const duration = Number(endTime - startTime) / 1e6;
logger.trace(`File collection completed in ${duration.toFixed(2)}ms`);

return results.filter((file): file is RawFile => file !== null);
const rawFiles: RawFile[] = [];
const skippedFiles: SkippedFileInfo[] = [];

for (const result of results) {
if (result.rawFile) {
rawFiles.push(result.rawFile);
}
if (result.skippedFile) {
skippedFiles.push(result.skippedFile);
}
}

return { rawFiles, skippedFiles };
} catch (error) {
logger.error('Error during file collection:', error);
throw error;
Expand Down
35 changes: 26 additions & 9 deletions src/core/file/fileRead.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,33 @@ import { isBinary } from 'istextorbinary';
import jschardet from 'jschardet';
import { logger } from '../../shared/logger.js';

export type FileSkipReason = 'binary-extension' | 'binary-content' | 'size-limit' | 'encoding-error';

export interface FileReadResult {
content: string | null;
skippedReason?: FileSkipReason;
}

/**
* Read a file and return its text content
* @param filePath Path to the file
* @param maxFileSize Maximum file size in bytes
* @returns File content as string, or null if the file is binary or exceeds size limit
* @returns File content as string and skip reason if file was skipped
*/
export const readRawFile = async (filePath: string, maxFileSize: number): Promise<string | null> => {
export const readRawFile = async (filePath: string, maxFileSize: number): Promise<FileReadResult> => {
try {
const stats = await fs.stat(filePath);

if (stats.size > maxFileSize) {
const sizeKB = (stats.size / 1024).toFixed(1);
const maxSizeKB = (maxFileSize / 1024).toFixed(1);
logger.trace(`File exceeds size limit: ${sizeKB}KB > ${maxSizeKB}KB (${filePath})`);
return null;
return { content: null, skippedReason: 'size-limit' };
}

if (isBinary(filePath)) {
logger.debug(`Skipping binary file: ${filePath}`);
return null;
return { content: null, skippedReason: 'binary-extension' };
}

logger.trace(`Reading file: ${filePath}`);
Expand All @@ -32,15 +39,25 @@ export const readRawFile = async (filePath: string, maxFileSize: number): Promis

if (isBinary(null, buffer)) {
logger.debug(`Skipping binary file (content check): ${filePath}`);
return null;
return { content: null, skippedReason: 'binary-content' };
}

const encoding = jschardet.detect(buffer).encoding || 'utf-8';
const content = iconv.decode(buffer, encoding);
const { encoding: detectedEncoding, confidence } = jschardet.detect(buffer) ?? {};
const encoding = detectedEncoding && iconv.encodingExists(detectedEncoding) ? detectedEncoding : 'utf-8';

const content = iconv.decode(buffer, encoding, { stripBOM: true });

// Heuristics: U+FFFD indicates decode errors; very low confidence implies unreliable guess.
if (content.includes('\uFFFD') || (typeof confidence === 'number' && confidence < 0.2)) {
logger.debug(
`Skipping file due to encoding errors (${encoding}, confidence=${(confidence ?? 0).toFixed(2)}): ${filePath}`,
);
return { content: null, skippedReason: 'encoding-error' };
}

return content;
return { content };
} catch (error) {
logger.warn(`Failed to read file: ${filePath}`, error);
return null;
return { content: null, skippedReason: 'encoding-error' };
}
};
40 changes: 32 additions & 8 deletions src/core/file/workers/fileCollectWorker.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import path from 'node:path';
import { logger, setLogLevelByWorkerData } from '../../../shared/logger.js';
import { readRawFile } from '../fileRead.js';
import { setLogLevelByWorkerData } from '../../../shared/logger.js';
import { type FileSkipReason, readRawFile } from '../fileRead.js';
import type { RawFile } from '../fileTypes.js';

// Initialize logger configuration from workerData at module load time
// This must be called before any logging operations in the worker
Expand All @@ -12,16 +13,39 @@ export interface FileCollectTask {
maxFileSize: number;
}

export default async ({ filePath, rootDir, maxFileSize }: FileCollectTask) => {
export interface SkippedFileInfo {
path: string;
reason: FileSkipReason;
}

export interface FileCollectResult {
rawFile?: RawFile;
skippedFile?: SkippedFileInfo;
}

export default async ({ filePath, rootDir, maxFileSize }: FileCollectTask): Promise<FileCollectResult> => {
const fullPath = path.resolve(rootDir, filePath);
const content = await readRawFile(fullPath, maxFileSize);
const result = await readRawFile(fullPath, maxFileSize);

if (result.content !== null) {
return {
rawFile: {
path: filePath,
content: result.content,
},
};
}

if (content) {
if (result.skippedReason) {
return {
path: filePath,
content,
skippedFile: {
path: filePath,
reason: result.skippedReason,
},
};
}

return null;
throw new Error(
`File processing for ${filePath} resulted in an unexpected state: content is null but no skip reason was provided.`,
);
};
15 changes: 10 additions & 5 deletions src/core/packager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import type { RepomixConfigMerged } from '../config/configSchema.js';
import { RepomixError } from '../shared/errorHandle.js';
import { logMemoryUsage, withMemoryLogging } from '../shared/memoryUtils.js';
import type { RepomixProgressCallback } from '../shared/types.js';
import { collectFiles } from './file/fileCollect.js';
import { type SkippedFileInfo, collectFiles } from './file/fileCollect.js';
import { sortPaths } from './file/filePathSort.js';
import { processFiles } from './file/fileProcess.js';
import { searchFiles } from './file/fileSearch.js';
Expand All @@ -29,6 +29,7 @@ export interface PackResult {
suspiciousGitLogResults: SuspiciousFileResult[];
processedFiles: ProcessedFile[];
safeFilePaths: string[];
skippedFiles: SkippedFileInfo[];
}

const defaultDeps = {
Expand Down Expand Up @@ -83,16 +84,19 @@ export const pack = async (
}));

progressCallback('Collecting files...');
const rawFiles = await withMemoryLogging('Collect Files', async () =>
(
const collectResults = await withMemoryLogging(
'Collect Files',
async () =>
await Promise.all(
sortedFilePathsByDir.map(({ rootDir, filePaths }) =>
deps.collectFiles(filePaths, rootDir, config, progressCallback),
),
)
).reduce((acc: RawFile[], curr: RawFile[]) => acc.concat(...curr), []),
),
);

const rawFiles = collectResults.flatMap((curr) => curr.rawFiles);
const allSkippedFiles = collectResults.flatMap((curr) => curr.skippedFiles);

// Get git diffs if enabled - run this before security check
progressCallback('Getting git diffs...');
const gitDiffResult = await deps.getGitDiffs(rootDirs, config);
Expand Down Expand Up @@ -135,6 +139,7 @@ export const pack = async (
suspiciousGitLogResults,
processedFiles,
safeFilePaths,
skippedFiles: allSkippedFiles,
};

logMemoryUsage('Pack - End');
Expand Down
4 changes: 4 additions & 0 deletions tests/cli/actions/defaultAction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ describe('defaultAction', () => {
safeFilePaths: [],
gitDiffTokenCount: 0,
gitLogTokenCount: 0,
skippedFiles: [],
});
});

Expand Down Expand Up @@ -665,6 +666,7 @@ describe('defaultAction', () => {
fileCharCounts: {},
fileTokenCounts: {},
outputFilePath: 'output.txt',
skippedFiles: [],
} as PackResult);
});

Expand Down Expand Up @@ -796,6 +798,7 @@ describe('defaultAction', () => {
fileCharCounts: {},
fileTokenCounts: {},
outputFilePath: 'output.txt',
skippedFiles: [],
} as PackResult);
});

Expand Down Expand Up @@ -865,6 +868,7 @@ describe('defaultAction', () => {
fileCharCounts: {},
fileTokenCounts: {},
outputFilePath: 'output.txt',
skippedFiles: [],
} as PackResult;
});

Expand Down
4 changes: 4 additions & 0 deletions tests/cli/actions/remoteAction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ describe('remoteAction functions', () => {
safeFilePaths: [],
gitDiffTokenCount: 0,
gitLogTokenCount: 0,
skippedFiles: [],
},
config: createMockConfig(),
} satisfies DefaultActionRunnerResult;
Expand Down Expand Up @@ -92,6 +93,7 @@ describe('remoteAction functions', () => {
safeFilePaths: [],
gitDiffTokenCount: 0,
gitLogTokenCount: 0,
skippedFiles: [],
},
config: createMockConfig(),
} satisfies DefaultActionRunnerResult;
Expand Down Expand Up @@ -137,6 +139,7 @@ describe('remoteAction functions', () => {
safeFilePaths: [],
gitDiffTokenCount: 0,
gitLogTokenCount: 0,
skippedFiles: [],
},
config: createMockConfig(),
} satisfies DefaultActionRunnerResult;
Expand Down Expand Up @@ -182,6 +185,7 @@ describe('remoteAction functions', () => {
safeFilePaths: [],
gitDiffTokenCount: 0,
gitLogTokenCount: 0,
skippedFiles: [],
},
config: createMockConfig(),
} satisfies DefaultActionRunnerResult;
Expand Down
Loading
Loading