From bc06014eff94faed0d825036bbe3f9569752a49e Mon Sep 17 00:00:00 2001 From: Varun Nuthalapati Date: Sun, 12 Apr 2026 23:48:30 -0700 Subject: [PATCH 1/7] feat(output): Add --show-file-offsets option to annotate directory tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new `--show-file-offsets` CLI flag and `output.showFileOffsets` config option (default: false) that annotates each file entry in the directory structure section with its line range in the output file (e.g., [lines 42–78]). This enables AI agents and users to navigate directly to a specific file's content in the packed output without scanning the entire file. Implementation uses a two-pass render: first render discovers file block positions, then the tree is re-rendered with offset annotations. Since the tree section maintains the same line count between passes, offsets remain stable. Supports XML, Markdown, and plain text output styles. JSON output is structured and does not require tree annotations. Closes #1367 --- src/cli/actions/defaultAction.ts | 7 + src/cli/cliRun.ts | 4 + src/cli/types.ts | 1 + src/config/configSchema.ts | 2 + src/core/file/fileTreeGenerate.ts | 69 +++++++ src/core/output/fileOffsets.ts | 89 ++++++++ src/core/output/outputGenerate.ts | 71 +++++-- tests/config/configSchema.test.ts | 2 + .../metrics/calculateGitDiffMetrics.test.ts | 1 + .../metrics/calculateGitLogMetrics.test.ts | 1 + .../output/flagFullDirectoryStructure.test.ts | 2 + .../output/outputStyles/jsonStyle.test.ts | 1 + tests/core/output/showFileOffsets.test.ts | 191 ++++++++++++++++++ 13 files changed, 427 insertions(+), 14 deletions(-) create mode 100644 src/core/output/fileOffsets.ts create mode 100644 tests/core/output/showFileOffsets.test.ts diff --git a/src/cli/actions/defaultAction.ts b/src/cli/actions/defaultAction.ts index 40958563f..8a7edd0fc 100644 --- a/src/cli/actions/defaultAction.ts +++ b/src/cli/actions/defaultAction.ts @@ -337,6 +337,13 @@ export const buildCliConfig = (options: CliOptions): RepomixConfigCli => { }; } + if (options.showFileOffsets) { + cliConfig.output = { + ...cliConfig.output, + showFileOffsets: options.showFileOffsets, + }; + } + // Skill generation if (options.skillGenerate !== undefined) { cliConfig.skillGenerate = options.skillGenerate; diff --git a/src/cli/cliRun.ts b/src/cli/cliRun.ts index 754b4ab80..2b938a2dd 100644 --- a/src/cli/cliRun.ts +++ b/src/cli/cliRun.ts @@ -172,6 +172,10 @@ export const run = async () => { '--token-count-encoding ', 'Tokenizer model for counting: o200k_base (GPT-4o), cl100k_base (GPT-3.5/4), etc. (default: o200k_base)', ) + .option( + '--show-file-offsets', + 'Annotate each file in the directory structure with its line range in the output (e.g., [lines 42–78])', + ) // MCP .optionsGroup('MCP') .option('--mcp', 'Run as Model Context Protocol server for AI tool integration') diff --git a/src/cli/types.ts b/src/cli/types.ts index b945ca1bf..8569bd5d5 100644 --- a/src/cli/types.ts +++ b/src/cli/types.ts @@ -54,6 +54,7 @@ export interface CliOptions extends OptionValues { // Token Count Options tokenCountEncoding?: string; tokenCountTree?: boolean | number; + showFileOffsets?: boolean; // MCP mcp?: boolean; diff --git a/src/config/configSchema.ts b/src/config/configSchema.ts index 74c1ef432..16d4348d9 100644 --- a/src/config/configSchema.ts +++ b/src/config/configSchema.ts @@ -42,6 +42,7 @@ export const repomixConfigBaseSchema = z.object({ includeFullDirectoryStructure: z.boolean().optional(), splitOutput: z.number().int().min(1).optional(), tokenCountTree: z.union([z.boolean(), z.number(), z.string()]).optional(), + showFileOffsets: z.boolean().optional(), git: z .object({ sortByChanges: z.boolean().optional(), @@ -103,6 +104,7 @@ export const repomixConfigDefaultSchema = z.object({ includeFullDirectoryStructure: z.boolean().default(false), splitOutput: z.number().int().min(1).optional(), tokenCountTree: z.union([z.boolean(), z.number(), z.string()]).default(false), + showFileOffsets: z.boolean().default(false), git: z.object({ sortByChanges: z.boolean().default(true), sortByChangesMaxCommits: z.number().int().min(1).default(100), diff --git a/src/core/file/fileTreeGenerate.ts b/src/core/file/fileTreeGenerate.ts index a79223b38..ff4e98ac3 100644 --- a/src/core/file/fileTreeGenerate.ts +++ b/src/core/file/fileTreeGenerate.ts @@ -1,4 +1,6 @@ import nodepath from 'node:path'; +import type { FileLineOffset } from '../output/fileOffsets.js'; +import { formatFileOffsetAnnotation } from '../output/fileOffsets.js'; export interface TreeNode { name: string; @@ -133,6 +135,73 @@ export const generateTreeStringWithLineCounts = ( return treeToStringWithLineCounts(tree, lineCounts).trim(); }; +/** + * Converts a tree to string with line offset annotations for files in the output. + * @param node The tree node to convert + * @param offsets Map of file paths to their line ranges in the output file + * @param prefix Current indentation prefix + * @param currentPath Current path being built (for looking up offsets) + */ +export const treeToStringWithFileOffsets = ( + node: TreeNode, + offsets: Record, + prefix = '', + currentPath = '', + _isRoot = true, +): string => { + if (_isRoot) { + sortTreeNodes(node); + } + let result = ''; + + for (const child of node.children) { + const childPath = currentPath ? `${currentPath}/${child.name}` : child.name; + + if (child.isDirectory) { + result += `${prefix}${child.name}/\n`; + result += treeToStringWithFileOffsets(child, offsets, `${prefix} `, childPath, false); + } else { + const offset = offsets[childPath]; + const offsetSuffix = offset ? formatFileOffsetAnnotation(offset) : ''; + result += `${prefix}${child.name}${offsetSuffix}\n`; + } + } + + return result; +}; + +export const generateTreeStringWithFileOffsets = ( + files: string[], + offsets: Record, + emptyDirPaths: string[] = [], +): string => { + const tree = generateFileTree(files, emptyDirPaths); + return treeToStringWithFileOffsets(tree, offsets).trim(); +}; + +/** + * Generates a tree string with root directory labels and file offset annotations. + * For single root, returns the standard flat tree with offsets. + * For multiple roots, each section is labeled with [rootLabel]/. + * + * @param filesByRoot Array of root directories with their files + * @param offsets Map of file paths to their line ranges in the output file + * @param emptyDirPaths Optional paths to empty directories + */ +export const generateTreeStringWithRootsAndFileOffsets = ( + filesByRoot: FilesByRoot[], + offsets: Record, + emptyDirPaths: string[] = [], +): string => { + // Single root: use existing behavior without labels + if (filesByRoot.length === 1) { + return generateTreeStringWithFileOffsets(filesByRoot[0].files, offsets, emptyDirPaths); + } + + // Multiple roots: generate labeled sections + return generateMultiRootSections(filesByRoot, (tree, prefix) => treeToStringWithFileOffsets(tree, offsets, prefix)); +}; + /** * Represents files grouped by their root directory. */ diff --git a/src/core/output/fileOffsets.ts b/src/core/output/fileOffsets.ts new file mode 100644 index 000000000..3a96cf12b --- /dev/null +++ b/src/core/output/fileOffsets.ts @@ -0,0 +1,89 @@ +export interface FileLineOffset { + start: number; + end: number; +} + +/** + * Scans a rendered output string and returns the line range (1-indexed, inclusive) + * for each file's content block. + * + * Supports XML, Markdown, and plain text output styles. + * JSON output is structured and does not use this function. + */ +export const computeFileLineOffsets = (output: string, style: string): Record => { + const offsets: Record = {}; + const lines = output.split('\n'); + + if (style === 'xml') { + let currentPath: string | null = null; + let currentStart = 0; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lineNum = i + 1; + + const startMatch = line.match(/^$/); + if (startMatch) { + currentPath = startMatch[1]; + currentStart = lineNum; + } else if (line === '' && currentPath !== null) { + offsets[currentPath] = { start: currentStart, end: lineNum }; + currentPath = null; + } + } + } else if (style === 'markdown') { + const fileStarts: Array<{ path: string; line: number }> = []; + + for (let i = 0; i < lines.length; i++) { + const match = lines[i].match(/^## File: (.+)$/); + if (match) { + fileStarts.push({ path: match[1], line: i + 1 }); + } + } + + for (let j = 0; j < fileStarts.length; j++) { + const { path, line } = fileStarts[j]; + const endLine = j + 1 < fileStarts.length ? fileStarts[j + 1].line - 1 : lines.length; + offsets[path] = { start: line, end: endLine }; + } + } else if (style === 'plain') { + // Plain format: "================" then "File: path" then "================" then content + // End of content = line before next "================" separator + const SEPARATOR = '================'; + const fileSeparatorLines: number[] = []; + const fileHeaderLines: Array<{ path: string; line: number }> = []; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lineNum = i + 1; + + if (line === SEPARATOR) { + fileSeparatorLines.push(lineNum); + // Check if next line is a File: header + if (i + 1 < lines.length && lines[i + 1].startsWith('File: ')) { + const filePath = lines[i + 1].slice('File: '.length); + fileHeaderLines.push({ path: filePath, line: lineNum }); + } + } + } + + for (let j = 0; j < fileHeaderLines.length; j++) { + const { path, line } = fileHeaderLines[j]; + // Content starts after: separator → File: header → separator → content + const contentStart = line + 3; + // Content ends before the next file separator, or at the last line + const nextSeparatorLine = j + 1 < fileHeaderLines.length ? fileHeaderLines[j + 1].line - 1 : lines.length; + offsets[path] = { start: contentStart, end: nextSeparatorLine }; + } + } + + return offsets; +}; + +/** + * Formats a FileLineOffset as a human-readable annotation string. + * Example: " [lines 42–78]" + */ +export const formatFileOffsetAnnotation = (offset: FileLineOffset): string => { + return ` [lines ${offset.start}–${offset.end}]`; +}; diff --git a/src/core/output/outputGenerate.ts b/src/core/output/outputGenerate.ts index 680816077..a067a7c30 100644 --- a/src/core/output/outputGenerate.ts +++ b/src/core/output/outputGenerate.ts @@ -4,10 +4,17 @@ import Handlebars from 'handlebars'; import type { RepomixConfigMerged } from '../../config/configSchema.js'; import { RepomixError } from '../../shared/errorHandle.js'; import { listDirectories, listFiles, searchFiles } from '../file/fileSearch.js'; -import { type FilesByRoot, generateTreeString, generateTreeStringWithRoots } from '../file/fileTreeGenerate.js'; +import { + type FilesByRoot, + generateTreeString, + generateTreeStringWithFileOffsets, + generateTreeStringWithRoots, + generateTreeStringWithRootsAndFileOffsets, +} from '../file/fileTreeGenerate.js'; import type { ProcessedFile } from '../file/fileTypes.js'; import type { GitDiffResult } from '../git/gitDiffHandle.js'; import type { GitLogResult } from '../git/gitLogHandle.js'; +import { computeFileLineOffsets } from './fileOffsets.js'; import type { OutputGeneratorContext, RenderContext } from './outputGeneratorTypes.js'; import { sortOutputFiles } from './outputSort.js'; import { @@ -251,6 +258,31 @@ Please try: } }; +const renderOutput = async ( + config: RepomixConfigMerged, + renderContext: RenderContext, + sortedProcessedFiles: ProcessedFile[], + deps: { + generateHandlebarOutput: typeof generateHandlebarOutput; + generateParsableXmlOutput: typeof generateParsableXmlOutput; + generateParsableJsonOutput: typeof generateParsableJsonOutput; + }, +): Promise => { + switch (config.output.style) { + case 'xml': + return config.output.parsableStyle + ? deps.generateParsableXmlOutput(renderContext) + : deps.generateHandlebarOutput(config, renderContext, sortedProcessedFiles); + case 'json': + return deps.generateParsableJsonOutput(renderContext); + case 'markdown': + case 'plain': + return deps.generateHandlebarOutput(config, renderContext, sortedProcessedFiles); + default: + throw new RepomixError(`Unsupported output style: ${config.output.style}`); + } +}; + export const generateOutput = async ( rootDirs: string[], config: RepomixConfigMerged, @@ -281,21 +313,32 @@ export const generateOutput = async ( filePathsByRoot, emptyDirPaths, ); - const renderContext = createRenderContext(outputGeneratorContext); - switch (config.output.style) { - case 'xml': - return config.output.parsableStyle - ? deps.generateParsableXmlOutput(renderContext) - : deps.generateHandlebarOutput(config, renderContext, sortedProcessedFiles); - case 'json': - return deps.generateParsableJsonOutput(renderContext); - case 'markdown': - case 'plain': - return deps.generateHandlebarOutput(config, renderContext, sortedProcessedFiles); - default: - throw new RepomixError(`Unsupported output style: ${config.output.style}`); + // When showFileOffsets is enabled, do a two-pass render: + // 1. Render without offsets to discover file line positions + // 2. Annotate the tree string with those positions and re-render + // The tree section has the same number of lines in both passes (only line content changes), + // so the file block positions remain stable between passes. + if (config.output.showFileOffsets && config.output.directoryStructure && config.output.files) { + const firstPassContext = createRenderContext(outputGeneratorContext); + const firstPassOutput = await renderOutput(config, firstPassContext, sortedProcessedFiles, deps); + + const offsets = computeFileLineOffsets(firstPassOutput, config.output.style); + + const annotatedTree = filePathsByRoot + ? generateTreeStringWithRootsAndFileOffsets(filePathsByRoot, offsets, emptyDirPaths) + : generateTreeStringWithFileOffsets(allFilePaths, offsets, emptyDirPaths); + + const annotatedContext = createRenderContext({ + ...outputGeneratorContext, + treeString: annotatedTree, + }); + + return renderOutput(config, annotatedContext, sortedProcessedFiles, deps); } + + const renderContext = createRenderContext(outputGeneratorContext); + return renderOutput(config, renderContext, sortedProcessedFiles, deps); }; export const buildOutputGeneratorContext = async ( diff --git a/tests/config/configSchema.test.ts b/tests/config/configSchema.test.ts index ded5ed405..dbe1737a4 100644 --- a/tests/config/configSchema.test.ts +++ b/tests/config/configSchema.test.ts @@ -115,6 +115,7 @@ describe('configSchema', () => { copyToClipboard: true, includeFullDirectoryStructure: false, tokenCountTree: '100', + showFileOffsets: false, git: { sortByChanges: true, sortByChangesMaxCommits: 100, @@ -220,6 +221,7 @@ describe('configSchema', () => { copyToClipboard: false, includeFullDirectoryStructure: false, tokenCountTree: false, + showFileOffsets: false, git: { sortByChanges: true, sortByChangesMaxCommits: 100, diff --git a/tests/core/metrics/calculateGitDiffMetrics.test.ts b/tests/core/metrics/calculateGitDiffMetrics.test.ts index 73231c456..5e0f17b9e 100644 --- a/tests/core/metrics/calculateGitDiffMetrics.test.ts +++ b/tests/core/metrics/calculateGitDiffMetrics.test.ts @@ -48,6 +48,7 @@ describe('calculateGitDiffMetrics', () => { includeEmptyDirectories: false, includeFullDirectoryStructure: false, tokenCountTree: false, + showFileOffsets: false, git: { sortByChanges: true, sortByChangesMaxCommits: 100, diff --git a/tests/core/metrics/calculateGitLogMetrics.test.ts b/tests/core/metrics/calculateGitLogMetrics.test.ts index b5a7a4364..67c05f70b 100644 --- a/tests/core/metrics/calculateGitLogMetrics.test.ts +++ b/tests/core/metrics/calculateGitLogMetrics.test.ts @@ -48,6 +48,7 @@ describe('calculateGitLogMetrics', () => { includeEmptyDirectories: false, includeFullDirectoryStructure: false, tokenCountTree: false, + showFileOffsets: false, git: { sortByChanges: true, sortByChangesMaxCommits: 100, diff --git a/tests/core/output/flagFullDirectoryStructure.test.ts b/tests/core/output/flagFullDirectoryStructure.test.ts index 19c9aa73d..c25891c15 100644 --- a/tests/core/output/flagFullDirectoryStructure.test.ts +++ b/tests/core/output/flagFullDirectoryStructure.test.ts @@ -25,6 +25,7 @@ const createMockConfig = (overrides: Partial = {}): Repomix includeEmptyDirectories: false, includeFullDirectoryStructure: true, tokenCountTree: false, + showFileOffsets: false, git: { sortByChanges: false, sortByChangesMaxCommits: 10, @@ -143,6 +144,7 @@ describe('includeEmptyDirectories with pre-computed emptyDirPaths', () => { includeEmptyDirectories: true, includeFullDirectoryStructure: false, tokenCountTree: false, + showFileOffsets: false, git: { sortByChanges: false, sortByChangesMaxCommits: 10, diff --git a/tests/core/output/outputStyles/jsonStyle.test.ts b/tests/core/output/outputStyles/jsonStyle.test.ts index 92a983837..21e836a8e 100644 --- a/tests/core/output/outputStyles/jsonStyle.test.ts +++ b/tests/core/output/outputStyles/jsonStyle.test.ts @@ -25,6 +25,7 @@ const createMockConfig = (overrides: Partial = {}): Repomix includeEmptyDirectories: false, includeFullDirectoryStructure: false, tokenCountTree: false, + showFileOffsets: false, git: { sortByChanges: false, sortByChangesMaxCommits: 10, diff --git a/tests/core/output/showFileOffsets.test.ts b/tests/core/output/showFileOffsets.test.ts new file mode 100644 index 000000000..8d2425017 --- /dev/null +++ b/tests/core/output/showFileOffsets.test.ts @@ -0,0 +1,191 @@ +import { describe, expect, test } from 'vitest'; +import { computeFileLineOffsets } from '../../../src/core/output/fileOffsets.js'; +import { + generateTreeStringWithFileOffsets, + generateTreeStringWithRootsAndFileOffsets, +} from '../../../src/core/file/fileTreeGenerate.js'; + +describe('showFileOffsets', () => { + describe('computeFileLineOffsets', () => { + test('extracts line offsets from XML output', () => { + const output = [ + '', + 'src/', + ' foo.ts', + '', + '', + '', + 'This section contains the contents of the repository\'s files.', + '', + '', + 'const x = 1;', + '', + '', + '', + 'const y = 2;', + 'const z = 3;', + '', + '', + ].join('\n'); + + const offsets = computeFileLineOffsets(output, 'xml'); + + expect(offsets['src/foo.ts']).toBeDefined(); + expect(offsets['src/foo.ts'].start).toBe(9); // line of + expect(offsets['src/foo.ts'].end).toBe(11); // line of + + expect(offsets['src/bar.ts']).toBeDefined(); + expect(offsets['src/bar.ts'].start).toBe(13); + expect(offsets['src/bar.ts'].end).toBe(16); + }); + + test('extracts line offsets from Markdown output', () => { + const output = [ + '# Directory Structure', + '```', + 'src/', + ' foo.ts', + '```', + '', + '# Files', + '', + '## File: src/foo.ts', + '```ts', + 'const x = 1;', + '```', + '', + '## File: src/bar.ts', + '```ts', + 'const y = 2;', + '```', + '', + ].join('\n'); + + const offsets = computeFileLineOffsets(output, 'markdown'); + + expect(offsets['src/foo.ts']).toBeDefined(); + expect(offsets['src/foo.ts'].start).toBe(9); // line of "## File: src/foo.ts" + expect(offsets['src/foo.ts'].end).toBe(13); // line before "## File: src/bar.ts" + + expect(offsets['src/bar.ts']).toBeDefined(); + expect(offsets['src/bar.ts'].start).toBe(14); + }); + + test('extracts line offsets from plain output', () => { + const output = [ + '================================================================================', + 'Directory Structure', + '================================================================================', + 'src/', + ' foo.ts', + '', + '================================================================================', + 'Files', + '================================================================================', + '', + '================', // line 11: file separator + 'File: src/foo.ts', // line 12 + '================', // line 13 + 'const x = 1;', // line 14: content start + '', // line 15 + '================', // line 16: next file separator + 'File: src/bar.ts', // line 17 + '================', // line 18 + 'const y = 2;', // line 19: content start + '', // line 20 + ].join('\n'); + + const offsets = computeFileLineOffsets(output, 'plain'); + + expect(offsets['src/foo.ts']).toBeDefined(); + // separator(11) + File:(12) + separator(13) → content at 14 = 11+3 + expect(offsets['src/foo.ts'].start).toBe(14); + expect(offsets['src/bar.ts']).toBeDefined(); + // separator(16) + File:(17) + separator(18) → content at 19 = 16+3 + expect(offsets['src/bar.ts'].start).toBe(19); + }); + + test('returns empty object for JSON style', () => { + const output = '{"files": {}}'; + const offsets = computeFileLineOffsets(output, 'json'); + expect(offsets).toEqual({}); + }); + + test('handles empty output', () => { + const offsets = computeFileLineOffsets('', 'xml'); + expect(offsets).toEqual({}); + }); + }); + + describe('generateTreeStringWithFileOffsets', () => { + test('annotates file entries with line ranges', () => { + const files = ['src/foo.ts', 'src/bar.ts']; + const offsets = { + 'src/foo.ts': { start: 10, end: 20 }, + 'src/bar.ts': { start: 22, end: 35 }, + }; + + const tree = generateTreeStringWithFileOffsets(files, offsets); + + expect(tree).toContain('foo.ts [lines 10–20]'); + expect(tree).toContain('bar.ts [lines 22–35]'); + expect(tree).toContain('src/'); + }); + + test('omits annotation when file not in offsets map', () => { + const files = ['src/foo.ts', 'src/bar.ts']; + const offsets = { + 'src/foo.ts': { start: 10, end: 20 }, + // src/bar.ts not in offsets + }; + + const tree = generateTreeStringWithFileOffsets(files, offsets); + + expect(tree).toContain('foo.ts [lines 10–20]'); + expect(tree).toContain('bar.ts'); + expect(tree).not.toContain('bar.ts [lines'); + }); + + test('directories have no annotations', () => { + const files = ['src/foo.ts']; + const offsets = { 'src/foo.ts': { start: 5, end: 10 } }; + + const tree = generateTreeStringWithFileOffsets(files, offsets); + + // Directory should not have annotation + const lines = tree.split('\n'); + const srcLine = lines.find((l) => l.includes('src/')); + expect(srcLine).toBe('src/'); + }); + }); + + describe('generateTreeStringWithRootsAndFileOffsets', () => { + test('annotates single root without labels', () => { + const filesByRoot = [{ rootLabel: 'project', files: ['src/foo.ts'] }]; + const offsets = { 'src/foo.ts': { start: 15, end: 25 } }; + + const tree = generateTreeStringWithRootsAndFileOffsets(filesByRoot, offsets); + + expect(tree).not.toContain('[project]'); + expect(tree).toContain('foo.ts [lines 15–25]'); + }); + + test('annotates multiple roots with labels', () => { + const filesByRoot = [ + { rootLabel: 'cli', files: ['cliRun.ts'] }, + { rootLabel: 'config', files: ['configLoad.ts'] }, + ]; + const offsets = { + 'cliRun.ts': { start: 10, end: 30 }, + 'configLoad.ts': { start: 32, end: 50 }, + }; + + const tree = generateTreeStringWithRootsAndFileOffsets(filesByRoot, offsets); + + expect(tree).toContain('[cli]/'); + expect(tree).toContain('[config]/'); + expect(tree).toContain('cliRun.ts [lines 10–30]'); + expect(tree).toContain('configLoad.ts [lines 32–50]'); + }); + }); +}); From 8f367c36e8c9cc05c071a4d53d6e39698c205c77 Mon Sep 17 00:00:00 2001 From: Varun Nuthalapati Date: Mon, 13 Apr 2026 00:00:36 -0700 Subject: [PATCH 2/7] fix(output): Scope file offset scanning to files section; use consistent tree inputs Two correctness fixes for --show-file-offsets: 1. Restrict computeFileLineOffsets scanning to the actual files section of the output (after for XML, after # Files for Markdown, after the Files long-separator block for plain). Previously the scanner could produce false offset entries if file content earlier in the output happened to contain marker strings matching our patterns (e.g., a file documenting XML format, or Markdown headings before the files section). 2. Store filePathsForTree, directoryPathsForTree, and filePathsByRootForTree on OutputGeneratorContext so the second-pass tree annotation uses the exact same file/directory sets that buildOutputGeneratorContext computed. Previously the second pass used allFilePaths / emptyDirPaths arguments directly, which could differ from the actual tree inputs when includeFullDirectoryStructure adds extra files to the tree. Adds tests asserting that markers outside the files section are not captured. --- src/core/output/fileOffsets.ts | 43 ++++++++++++-- src/core/output/outputGenerate.ts | 12 +++- src/core/output/outputGeneratorTypes.ts | 7 +++ tests/core/output/showFileOffsets.test.ts | 70 +++++++++++++++++++---- 4 files changed, 112 insertions(+), 20 deletions(-) diff --git a/src/core/output/fileOffsets.ts b/src/core/output/fileOffsets.ts index 3a96cf12b..2f25e31d9 100644 --- a/src/core/output/fileOffsets.ts +++ b/src/core/output/fileOffsets.ts @@ -3,10 +3,38 @@ export interface FileLineOffset { end: number; } +/** + * Finds the 1-indexed line number where the files section starts in the output. + * Restricting offset scanning to this section prevents false matches when a file's + * own content contains marker strings (e.g., a file that itself contains XML tags + * or Markdown headings matching our patterns). + */ +const findFilesSectionStart = (lines: string[], style: string): number => { + if (style === 'xml') { + for (let i = 0; i < lines.length; i++) { + if (lines[i] === '') return i; + } + } else if (style === 'markdown') { + for (let i = 0; i < lines.length; i++) { + if (lines[i] === '# Files') return i; + } + } else if (style === 'plain') { + // Plain format: long separator (64 =) followed by "Files" line + const LONG_SEPARATOR = '='.repeat(64); + for (let i = 0; i < lines.length - 1; i++) { + if (lines[i] === LONG_SEPARATOR && lines[i + 1] === 'Files') return i; + } + } + return 0; // fallback: scan entire output +}; + /** * Scans a rendered output string and returns the line range (1-indexed, inclusive) * for each file's content block. * + * Scanning is restricted to the files section of the output to avoid false matches + * from file content that happens to contain marker strings. + * * Supports XML, Markdown, and plain text output styles. * JSON output is structured and does not use this function. */ @@ -14,11 +42,13 @@ export const computeFileLineOffsets = (output: string, style: string): Record = {}; const lines = output.split('\n'); + const sectionStart = findFilesSectionStart(lines, style); + if (style === 'xml') { let currentPath: string | null = null; let currentStart = 0; - for (let i = 0; i < lines.length; i++) { + for (let i = sectionStart; i < lines.length; i++) { const line = lines[i]; const lineNum = i + 1; @@ -29,15 +59,20 @@ export const computeFileLineOffsets = (output: string, style: string): Record' && currentPath !== null) { offsets[currentPath] = { start: currentStart, end: lineNum }; currentPath = null; + } else if (line === '') { + break; } } } else if (style === 'markdown') { const fileStarts: Array<{ path: string; line: number }> = []; - for (let i = 0; i < lines.length; i++) { + for (let i = sectionStart; i < lines.length; i++) { const match = lines[i].match(/^## File: (.+)$/); if (match) { fileStarts.push({ path: match[1], line: i + 1 }); + } else if (lines[i].startsWith('# ') && i > sectionStart) { + // Hit the next top-level section — stop scanning + break; } } @@ -50,15 +85,13 @@ export const computeFileLineOffsets = (output: string, style: string): Record = []; - for (let i = 0; i < lines.length; i++) { + for (let i = sectionStart; i < lines.length; i++) { const line = lines[i]; const lineNum = i + 1; if (line === SEPARATOR) { - fileSeparatorLines.push(lineNum); // Check if next line is a File: header if (i + 1 < lines.length && lines[i + 1].startsWith('File: ')) { const filePath = lines[i + 1].slice('File: '.length); diff --git a/src/core/output/outputGenerate.ts b/src/core/output/outputGenerate.ts index a067a7c30..a03266abe 100644 --- a/src/core/output/outputGenerate.ts +++ b/src/core/output/outputGenerate.ts @@ -325,9 +325,12 @@ export const generateOutput = async ( const offsets = computeFileLineOffsets(firstPassOutput, config.output.style); - const annotatedTree = filePathsByRoot - ? generateTreeStringWithRootsAndFileOffsets(filePathsByRoot, offsets, emptyDirPaths) - : generateTreeStringWithFileOffsets(allFilePaths, offsets, emptyDirPaths); + // Use the exact file/directory sets that buildOutputGeneratorContext used for the tree + // (these may differ from allFilePaths when includeFullDirectoryStructure adds extra files) + const { filePathsForTree, directoryPathsForTree, filePathsByRootForTree } = outputGeneratorContext; + const annotatedTree = filePathsByRootForTree + ? generateTreeStringWithRootsAndFileOffsets(filePathsByRootForTree, offsets, directoryPathsForTree) + : generateTreeStringWithFileOffsets(filePathsForTree, offsets, directoryPathsForTree); const annotatedContext = createRenderContext({ ...outputGeneratorContext, @@ -434,6 +437,9 @@ export const buildOutputGeneratorContext = async ( return { generationDate: new Date().toISOString(), treeString, + filePathsForTree, + directoryPathsForTree, + filePathsByRootForTree: filePathsByRoot, processedFiles, config, instruction: repositoryInstruction, diff --git a/src/core/output/outputGeneratorTypes.ts b/src/core/output/outputGeneratorTypes.ts index 270a022c0..2369fb4ad 100644 --- a/src/core/output/outputGeneratorTypes.ts +++ b/src/core/output/outputGeneratorTypes.ts @@ -1,4 +1,5 @@ import type { RepomixConfigMerged } from '../../config/configSchema.js'; +import type { FilesByRoot } from '../file/fileTreeGenerate.js'; import type { ProcessedFile } from '../file/fileTypes.js'; import type { GitDiffResult } from '../git/gitDiffHandle.js'; import type { GitLogCommit, GitLogResult } from '../git/gitLogHandle.js'; @@ -6,6 +7,12 @@ import type { GitLogCommit, GitLogResult } from '../git/gitLogHandle.js'; export interface OutputGeneratorContext { generationDate: string; treeString: string; + /** The exact file paths used to build treeString (may differ from allFilePaths in full-tree mode). */ + filePathsForTree: string[]; + /** The directory paths (e.g. empty dirs) used to build treeString. */ + directoryPathsForTree: string[]; + /** The per-root file grouping used for multi-root trees, if applicable. */ + filePathsByRootForTree?: FilesByRoot[]; processedFiles: ProcessedFile[]; config: RepomixConfigMerged; instruction: string; diff --git a/tests/core/output/showFileOffsets.test.ts b/tests/core/output/showFileOffsets.test.ts index 8d2425017..91d479b1f 100644 --- a/tests/core/output/showFileOffsets.test.ts +++ b/tests/core/output/showFileOffsets.test.ts @@ -1,9 +1,9 @@ import { describe, expect, test } from 'vitest'; -import { computeFileLineOffsets } from '../../../src/core/output/fileOffsets.js'; import { generateTreeStringWithFileOffsets, generateTreeStringWithRootsAndFileOffsets, } from '../../../src/core/file/fileTreeGenerate.js'; +import { computeFileLineOffsets } from '../../../src/core/output/fileOffsets.js'; describe('showFileOffsets', () => { describe('computeFileLineOffsets', () => { @@ -15,7 +15,7 @@ describe('showFileOffsets', () => { '', '', '', - 'This section contains the contents of the repository\'s files.', + "This section contains the contents of the repository's files.", '', '', 'const x = 1;', @@ -83,16 +83,16 @@ describe('showFileOffsets', () => { 'Files', '================================================================================', '', - '================', // line 11: file separator - 'File: src/foo.ts', // line 12 - '================', // line 13 - 'const x = 1;', // line 14: content start - '', // line 15 - '================', // line 16: next file separator - 'File: src/bar.ts', // line 17 - '================', // line 18 - 'const y = 2;', // line 19: content start - '', // line 20 + '================', // line 11: file separator + 'File: src/foo.ts', // line 12 + '================', // line 13 + 'const x = 1;', // line 14: content start + '', // line 15 + '================', // line 16: next file separator + 'File: src/bar.ts', // line 17 + '================', // line 18 + 'const y = 2;', // line 19: content start + '', // line 20 ].join('\n'); const offsets = computeFileLineOffsets(output, 'plain'); @@ -105,6 +105,52 @@ describe('showFileOffsets', () => { expect(offsets['src/bar.ts'].start).toBe(19); }); + test('does not pick up XML file markers outside the section', () => { + // Markers that appear before (e.g. in directory_structure or header) must not + // produce false offset entries. This is the primary false-match scenario. + const output = [ + // Some header content that happens to look like a file marker + '', + 'not real content', + '', + '', + '', + '', + '', + 'const x = 1;', + '', + '', + '', + ].join('\n'); + + const offsets = computeFileLineOffsets(output, 'xml'); + + // Decoy outside must not be captured + expect(offsets['src/decoy.ts']).toBeUndefined(); + // Real file inside must be captured + expect(offsets['src/real.ts']).toBeDefined(); + }); + + test('does not pick up Markdown ## File: headers outside the # Files section', () => { + const output = [ + '## File: src/decoy.ts', // appears before # Files — must be ignored + '', + '# Files', + '', + '## File: src/real.ts', + '```ts', + 'const x = 1;', + '```', + '', + ].join('\n'); + + const offsets = computeFileLineOffsets(output, 'markdown'); + + expect(offsets['src/decoy.ts']).toBeUndefined(); + expect(offsets['src/real.ts']).toBeDefined(); + expect(offsets['src/real.ts'].start).toBe(5); + }); + test('returns empty object for JSON style', () => { const output = '{"files": {}}'; const offsets = computeFileLineOffsets(output, 'json'); From 279fb7acd0a5246272338bc17be93ecc5294f84e Mon Sep 17 00:00:00 2001 From: Varun Nuthalapati Date: Mon, 13 Apr 2026 00:03:47 -0700 Subject: [PATCH 3/7] fix(output): Address PR review feedback for show-file-offsets - Move --show-file-offsets to Repomix Output Options group (was under Token Count Options) - Use RepomixOutputStyle type instead of string for style parameter - Tighten XML file tag regex to use [^"]+ and allow optional whitespace - Use line.trim() for and tag matching - Trim extracted paths in Markdown and plain formats - Add comment documenting plain separator length constant Co-Authored-By: Claude Sonnet 4.6 --- src/cli/cliRun.ts | 8 ++++---- src/core/output/fileOffsets.ts | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/cli/cliRun.ts b/src/cli/cliRun.ts index 2b938a2dd..ff40517a2 100644 --- a/src/cli/cliRun.ts +++ b/src/cli/cliRun.ts @@ -124,6 +124,10 @@ export const run = async () => { '--include-full-directory-structure', 'Show entire repository tree in the Directory Structure section, even when using --include patterns', ) + .option( + '--show-file-offsets', + 'Annotate each file in the directory structure with its line range in the output (e.g., [lines 42–78])', + ) .option( '--no-git-sort-by-changes', "Don't sort files by git change frequency (default: most changed files first)", @@ -172,10 +176,6 @@ export const run = async () => { '--token-count-encoding ', 'Tokenizer model for counting: o200k_base (GPT-4o), cl100k_base (GPT-3.5/4), etc. (default: o200k_base)', ) - .option( - '--show-file-offsets', - 'Annotate each file in the directory structure with its line range in the output (e.g., [lines 42–78])', - ) // MCP .optionsGroup('MCP') .option('--mcp', 'Run as Model Context Protocol server for AI tool integration') diff --git a/src/core/output/fileOffsets.ts b/src/core/output/fileOffsets.ts index 2f25e31d9..f4f7b5e75 100644 --- a/src/core/output/fileOffsets.ts +++ b/src/core/output/fileOffsets.ts @@ -1,18 +1,20 @@ +import type { RepomixOutputStyle } from '../../config/configSchema.js'; + export interface FileLineOffset { start: number; end: number; } /** - * Finds the 1-indexed line number where the files section starts in the output. + * Finds the 0-indexed line position where the files section starts in the output. * Restricting offset scanning to this section prevents false matches when a file's * own content contains marker strings (e.g., a file that itself contains XML tags * or Markdown headings matching our patterns). */ -const findFilesSectionStart = (lines: string[], style: string): number => { +const findFilesSectionStart = (lines: string[], style: RepomixOutputStyle): number => { if (style === 'xml') { for (let i = 0; i < lines.length; i++) { - if (lines[i] === '') return i; + if (lines[i].trim() === '') return i; } } else if (style === 'markdown') { for (let i = 0; i < lines.length; i++) { @@ -38,7 +40,7 @@ const findFilesSectionStart = (lines: string[], style: string): number => { * Supports XML, Markdown, and plain text output styles. * JSON output is structured and does not use this function. */ -export const computeFileLineOffsets = (output: string, style: string): Record => { +export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle): Record => { const offsets: Record = {}; const lines = output.split('\n'); @@ -52,14 +54,15 @@ export const computeFileLineOffsets = (output: string, style: string): Record$/); + // Allow optional surrounding whitespace; use non-greedy [^"]+ to match path + const startMatch = line.match(/^\s*\s*$/); if (startMatch) { currentPath = startMatch[1]; currentStart = lineNum; - } else if (line === '' && currentPath !== null) { + } else if (line.trim() === '' && currentPath !== null) { offsets[currentPath] = { start: currentStart, end: lineNum }; currentPath = null; - } else if (line === '') { + } else if (line.trim() === '') { break; } } @@ -69,7 +72,7 @@ export const computeFileLineOffsets = (output: string, style: string): Record sectionStart) { // Hit the next top-level section — stop scanning break; @@ -84,6 +87,7 @@ export const computeFileLineOffsets = (output: string, style: string): Record = []; @@ -94,7 +98,7 @@ export const computeFileLineOffsets = (output: string, style: string): Record Date: Mon, 13 Apr 2026 12:23:45 -0700 Subject: [PATCH 4/7] refactor(output): Extract file offset tree functions into fileTreeOffsets.ts fileTreeGenerate.ts exceeded the 250-line limit after adding the three offset-annotation functions. Move treeToStringWithFileOffsets, generateTreeStringWithFileOffsets, and generateTreeStringWithRootsAndFileOffsets into a new src/core/file/fileTreeOffsets.ts. Export generateMultiRootSections and sortTreeNodes from fileTreeGenerate.ts to support the new module. Also add --show-file-offsets and output.showFileOffsets to README.md per CONTRIBUTING.md documentation requirements. Co-Authored-By: Claude Sonnet 4.6 --- README.md | 2 + src/core/file/fileTreeGenerate.ts | 73 +---------------------- src/core/file/fileTreeOffsets.ts | 71 ++++++++++++++++++++++ src/core/output/outputGenerate.ts | 6 +- tests/core/output/showFileOffsets.test.ts | 2 +- 5 files changed, 78 insertions(+), 76 deletions(-) create mode 100644 src/core/file/fileTreeOffsets.ts diff --git a/README.md b/README.md index 94b5c524f..149e043f2 100644 --- a/README.md +++ b/README.md @@ -628,6 +628,7 @@ Instruction | `--split-output ` | Split output into multiple numbered files (e.g., `repomix-output.1.xml`); size like `500kb`, `2mb`, or `1.5mb` | | `--include-empty-directories` | Include folders with no files in directory structure | | `--include-full-directory-structure` | Show complete directory tree in output, including files not matched by `--include` patterns | +| `--show-file-offsets` | Annotate each file in the directory structure with its line range in the output (e.g., `[lines 42–78]`); works with XML, Markdown, and plain text styles | | `--no-git-sort-by-changes` | Don't sort files by git change frequency (default: most changed files first) | | `--include-diffs` | Add git diff section showing working tree and staged changes | | `--include-logs` | Add git commit history with messages and changed files | @@ -1363,6 +1364,7 @@ Here's an explanation of the configuration options: | `output.tokenCountTree` | Whether to display file tree with token count summaries. Can be boolean or number (minimum token count threshold) | `false` | | `output.includeEmptyDirectories` | Whether to include empty directories in the repository structure | `false` | | `output.includeFullDirectoryStructure` | When using `include` patterns, whether to display the complete directory tree (respecting ignore patterns) while still processing only the included files. Provides full repository context for AI analysis | `false` | +| `output.showFileOffsets` | Whether to annotate each file in the directory structure with its line range in the output (e.g., `[lines 42–78]`). Works with XML, Markdown, and plain text styles | `false` | | `output.git.sortByChanges` | Whether to sort files by git change count (files with more changes appear at the bottom) | `true` | | `output.git.sortByChangesMaxCommits` | Maximum number of commits to analyze for git changes | `100` | | `output.git.includeDiffs` | Whether to include git diffs in the output (includes both work tree and staged changes separately) | `false` | diff --git a/src/core/file/fileTreeGenerate.ts b/src/core/file/fileTreeGenerate.ts index ff4e98ac3..be8874fac 100644 --- a/src/core/file/fileTreeGenerate.ts +++ b/src/core/file/fileTreeGenerate.ts @@ -1,6 +1,4 @@ import nodepath from 'node:path'; -import type { FileLineOffset } from '../output/fileOffsets.js'; -import { formatFileOffsetAnnotation } from '../output/fileOffsets.js'; export interface TreeNode { name: string; @@ -57,7 +55,7 @@ const addPathToTree = (root: TreeNode, path: string, isDirectory: boolean): void } }; -const sortTreeNodes = (node: TreeNode) => { +export const sortTreeNodes = (node: TreeNode) => { node.children.sort((a, b) => { if (a.isDirectory === b.isDirectory) { return a.name.localeCompare(b.name); @@ -135,73 +133,6 @@ export const generateTreeStringWithLineCounts = ( return treeToStringWithLineCounts(tree, lineCounts).trim(); }; -/** - * Converts a tree to string with line offset annotations for files in the output. - * @param node The tree node to convert - * @param offsets Map of file paths to their line ranges in the output file - * @param prefix Current indentation prefix - * @param currentPath Current path being built (for looking up offsets) - */ -export const treeToStringWithFileOffsets = ( - node: TreeNode, - offsets: Record, - prefix = '', - currentPath = '', - _isRoot = true, -): string => { - if (_isRoot) { - sortTreeNodes(node); - } - let result = ''; - - for (const child of node.children) { - const childPath = currentPath ? `${currentPath}/${child.name}` : child.name; - - if (child.isDirectory) { - result += `${prefix}${child.name}/\n`; - result += treeToStringWithFileOffsets(child, offsets, `${prefix} `, childPath, false); - } else { - const offset = offsets[childPath]; - const offsetSuffix = offset ? formatFileOffsetAnnotation(offset) : ''; - result += `${prefix}${child.name}${offsetSuffix}\n`; - } - } - - return result; -}; - -export const generateTreeStringWithFileOffsets = ( - files: string[], - offsets: Record, - emptyDirPaths: string[] = [], -): string => { - const tree = generateFileTree(files, emptyDirPaths); - return treeToStringWithFileOffsets(tree, offsets).trim(); -}; - -/** - * Generates a tree string with root directory labels and file offset annotations. - * For single root, returns the standard flat tree with offsets. - * For multiple roots, each section is labeled with [rootLabel]/. - * - * @param filesByRoot Array of root directories with their files - * @param offsets Map of file paths to their line ranges in the output file - * @param emptyDirPaths Optional paths to empty directories - */ -export const generateTreeStringWithRootsAndFileOffsets = ( - filesByRoot: FilesByRoot[], - offsets: Record, - emptyDirPaths: string[] = [], -): string => { - // Single root: use existing behavior without labels - if (filesByRoot.length === 1) { - return generateTreeStringWithFileOffsets(filesByRoot[0].files, offsets, emptyDirPaths); - } - - // Multiple roots: generate labeled sections - return generateMultiRootSections(filesByRoot, (tree, prefix) => treeToStringWithFileOffsets(tree, offsets, prefix)); -}; - /** * Represents files grouped by their root directory. */ @@ -219,7 +150,7 @@ export interface FilesByRoot { * contamination, which would require additional complexity. For most use cases, * empty directories are less important in multi-root scenarios. */ -const generateMultiRootSections = ( +export const generateMultiRootSections = ( filesByRoot: FilesByRoot[], treeToStringFn: (tree: TreeNode, prefix: string) => string, ): string => { diff --git a/src/core/file/fileTreeOffsets.ts b/src/core/file/fileTreeOffsets.ts new file mode 100644 index 000000000..b7046bcd4 --- /dev/null +++ b/src/core/file/fileTreeOffsets.ts @@ -0,0 +1,71 @@ +import type { FileLineOffset } from '../output/fileOffsets.js'; +import { formatFileOffsetAnnotation } from '../output/fileOffsets.js'; +import type { FilesByRoot, TreeNode } from './fileTreeGenerate.js'; +import { generateFileTree, generateMultiRootSections, sortTreeNodes } from './fileTreeGenerate.js'; + +/** + * Converts a tree to string with line offset annotations for files in the output. + * @param node The tree node to convert + * @param offsets Map of file paths to their line ranges in the output file + * @param prefix Current indentation prefix + * @param currentPath Current path being built (for looking up offsets) + */ +export const treeToStringWithFileOffsets = ( + node: TreeNode, + offsets: Record, + prefix = '', + currentPath = '', + _isRoot = true, +): string => { + if (_isRoot) { + sortTreeNodes(node); + } + let result = ''; + + for (const child of node.children) { + const childPath = currentPath ? `${currentPath}/${child.name}` : child.name; + + if (child.isDirectory) { + result += `${prefix}${child.name}/\n`; + result += treeToStringWithFileOffsets(child, offsets, `${prefix} `, childPath, false); + } else { + const offset = offsets[childPath]; + const offsetSuffix = offset ? formatFileOffsetAnnotation(offset) : ''; + result += `${prefix}${child.name}${offsetSuffix}\n`; + } + } + + return result; +}; + +export const generateTreeStringWithFileOffsets = ( + files: string[], + offsets: Record, + emptyDirPaths: string[] = [], +): string => { + const tree = generateFileTree(files, emptyDirPaths); + return treeToStringWithFileOffsets(tree, offsets).trim(); +}; + +/** + * Generates a tree string with root directory labels and file offset annotations. + * For single root, returns the standard flat tree with offsets. + * For multiple roots, each section is labeled with [rootLabel]/. + * + * @param filesByRoot Array of root directories with their files + * @param offsets Map of file paths to their line ranges in the output file + * @param emptyDirPaths Optional paths to empty directories + */ +export const generateTreeStringWithRootsAndFileOffsets = ( + filesByRoot: FilesByRoot[], + offsets: Record, + emptyDirPaths: string[] = [], +): string => { + // Single root: use existing behavior without labels + if (filesByRoot.length === 1) { + return generateTreeStringWithFileOffsets(filesByRoot[0].files, offsets, emptyDirPaths); + } + + // Multiple roots: generate labeled sections + return generateMultiRootSections(filesByRoot, (tree, prefix) => treeToStringWithFileOffsets(tree, offsets, prefix)); +}; diff --git a/src/core/output/outputGenerate.ts b/src/core/output/outputGenerate.ts index a03266abe..e57fe9613 100644 --- a/src/core/output/outputGenerate.ts +++ b/src/core/output/outputGenerate.ts @@ -4,13 +4,11 @@ import Handlebars from 'handlebars'; import type { RepomixConfigMerged } from '../../config/configSchema.js'; import { RepomixError } from '../../shared/errorHandle.js'; import { listDirectories, listFiles, searchFiles } from '../file/fileSearch.js'; +import { type FilesByRoot, generateTreeString, generateTreeStringWithRoots } from '../file/fileTreeGenerate.js'; import { - type FilesByRoot, - generateTreeString, generateTreeStringWithFileOffsets, - generateTreeStringWithRoots, generateTreeStringWithRootsAndFileOffsets, -} from '../file/fileTreeGenerate.js'; +} from '../file/fileTreeOffsets.js'; import type { ProcessedFile } from '../file/fileTypes.js'; import type { GitDiffResult } from '../git/gitDiffHandle.js'; import type { GitLogResult } from '../git/gitLogHandle.js'; diff --git a/tests/core/output/showFileOffsets.test.ts b/tests/core/output/showFileOffsets.test.ts index 91d479b1f..d4afa45d3 100644 --- a/tests/core/output/showFileOffsets.test.ts +++ b/tests/core/output/showFileOffsets.test.ts @@ -2,7 +2,7 @@ import { describe, expect, test } from 'vitest'; import { generateTreeStringWithFileOffsets, generateTreeStringWithRootsAndFileOffsets, -} from '../../../src/core/file/fileTreeGenerate.js'; +} from '../../../src/core/file/fileTreeOffsets.js'; import { computeFileLineOffsets } from '../../../src/core/output/fileOffsets.js'; describe('showFileOffsets', () => { From dfed3267980af874fb9c28119977af5ddca4a467 Mon Sep 17 00:00:00 2001 From: Varun Nuthalapati Date: Mon, 13 Apr 2026 12:26:32 -0700 Subject: [PATCH 5/7] perf(output): Avoid full line-array allocation in computeFileLineOffsets Replace output.split('\n') with an indexOf-based line iterator (iterLines) so lines are processed one at a time without duplicating the entire output string as an array of strings, reducing memory overhead for large repos. Also release the first-pass output string immediately after offsets are extracted so GC can reclaim it before the second render begins, avoiding holding two full output strings in memory simultaneously. Co-Authored-By: Claude Sonnet 4.6 --- src/core/output/fileOffsets.ts | 107 +++++++++++++++++++----------- src/core/output/outputGenerate.ts | 5 +- 2 files changed, 73 insertions(+), 39 deletions(-) diff --git a/src/core/output/fileOffsets.ts b/src/core/output/fileOffsets.ts index f4f7b5e75..18cf70071 100644 --- a/src/core/output/fileOffsets.ts +++ b/src/core/output/fileOffsets.ts @@ -6,28 +6,53 @@ export interface FileLineOffset { } /** - * Finds the 0-indexed line position where the files section starts in the output. + * Iterator that walks through a string line-by-line using indexOf('\n') without + * allocating an array of all lines, keeping memory overhead proportional to + * one line at a time rather than the entire output. + */ +function* iterLines(s: string): Generator<{ line: string; lineNum: number }> { + let pos = 0; + let lineNum = 1; + while (pos <= s.length) { + const next = s.indexOf('\n', pos); + const end = next === -1 ? s.length : next; + yield { line: s.slice(pos, end), lineNum }; + if (next === -1) break; + pos = next + 1; + lineNum++; + } +} + +/** Total number of lines in a string (fast count via indexOf). */ +const countLines = (s: string): number => { + let count = 1; + let pos = 0; + let found = s.indexOf('\n', pos); + while (found !== -1) { + count++; + pos = found + 1; + found = s.indexOf('\n', pos); + } + return count; +}; + +/** + * Returns the 1-indexed line number where the files section starts in the output. * Restricting offset scanning to this section prevents false matches when a file's * own content contains marker strings (e.g., a file that itself contains XML tags * or Markdown headings matching our patterns). */ -const findFilesSectionStart = (lines: string[], style: RepomixOutputStyle): number => { - if (style === 'xml') { - for (let i = 0; i < lines.length; i++) { - if (lines[i].trim() === '') return i; - } - } else if (style === 'markdown') { - for (let i = 0; i < lines.length; i++) { - if (lines[i] === '# Files') return i; - } - } else if (style === 'plain') { - // Plain format: long separator (64 =) followed by "Files" line - const LONG_SEPARATOR = '='.repeat(64); - for (let i = 0; i < lines.length - 1; i++) { - if (lines[i] === LONG_SEPARATOR && lines[i + 1] === 'Files') return i; - } +const findFilesSectionStartLine = (output: string, style: RepomixOutputStyle): number => { + const LONG_SEPARATOR = '='.repeat(64); + let prevLine = ''; + + for (const { line, lineNum } of iterLines(output)) { + if (style === 'xml' && line.trim() === '') return lineNum; + if (style === 'markdown' && line === '# Files') return lineNum; + if (style === 'plain' && prevLine === LONG_SEPARATOR && line === 'Files') return lineNum - 1; + prevLine = line; } - return 0; // fallback: scan entire output + return 1; // fallback: scan entire output }; /** @@ -39,20 +64,20 @@ const findFilesSectionStart = (lines: string[], style: RepomixOutputStyle): numb * * Supports XML, Markdown, and plain text output styles. * JSON output is structured and does not use this function. + * + * Uses indexOf-based line iteration to avoid duplicating the entire output string + * as an array of lines. */ export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle): Record => { const offsets: Record = {}; - const lines = output.split('\n'); - - const sectionStart = findFilesSectionStart(lines, style); + const sectionStartLine = findFilesSectionStartLine(output, style); if (style === 'xml') { let currentPath: string | null = null; let currentStart = 0; - for (let i = sectionStart; i < lines.length; i++) { - const line = lines[i]; - const lineNum = i + 1; + for (const { line, lineNum } of iterLines(output)) { + if (lineNum < sectionStartLine) continue; // Allow optional surrounding whitespace; use non-greedy [^"]+ to match path const startMatch = line.match(/^\s*\s*$/); @@ -68,12 +93,15 @@ export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle } } else if (style === 'markdown') { const fileStarts: Array<{ path: string; line: number }> = []; + const totalLines = countLines(output); - for (let i = sectionStart; i < lines.length; i++) { - const match = lines[i].match(/^## File: (.+)$/); + for (const { line, lineNum } of iterLines(output)) { + if (lineNum < sectionStartLine) continue; + + const match = line.match(/^## File: (.+)$/); if (match) { - fileStarts.push({ path: match[1].trim(), line: i + 1 }); - } else if (lines[i].startsWith('# ') && i > sectionStart) { + fileStarts.push({ path: match[1].trim(), line: lineNum }); + } else if (line.startsWith('# ') && lineNum > sectionStartLine) { // Hit the next top-level section — stop scanning break; } @@ -81,7 +109,7 @@ export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle for (let j = 0; j < fileStarts.length; j++) { const { path, line } = fileStarts[j]; - const endLine = j + 1 < fileStarts.length ? fileStarts[j + 1].line - 1 : lines.length; + const endLine = j + 1 < fileStarts.length ? fileStarts[j + 1].line - 1 : totalLines; offsets[path] = { start: line, end: endLine }; } } else if (style === 'plain') { @@ -90,18 +118,21 @@ export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle // The short separator is exactly 16 '=' characters (matches PLAIN_SEPARATOR in plainStyle.ts) const SEPARATOR = '================'; const fileHeaderLines: Array<{ path: string; line: number }> = []; + const totalLines = countLines(output); + let prevLine = ''; - for (let i = sectionStart; i < lines.length; i++) { - const line = lines[i]; - const lineNum = i + 1; + for (const { line, lineNum } of iterLines(output)) { + if (lineNum < sectionStartLine) { + prevLine = line; + continue; + } - if (line === SEPARATOR) { - // Check if next line is a File: header - if (i + 1 < lines.length && lines[i + 1].startsWith('File: ')) { - const filePath = lines[i + 1].slice('File: '.length).trim(); - fileHeaderLines.push({ path: filePath, line: lineNum }); - } + if (prevLine === SEPARATOR && line.startsWith('File: ')) { + const filePath = line.slice('File: '.length).trim(); + // lineNum - 1 is the separator line number + fileHeaderLines.push({ path: filePath, line: lineNum - 1 }); } + prevLine = line; } for (let j = 0; j < fileHeaderLines.length; j++) { @@ -109,7 +140,7 @@ export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle // Content starts after: separator → File: header → separator → content const contentStart = line + 3; // Content ends before the next file separator, or at the last line - const nextSeparatorLine = j + 1 < fileHeaderLines.length ? fileHeaderLines[j + 1].line - 1 : lines.length; + const nextSeparatorLine = j + 1 < fileHeaderLines.length ? fileHeaderLines[j + 1].line - 1 : totalLines; offsets[path] = { start: contentStart, end: nextSeparatorLine }; } } diff --git a/src/core/output/outputGenerate.ts b/src/core/output/outputGenerate.ts index e57fe9613..c667bd0f5 100644 --- a/src/core/output/outputGenerate.ts +++ b/src/core/output/outputGenerate.ts @@ -319,9 +319,12 @@ export const generateOutput = async ( // so the file block positions remain stable between passes. if (config.output.showFileOffsets && config.output.directoryStructure && config.output.files) { const firstPassContext = createRenderContext(outputGeneratorContext); - const firstPassOutput = await renderOutput(config, firstPassContext, sortedProcessedFiles, deps); + let firstPassOutput: string | null = await renderOutput(config, firstPassContext, sortedProcessedFiles, deps); const offsets = computeFileLineOffsets(firstPassOutput, config.output.style); + // Release the first-pass string before the second render so GC can reclaim it, + // avoiding holding two full output strings in memory simultaneously. + firstPassOutput = null; // Use the exact file/directory sets that buildOutputGeneratorContext used for the tree // (these may differ from allFilePaths when includeFullDirectoryStructure adds extra files) From efb81f7b02845c8a66a567ceec86c08db73673eb Mon Sep 17 00:00:00 2001 From: Varun Nuthalapati Date: Mon, 13 Apr 2026 18:50:36 -0700 Subject: [PATCH 6/7] fix(output): Clamp last-file end offset to Files section boundary Markdown: record sectionEndLine when breaking on the next top-level heading so the last file's range doesn't bleed into subsequent sections (git diff, "End of Codebase" footer, etc.). Plain: detect the long separator (64 '=') after the first file entry and set sectionEndLine there, mirroring the XML boundary logic. Co-Authored-By: Claude Sonnet 4.6 --- src/core/output/fileOffsets.ts | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/core/output/fileOffsets.ts b/src/core/output/fileOffsets.ts index 18cf70071..ef05363a2 100644 --- a/src/core/output/fileOffsets.ts +++ b/src/core/output/fileOffsets.ts @@ -93,7 +93,8 @@ export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle } } else if (style === 'markdown') { const fileStarts: Array<{ path: string; line: number }> = []; - const totalLines = countLines(output); + // Default to end of output; narrowed to line before next top-level heading if one exists + let sectionEndLine = countLines(output); for (const { line, lineNum } of iterLines(output)) { if (lineNum < sectionStartLine) continue; @@ -102,23 +103,27 @@ export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle if (match) { fileStarts.push({ path: match[1].trim(), line: lineNum }); } else if (line.startsWith('# ') && lineNum > sectionStartLine) { - // Hit the next top-level section — stop scanning + // Hit the next top-level section — record boundary and stop scanning + sectionEndLine = lineNum - 1; break; } } for (let j = 0; j < fileStarts.length; j++) { const { path, line } = fileStarts[j]; - const endLine = j + 1 < fileStarts.length ? fileStarts[j + 1].line - 1 : totalLines; + const endLine = j + 1 < fileStarts.length ? fileStarts[j + 1].line - 1 : sectionEndLine; offsets[path] = { start: line, end: endLine }; } } else if (style === 'plain') { // Plain format: "================" then "File: path" then "================" then content // End of content = line before next "================" separator // The short separator is exactly 16 '=' characters (matches PLAIN_SEPARATOR in plainStyle.ts) + // The long separator (64 '=') marks the end of the Files section (e.g. End of Codebase footer) const SEPARATOR = '================'; + const LONG_SEPARATOR = '='.repeat(64); const fileHeaderLines: Array<{ path: string; line: number }> = []; - const totalLines = countLines(output); + // Default to end of output; narrowed when a long separator signals the next section + let sectionEndLine = countLines(output); let prevLine = ''; for (const { line, lineNum } of iterLines(output)) { @@ -127,6 +132,12 @@ export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle continue; } + // A long separator after at least one file entry signals the end of the Files section + if (line === LONG_SEPARATOR && fileHeaderLines.length > 0) { + sectionEndLine = lineNum - 1; + break; + } + if (prevLine === SEPARATOR && line.startsWith('File: ')) { const filePath = line.slice('File: '.length).trim(); // lineNum - 1 is the separator line number @@ -139,8 +150,8 @@ export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle const { path, line } = fileHeaderLines[j]; // Content starts after: separator → File: header → separator → content const contentStart = line + 3; - // Content ends before the next file separator, or at the last line - const nextSeparatorLine = j + 1 < fileHeaderLines.length ? fileHeaderLines[j + 1].line - 1 : totalLines; + // Content ends before the next file separator, or at the files section boundary + const nextSeparatorLine = j + 1 < fileHeaderLines.length ? fileHeaderLines[j + 1].line - 1 : sectionEndLine; offsets[path] = { start: contentStart, end: nextSeparatorLine }; } } From b9ea60d01090f6f921beb610988dfc491df83f72 Mon Sep 17 00:00:00 2001 From: nuthalapativarun Date: Thu, 23 Apr 2026 21:41:05 -0700 Subject: [PATCH 7/7] Update tests/core/output/showFileOffsets.test.ts Co-authored-by: devin-ai-integration[bot] <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- tests/core/output/showFileOffsets.test.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/core/output/showFileOffsets.test.ts b/tests/core/output/showFileOffsets.test.ts index d4afa45d3..057eb01fe 100644 --- a/tests/core/output/showFileOffsets.test.ts +++ b/tests/core/output/showFileOffsets.test.ts @@ -73,15 +73,15 @@ describe('showFileOffsets', () => { test('extracts line offsets from plain output', () => { const output = [ - '================================================================================', + '================================================================', 'Directory Structure', - '================================================================================', + '================================================================', 'src/', ' foo.ts', '', - '================================================================================', + '================================================================', 'Files', - '================================================================================', + '================================================================', '', '================', // line 11: file separator 'File: src/foo.ts', // line 12