Skip to content
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@ Instruction
| `--split-output <size>` | Split output into multiple numbered files (e.g., `repomix-output.1.xml`); size like `500kb`, `2mb`, or `1.5mb` |
| `--include-empty-directories` | Include folders with no files in directory structure |
| `--include-full-directory-structure` | Show complete directory tree in output, including files not matched by `--include` patterns |
| `--show-file-offsets` | Annotate each file in the directory structure with its line range in the output (e.g., `[lines 42–78]`); works with XML, Markdown, and plain text styles |
| `--no-git-sort-by-changes` | Don't sort files by git change frequency (default: most changed files first) |
| `--include-diffs` | Add git diff section showing working tree and staged changes |
| `--include-logs` | Add git commit history with messages and changed files |
Expand Down Expand Up @@ -1363,6 +1364,7 @@ Here's an explanation of the configuration options:
| `output.tokenCountTree` | Whether to display file tree with token count summaries. Can be boolean or number (minimum token count threshold) | `false` |
| `output.includeEmptyDirectories` | Whether to include empty directories in the repository structure | `false` |
| `output.includeFullDirectoryStructure` | When using `include` patterns, whether to display the complete directory tree (respecting ignore patterns) while still processing only the included files. Provides full repository context for AI analysis | `false` |
| `output.showFileOffsets` | Whether to annotate each file in the directory structure with its line range in the output (e.g., `[lines 42–78]`). Works with XML, Markdown, and plain text styles | `false` |
| `output.git.sortByChanges` | Whether to sort files by git change count (files with more changes appear at the bottom) | `true` |
| `output.git.sortByChangesMaxCommits` | Maximum number of commits to analyze for git changes | `100` |
| `output.git.includeDiffs` | Whether to include git diffs in the output (includes both work tree and staged changes separately) | `false` |
Expand Down
7 changes: 7 additions & 0 deletions src/cli/actions/defaultAction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,13 @@ export const buildCliConfig = (options: CliOptions): RepomixConfigCli => {
};
}

if (options.showFileOffsets) {
cliConfig.output = {
...cliConfig.output,
showFileOffsets: options.showFileOffsets,
};
}

// Skill generation
if (options.skillGenerate !== undefined) {
cliConfig.skillGenerate = options.skillGenerate;
Expand Down
4 changes: 4 additions & 0 deletions src/cli/cliRun.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ export const run = async () => {
'--include-full-directory-structure',
'Show entire repository tree in the Directory Structure section, even when using --include patterns',
)
.option(
'--show-file-offsets',
'Annotate each file in the directory structure with its line range in the output (e.g., [lines 42–78])',
)
Comment thread
nuthalapativarun marked this conversation as resolved.
.option(
'--no-git-sort-by-changes',
"Don't sort files by git change frequency (default: most changed files first)",
Expand Down
1 change: 1 addition & 0 deletions src/cli/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ export interface CliOptions extends OptionValues {
// Token Count Options
tokenCountEncoding?: string;
tokenCountTree?: boolean | number;
showFileOffsets?: boolean;

// MCP
mcp?: boolean;
Expand Down
2 changes: 2 additions & 0 deletions src/config/configSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export const repomixConfigBaseSchema = z.object({
includeFullDirectoryStructure: z.boolean().optional(),
splitOutput: z.number().int().min(1).optional(),
tokenCountTree: z.union([z.boolean(), z.number(), z.string()]).optional(),
showFileOffsets: z.boolean().optional(),
git: z
.object({
sortByChanges: z.boolean().optional(),
Expand Down Expand Up @@ -103,6 +104,7 @@ export const repomixConfigDefaultSchema = z.object({
includeFullDirectoryStructure: z.boolean().default(false),
splitOutput: z.number().int().min(1).optional(),
tokenCountTree: z.union([z.boolean(), z.number(), z.string()]).default(false),
showFileOffsets: z.boolean().default(false),
git: z.object({
sortByChanges: z.boolean().default(true),
sortByChangesMaxCommits: z.number().int().min(1).default(100),
Expand Down
4 changes: 2 additions & 2 deletions src/core/file/fileTreeGenerate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ const addPathToTree = (root: TreeNode, path: string, isDirectory: boolean): void
}
};

const sortTreeNodes = (node: TreeNode) => {
export const sortTreeNodes = (node: TreeNode) => {
node.children.sort((a, b) => {
if (a.isDirectory === b.isDirectory) {
return a.name.localeCompare(b.name);
Expand Down Expand Up @@ -150,7 +150,7 @@ export interface FilesByRoot {
* contamination, which would require additional complexity. For most use cases,
* empty directories are less important in multi-root scenarios.
*/
const generateMultiRootSections = (
export const generateMultiRootSections = (
filesByRoot: FilesByRoot[],
treeToStringFn: (tree: TreeNode, prefix: string) => string,
): string => {
Expand Down
71 changes: 71 additions & 0 deletions src/core/file/fileTreeOffsets.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import type { FileLineOffset } from '../output/fileOffsets.js';
import { formatFileOffsetAnnotation } from '../output/fileOffsets.js';
import type { FilesByRoot, TreeNode } from './fileTreeGenerate.js';
import { generateFileTree, generateMultiRootSections, sortTreeNodes } from './fileTreeGenerate.js';

/**
* Converts a tree to string with line offset annotations for files in the output.
* @param node The tree node to convert
* @param offsets Map of file paths to their line ranges in the output file
* @param prefix Current indentation prefix
* @param currentPath Current path being built (for looking up offsets)
*/
export const treeToStringWithFileOffsets = (
node: TreeNode,
offsets: Record<string, FileLineOffset>,
prefix = '',
currentPath = '',
_isRoot = true,
): string => {
if (_isRoot) {
sortTreeNodes(node);
}
let result = '';

for (const child of node.children) {
const childPath = currentPath ? `${currentPath}/${child.name}` : child.name;

if (child.isDirectory) {
result += `${prefix}${child.name}/\n`;
result += treeToStringWithFileOffsets(child, offsets, `${prefix} `, childPath, false);
} else {
const offset = offsets[childPath];
const offsetSuffix = offset ? formatFileOffsetAnnotation(offset) : '';
result += `${prefix}${child.name}${offsetSuffix}\n`;
}
}

return result;
};

export const generateTreeStringWithFileOffsets = (
files: string[],
offsets: Record<string, FileLineOffset>,
emptyDirPaths: string[] = [],
): string => {
const tree = generateFileTree(files, emptyDirPaths);
return treeToStringWithFileOffsets(tree, offsets).trim();
};

/**
* Generates a tree string with root directory labels and file offset annotations.
* For single root, returns the standard flat tree with offsets.
* For multiple roots, each section is labeled with [rootLabel]/.
*
* @param filesByRoot Array of root directories with their files
* @param offsets Map of file paths to their line ranges in the output file
* @param emptyDirPaths Optional paths to empty directories
*/
export const generateTreeStringWithRootsAndFileOffsets = (
filesByRoot: FilesByRoot[],
offsets: Record<string, FileLineOffset>,
emptyDirPaths: string[] = [],
): string => {
// Single root: use existing behavior without labels
if (filesByRoot.length === 1) {
return generateTreeStringWithFileOffsets(filesByRoot[0].files, offsets, emptyDirPaths);
}

// Multiple roots: generate labeled sections
return generateMultiRootSections(filesByRoot, (tree, prefix) => treeToStringWithFileOffsets(tree, offsets, prefix));
};
168 changes: 168 additions & 0 deletions src/core/output/fileOffsets.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import type { RepomixOutputStyle } from '../../config/configSchema.js';

export interface FileLineOffset {
start: number;
end: number;
}

/**
* Iterator that walks through a string line-by-line using indexOf('\n') without
* allocating an array of all lines, keeping memory overhead proportional to
* one line at a time rather than the entire output.
*/
function* iterLines(s: string): Generator<{ line: string; lineNum: number }> {
let pos = 0;
let lineNum = 1;
while (pos <= s.length) {
const next = s.indexOf('\n', pos);
const end = next === -1 ? s.length : next;
yield { line: s.slice(pos, end), lineNum };
if (next === -1) break;
pos = next + 1;
lineNum++;
}
}

/** Total number of lines in a string (fast count via indexOf). */
const countLines = (s: string): number => {
let count = 1;
let pos = 0;
let found = s.indexOf('\n', pos);
while (found !== -1) {
count++;
pos = found + 1;
found = s.indexOf('\n', pos);
}
return count;
};

/**
* Returns the 1-indexed line number where the files section starts in the output.
* Restricting offset scanning to this section prevents false matches when a file's
* own content contains marker strings (e.g., a file that itself contains XML tags
* or Markdown headings matching our patterns).
*/
const findFilesSectionStartLine = (output: string, style: RepomixOutputStyle): number => {
const LONG_SEPARATOR = '='.repeat(64);
let prevLine = '';

for (const { line, lineNum } of iterLines(output)) {
if (style === 'xml' && line.trim() === '<files>') return lineNum;
if (style === 'markdown' && line === '# Files') return lineNum;
if (style === 'plain' && prevLine === LONG_SEPARATOR && line === 'Files') return lineNum - 1;
prevLine = line;
}
return 1; // fallback: scan entire output
};

/**
* Scans a rendered output string and returns the line range (1-indexed, inclusive)
* for each file's content block.
*
* Scanning is restricted to the files section of the output to avoid false matches
* from file content that happens to contain marker strings.
*
* Supports XML, Markdown, and plain text output styles.
* JSON output is structured and does not use this function.
*
* Uses indexOf-based line iteration to avoid duplicating the entire output string
* as an array of lines.
*/
export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle): Record<string, FileLineOffset> => {
const offsets: Record<string, FileLineOffset> = {};
const sectionStartLine = findFilesSectionStartLine(output, style);

if (style === 'xml') {
let currentPath: string | null = null;
let currentStart = 0;

for (const { line, lineNum } of iterLines(output)) {
if (lineNum < sectionStartLine) continue;

// Allow optional surrounding whitespace; use non-greedy [^"]+ to match path
const startMatch = line.match(/^\s*<file path="([^"]+)">\s*$/);
if (startMatch) {
currentPath = startMatch[1];
currentStart = lineNum;
} else if (line.trim() === '</file>' && currentPath !== null) {
offsets[currentPath] = { start: currentStart, end: lineNum };
currentPath = null;
} else if (line.trim() === '</files>') {
break;
}
Comment on lines +87 to +92

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 XML offset scanning falsely matches </file> or </files> appearing inside file content

In computeFileLineOffsets for XML style, the scanner matches line.trim() === '</file>' (line 87) and line.trim() === '</files>' (line 90) without verifying these are actual structural tags rather than file content. Since the non-parsable XML style embeds file content directly without escaping (src/core/output/outputStyles/xmlStyle.ts:49), any packed file whose content contains a line that is exactly </file> or </files> (with optional whitespace) will cause the scanner to either close the current file block prematurely or stop scanning entirely. This can occur when packing XML templates, test fixtures, or documentation that references these tags.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

}
} else if (style === 'markdown') {
const fileStarts: Array<{ path: string; line: number }> = [];
// Default to end of output; narrowed to line before next top-level heading if one exists
let sectionEndLine = countLines(output);

for (const { line, lineNum } of iterLines(output)) {
if (lineNum < sectionStartLine) continue;

const match = line.match(/^## File: (.+)$/);
if (match) {
fileStarts.push({ path: match[1].trim(), line: lineNum });
} else if (line.startsWith('# ') && lineNum > sectionStartLine) {
// Hit the next top-level section — record boundary and stop scanning
sectionEndLine = lineNum - 1;
break;
}
Comment on lines +105 to +109

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Markdown offset scanning prematurely terminates when file content contains lines starting with #

In computeFileLineOffsets, the Markdown scanner at line 105 uses line.startsWith('# ') to detect the end of the # Files section. However, file content is embedded raw in the Markdown output (inside code block delimiters), and lines within file content that begin with # (e.g., Python/shell/Ruby comments like # import os, or Markdown H1 headings) will match this check. This causes the scanner to break out of the loop prematurely, resulting in missing offset annotations for all files listed after the first file whose content contains a # line, and an incorrect end-line for the file containing the # line. Since # comments are extremely common (Python, shell, Ruby, Perl, YAML comments, Markdown headings), this effectively breaks --show-file-offsets for virtually all real repositories when using --style markdown.

Prompt for agents
The Markdown offset scanner in computeFileLineOffsets (fileOffsets.ts, around line 99-110) prematurely stops scanning when it encounters a line starting with '# ' inside file content. The root cause is that file content is embedded raw inside Markdown code blocks (backtick delimiters), and the scanner doesn't track whether it is inside a code block or not.

The fix should track code-block state: maintain a boolean flag that toggles when encountering the markdownCodeBlockDelimiter (lines matching a backtick-only pattern). When inside a code block, skip the '# ' top-level heading check. Only check for section boundaries when NOT inside a code block.

Alternatively, the scanner could count the '## File:' headers it finds against the known processedFiles count and stop only when all files have been accounted for, rather than relying on section boundary detection.

The relevant template is in outputStyles/markdownStyle.ts lines 44-50 — each file's content is between markdownCodeBlockDelimiter lines.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

}

for (let j = 0; j < fileStarts.length; j++) {
const { path, line } = fileStarts[j];
const endLine = j + 1 < fileStarts.length ? fileStarts[j + 1].line - 1 : sectionEndLine;
offsets[path] = { start: line, end: endLine };
}
Comment thread
nuthalapativarun marked this conversation as resolved.
} else if (style === 'plain') {
// Plain format: "================" then "File: path" then "================" then content
// End of content = line before next "================" separator
// The short separator is exactly 16 '=' characters (matches PLAIN_SEPARATOR in plainStyle.ts)
// The long separator (64 '=') marks the end of the Files section (e.g. End of Codebase footer)
const SEPARATOR = '================';
Comment thread
nuthalapativarun marked this conversation as resolved.
const LONG_SEPARATOR = '='.repeat(64);
const fileHeaderLines: Array<{ path: string; line: number }> = [];
// Default to end of output; narrowed when a long separator signals the next section
let sectionEndLine = countLines(output);
let prevLine = '';

for (const { line, lineNum } of iterLines(output)) {
if (lineNum < sectionStartLine) {
prevLine = line;
continue;
}

// A long separator after at least one file entry signals the end of the Files section
if (line === LONG_SEPARATOR && fileHeaderLines.length > 0) {
sectionEndLine = lineNum - 1;
break;
}

if (prevLine === SEPARATOR && line.startsWith('File: ')) {
const filePath = line.slice('File: '.length).trim();
// lineNum - 1 is the separator line number
fileHeaderLines.push({ path: filePath, line: lineNum - 1 });
}
prevLine = line;
}

for (let j = 0; j < fileHeaderLines.length; j++) {
const { path, line } = fileHeaderLines[j];
// Content starts after: separator → File: header → separator → content
const contentStart = line + 3;
// Content ends before the next file separator, or at the files section boundary
const nextSeparatorLine = j + 1 < fileHeaderLines.length ? fileHeaderLines[j + 1].line - 1 : sectionEndLine;
offsets[path] = { start: contentStart, end: nextSeparatorLine };
}
}

return offsets;
};

/**
* Formats a FileLineOffset as a human-readable annotation string.
* Example: " [lines 42–78]"
*/
export const formatFileOffsetAnnotation = (offset: FileLineOffset): string => {
return ` [lines ${offset.start}–${offset.end}]`;
};
Loading