Skip to content
7 changes: 7 additions & 0 deletions src/cli/actions/defaultAction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,13 @@ export const buildCliConfig = (options: CliOptions): RepomixConfigCli => {
};
}

if (options.showFileOffsets) {
cliConfig.output = {
...cliConfig.output,
showFileOffsets: options.showFileOffsets,
};
}

// Skill generation
if (options.skillGenerate !== undefined) {
cliConfig.skillGenerate = options.skillGenerate;
Expand Down
4 changes: 4 additions & 0 deletions src/cli/cliRun.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@ export const run = async () => {
'--token-count-encoding <encoding>',
'Tokenizer model for counting: o200k_base (GPT-4o), cl100k_base (GPT-3.5/4), etc. (default: o200k_base)',
)
.option(
'--show-file-offsets',
'Annotate each file in the directory structure with its line range in the output (e.g., [lines 42–78])',
)
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
Outdated
// MCP
.optionsGroup('MCP')
.option('--mcp', 'Run as Model Context Protocol server for AI tool integration')
Expand Down
1 change: 1 addition & 0 deletions src/cli/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ export interface CliOptions extends OptionValues {
// Token Count Options
tokenCountEncoding?: string;
tokenCountTree?: boolean | number;
showFileOffsets?: boolean;

// MCP
mcp?: boolean;
Expand Down
2 changes: 2 additions & 0 deletions src/config/configSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export const repomixConfigBaseSchema = z.object({
includeFullDirectoryStructure: z.boolean().optional(),
splitOutput: z.number().int().min(1).optional(),
tokenCountTree: z.union([z.boolean(), z.number(), z.string()]).optional(),
showFileOffsets: z.boolean().optional(),
git: z
.object({
sortByChanges: z.boolean().optional(),
Expand Down Expand Up @@ -103,6 +104,7 @@ export const repomixConfigDefaultSchema = z.object({
includeFullDirectoryStructure: z.boolean().default(false),
splitOutput: z.number().int().min(1).optional(),
tokenCountTree: z.union([z.boolean(), z.number(), z.string()]).default(false),
showFileOffsets: z.boolean().default(false),
git: z.object({
sortByChanges: z.boolean().default(true),
sortByChangesMaxCommits: z.number().int().min(1).default(100),
Expand Down
69 changes: 69 additions & 0 deletions src/core/file/fileTreeGenerate.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import nodepath from 'node:path';
import type { FileLineOffset } from '../output/fileOffsets.js';
import { formatFileOffsetAnnotation } from '../output/fileOffsets.js';

export interface TreeNode {
name: string;
Expand Down Expand Up @@ -133,6 +135,73 @@ export const generateTreeStringWithLineCounts = (
return treeToStringWithLineCounts(tree, lineCounts).trim();
};

/**
* Converts a tree to string with line offset annotations for files in the output.
* @param node The tree node to convert
* @param offsets Map of file paths to their line ranges in the output file
* @param prefix Current indentation prefix
* @param currentPath Current path being built (for looking up offsets)
*/
export const treeToStringWithFileOffsets = (
node: TreeNode,
offsets: Record<string, FileLineOffset>,
prefix = '',
currentPath = '',
_isRoot = true,
): string => {
if (_isRoot) {
sortTreeNodes(node);
}
let result = '';

for (const child of node.children) {
const childPath = currentPath ? `${currentPath}/${child.name}` : child.name;

if (child.isDirectory) {
result += `${prefix}${child.name}/\n`;
result += treeToStringWithFileOffsets(child, offsets, `${prefix} `, childPath, false);
} else {
const offset = offsets[childPath];
const offsetSuffix = offset ? formatFileOffsetAnnotation(offset) : '';
result += `${prefix}${child.name}${offsetSuffix}\n`;
}
}

return result;
};

export const generateTreeStringWithFileOffsets = (
files: string[],
offsets: Record<string, FileLineOffset>,
emptyDirPaths: string[] = [],
): string => {
const tree = generateFileTree(files, emptyDirPaths);
return treeToStringWithFileOffsets(tree, offsets).trim();
};

/**
* Generates a tree string with root directory labels and file offset annotations.
* For single root, returns the standard flat tree with offsets.
* For multiple roots, each section is labeled with [rootLabel]/.
*
* @param filesByRoot Array of root directories with their files
* @param offsets Map of file paths to their line ranges in the output file
* @param emptyDirPaths Optional paths to empty directories
*/
export const generateTreeStringWithRootsAndFileOffsets = (
filesByRoot: FilesByRoot[],
offsets: Record<string, FileLineOffset>,
emptyDirPaths: string[] = [],
): string => {
// Single root: use existing behavior without labels
if (filesByRoot.length === 1) {
return generateTreeStringWithFileOffsets(filesByRoot[0].files, offsets, emptyDirPaths);
}

// Multiple roots: generate labeled sections
return generateMultiRootSections(filesByRoot, (tree, prefix) => treeToStringWithFileOffsets(tree, offsets, prefix));
};
Comment thread
nuthalapativarun marked this conversation as resolved.
Outdated

/**
* Represents files grouped by their root directory.
*/
Expand Down
89 changes: 89 additions & 0 deletions src/core/output/fileOffsets.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
export interface FileLineOffset {
start: number;
end: number;
}

/**
* Scans a rendered output string and returns the line range (1-indexed, inclusive)
* for each file's content block.
*
* Supports XML, Markdown, and plain text output styles.
* JSON output is structured and does not use this function.
*/
export const computeFileLineOffsets = (output: string, style: string): Record<string, FileLineOffset> => {
Comment thread
nuthalapativarun marked this conversation as resolved.
Outdated
const offsets: Record<string, FileLineOffset> = {};
const lines = output.split('\n');
Comment thread
nuthalapativarun marked this conversation as resolved.
Outdated

if (style === 'xml') {
let currentPath: string | null = null;
let currentStart = 0;

for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const lineNum = i + 1;

const startMatch = line.match(/^<file path="(.+)">$/);
Comment thread
nuthalapativarun marked this conversation as resolved.
Outdated
if (startMatch) {
currentPath = startMatch[1];
currentStart = lineNum;
} else if (line === '</file>' && currentPath !== null) {
Comment thread
nuthalapativarun marked this conversation as resolved.
Outdated
offsets[currentPath] = { start: currentStart, end: lineNum };
currentPath = null;
}
Comment on lines +87 to +92

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 XML offset scanning falsely matches </file> or </files> appearing inside file content

In computeFileLineOffsets for XML style, the scanner matches line.trim() === '</file>' (line 87) and line.trim() === '</files>' (line 90) without verifying these are actual structural tags rather than file content. Since the non-parsable XML style embeds file content directly without escaping (src/core/output/outputStyles/xmlStyle.ts:49), any packed file whose content contains a line that is exactly </file> or </files> (with optional whitespace) will cause the scanner to either close the current file block prematurely or stop scanning entirely. This can occur when packing XML templates, test fixtures, or documentation that references these tags.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

}
} else if (style === 'markdown') {
const fileStarts: Array<{ path: string; line: number }> = [];

for (let i = 0; i < lines.length; i++) {
const match = lines[i].match(/^## File: (.+)$/);
if (match) {
fileStarts.push({ path: match[1], line: i + 1 });
}
Comment on lines +105 to +109

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Markdown offset scanning prematurely terminates when file content contains lines starting with #

In computeFileLineOffsets, the Markdown scanner at line 105 uses line.startsWith('# ') to detect the end of the # Files section. However, file content is embedded raw in the Markdown output (inside code block delimiters), and lines within file content that begin with # (e.g., Python/shell/Ruby comments like # import os, or Markdown H1 headings) will match this check. This causes the scanner to break out of the loop prematurely, resulting in missing offset annotations for all files listed after the first file whose content contains a # line, and an incorrect end-line for the file containing the # line. Since # comments are extremely common (Python, shell, Ruby, Perl, YAML comments, Markdown headings), this effectively breaks --show-file-offsets for virtually all real repositories when using --style markdown.

Prompt for agents
The Markdown offset scanner in computeFileLineOffsets (fileOffsets.ts, around line 99-110) prematurely stops scanning when it encounters a line starting with '# ' inside file content. The root cause is that file content is embedded raw inside Markdown code blocks (backtick delimiters), and the scanner doesn't track whether it is inside a code block or not.

The fix should track code-block state: maintain a boolean flag that toggles when encountering the markdownCodeBlockDelimiter (lines matching a backtick-only pattern). When inside a code block, skip the '# ' top-level heading check. Only check for section boundaries when NOT inside a code block.

Alternatively, the scanner could count the '## File:' headers it finds against the known processedFiles count and stop only when all files have been accounted for, rather than relying on section boundary detection.

The relevant template is in outputStyles/markdownStyle.ts lines 44-50 — each file's content is between markdownCodeBlockDelimiter lines.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

}

for (let j = 0; j < fileStarts.length; j++) {
const { path, line } = fileStarts[j];
const endLine = j + 1 < fileStarts.length ? fileStarts[j + 1].line - 1 : lines.length;
offsets[path] = { start: line, end: endLine };
}
Comment thread
nuthalapativarun marked this conversation as resolved.
} else if (style === 'plain') {
// Plain format: "================" then "File: path" then "================" then content
// End of content = line before next "================" separator
const SEPARATOR = '================';
Comment thread
nuthalapativarun marked this conversation as resolved.
const fileSeparatorLines: number[] = [];
const fileHeaderLines: Array<{ path: string; line: number }> = [];

for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const lineNum = i + 1;

if (line === SEPARATOR) {
fileSeparatorLines.push(lineNum);
// Check if next line is a File: header
if (i + 1 < lines.length && lines[i + 1].startsWith('File: ')) {
const filePath = lines[i + 1].slice('File: '.length);
Comment thread
nuthalapativarun marked this conversation as resolved.
Outdated
fileHeaderLines.push({ path: filePath, line: lineNum });
}
}
}

for (let j = 0; j < fileHeaderLines.length; j++) {
const { path, line } = fileHeaderLines[j];
// Content starts after: separator → File: header → separator → content
const contentStart = line + 3;
// Content ends before the next file separator, or at the last line
const nextSeparatorLine = j + 1 < fileHeaderLines.length ? fileHeaderLines[j + 1].line - 1 : lines.length;
offsets[path] = { start: contentStart, end: nextSeparatorLine };
}
}
Comment thread
nuthalapativarun marked this conversation as resolved.
Outdated

return offsets;
};

/**
* Formats a FileLineOffset as a human-readable annotation string.
* Example: " [lines 42–78]"
*/
export const formatFileOffsetAnnotation = (offset: FileLineOffset): string => {
return ` [lines ${offset.start}–${offset.end}]`;
};
71 changes: 57 additions & 14 deletions src/core/output/outputGenerate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,17 @@ import Handlebars from 'handlebars';
import type { RepomixConfigMerged } from '../../config/configSchema.js';
import { RepomixError } from '../../shared/errorHandle.js';
import { listDirectories, listFiles, searchFiles } from '../file/fileSearch.js';
import { type FilesByRoot, generateTreeString, generateTreeStringWithRoots } from '../file/fileTreeGenerate.js';
import {
type FilesByRoot,
generateTreeString,
generateTreeStringWithFileOffsets,
generateTreeStringWithRoots,
generateTreeStringWithRootsAndFileOffsets,
} from '../file/fileTreeGenerate.js';
import type { ProcessedFile } from '../file/fileTypes.js';
import type { GitDiffResult } from '../git/gitDiffHandle.js';
import type { GitLogResult } from '../git/gitLogHandle.js';
import { computeFileLineOffsets } from './fileOffsets.js';
import type { OutputGeneratorContext, RenderContext } from './outputGeneratorTypes.js';
import { sortOutputFiles } from './outputSort.js';
import {
Expand Down Expand Up @@ -251,6 +258,31 @@ Please try:
}
};

const renderOutput = async (
config: RepomixConfigMerged,
renderContext: RenderContext,
sortedProcessedFiles: ProcessedFile[],
deps: {
generateHandlebarOutput: typeof generateHandlebarOutput;
generateParsableXmlOutput: typeof generateParsableXmlOutput;
generateParsableJsonOutput: typeof generateParsableJsonOutput;
},
): Promise<string> => {
switch (config.output.style) {
case 'xml':
return config.output.parsableStyle
? deps.generateParsableXmlOutput(renderContext)
: deps.generateHandlebarOutput(config, renderContext, sortedProcessedFiles);
case 'json':
return deps.generateParsableJsonOutput(renderContext);
case 'markdown':
case 'plain':
return deps.generateHandlebarOutput(config, renderContext, sortedProcessedFiles);
default:
throw new RepomixError(`Unsupported output style: ${config.output.style}`);
}
};

export const generateOutput = async (
rootDirs: string[],
config: RepomixConfigMerged,
Expand Down Expand Up @@ -281,21 +313,32 @@ export const generateOutput = async (
filePathsByRoot,
emptyDirPaths,
);
const renderContext = createRenderContext(outputGeneratorContext);

switch (config.output.style) {
case 'xml':
return config.output.parsableStyle
? deps.generateParsableXmlOutput(renderContext)
: deps.generateHandlebarOutput(config, renderContext, sortedProcessedFiles);
case 'json':
return deps.generateParsableJsonOutput(renderContext);
case 'markdown':
case 'plain':
return deps.generateHandlebarOutput(config, renderContext, sortedProcessedFiles);
default:
throw new RepomixError(`Unsupported output style: ${config.output.style}`);
// When showFileOffsets is enabled, do a two-pass render:
// 1. Render without offsets to discover file line positions
// 2. Annotate the tree string with those positions and re-render
// The tree section has the same number of lines in both passes (only line content changes),
// so the file block positions remain stable between passes.
if (config.output.showFileOffsets && config.output.directoryStructure && config.output.files) {
const firstPassContext = createRenderContext(outputGeneratorContext);
const firstPassOutput = await renderOutput(config, firstPassContext, sortedProcessedFiles, deps);

const offsets = computeFileLineOffsets(firstPassOutput, config.output.style);

const annotatedTree = filePathsByRoot
? generateTreeStringWithRootsAndFileOffsets(filePathsByRoot, offsets, emptyDirPaths)
: generateTreeStringWithFileOffsets(allFilePaths, offsets, emptyDirPaths);
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
Outdated

const annotatedContext = createRenderContext({
...outputGeneratorContext,
treeString: annotatedTree,
});

return renderOutput(config, annotatedContext, sortedProcessedFiles, deps);
Comment thread
nuthalapativarun marked this conversation as resolved.
}
Comment thread
nuthalapativarun marked this conversation as resolved.

const renderContext = createRenderContext(outputGeneratorContext);
return renderOutput(config, renderContext, sortedProcessedFiles, deps);
};

export const buildOutputGeneratorContext = async (
Expand Down
2 changes: 2 additions & 0 deletions tests/config/configSchema.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ describe('configSchema', () => {
copyToClipboard: true,
includeFullDirectoryStructure: false,
tokenCountTree: '100',
showFileOffsets: false,
git: {
sortByChanges: true,
sortByChangesMaxCommits: 100,
Expand Down Expand Up @@ -220,6 +221,7 @@ describe('configSchema', () => {
copyToClipboard: false,
includeFullDirectoryStructure: false,
tokenCountTree: false,
showFileOffsets: false,
git: {
sortByChanges: true,
sortByChangesMaxCommits: 100,
Expand Down
1 change: 1 addition & 0 deletions tests/core/metrics/calculateGitDiffMetrics.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ describe('calculateGitDiffMetrics', () => {
includeEmptyDirectories: false,
includeFullDirectoryStructure: false,
tokenCountTree: false,
showFileOffsets: false,
git: {
sortByChanges: true,
sortByChangesMaxCommits: 100,
Expand Down
1 change: 1 addition & 0 deletions tests/core/metrics/calculateGitLogMetrics.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ describe('calculateGitLogMetrics', () => {
includeEmptyDirectories: false,
includeFullDirectoryStructure: false,
tokenCountTree: false,
showFileOffsets: false,
git: {
sortByChanges: true,
sortByChangesMaxCommits: 100,
Expand Down
2 changes: 2 additions & 0 deletions tests/core/output/flagFullDirectoryStructure.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const createMockConfig = (overrides: Partial<RepomixConfigMerged> = {}): Repomix
includeEmptyDirectories: false,
includeFullDirectoryStructure: true,
tokenCountTree: false,
showFileOffsets: false,
git: {
sortByChanges: false,
sortByChangesMaxCommits: 10,
Expand Down Expand Up @@ -143,6 +144,7 @@ describe('includeEmptyDirectories with pre-computed emptyDirPaths', () => {
includeEmptyDirectories: true,
includeFullDirectoryStructure: false,
tokenCountTree: false,
showFileOffsets: false,
git: {
sortByChanges: false,
sortByChangesMaxCommits: 10,
Expand Down
1 change: 1 addition & 0 deletions tests/core/output/outputStyles/jsonStyle.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const createMockConfig = (overrides: Partial<RepomixConfigMerged> = {}): Repomix
includeEmptyDirectories: false,
includeFullDirectoryStructure: false,
tokenCountTree: false,
showFileOffsets: false,
git: {
sortByChanges: false,
sortByChangesMaxCommits: 10,
Expand Down
Loading