yamadashy · nuthalapativarun · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
@@ -628,6 +628,7 @@ Instruction
 | `--split-output <size>` | Split output into multiple numbered files (e.g., `repomix-output.1.xml`); size like `500kb`, `2mb`, or `1.5mb` |
 | `--include-empty-directories` | Include folders with no files in directory structure |
 | `--include-full-directory-structure` | Show complete directory tree in output, including files not matched by `--include` patterns |
+| `--show-file-offsets` | Annotate each file in the directory structure with its line range in the output (e.g., `[lines 42–78]`); works with XML, Markdown, and plain text styles |
 | `--no-git-sort-by-changes` | Don't sort files by git change frequency (default: most changed files first) |
 | `--include-diffs` | Add git diff section showing working tree and staged changes |
 | `--include-logs` | Add git commit history with messages and changed files |
@@ -1363,6 +1364,7 @@ Here's an explanation of the configuration options:
 | `output.tokenCountTree`          | Whether to display file tree with token count summaries. Can be boolean or number (minimum token count threshold)           | `false`                |
 | `output.includeEmptyDirectories` | Whether to include empty directories in the repository structure                                                             | `false`                |
 | `output.includeFullDirectoryStructure` | When using `include` patterns, whether to display the complete directory tree (respecting ignore patterns) while still processing only the included files. Provides full repository context for AI analysis | `false`                |
+| `output.showFileOffsets`         | Whether to annotate each file in the directory structure with its line range in the output (e.g., `[lines 42–78]`). Works with XML, Markdown, and plain text styles | `false`                |
 | `output.git.sortByChanges`       | Whether to sort files by git change count (files with more changes appear at the bottom)                                     | `true`                 |
 | `output.git.sortByChangesMaxCommits` | Maximum number of commits to analyze for git changes                                                                     | `100`                  |
 | `output.git.includeDiffs`       | Whether to include git diffs in the output (includes both work tree and staged changes separately)                          | `false`                |

@@ -337,6 +337,13 @@ export const buildCliConfig = (options: CliOptions): RepomixConfigCli => {
     };
   }
 
+  if (options.showFileOffsets) {
+    cliConfig.output = {
+      ...cliConfig.output,
+      showFileOffsets: options.showFileOffsets,
+    };
+  }
+
   // Skill generation
   if (options.skillGenerate !== undefined) {
     cliConfig.skillGenerate = options.skillGenerate;

@@ -124,6 +124,10 @@ export const run = async () => {
         '--include-full-directory-structure',
         'Show entire repository tree in the Directory Structure section, even when using --include patterns',
       )
+      .option(
+        '--show-file-offsets',
+        'Annotate each file in the directory structure with its line range in the output (e.g., [lines 42–78])',
+      )
       .option(
         '--no-git-sort-by-changes',
         "Don't sort files by git change frequency (default: most changed files first)",

@@ -54,6 +54,7 @@ export interface CliOptions extends OptionValues {
   // Token Count Options
   tokenCountEncoding?: string;
   tokenCountTree?: boolean | number;
+  showFileOffsets?: boolean;
 
   // MCP
   mcp?: boolean;

@@ -42,6 +42,7 @@ export const repomixConfigBaseSchema = z.object({
       includeFullDirectoryStructure: z.boolean().optional(),
       splitOutput: z.number().int().min(1).optional(),
       tokenCountTree: z.union([z.boolean(), z.number(), z.string()]).optional(),
+      showFileOffsets: z.boolean().optional(),
       git: z
         .object({
           sortByChanges: z.boolean().optional(),
@@ -103,6 +104,7 @@ export const repomixConfigDefaultSchema = z.object({
     includeFullDirectoryStructure: z.boolean().default(false),
     splitOutput: z.number().int().min(1).optional(),
     tokenCountTree: z.union([z.boolean(), z.number(), z.string()]).default(false),
+    showFileOffsets: z.boolean().default(false),
     git: z.object({
       sortByChanges: z.boolean().default(true),
       sortByChangesMaxCommits: z.number().int().min(1).default(100),

@@ -55,7 +55,7 @@ const addPathToTree = (root: TreeNode, path: string, isDirectory: boolean): void
   }
 };
 
-const sortTreeNodes = (node: TreeNode) => {
+export const sortTreeNodes = (node: TreeNode) => {
   node.children.sort((a, b) => {
     if (a.isDirectory === b.isDirectory) {
       return a.name.localeCompare(b.name);
@@ -150,7 +150,7 @@ export interface FilesByRoot {
  * contamination, which would require additional complexity. For most use cases,
  * empty directories are less important in multi-root scenarios.
  */
-const generateMultiRootSections = (
+export const generateMultiRootSections = (
   filesByRoot: FilesByRoot[],
   treeToStringFn: (tree: TreeNode, prefix: string) => string,
 ): string => {

@@ -0,0 +1,71 @@
+import type { FileLineOffset } from '../output/fileOffsets.js';
+import { formatFileOffsetAnnotation } from '../output/fileOffsets.js';
+import type { FilesByRoot, TreeNode } from './fileTreeGenerate.js';
+import { generateFileTree, generateMultiRootSections, sortTreeNodes } from './fileTreeGenerate.js';
+
+/**
+ * Converts a tree to string with line offset annotations for files in the output.
+ * @param node The tree node to convert
+ * @param offsets Map of file paths to their line ranges in the output file
+ * @param prefix Current indentation prefix
+ * @param currentPath Current path being built (for looking up offsets)
+ */
+export const treeToStringWithFileOffsets = (
+  node: TreeNode,
+  offsets: Record<string, FileLineOffset>,
+  prefix = '',
+  currentPath = '',
+  _isRoot = true,
+): string => {
+  if (_isRoot) {
+    sortTreeNodes(node);
+  }
+  let result = '';
+
+  for (const child of node.children) {
+    const childPath = currentPath ? `${currentPath}/${child.name}` : child.name;
+
+    if (child.isDirectory) {
+      result += `${prefix}${child.name}/\n`;
+      result += treeToStringWithFileOffsets(child, offsets, `${prefix}  `, childPath, false);
+    } else {
+      const offset = offsets[childPath];
+      const offsetSuffix = offset ? formatFileOffsetAnnotation(offset) : '';
+      result += `${prefix}${child.name}${offsetSuffix}\n`;
+    }
+  }
+
+  return result;
+};
+
+export const generateTreeStringWithFileOffsets = (
+  files: string[],
+  offsets: Record<string, FileLineOffset>,
+  emptyDirPaths: string[] = [],
+): string => {
+  const tree = generateFileTree(files, emptyDirPaths);
+  return treeToStringWithFileOffsets(tree, offsets).trim();
+};
+
+/**
+ * Generates a tree string with root directory labels and file offset annotations.
+ * For single root, returns the standard flat tree with offsets.
+ * For multiple roots, each section is labeled with [rootLabel]/.
+ *
+ * @param filesByRoot Array of root directories with their files
+ * @param offsets Map of file paths to their line ranges in the output file
+ * @param emptyDirPaths Optional paths to empty directories
+ */
+export const generateTreeStringWithRootsAndFileOffsets = (
+  filesByRoot: FilesByRoot[],
+  offsets: Record<string, FileLineOffset>,
+  emptyDirPaths: string[] = [],
+): string => {
+  // Single root: use existing behavior without labels
+  if (filesByRoot.length === 1) {
+    return generateTreeStringWithFileOffsets(filesByRoot[0].files, offsets, emptyDirPaths);
+  }
+
+  // Multiple roots: generate labeled sections
+  return generateMultiRootSections(filesByRoot, (tree, prefix) => treeToStringWithFileOffsets(tree, offsets, prefix));
+};
@@ -0,0 +1,168 @@
+import type { RepomixOutputStyle } from '../../config/configSchema.js';
+
+export interface FileLineOffset {
+  start: number;
+  end: number;
+}
+
+/**
+ * Iterator that walks through a string line-by-line using indexOf('\n') without
+ * allocating an array of all lines, keeping memory overhead proportional to
+ * one line at a time rather than the entire output.
+ */
+function* iterLines(s: string): Generator<{ line: string; lineNum: number }> {
+  let pos = 0;
+  let lineNum = 1;
+  while (pos <= s.length) {
+    const next = s.indexOf('\n', pos);
+    const end = next === -1 ? s.length : next;
+    yield { line: s.slice(pos, end), lineNum };
+    if (next === -1) break;
+    pos = next + 1;
+    lineNum++;
+  }
+}
+
+/** Total number of lines in a string (fast count via indexOf). */
+const countLines = (s: string): number => {
+  let count = 1;
+  let pos = 0;
+  let found = s.indexOf('\n', pos);
+  while (found !== -1) {
+    count++;
+    pos = found + 1;
+    found = s.indexOf('\n', pos);
+  }
+  return count;
+};
+
+/**
+ * Returns the 1-indexed line number where the files section starts in the output.
+ * Restricting offset scanning to this section prevents false matches when a file's
+ * own content contains marker strings (e.g., a file that itself contains XML tags
+ * or Markdown headings matching our patterns).
+ */
+const findFilesSectionStartLine = (output: string, style: RepomixOutputStyle): number => {
+  const LONG_SEPARATOR = '='.repeat(64);
+  let prevLine = '';
+
+  for (const { line, lineNum } of iterLines(output)) {
+    if (style === 'xml' && line.trim() === '<files>') return lineNum;
+    if (style === 'markdown' && line === '# Files') return lineNum;
+    if (style === 'plain' && prevLine === LONG_SEPARATOR && line === 'Files') return lineNum - 1;
+    prevLine = line;
+  }
+  return 1; // fallback: scan entire output
+};
+
+/**
+ * Scans a rendered output string and returns the line range (1-indexed, inclusive)
+ * for each file's content block.
+ *
+ * Scanning is restricted to the files section of the output to avoid false matches
+ * from file content that happens to contain marker strings.
+ *
+ * Supports XML, Markdown, and plain text output styles.
+ * JSON output is structured and does not use this function.
+ *
+ * Uses indexOf-based line iteration to avoid duplicating the entire output string
+ * as an array of lines.
+ */
+export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle): Record<string, FileLineOffset> => {
+  const offsets: Record<string, FileLineOffset> = {};
+  const sectionStartLine = findFilesSectionStartLine(output, style);
+
+  if (style === 'xml') {
+    let currentPath: string | null = null;
+    let currentStart = 0;
+
+    for (const { line, lineNum } of iterLines(output)) {
+      if (lineNum < sectionStartLine) continue;
+
+      // Allow optional surrounding whitespace; use non-greedy [^"]+ to match path
+      const startMatch = line.match(/^\s*<file path="([^"]+)">\s*$/);
+      if (startMatch) {
+        currentPath = startMatch[1];
+        currentStart = lineNum;
+      } else if (line.trim() === '</file>' && currentPath !== null) {
+        offsets[currentPath] = { start: currentStart, end: lineNum };
+        currentPath = null;
+      } else if (line.trim() === '</files>') {
+        break;
+      }
+    }
+  } else if (style === 'markdown') {
+    const fileStarts: Array<{ path: string; line: number }> = [];
+    // Default to end of output; narrowed to line before next top-level heading if one exists
+    let sectionEndLine = countLines(output);
+
+    for (const { line, lineNum } of iterLines(output)) {
+      if (lineNum < sectionStartLine) continue;
+
+      const match = line.match(/^## File: (.+)$/);
+      if (match) {
+        fileStarts.push({ path: match[1].trim(), line: lineNum });
+      } else if (line.startsWith('# ') && lineNum > sectionStartLine) {
+        // Hit the next top-level section — record boundary and stop scanning
+        sectionEndLine = lineNum - 1;
+        break;
+      }
+    }
+
+    for (let j = 0; j < fileStarts.length; j++) {
+      const { path, line } = fileStarts[j];
+      const endLine = j + 1 < fileStarts.length ? fileStarts[j + 1].line - 1 : sectionEndLine;
+      offsets[path] = { start: line, end: endLine };
+    }
+  } else if (style === 'plain') {
+    // Plain format: "================" then "File: path" then "================" then content
+    // End of content = line before next "================" separator
+    // The short separator is exactly 16 '=' characters (matches PLAIN_SEPARATOR in plainStyle.ts)
+    // The long separator (64 '=') marks the end of the Files section (e.g. End of Codebase footer)
+    const SEPARATOR = '================';
+    const LONG_SEPARATOR = '='.repeat(64);
+    const fileHeaderLines: Array<{ path: string; line: number }> = [];
+    // Default to end of output; narrowed when a long separator signals the next section
+    let sectionEndLine = countLines(output);
+    let prevLine = '';
+
+    for (const { line, lineNum } of iterLines(output)) {
+      if (lineNum < sectionStartLine) {
+        prevLine = line;
+        continue;
+      }
+
+      // A long separator after at least one file entry signals the end of the Files section
+      if (line === LONG_SEPARATOR && fileHeaderLines.length > 0) {
+        sectionEndLine = lineNum - 1;
+        break;
+      }
+
+      if (prevLine === SEPARATOR && line.startsWith('File: ')) {
+        const filePath = line.slice('File: '.length).trim();
+        // lineNum - 1 is the separator line number
+        fileHeaderLines.push({ path: filePath, line: lineNum - 1 });
+      }
+      prevLine = line;
+    }
+
+    for (let j = 0; j < fileHeaderLines.length; j++) {
+      const { path, line } = fileHeaderLines[j];
+      // Content starts after: separator → File: header → separator → content
+      const contentStart = line + 3;
+      // Content ends before the next file separator, or at the files section boundary
+      const nextSeparatorLine = j + 1 < fileHeaderLines.length ? fileHeaderLines[j + 1].line - 1 : sectionEndLine;
+      offsets[path] = { start: contentStart, end: nextSeparatorLine };
+    }
+  }
+
+  return offsets;
+};
+
+/**
+ * Formats a FileLineOffset as a human-readable annotation string.
+ * Example: " [lines 42–78]"
+ */
+export const formatFileOffsetAnnotation = (offset: FileLineOffset): string => {
+  return ` [lines ${offset.start}–${offset.end}]`;
+};