-
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
feat(output): Add --show-file-offsets option to annotate directory tree with line ranges #1464
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
bc06014
8f367c3
279fb7a
96d1c4d
dfed326
efb81f7
b9ea60d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| import type { FileLineOffset } from '../output/fileOffsets.js'; | ||
| import { formatFileOffsetAnnotation } from '../output/fileOffsets.js'; | ||
| import type { FilesByRoot, TreeNode } from './fileTreeGenerate.js'; | ||
| import { generateFileTree, generateMultiRootSections, sortTreeNodes } from './fileTreeGenerate.js'; | ||
|
|
||
| /** | ||
| * Converts a tree to string with line offset annotations for files in the output. | ||
| * @param node The tree node to convert | ||
| * @param offsets Map of file paths to their line ranges in the output file | ||
| * @param prefix Current indentation prefix | ||
| * @param currentPath Current path being built (for looking up offsets) | ||
| */ | ||
| export const treeToStringWithFileOffsets = ( | ||
| node: TreeNode, | ||
| offsets: Record<string, FileLineOffset>, | ||
| prefix = '', | ||
| currentPath = '', | ||
| _isRoot = true, | ||
| ): string => { | ||
| if (_isRoot) { | ||
| sortTreeNodes(node); | ||
| } | ||
| let result = ''; | ||
|
|
||
| for (const child of node.children) { | ||
| const childPath = currentPath ? `${currentPath}/${child.name}` : child.name; | ||
|
|
||
| if (child.isDirectory) { | ||
| result += `${prefix}${child.name}/\n`; | ||
| result += treeToStringWithFileOffsets(child, offsets, `${prefix} `, childPath, false); | ||
| } else { | ||
| const offset = offsets[childPath]; | ||
| const offsetSuffix = offset ? formatFileOffsetAnnotation(offset) : ''; | ||
| result += `${prefix}${child.name}${offsetSuffix}\n`; | ||
| } | ||
| } | ||
|
|
||
| return result; | ||
| }; | ||
|
|
||
| export const generateTreeStringWithFileOffsets = ( | ||
| files: string[], | ||
| offsets: Record<string, FileLineOffset>, | ||
| emptyDirPaths: string[] = [], | ||
| ): string => { | ||
| const tree = generateFileTree(files, emptyDirPaths); | ||
| return treeToStringWithFileOffsets(tree, offsets).trim(); | ||
| }; | ||
|
|
||
| /** | ||
| * Generates a tree string with root directory labels and file offset annotations. | ||
| * For single root, returns the standard flat tree with offsets. | ||
| * For multiple roots, each section is labeled with [rootLabel]/. | ||
| * | ||
| * @param filesByRoot Array of root directories with their files | ||
| * @param offsets Map of file paths to their line ranges in the output file | ||
| * @param emptyDirPaths Optional paths to empty directories | ||
| */ | ||
| export const generateTreeStringWithRootsAndFileOffsets = ( | ||
| filesByRoot: FilesByRoot[], | ||
| offsets: Record<string, FileLineOffset>, | ||
| emptyDirPaths: string[] = [], | ||
| ): string => { | ||
| // Single root: use existing behavior without labels | ||
| if (filesByRoot.length === 1) { | ||
| return generateTreeStringWithFileOffsets(filesByRoot[0].files, offsets, emptyDirPaths); | ||
| } | ||
|
|
||
| // Multiple roots: generate labeled sections | ||
| return generateMultiRootSections(filesByRoot, (tree, prefix) => treeToStringWithFileOffsets(tree, offsets, prefix)); | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,168 @@ | ||
| import type { RepomixOutputStyle } from '../../config/configSchema.js'; | ||
|
|
||
| export interface FileLineOffset { | ||
| start: number; | ||
| end: number; | ||
| } | ||
|
|
||
| /** | ||
| * Iterator that walks through a string line-by-line using indexOf('\n') without | ||
| * allocating an array of all lines, keeping memory overhead proportional to | ||
| * one line at a time rather than the entire output. | ||
| */ | ||
| function* iterLines(s: string): Generator<{ line: string; lineNum: number }> { | ||
| let pos = 0; | ||
| let lineNum = 1; | ||
| while (pos <= s.length) { | ||
| const next = s.indexOf('\n', pos); | ||
| const end = next === -1 ? s.length : next; | ||
| yield { line: s.slice(pos, end), lineNum }; | ||
| if (next === -1) break; | ||
| pos = next + 1; | ||
| lineNum++; | ||
| } | ||
| } | ||
|
|
||
| /** Total number of lines in a string (fast count via indexOf). */ | ||
| const countLines = (s: string): number => { | ||
| let count = 1; | ||
| let pos = 0; | ||
| let found = s.indexOf('\n', pos); | ||
| while (found !== -1) { | ||
| count++; | ||
| pos = found + 1; | ||
| found = s.indexOf('\n', pos); | ||
| } | ||
| return count; | ||
| }; | ||
|
|
||
| /** | ||
| * Returns the 1-indexed line number where the files section starts in the output. | ||
| * Restricting offset scanning to this section prevents false matches when a file's | ||
| * own content contains marker strings (e.g., a file that itself contains XML tags | ||
| * or Markdown headings matching our patterns). | ||
| */ | ||
| const findFilesSectionStartLine = (output: string, style: RepomixOutputStyle): number => { | ||
| const LONG_SEPARATOR = '='.repeat(64); | ||
| let prevLine = ''; | ||
|
|
||
| for (const { line, lineNum } of iterLines(output)) { | ||
| if (style === 'xml' && line.trim() === '<files>') return lineNum; | ||
| if (style === 'markdown' && line === '# Files') return lineNum; | ||
| if (style === 'plain' && prevLine === LONG_SEPARATOR && line === 'Files') return lineNum - 1; | ||
| prevLine = line; | ||
| } | ||
| return 1; // fallback: scan entire output | ||
| }; | ||
|
|
||
| /** | ||
| * Scans a rendered output string and returns the line range (1-indexed, inclusive) | ||
| * for each file's content block. | ||
| * | ||
| * Scanning is restricted to the files section of the output to avoid false matches | ||
| * from file content that happens to contain marker strings. | ||
| * | ||
| * Supports XML, Markdown, and plain text output styles. | ||
| * JSON output is structured and does not use this function. | ||
| * | ||
| * Uses indexOf-based line iteration to avoid duplicating the entire output string | ||
| * as an array of lines. | ||
| */ | ||
| export const computeFileLineOffsets = (output: string, style: RepomixOutputStyle): Record<string, FileLineOffset> => { | ||
| const offsets: Record<string, FileLineOffset> = {}; | ||
| const sectionStartLine = findFilesSectionStartLine(output, style); | ||
|
|
||
| if (style === 'xml') { | ||
| let currentPath: string | null = null; | ||
| let currentStart = 0; | ||
|
|
||
| for (const { line, lineNum } of iterLines(output)) { | ||
| if (lineNum < sectionStartLine) continue; | ||
|
|
||
| // Allow optional surrounding whitespace; use non-greedy [^"]+ to match path | ||
| const startMatch = line.match(/^\s*<file path="([^"]+)">\s*$/); | ||
| if (startMatch) { | ||
| currentPath = startMatch[1]; | ||
| currentStart = lineNum; | ||
| } else if (line.trim() === '</file>' && currentPath !== null) { | ||
| offsets[currentPath] = { start: currentStart, end: lineNum }; | ||
| currentPath = null; | ||
| } else if (line.trim() === '</files>') { | ||
| break; | ||
| } | ||
|
Comment on lines
+87
to
+92
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 XML offset scanning falsely matches In Was this helpful? React with 👍 or 👎 to provide feedback. |
||
| } | ||
| } else if (style === 'markdown') { | ||
| const fileStarts: Array<{ path: string; line: number }> = []; | ||
| // Default to end of output; narrowed to line before next top-level heading if one exists | ||
| let sectionEndLine = countLines(output); | ||
|
|
||
| for (const { line, lineNum } of iterLines(output)) { | ||
| if (lineNum < sectionStartLine) continue; | ||
|
|
||
| const match = line.match(/^## File: (.+)$/); | ||
| if (match) { | ||
| fileStarts.push({ path: match[1].trim(), line: lineNum }); | ||
| } else if (line.startsWith('# ') && lineNum > sectionStartLine) { | ||
| // Hit the next top-level section — record boundary and stop scanning | ||
| sectionEndLine = lineNum - 1; | ||
| break; | ||
| } | ||
|
Comment on lines
+105
to
+109
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 Markdown offset scanning prematurely terminates when file content contains lines starting with In Prompt for agentsWas this helpful? React with 👍 or 👎 to provide feedback. |
||
| } | ||
|
|
||
| for (let j = 0; j < fileStarts.length; j++) { | ||
| const { path, line } = fileStarts[j]; | ||
| const endLine = j + 1 < fileStarts.length ? fileStarts[j + 1].line - 1 : sectionEndLine; | ||
| offsets[path] = { start: line, end: endLine }; | ||
| } | ||
|
nuthalapativarun marked this conversation as resolved.
|
||
| } else if (style === 'plain') { | ||
| // Plain format: "================" then "File: path" then "================" then content | ||
| // End of content = line before next "================" separator | ||
| // The short separator is exactly 16 '=' characters (matches PLAIN_SEPARATOR in plainStyle.ts) | ||
| // The long separator (64 '=') marks the end of the Files section (e.g. End of Codebase footer) | ||
| const SEPARATOR = '================'; | ||
|
nuthalapativarun marked this conversation as resolved.
|
||
| const LONG_SEPARATOR = '='.repeat(64); | ||
| const fileHeaderLines: Array<{ path: string; line: number }> = []; | ||
| // Default to end of output; narrowed when a long separator signals the next section | ||
| let sectionEndLine = countLines(output); | ||
| let prevLine = ''; | ||
|
|
||
| for (const { line, lineNum } of iterLines(output)) { | ||
| if (lineNum < sectionStartLine) { | ||
| prevLine = line; | ||
| continue; | ||
| } | ||
|
|
||
| // A long separator after at least one file entry signals the end of the Files section | ||
| if (line === LONG_SEPARATOR && fileHeaderLines.length > 0) { | ||
| sectionEndLine = lineNum - 1; | ||
| break; | ||
| } | ||
|
|
||
| if (prevLine === SEPARATOR && line.startsWith('File: ')) { | ||
| const filePath = line.slice('File: '.length).trim(); | ||
| // lineNum - 1 is the separator line number | ||
| fileHeaderLines.push({ path: filePath, line: lineNum - 1 }); | ||
| } | ||
| prevLine = line; | ||
| } | ||
|
|
||
| for (let j = 0; j < fileHeaderLines.length; j++) { | ||
| const { path, line } = fileHeaderLines[j]; | ||
| // Content starts after: separator → File: header → separator → content | ||
| const contentStart = line + 3; | ||
| // Content ends before the next file separator, or at the files section boundary | ||
| const nextSeparatorLine = j + 1 < fileHeaderLines.length ? fileHeaderLines[j + 1].line - 1 : sectionEndLine; | ||
| offsets[path] = { start: contentStart, end: nextSeparatorLine }; | ||
| } | ||
| } | ||
|
|
||
| return offsets; | ||
| }; | ||
|
|
||
| /** | ||
| * Formats a FileLineOffset as a human-readable annotation string. | ||
| * Example: " [lines 42–78]" | ||
| */ | ||
| export const formatFileOffsetAnnotation = (offset: FileLineOffset): string => { | ||
| return ` [lines ${offset.start}–${offset.end}]`; | ||
| }; | ||
Uh oh!
There was an error while loading. Please reload this page.