diff --git a/package-lock.json b/package-lock.json index 4dfd6928b..9d02a295a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@clack/prompts": "^0.11.0", "@modelcontextprotocol/sdk": "^1.24.3", + "@repomix/strip-comments": "^2.4.2", "@repomix/tree-sitter-wasms": "^0.1.15", "@secretlint/core": "^11.2.5", "@secretlint/secretlint-rule-preset-recommend": "^11.2.5", @@ -30,7 +31,6 @@ "log-update": "^7.0.2", "minimatch": "^10.1.1", "picocolors": "^1.1.1", - "strip-comments": "^2.0.1", "tiktoken": "^1.0.22", "tinypool": "^2.0.0", "web-tree-sitter": "^0.25.10", @@ -1089,6 +1089,15 @@ "node": ">=14" } }, + "node_modules/@repomix/strip-comments": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/@repomix/strip-comments/-/strip-comments-2.4.2.tgz", + "integrity": "sha512-7a18ODb043eszMBr6mpVWz802xIRMzdmptarVxTtnMIW7ZQzba/v8jLp3kcHUHb76uRkyJRPpGSwdm7+8GmsEA==", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, "node_modules/@repomix/tree-sitter-wasms": { "version": "0.1.15", "resolved": "https://registry.npmjs.org/@repomix/tree-sitter-wasms/-/tree-sitter-wasms-0.1.15.tgz", @@ -5099,14 +5108,6 @@ "node": ">=8" } }, - "node_modules/strip-comments": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/strip-comments/-/strip-comments-2.0.1.tgz", - "integrity": "sha512-ZprKx+bBLXv067WTCALv8SSz5l2+XhpYCsVtSqlMnkAXMWDq+/ekVbl1ghqP9rUHTzv6sm/DwCOiYutU/yp1fw==", - "engines": { - "node": ">=10" - } - }, "node_modules/strip-final-newline": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-4.0.0.tgz", diff --git a/package.json b/package.json index 4fa53f9ab..e47b0705c 100644 --- a/package.json +++ b/package.json @@ -75,6 +75,7 @@ "dependencies": { "@clack/prompts": "^0.11.0", "@modelcontextprotocol/sdk": "^1.24.3", + "@repomix/strip-comments": "^2.4.2", "@repomix/tree-sitter-wasms": "^0.1.15", "@secretlint/core": "^11.2.5", "@secretlint/secretlint-rule-preset-recommend": "^11.2.5", @@ -94,7 +95,6 @@ "log-update": "^7.0.2", "minimatch": "^10.1.1", "picocolors": "^1.1.1", - "strip-comments": "^2.0.1", "tiktoken": "^1.0.22", "tinypool": "^2.0.0", "web-tree-sitter": "^0.25.10", diff --git a/src/core/file/fileManipulate.ts b/src/core/file/fileManipulate.ts index ee3ec92a7..db14d59a4 100644 --- a/src/core/file/fileManipulate.ts +++ b/src/core/file/fileManipulate.ts @@ -1,5 +1,5 @@ import path from 'node:path'; -import strip from 'strip-comments'; +import strip from '@repomix/strip-comments'; export interface FileManipulator { removeComments(content: string): string; @@ -42,269 +42,6 @@ class StripCommentsManipulator extends BaseManipulator { } } -class CppManipulator extends BaseManipulator { - removeComments(content: string): string { - let result = strip(content, { - language: 'c', - preserveNewlines: true, - }); - - result = result - .split('\n') - .map((line) => { - const tripleSlashIndex = line.indexOf('///'); - if (tripleSlashIndex !== -1) { - return line.substring(0, tripleSlashIndex).trimEnd(); - } - return line; - }) - .join('\n'); - - return rtrimLines(result); - } -} - -enum GoParserState { - Normal = 0, - InLineComment = 1, - InBlockComment = 2, - InDoubleQuoteString = 3, - InRawString = 4, - InRuneLiteral = 5, -} - -class GoManipulator extends BaseManipulator { - removeComments(content: string): string { - if (!content) return ''; - - let state: GoParserState = GoParserState.Normal; - let result = ''; - let i = 0; - let hasNonWhitespaceOnLine = false; // Track if line has non-whitespace content - - while (i < content.length) { - const char = content[i]; - const nextChar = i + 1 < content.length ? content[i + 1] : null; - - switch (state) { - case GoParserState.Normal: - if (char === '/' && nextChar === '/') { - // Go directive handling - if (!hasNonWhitespaceOnLine) { - if (content.startsWith('//go:', i)) { - // Preserve //go: directives - const lineEnd = content.indexOf('\n', i); - if (lineEnd === -1) { - result += content.substring(i); - i = content.length; - } else { - result += content.substring(i, lineEnd + 1); - i = lineEnd + 1; - hasNonWhitespaceOnLine = false; - } - continue; - } - } - state = GoParserState.InLineComment; - i += 2; // skip '//' - continue; - } - if (char === '/' && nextChar === '*') { - state = GoParserState.InBlockComment; - i += 2; // skip '/*' - continue; - } - result += char; - if (char !== ' ' && char !== '\t' && char !== '\n') { - hasNonWhitespaceOnLine = true; - } - if (char === '"') { - state = GoParserState.InDoubleQuoteString; - } else if (char === '`') { - state = GoParserState.InRawString; - } else if (char === "'") { - state = GoParserState.InRuneLiteral; - } - break; - - case GoParserState.InLineComment: - // Skip text within line comments until newline - if (char === '\n') { - result += char; - state = GoParserState.Normal; - hasNonWhitespaceOnLine = false; - } - // Skip all other characters - break; - - case GoParserState.InBlockComment: - // Go block comments do not nest - first */ closes the comment - if (char === '*' && nextChar === '/') { - state = GoParserState.Normal; - i += 2; // skip '*/' - continue; - } - if (char === '\n') { - // Preserve newlines in block comments to maintain line structure - result += char; - hasNonWhitespaceOnLine = false; - } - // Skip all other characters within block comments - break; - - case GoParserState.InDoubleQuoteString: - result += char; - if (char === '\\' && nextChar !== null) { - // Handle escape sequences - result += nextChar; - i += 2; - continue; - } - if (char === '"') { - state = GoParserState.Normal; - } - break; - - case GoParserState.InRawString: - result += char; - if (char === '`') { - state = GoParserState.Normal; - } - break; - - case GoParserState.InRuneLiteral: - result += char; - if (char === '\\' && nextChar !== null) { - // Handle escape sequences - result += nextChar; - i += 2; - continue; - } - if (char === "'") { - state = GoParserState.Normal; - } - break; - } - - if (char === '\n') { - hasNonWhitespaceOnLine = false; - } - i++; - } - return rtrimLines(result); - } -} - -class PythonManipulator extends BaseManipulator { - removeDocStrings(content: string): string { - if (!content) return ''; - const lines = content.split('\n'); - - let result = ''; - - let buffer = ''; - let quoteType: '' | "'" | '"' = ''; - let tripleQuotes = 0; - - const doubleQuoteRegex = /^\s*(? { - return pairs.some(([start, end]) => hashIndex > start && hashIndex < end); - }; - - let result = ''; - const pairs: [number, number][] = []; - let prevQuote = 0; - while (prevQuote < content.length) { - const openingQuote = content.slice(prevQuote + 1).search(/(? = { '.c': new StripCommentsManipulator('c'), '.h': new StripCommentsManipulator('c'), - '.hpp': new CppManipulator(), - '.cpp': new CppManipulator(), - '.cc': new CppManipulator(), - '.cxx': new CppManipulator(), + '.hpp': new StripCommentsManipulator('cpp'), + '.cpp': new StripCommentsManipulator('cpp'), + '.cc': new StripCommentsManipulator('cpp'), + '.cxx': new StripCommentsManipulator('cpp'), '.cs': new StripCommentsManipulator('csharp'), '.css': new StripCommentsManipulator('css'), '.dart': new StripCommentsManipulator('c'), - '.go': new GoManipulator(), + '.go': new StripCommentsManipulator('go'), '.html': new StripCommentsManipulator('html'), '.java': new StripCommentsManipulator('java'), '.js': new StripCommentsManipulator('javascript'), @@ -336,6 +73,7 @@ const manipulators: Record = { '.kt': new StripCommentsManipulator('c'), '.less': new StripCommentsManipulator('less'), '.php': new StripCommentsManipulator('php'), + '.py': new StripCommentsManipulator('python'), '.rb': new StripCommentsManipulator('ruby'), '.rs': new StripCommentsManipulator('c'), '.sass': new StripCommentsManipulator('sass'), @@ -350,8 +88,6 @@ const manipulators: Record = { '.yaml': new StripCommentsManipulator('perl'), '.yml': new StripCommentsManipulator('perl'), - '.py': new PythonManipulator(), - '.vue': new CompositeManipulator( new StripCommentsManipulator('html'), new StripCommentsManipulator('css'), diff --git a/tests/core/file/fileManipulate.test.ts b/tests/core/file/fileManipulate.test.ts index e53301755..5583a0f0b 100644 --- a/tests/core/file/fileManipulate.test.ts +++ b/tests/core/file/fileManipulate.test.ts @@ -295,12 +295,17 @@ describe('fileManipulate', () => { Another docstring """ `, + // preserveNewlines keeps newlines for line number preservation expected: ` def test(): + + return True + + `, }, { @@ -314,11 +319,14 @@ describe('fileManipulate', () => { docstring """ `, + // preserveNewlines keeps newlines for line number preservation expected: ` var = """ string variable """ + + `, }, { @@ -373,8 +381,13 @@ describe('fileManipulate', () => { ''' """ `, + // preserveNewlines keeps newlines for line number preservation expected: ` + + + + `, }, { @@ -391,9 +404,16 @@ describe('fileManipulate', () => { """ return True `, + // preserveNewlines keeps newlines for line number preservation expected: ` def func(): + + + + + + return True `, }, @@ -461,13 +481,18 @@ describe('fileManipulate', () => { # Another comment return x `, + // preserveNewlines keeps newlines for line number preservation expected: ` def func(): + + x = 5 + + return x `, }, @@ -554,9 +579,13 @@ describe('fileManipulate', () => { """ return True `, + // preserveNewlines keeps newlines for line number preservation expected: ` def func(): + + + return True `, },