From 4b6fc1a9b5521f1dfb1fa8bacf0628b84aed90c2 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Sat, 30 Aug 2025 19:38:36 +0900 Subject: [PATCH 1/6] feat(core): Add GoManipulator to preserve Go directives in comment removal Implement custom Go language comment processor that preserves important //go: directives (build tags, generate commands) while removing regular comments. This addresses the issue where strip-comments library was removing all comments including critical Go compiler directives. Features: - Preserve //go:build, //go:generate and other //go: directives - Remove regular // and /* */ comments appropriately - Handle nested block comments (Go-specific feature) - Protect comment-like text within strings, raw strings, and rune literals - Comprehensive test coverage for edge cases The GoManipulator uses a state machine to accurately parse Go syntax and distinguish between directives that must be preserved and comments that should be removed. --- src/core/file/fileManipulate.ts | 146 ++++++++++++++++++++++++- tests/core/file/fileManipulate.test.ts | 98 +++++++++++++++++ 2 files changed, 243 insertions(+), 1 deletion(-) diff --git a/src/core/file/fileManipulate.ts b/src/core/file/fileManipulate.ts index 12dd2831c..10a02cee9 100644 --- a/src/core/file/fileManipulate.ts +++ b/src/core/file/fileManipulate.ts @@ -64,6 +64,150 @@ class CppManipulator extends BaseManipulator { } } +class GoManipulator extends BaseManipulator { + removeComments(content: string): string { + if (!content) return ''; + + enum State { + Normal = 0, + InLineComment = 1, + InBlockComment = 2, + InDoubleQuoteString = 3, + InRawString = 4, + InRuneLiteral = 5, + } + + let state: State = State.Normal; + let result = ''; + let lineStart = 0; + let i = 0; + let blockCommentDepth = 0; // Track nested block comments + + while (i < content.length) { + const char = content[i]; + const nextChar = i + 1 < content.length ? content[i + 1] : null; + const isAtLineStart = i === lineStart; + + switch (state) { + case State.Normal: + if (char === '/' && nextChar === '/') { + // //go: Directive handling + if (isAtLineStart || content.substring(lineStart, i).trim() === '') { + const restOfLine = content.substring(i); + if (restOfLine.startsWith('//go:')) { + // Preserve //go: directives + const lineEnd = content.indexOf('\n', i); + if (lineEnd === -1) { + result += content.substring(i); + i = content.length; + } else { + result += content.substring(i, lineEnd + 1); + i = lineEnd + 1; + lineStart = i; + } + continue; + } + } + state = State.InLineComment; + i += 2; // skip '//' + continue; + } + if (char === '/' && nextChar === '*') { + state = State.InBlockComment; + blockCommentDepth = 1; + i += 2; // skip '/*' + continue; + } + if (char === '"') { + result += char; + state = State.InDoubleQuoteString; + } else if (char === '`') { + result += char; + state = State.InRawString; + } else if (char === "'") { + result += char; + state = State.InRuneLiteral; + } else { + result += char; + } + break; + + case State.InLineComment: + // Skip text within line comments until newline + if (char === '\n') { + result += char; + state = State.Normal; + lineStart = i + 1; + } + // Skip all other characters + break; + + case State.InBlockComment: + // Handle nested block comments (Go supports them) + if (char === '/' && nextChar === '*') { + blockCommentDepth++; + i += 2; + continue; + } + if (char === '*' && nextChar === '/') { + blockCommentDepth--; + if (blockCommentDepth === 0) { + state = State.Normal; + } + i += 2; + continue; + } + if (char === '\n') { + // Preserve newlines in block comments to maintain line structure + result += char; + lineStart = i + 1; + } + // Skip all other characters within block comments + break; + + case State.InDoubleQuoteString: + result += char; + if (char === '\\' && nextChar !== null) { + // Handle escape sequences + result += nextChar; + i += 2; + continue; + } + if (char === '"') { + state = State.Normal; + } + break; + + case State.InRawString: + result += char; + if (char === '`') { + state = State.Normal; + } + break; + + case State.InRuneLiteral: + result += char; + if (char === '\\' && nextChar !== null) { + // Handle escape sequences + result += nextChar; + i += 2; + continue; + } + if (char === "'") { + state = State.Normal; + } + break; + } + + if (char === '\n') { + lineStart = i + 1; + } + i++; + } + return rtrimLines(result); + } +} + class PythonManipulator extends BaseManipulator { removeDocStrings(content: string): string { if (!content) return ''; @@ -197,7 +341,7 @@ const manipulators: Record = { '.cs': new StripCommentsManipulator('csharp'), '.css': new StripCommentsManipulator('css'), '.dart': new StripCommentsManipulator('c'), - '.go': new StripCommentsManipulator('c'), + '.go': new GoManipulator(), '.html': new StripCommentsManipulator('html'), '.java': new StripCommentsManipulator('java'), '.js': new StripCommentsManipulator('javascript'), diff --git a/tests/core/file/fileManipulate.test.ts b/tests/core/file/fileManipulate.test.ts index d9a465f2c..2cc7405d1 100644 --- a/tests/core/file/fileManipulate.test.ts +++ b/tests/core/file/fileManipulate.test.ts @@ -731,6 +731,104 @@ describe('fileManipulate', () => { fmt.Println("Hello") } `, + }, + { + name: 'Go directives preservation', + ext: '.go', + input: `//go:build linux +//go:generate something + +package main + +import "fmt" + +func main() { + // Regular comment + s1 := "String with // not a comment" + s2 := \`raw string with + // this is not a comment + /* neither is this */\` + + r := '/' // rune literal + + /* + Multi-line comment + */ + + fmt.Println("Hello") // end of line comment +}`, + expected: `//go:build linux +//go:generate something + +package main + +import "fmt" + +func main() { + + s1 := "String with // not a comment" + s2 := \`raw string with + // this is not a comment + /* neither is this */\` + + r := '/' + + + + + + fmt.Println("Hello") +}`, + }, + { + name: 'Go nested block comments', + ext: '.go', + input: `package main + +/* Outer comment + /* Inner comment */ + Still in outer comment +*/ + +func main() { + fmt.Println("Hello") +}`, + expected: `package main + + + + + + +func main() { + fmt.Println("Hello") +}`, + }, + { + name: 'Go mixed directives and comments', + ext: '.go', + input: `//go:build linux +// This is a comment, not a directive +//go:generate stringer -type=Color +// Another comment +package main`, + expected: `//go:build linux + +//go:generate stringer -type=Color + +package main`, + }, + { + name: 'Go string literals with comments', + ext: '.go', + input: `s := "This is a string with \\"escaped\\" quotes // not a comment" +// This is a comment +r1 := '\\'' // Escaped single quote +r2 := '\\\\' // Backslash`, + expected: `s := "This is a string with \\"escaped\\" quotes // not a comment" + +r1 := '\\'' +r2 := '\\\\'`, }, { name: 'Kotlin comment removal', From c97e5dc54cd342ae60aec74c0232b29850429f57 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Sat, 30 Aug 2025 22:42:16 +0900 Subject: [PATCH 2/6] refactor(core): Optimize GoManipulator performance and code quality Address code review feedback: - Move GoParserState enum to module level to avoid re-declaration overhead - Optimize string operations by using content.startsWith() instead of substring - Improve code readability by hoisting duplicate result += char statements - Fix comment capitalization for consistency These changes improve performance by reducing unnecessary string operations and memory allocations while maintaining the same functionality. --- src/core/file/fileManipulate.ts | 63 +++++++++++++++------------------ 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/src/core/file/fileManipulate.ts b/src/core/file/fileManipulate.ts index 10a02cee9..e71a98438 100644 --- a/src/core/file/fileManipulate.ts +++ b/src/core/file/fileManipulate.ts @@ -64,20 +64,20 @@ class CppManipulator extends BaseManipulator { } } +enum GoParserState { + Normal = 0, + InLineComment = 1, + InBlockComment = 2, + InDoubleQuoteString = 3, + InRawString = 4, + InRuneLiteral = 5, +} + class GoManipulator extends BaseManipulator { removeComments(content: string): string { if (!content) return ''; - enum State { - Normal = 0, - InLineComment = 1, - InBlockComment = 2, - InDoubleQuoteString = 3, - InRawString = 4, - InRuneLiteral = 5, - } - - let state: State = State.Normal; + let state: GoParserState = GoParserState.Normal; let result = ''; let lineStart = 0; let i = 0; @@ -89,12 +89,11 @@ class GoManipulator extends BaseManipulator { const isAtLineStart = i === lineStart; switch (state) { - case State.Normal: + case GoParserState.Normal: if (char === '/' && nextChar === '/') { - // //go: Directive handling + // Go directive handling if (isAtLineStart || content.substring(lineStart, i).trim() === '') { - const restOfLine = content.substring(i); - if (restOfLine.startsWith('//go:')) { + if (content.startsWith('//go:', i)) { // Preserve //go: directives const lineEnd = content.indexOf('\n', i); if (lineEnd === -1) { @@ -108,41 +107,37 @@ class GoManipulator extends BaseManipulator { continue; } } - state = State.InLineComment; + state = GoParserState.InLineComment; i += 2; // skip '//' continue; } if (char === '/' && nextChar === '*') { - state = State.InBlockComment; + state = GoParserState.InBlockComment; blockCommentDepth = 1; i += 2; // skip '/*' continue; } + result += char; if (char === '"') { - result += char; - state = State.InDoubleQuoteString; + state = GoParserState.InDoubleQuoteString; } else if (char === '`') { - result += char; - state = State.InRawString; + state = GoParserState.InRawString; } else if (char === "'") { - result += char; - state = State.InRuneLiteral; - } else { - result += char; + state = GoParserState.InRuneLiteral; } break; - case State.InLineComment: + case GoParserState.InLineComment: // Skip text within line comments until newline if (char === '\n') { result += char; - state = State.Normal; + state = GoParserState.Normal; lineStart = i + 1; } // Skip all other characters break; - case State.InBlockComment: + case GoParserState.InBlockComment: // Handle nested block comments (Go supports them) if (char === '/' && nextChar === '*') { blockCommentDepth++; @@ -152,7 +147,7 @@ class GoManipulator extends BaseManipulator { if (char === '*' && nextChar === '/') { blockCommentDepth--; if (blockCommentDepth === 0) { - state = State.Normal; + state = GoParserState.Normal; } i += 2; continue; @@ -165,7 +160,7 @@ class GoManipulator extends BaseManipulator { // Skip all other characters within block comments break; - case State.InDoubleQuoteString: + case GoParserState.InDoubleQuoteString: result += char; if (char === '\\' && nextChar !== null) { // Handle escape sequences @@ -174,18 +169,18 @@ class GoManipulator extends BaseManipulator { continue; } if (char === '"') { - state = State.Normal; + state = GoParserState.Normal; } break; - case State.InRawString: + case GoParserState.InRawString: result += char; if (char === '`') { - state = State.Normal; + state = GoParserState.Normal; } break; - case State.InRuneLiteral: + case GoParserState.InRuneLiteral: result += char; if (char === '\\' && nextChar !== null) { // Handle escape sequences @@ -194,7 +189,7 @@ class GoManipulator extends BaseManipulator { continue; } if (char === "'") { - state = State.Normal; + state = GoParserState.Normal; } break; } From ecd416ffaa2d6b6ddcf339b44dc18bb83826dcb6 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Sat, 30 Aug 2025 23:25:19 +0900 Subject: [PATCH 3/6] refactor(core): Remove unused lineStart variable from GoManipulator Remove the unused lineStart variable and its assignments from the GoManipulator class. After optimization with hasNonWhitespaceOnLine tracking, lineStart is no longer needed for functionality. This resolves the final lint warning about unused variables. --- src/core/file/fileManipulate.ts | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/core/file/fileManipulate.ts b/src/core/file/fileManipulate.ts index e71a98438..fbe29ce40 100644 --- a/src/core/file/fileManipulate.ts +++ b/src/core/file/fileManipulate.ts @@ -79,20 +79,19 @@ class GoManipulator extends BaseManipulator { let state: GoParserState = GoParserState.Normal; let result = ''; - let lineStart = 0; let i = 0; let blockCommentDepth = 0; // Track nested block comments + let hasNonWhitespaceOnLine = false; // Track if line has non-whitespace content while (i < content.length) { const char = content[i]; const nextChar = i + 1 < content.length ? content[i + 1] : null; - const isAtLineStart = i === lineStart; switch (state) { case GoParserState.Normal: if (char === '/' && nextChar === '/') { // Go directive handling - if (isAtLineStart || content.substring(lineStart, i).trim() === '') { + if (!hasNonWhitespaceOnLine) { if (content.startsWith('//go:', i)) { // Preserve //go: directives const lineEnd = content.indexOf('\n', i); @@ -102,7 +101,7 @@ class GoManipulator extends BaseManipulator { } else { result += content.substring(i, lineEnd + 1); i = lineEnd + 1; - lineStart = i; + hasNonWhitespaceOnLine = false; } continue; } @@ -118,6 +117,9 @@ class GoManipulator extends BaseManipulator { continue; } result += char; + if (char !== ' ' && char !== '\t' && char !== '\n') { + hasNonWhitespaceOnLine = true; + } if (char === '"') { state = GoParserState.InDoubleQuoteString; } else if (char === '`') { @@ -132,13 +134,13 @@ class GoManipulator extends BaseManipulator { if (char === '\n') { result += char; state = GoParserState.Normal; - lineStart = i + 1; + hasNonWhitespaceOnLine = false; } // Skip all other characters break; case GoParserState.InBlockComment: - // Handle nested block comments (Go supports them) + // Handle nested block comment sequences for robustness (Go block comments do not nest per spec) if (char === '/' && nextChar === '*') { blockCommentDepth++; i += 2; @@ -155,7 +157,7 @@ class GoManipulator extends BaseManipulator { if (char === '\n') { // Preserve newlines in block comments to maintain line structure result += char; - lineStart = i + 1; + hasNonWhitespaceOnLine = false; } // Skip all other characters within block comments break; @@ -195,7 +197,7 @@ class GoManipulator extends BaseManipulator { } if (char === '\n') { - lineStart = i + 1; + hasNonWhitespaceOnLine = false; } i++; } From 23a0f00005e9e47389cb01c5655fe38bb69699c5 Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Sun, 31 Aug 2025 00:05:04 +0900 Subject: [PATCH 4/6] fix(core): Correct Go block comment parsing to match language spec Go block comments do not nest according to the language specification. The first */ sequence should close the comment, regardless of any /* sequences within it. This change removes the blockCommentDepth tracking and ensures correct parsing behavior for Go code containing sequences like /* comment with /* nested */ part */. Updated test expectations to reflect the correct Go language behavior. --- src/core/file/fileManipulate.ts | 18 ++++-------------- tests/core/file/fileManipulate.test.ts | 4 ++-- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/src/core/file/fileManipulate.ts b/src/core/file/fileManipulate.ts index fbe29ce40..a18317823 100644 --- a/src/core/file/fileManipulate.ts +++ b/src/core/file/fileManipulate.ts @@ -80,7 +80,6 @@ class GoManipulator extends BaseManipulator { let state: GoParserState = GoParserState.Normal; let result = ''; let i = 0; - let blockCommentDepth = 0; // Track nested block comments let hasNonWhitespaceOnLine = false; // Track if line has non-whitespace content while (i < content.length) { @@ -112,7 +111,6 @@ class GoManipulator extends BaseManipulator { } if (char === '/' && nextChar === '*') { state = GoParserState.InBlockComment; - blockCommentDepth = 1; i += 2; // skip '/*' continue; } @@ -140,18 +138,10 @@ class GoManipulator extends BaseManipulator { break; case GoParserState.InBlockComment: - // Handle nested block comment sequences for robustness (Go block comments do not nest per spec) - if (char === '/' && nextChar === '*') { - blockCommentDepth++; - i += 2; - continue; - } + // Go block comments do not nest - first */ closes the comment if (char === '*' && nextChar === '/') { - blockCommentDepth--; - if (blockCommentDepth === 0) { - state = GoParserState.Normal; - } - i += 2; + state = GoParserState.Normal; + i += 2; // skip '*/' continue; } if (char === '\n') { @@ -377,4 +367,4 @@ const manipulators: Record = { export const getFileManipulator = (filePath: string): FileManipulator | null => { const ext = path.extname(filePath); return manipulators[ext] || null; -}; +}; \ No newline at end of file diff --git a/tests/core/file/fileManipulate.test.ts b/tests/core/file/fileManipulate.test.ts index 2cc7405d1..433340267 100644 --- a/tests/core/file/fileManipulate.test.ts +++ b/tests/core/file/fileManipulate.test.ts @@ -797,8 +797,8 @@ func main() { - - + Still in outer comment +*/ func main() { fmt.Println("Hello") From 13a9bbb366fd3daee442430c8677d0d8b3f8d29c Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Sun, 31 Aug 2025 00:10:24 +0900 Subject: [PATCH 5/6] style(core): Add missing newline at end of fileManipulate.ts Fixed formatting issue identified by Biome linter to ensure consistent code style across the project. --- src/core/file/fileManipulate.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/file/fileManipulate.ts b/src/core/file/fileManipulate.ts index a18317823..ee3ec92a7 100644 --- a/src/core/file/fileManipulate.ts +++ b/src/core/file/fileManipulate.ts @@ -367,4 +367,4 @@ const manipulators: Record = { export const getFileManipulator = (filePath: string): FileManipulator | null => { const ext = path.extname(filePath); return manipulators[ext] || null; -}; \ No newline at end of file +}; From 575ae2bca402d036bc191e3bb52dc892b4c1f94c Mon Sep 17 00:00:00 2001 From: Kazuki Yamada Date: Sun, 31 Aug 2025 00:39:05 +0900 Subject: [PATCH 6/6] test(core): Remove misleading Go nested block comments test Removed the Go nested block comments test case as it was unnecessary and potentially misleading. Go block comments do not nest according to the language specification, so testing this behavior is not needed and could cause confusion about the expected behavior. The remaining tests adequately cover Go comment parsing functionality. --- tests/core/file/fileManipulate.test.ts | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/tests/core/file/fileManipulate.test.ts b/tests/core/file/fileManipulate.test.ts index 433340267..e53301755 100644 --- a/tests/core/file/fileManipulate.test.ts +++ b/tests/core/file/fileManipulate.test.ts @@ -777,30 +777,6 @@ func main() { - fmt.Println("Hello") -}`, - }, - { - name: 'Go nested block comments', - ext: '.go', - input: `package main - -/* Outer comment - /* Inner comment */ - Still in outer comment -*/ - -func main() { - fmt.Println("Hello") -}`, - expected: `package main - - - - Still in outer comment -*/ - -func main() { fmt.Println("Hello") }`, },