diff --git a/go/cmd/dolt/commands/sql_statement_scanner.go b/go/cmd/dolt/commands/sql_statement_scanner.go index 5789e4779f4..9a760814c25 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner.go +++ b/go/cmd/dolt/commands/sql_statement_scanner.go @@ -38,6 +38,10 @@ const ( dQuote = '"' backslash = '\\' backtick = '`' + hyphen = '-' + asterisk = '*' + slash = '/' + newline = '\n' ) const delimPrefixLen = 10 @@ -76,6 +80,22 @@ type qState struct { seenNonWhitespaceChar bool // whether we have encountered a non-whitespace character since we returned the last token numConsecutiveDelimiterMatches int // the consecutive number of characters that have been matched to the delimiter statementStartLine int + insideBlockComment bool + insideLineComment bool +} + +func (qs qState) insideComment() bool { + return qs.insideLineComment || qs.insideBlockComment +} + +func (qs qState) insideQuote() bool { + return qs.quoteChar != 0 +} + +// ignoreDelimiters returns if delimiters should be ignored. If inside a comment or a quote, delimiters, including +// comment delimiters, should be ignored +func (qs qState) ignoreDelimiters() bool { + return qs.insideComment() || qs.insideQuote() } func (s *StreamScanner) Scan() bool { @@ -267,7 +287,7 @@ func (s *StreamScanner) seekDelimiter() (error, bool) { } // check if we've matched the delimiter string - if s.state.quoteChar == 0 && s.buf[i] == s.delimiter[s.state.numConsecutiveDelimiterMatches] { + if !s.state.ignoreDelimiters() && s.buf[i] == s.delimiter[s.state.numConsecutiveDelimiterMatches] { s.state.numConsecutiveDelimiterMatches++ if s.state.numConsecutiveDelimiterMatches == len(s.delimiter) { s.state.end = s.i - len(s.delimiter) + 1 @@ -281,11 +301,34 @@ func (s *StreamScanner) seekDelimiter() (error, bool) { } switch s.buf[i] { - case '\n': + case newline: + s.state.insideLineComment = false s.lineNum++ + case hyphen: + // If inside quote or already inside comment, ignore. Otherwise, if previous character is also a hyphen, + // ie "--", begin line comment. + if !s.state.ignoreDelimiters() && s.state.lastChar == hyphen { + s.state.insideLineComment = true + } + case asterisk: + // If inside quote or already inside comment, ignore. Otherwise, if previous character is a slash, ie + // "/*", begin block comment. + if !s.state.ignoreDelimiters() && s.state.lastChar == slash { + s.state.insideBlockComment = true + } + case slash: + // If previous character is an asterisk, ie "*/", end block comment. + if s.state.lastChar == asterisk { + s.state.insideBlockComment = false + } case backslash: s.state.numConsecutiveBackslashes++ case sQuote, dQuote, backtick: + // ignore quotes inside comments + if s.state.insideComment() { + break + } + prevNumConsecutiveBackslashes := s.state.numConsecutiveBackslashes s.state.numConsecutiveBackslashes = 0 @@ -295,7 +338,7 @@ func (s *StreamScanner) seekDelimiter() (error, bool) { } // currently in a quoted string - if s.state.quoteChar != 0 { + if s.state.insideQuote() { if i+1 >= s.fill { // require lookahead or EOF if err := s.read(); err != nil { diff --git a/go/cmd/dolt/commands/sql_statement_scanner_test.go b/go/cmd/dolt/commands/sql_statement_scanner_test.go index 46bd62fb046..07ce7ad2a89 100755 --- a/go/cmd/dolt/commands/sql_statement_scanner_test.go +++ b/go/cmd/dolt/commands/sql_statement_scanner_test.go @@ -204,6 +204,57 @@ insert into foo values (1,2,3)|`, }, lineNums: []int{1, 2}, }, + { + // https://github.com/dolthub/dolt/issues/10694 + input: `-- ' +-- can have intermediate comments +CALL dolt_commit('-m', 'message', '--allow-empty'); +CALL dolt_checkout('main');`, + statements: []string{ + `-- ' +-- can have intermediate comments +CALL dolt_commit('-m', 'message', '--allow-empty')`, + "CALL dolt_checkout('main')", + }, + lineNums: []int{1, 4}, + }, + { + input: `/* block comment with lone quote ' +*/ +-- can have intermediate comments +CALL dolt_commit('-m', 'message', '--allow-empty'); +CALL dolt_checkout('main');`, + statements: []string{ + `/* block comment with lone quote ' +*/ +-- can have intermediate comments +CALL dolt_commit('-m', 'message', '--allow-empty')`, + "CALL dolt_checkout('main')", + }, + lineNums: []int{1, 5}, + }, + { + input: `select * /* -- ignore line comment inside block comment */ from xy; +select x from xy; -- select y from xy; +select * /* ignore multi-line comment with ; +comment; +comment; +*/ from foo; +select '-- ignore line comment +in quote';`, + statements: []string{ + "select * /* -- ignore line comment inside block comment */ from xy", + "select x from xy", + `-- select y from xy; +select * /* ignore multi-line comment with ; +comment; +comment; +*/ from foo`, + `select '-- ignore line comment +in quote'`, + }, + lineNums: []int{1, 2, 2, 7}, + }, } for _, tt := range testcases {