diff --git a/go/vt/sqlparser/ast.go b/go/vt/sqlparser/ast.go index e0107dd1cfe..71120eeaa07 100644 --- a/go/vt/sqlparser/ast.go +++ b/go/vt/sqlparser/ast.go @@ -89,6 +89,9 @@ func Parse(sql string) (Statement, error) { tokenizer := NewStringTokenizer(sql) if yyParsePooled(tokenizer) != 0 { if tokenizer.partialDDL != nil { + if typ, val := tokenizer.Scan(); typ != 0 { + return nil, fmt.Errorf("extra characters encountered after end of DDL: '%s'", string(val)) + } log.Warningf("ignoring error parsing DDL '%s': %v", sql, tokenizer.LastError) tokenizer.ParseTree = tokenizer.partialDDL return tokenizer.ParseTree, nil diff --git a/go/vt/sqlparser/parse_next_test.go b/go/vt/sqlparser/parse_next_test.go index 543854cadf5..c1dd85f5498 100644 --- a/go/vt/sqlparser/parse_next_test.go +++ b/go/vt/sqlparser/parse_next_test.go @@ -139,7 +139,7 @@ func TestParseNextEdgeCases(t *testing.T) { input: "select 1 from a; update a set b = 2 ; ", want: []string{"select 1 from a", "update a set b = 2"}, }, { - name: "Handle ForceEOF statements", + name: "Handle SkipToEnd statements", input: "set character set utf8; select 1 from a", want: []string{"set charset 'utf8'", "select 1 from a"}, }, { diff --git a/go/vt/sqlparser/parse_test.go b/go/vt/sqlparser/parse_test.go index 7266725c1d2..c914da29d89 100644 --- a/go/vt/sqlparser/parse_test.go +++ b/go/vt/sqlparser/parse_test.go @@ -2237,6 +2237,40 @@ func TestErrors(t *testing.T) { } } +// TestSkipToEnd tests that the skip to end functionality +// does not skip past a ';'. If any tokens exist after that, Parse +// should return an error. +func TestSkipToEnd(t *testing.T) { + testcases := []struct { + input string + output string + }{{ + // This is the case where the partial ddl will be reset + // because of a premature ';'. + input: "create table a(id; select * from t", + output: "syntax error at position 19", + }, { + // Partial DDL should get reset for valid DDLs also. + input: "create table a(id int); select * from t", + output: "syntax error at position 31 near 'select'", + }, { + // Partial DDL does not get reset here. But we allow the + // DDL only if there are no new tokens after skipping to end. + input: "create table a bb cc; select * from t", + output: "extra characters encountered after end of DDL: 'select'", + }, { + // Test that we don't step at ';' inside strings. + input: "create table a bb 'a;'; select * from t", + output: "extra characters encountered after end of DDL: 'select'", + }} + for _, tcase := range testcases { + _, err := Parse(tcase.input) + if err == nil || err.Error() != tcase.output { + t.Errorf("%s: %v, want %s", tcase.input, err, tcase.output) + } + } +} + // Benchmark run on 6/23/17, prior to improvements: // BenchmarkParse1-4 100000 16334 ns/op // BenchmarkParse2-4 30000 44121 ns/op diff --git a/go/vt/sqlparser/sql.go b/go/vt/sqlparser/sql.go index ab3c25e1be4..a760613d5a9 100644 --- a/go/vt/sqlparser/sql.go +++ b/go/vt/sqlparser/sql.go @@ -30,11 +30,11 @@ func decNesting(yylex interface{}) { yylex.(*Tokenizer).nesting-- } -// forceEOF forces the lexer to end prematurely. Not all SQL statements -// are supported by the Parser, thus calling forceEOF will make the lexer +// skipToEnd forces the lexer to end prematurely. Not all SQL statements +// are supported by the Parser, thus calling skipToEnd will make the lexer // return EOF early. -func forceEOF(yylex interface{}) { - yylex.(*Tokenizer).ForceEOF = true +func skipToEnd(yylex interface{}) { + yylex.(*Tokenizer).SkipToEnd = true } //line sql.y:53 @@ -6446,25 +6446,25 @@ yydefault: yyDollar = yyS[yypt-0 : yypt+1] //line sql.y:3322 { - forceEOF(yylex) + skipToEnd(yylex) } case 830: yyDollar = yyS[yypt-0 : yypt+1] //line sql.y:3327 { - forceEOF(yylex) + skipToEnd(yylex) } case 831: yyDollar = yyS[yypt-1 : yypt+1] //line sql.y:3331 { - forceEOF(yylex) + skipToEnd(yylex) } case 832: yyDollar = yyS[yypt-1 : yypt+1] //line sql.y:3335 { - forceEOF(yylex) + skipToEnd(yylex) } } goto yystack /* stack new state and value */ diff --git a/go/vt/sqlparser/sql.y b/go/vt/sqlparser/sql.y index 75b31d113ef..47e2247891a 100644 --- a/go/vt/sqlparser/sql.y +++ b/go/vt/sqlparser/sql.y @@ -41,11 +41,11 @@ func decNesting(yylex interface{}) { yylex.(*Tokenizer).nesting-- } -// forceEOF forces the lexer to end prematurely. Not all SQL statements -// are supported by the Parser, thus calling forceEOF will make the lexer +// skipToEnd forces the lexer to end prematurely. Not all SQL statements +// are supported by the Parser, thus calling skipToEnd will make the lexer // return EOF early. -func forceEOF(yylex interface{}) { - yylex.(*Tokenizer).ForceEOF = true +func skipToEnd(yylex interface{}) { + yylex.(*Tokenizer).SkipToEnd = true } %} @@ -267,7 +267,7 @@ func forceEOF(yylex interface{}) { %type charset_value %type table_id reserved_table_id table_alias as_opt_id %type as_opt -%type force_eof ddl_force_eof +%type skip_to_end ddl_skip_to_end %type charset %type set_session_or_global show_session_or_global %type convert_type @@ -557,16 +557,16 @@ create_statement: $1.OptLike = $2 $$ = $1 } -| CREATE constraint_opt INDEX ID using_opt ON table_name ddl_force_eof +| CREATE constraint_opt INDEX ID using_opt ON table_name ddl_skip_to_end { // Change this to an alter statement $$ = &DDL{Action: AlterStr, Table: $7, NewName:$7} } -| CREATE VIEW table_name ddl_force_eof +| CREATE VIEW table_name ddl_skip_to_end { $$ = &DDL{Action: CreateStr, NewName: $3.ToViewName()} } -| CREATE OR REPLACE VIEW table_name ddl_force_eof +| CREATE OR REPLACE VIEW table_name ddl_skip_to_end { $$ = &DDL{Action: CreateStr, NewName: $5.ToViewName()} } @@ -578,11 +578,11 @@ create_statement: Params: $5, }} } -| CREATE DATABASE not_exists_opt ID ddl_force_eof +| CREATE DATABASE not_exists_opt ID ddl_skip_to_end { $$ = &DBDDL{Action: CreateStr, DBName: string($4)} } -| CREATE SCHEMA not_exists_opt ID ddl_force_eof +| CREATE SCHEMA not_exists_opt ID ddl_skip_to_end { $$ = &DBDDL{Action: CreateStr, DBName: string($4)} } @@ -1288,15 +1288,15 @@ table_opt_value: } alter_statement: - ALTER ignore_opt TABLE table_name non_add_drop_or_rename_operation force_eof + ALTER ignore_opt TABLE table_name non_add_drop_or_rename_operation skip_to_end { $$ = &DDL{Action: AlterStr, Table: $4, NewName: $4} } -| ALTER ignore_opt TABLE table_name ADD alter_object_type force_eof +| ALTER ignore_opt TABLE table_name ADD alter_object_type skip_to_end { $$ = &DDL{Action: AlterStr, Table: $4, NewName: $4} } -| ALTER ignore_opt TABLE table_name DROP alter_object_type force_eof +| ALTER ignore_opt TABLE table_name DROP alter_object_type skip_to_end { $$ = &DDL{Action: AlterStr, Table: $4, NewName: $4} } @@ -1328,12 +1328,12 @@ alter_statement: // Change this to a rename statement $$ = &DDL{Action: RenameStr, Table: $4, NewName: $7} } -| ALTER ignore_opt TABLE table_name RENAME index_opt force_eof +| ALTER ignore_opt TABLE table_name RENAME index_opt skip_to_end { // Rename an index can just be an alter $$ = &DDL{Action: AlterStr, Table: $4, NewName: $4} } -| ALTER VIEW table_name ddl_force_eof +| ALTER VIEW table_name ddl_skip_to_end { $$ = &DDL{Action: AlterStr, Table: $3.ToViewName(), NewName: $3.ToViewName()} } @@ -1396,12 +1396,12 @@ drop_statement: } $$ = &DDL{Action: DropStr, Table: $4, IfExists: exists} } -| DROP INDEX ID ON table_name ddl_force_eof +| DROP INDEX ID ON table_name ddl_skip_to_end { // Change this to an alter statement $$ = &DDL{Action: AlterStr, Table: $5, NewName: $5} } -| DROP VIEW exists_opt table_name ddl_force_eof +| DROP VIEW exists_opt table_name ddl_skip_to_end { var exists bool if $3 != 0 { @@ -1434,60 +1434,60 @@ analyze_statement: } show_statement: - SHOW BINARY ID ddl_force_eof /* SHOW BINARY LOGS */ + SHOW BINARY ID ddl_skip_to_end /* SHOW BINARY LOGS */ { $$ = &Show{Type: string($2) + " " + string($3)} } -| SHOW CHARACTER SET ddl_force_eof +| SHOW CHARACTER SET ddl_skip_to_end { $$ = &Show{Type: string($2) + " " + string($3)} } -| SHOW CREATE DATABASE ddl_force_eof +| SHOW CREATE DATABASE ddl_skip_to_end { $$ = &Show{Type: string($2) + " " + string($3)} } /* Rule to handle SHOW CREATE EVENT, SHOW CREATE FUNCTION, etc. */ -| SHOW CREATE ID ddl_force_eof +| SHOW CREATE ID ddl_skip_to_end { $$ = &Show{Type: string($2) + " " + string($3)} } -| SHOW CREATE PROCEDURE ddl_force_eof +| SHOW CREATE PROCEDURE ddl_skip_to_end { $$ = &Show{Type: string($2) + " " + string($3)} } -| SHOW CREATE TABLE ddl_force_eof +| SHOW CREATE TABLE ddl_skip_to_end { $$ = &Show{Type: string($2) + " " + string($3)} } -| SHOW CREATE TRIGGER ddl_force_eof +| SHOW CREATE TRIGGER ddl_skip_to_end { $$ = &Show{Type: string($2) + " " + string($3)} } -| SHOW CREATE VIEW ddl_force_eof +| SHOW CREATE VIEW ddl_skip_to_end { $$ = &Show{Type: string($2) + " " + string($3)} } -| SHOW DATABASES ddl_force_eof +| SHOW DATABASES ddl_skip_to_end { $$ = &Show{Type: string($2)} } -| SHOW INDEX ddl_force_eof +| SHOW INDEX ddl_skip_to_end { $$ = &Show{Type: string($2)} } -| SHOW KEYS ddl_force_eof +| SHOW KEYS ddl_skip_to_end { $$ = &Show{Type: string($2)} } -| SHOW PROCEDURE ddl_force_eof +| SHOW PROCEDURE ddl_skip_to_end { $$ = &Show{Type: string($2)} } -| SHOW show_session_or_global STATUS ddl_force_eof +| SHOW show_session_or_global STATUS ddl_skip_to_end { $$ = &Show{Scope: $2, Type: string($3)} } -| SHOW TABLE ddl_force_eof +| SHOW TABLE ddl_skip_to_end { $$ = &Show{Type: string($2)} } @@ -1506,7 +1506,7 @@ show_statement: $$ = &Show{Type: $3, ShowTablesOpt: showTablesOpt} } } -| SHOW show_session_or_global VARIABLES ddl_force_eof +| SHOW show_session_or_global VARIABLES ddl_skip_to_end { $$ = &Show{Scope: $2, Type: string($3)} } @@ -1554,7 +1554,7 @@ show_statement: * SHOW BINARY LOGS * SHOW INVALID */ -| SHOW ID ddl_force_eof +| SHOW ID ddl_skip_to_end { $$ = &Show{Type: string($2)} } @@ -1664,37 +1664,37 @@ rollback_statement: } other_statement: - DESC force_eof + DESC skip_to_end { $$ = &OtherRead{} } -| DESCRIBE force_eof +| DESCRIBE skip_to_end { $$ = &OtherRead{} } -| EXPLAIN force_eof +| EXPLAIN skip_to_end { $$ = &OtherRead{} } -| REPAIR force_eof +| REPAIR skip_to_end { $$ = &OtherAdmin{} } -| OPTIMIZE force_eof +| OPTIMIZE skip_to_end { $$ = &OtherAdmin{} } -| LOCK TABLES force_eof +| LOCK TABLES skip_to_end { $$ = &OtherAdmin{} } -| UNLOCK TABLES force_eof +| UNLOCK TABLES skip_to_end { $$ = &OtherAdmin{} } flush_statement: - FLUSH force_eof + FLUSH skip_to_end { $$ = &DDL{Action: FlushStr} } @@ -3318,20 +3318,20 @@ closeb: decNesting(yylex) } -force_eof: +skip_to_end: { - forceEOF(yylex) + skipToEnd(yylex) } -ddl_force_eof: +ddl_skip_to_end: { - forceEOF(yylex) + skipToEnd(yylex) } | openb { - forceEOF(yylex) + skipToEnd(yylex) } | reserved_sql_id { - forceEOF(yylex) + skipToEnd(yylex) } diff --git a/go/vt/sqlparser/token.go b/go/vt/sqlparser/token.go index 2b128d57a7f..fe55428f703 100644 --- a/go/vt/sqlparser/token.go +++ b/go/vt/sqlparser/token.go @@ -37,7 +37,7 @@ type Tokenizer struct { InStream io.Reader AllowComments bool SkipSpecialComments bool - ForceEOF bool + SkipToEnd bool lastChar uint16 Position int lastToken []byte @@ -427,6 +427,10 @@ func KeywordString(id int) string { // Lex returns the next token form the Tokenizer. // This function is used by go yacc. func (tkn *Tokenizer) Lex(lval *yySymType) int { + if tkn.SkipToEnd { + return tkn.skipStatement() + } + typ, val := tkn.Scan() for typ == COMMENT { if tkn.AllowComments { @@ -434,6 +438,13 @@ func (tkn *Tokenizer) Lex(lval *yySymType) int { } typ, val = tkn.Scan() } + if typ == 0 || typ == ';' || typ == LEX_ERROR { + // If encounter end of statement or invalid token, + // we should not accept partially parsed DDLs. They + // should instead result in parser errors. See the + // Parse function to see how this is handled. + tkn.partialDDL = nil + } lval.bytes = val tkn.lastToken = val return typ @@ -450,9 +461,7 @@ func (tkn *Tokenizer) Error(err string) { tkn.LastError = errors.New(buf.String()) // Try and re-sync to the next statement - if tkn.lastChar != ';' { - tkn.skipStatement() - } + tkn.skipStatement() } // Scan scans the tokenizer for the next token and returns @@ -474,11 +483,6 @@ func (tkn *Tokenizer) Scan() (int, []byte) { tkn.next() } - if tkn.ForceEOF { - tkn.skipStatement() - return 0, nil - } - tkn.skipBlank() switch ch := tkn.lastChar; { case isLetter(ch): @@ -504,14 +508,21 @@ func (tkn *Tokenizer) Scan() (int, []byte) { return tkn.scanNumber(false) case ch == ':': return tkn.scanBindVar() - case ch == ';' && tkn.multi: + case ch == ';': + if tkn.multi { + // In multi mode, ';' is treated as EOF. So, we don't advance. + // Repeated calls to Scan will keep returning 0 until ParseNext + // forces the advance. + return 0, nil + } + tkn.next() + return ';', nil + case ch == eofChar: return 0, nil default: tkn.next() switch ch { - case eofChar: - return 0, nil - case '=', ',', ';', '(', ')', '+', '*', '%', '^', '~': + case '=', ',', '(', ')', '+', '*', '%', '^', '~': return int(ch), nil case '&': if tkn.lastChar == '&' { @@ -612,12 +623,14 @@ func (tkn *Tokenizer) Scan() (int, []byte) { } } -// skipStatement scans until the EOF, or end of statement is encountered. -func (tkn *Tokenizer) skipStatement() { - ch := tkn.lastChar - for ch != ';' && ch != eofChar { - tkn.next() - ch = tkn.lastChar +// skipStatement scans until end of statement. +func (tkn *Tokenizer) skipStatement() int { + tkn.SkipToEnd = false + for { + typ, _ := tkn.Scan() + if typ == 0 || typ == ';' || typ == LEX_ERROR { + return typ + } } } @@ -929,7 +942,7 @@ func (tkn *Tokenizer) reset() { tkn.specialComment = nil tkn.posVarIndex = 0 tkn.nesting = 0 - tkn.ForceEOF = false + tkn.SkipToEnd = false } func isLetter(ch uint16) bool {