From 464cd03a546ccb72e1a2efe70900df03872e3fb9 Mon Sep 17 00:00:00 2001 From: beliefer Date: Sat, 8 Feb 2020 10:45:20 +0800 Subject: [PATCH 01/26] Support nested bracketed comments --- .../spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../sql/catalyst/parser/PlanParserSuite.scala | 48 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 08d5ff53bf2e2..4630e94602e31 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1794,7 +1794,7 @@ BRACKETED_EMPTY_COMMENT ; BRACKETED_COMMENT - : '/*' ~[+] .*? '*/' -> channel(HIDDEN) + : '/*' ~[+] ~'/'*? BRACKETED_COMMENT? ~'/'*? '*/' -> channel(HIDDEN) ; WS diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 875096f615241..1c3f0bfc69994 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -55,6 +55,54 @@ class PlanParserSuite extends AnalysisTest { With(plan, ctes) } + test("single comment") { + val plan = table("a").select(star()) + assertEqual("-- single comment\nSELECT * FROM a", plan) + } + + test("bracketed comment case one") { + val plan = table("a").select(star()) + assertEqual("/* This is an example of SQL which should not execute:\n" + + " * select 'multi-line';\n" + + " */\n" + + "SELECT * FROM a", plan) + } + + test("bracketed comment case two") { + val plan = table("a").select(star()) + assertEqual("/*\n" + + "SELECT 'trailing' as x1; -- inside block comment\n" + + "*/\n" + + "SELECT * FROM a", plan) + } + + test("nexted bracketed comment case one") { + val plan = table("a").select(star()) + assertEqual("/* This block comment surrounds a query which itself has a block comment...\n" + + "SELECT /* embedded single line */ 'embedded' AS x2;\n" + + "*/\n" + + "SELECT * FROM a", plan) + } + + test("nexted bracketed comment case two") { + val plan = table("a").select(star()) + assertEqual("SELECT -- continued after the following block comments...\n" + + "/* Deeply nested comment.\n" + + " This includes a single apostrophe to make sure we aren't decoding this part as a " + + "string.\n" + + "SELECT 'deep nest' AS n1;\n" + + "/* Second level of nesting...\n" + + "SELECT 'deeper nest' as n2;\n" + + "/* Third level of nesting...\n" + + "SELECT 'deepest nest' as n3;\n" + + "*/\n" + + "Hoo boy. Still two deep...\n" + + "*/\n" + + "Now just one deep...\n" + + "*/\n" + + "* FROM a", plan) + } + test("case insensitive") { val plan = table("a").select(star()) assertEqual("sELEct * FroM a", plan) From c782bacc785eee3441cc8e58a5bf85114a57a60f Mon Sep 17 00:00:00 2001 From: beliefer Date: Sat, 8 Feb 2020 16:12:49 +0800 Subject: [PATCH 02/26] Fix bug --- .../main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 4630e94602e31..5b708a09ee313 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1794,7 +1794,7 @@ BRACKETED_EMPTY_COMMENT ; BRACKETED_COMMENT - : '/*' ~[+] ~'/'*? BRACKETED_COMMENT? ~'/'*? '*/' -> channel(HIDDEN) + : '/*' ~[+] ( ~'/' | ~'*' '/' ~'*' )*? BRACKETED_COMMENT? ( ~'/' | ~'*' '/' ~'*' )*? '*/' -> channel(HIDDEN) ; WS From 0a21b5eb526b72bdf36840b92ad0b2efb8c6f888 Mon Sep 17 00:00:00 2001 From: beliefer Date: Sat, 8 Feb 2020 17:38:00 +0800 Subject: [PATCH 03/26] Optimize code --- .../sql/catalyst/parser/PlanParserSuite.scala | 65 +++++++++++-------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 1c3f0bfc69994..daf6ec765650f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -62,45 +62,56 @@ class PlanParserSuite extends AnalysisTest { test("bracketed comment case one") { val plan = table("a").select(star()) - assertEqual("/* This is an example of SQL which should not execute:\n" + - " * select 'multi-line';\n" + - " */\n" + - "SELECT * FROM a", plan) + assertEqual( + """ + |/* This is an example of SQL which should not execute: + | * select 'multi-line'; + | */ + |SELECT * FROM a + """.stripMargin, plan) } test("bracketed comment case two") { val plan = table("a").select(star()) - assertEqual("/*\n" + - "SELECT 'trailing' as x1; -- inside block comment\n" + - "*/\n" + - "SELECT * FROM a", plan) + assertEqual( + """ + |/* + |SELECT 'trailing' as x1; -- inside block comment + |*/ + |SELECT * FROM a + """.stripMargin, plan) } test("nexted bracketed comment case one") { val plan = table("a").select(star()) - assertEqual("/* This block comment surrounds a query which itself has a block comment...\n" + - "SELECT /* embedded single line */ 'embedded' AS x2;\n" + - "*/\n" + - "SELECT * FROM a", plan) + assertEqual( + """ + |/* This block comment surrounds a query which itself has a block comment... + |SELECT /* embedded single line */ 'embedded' AS x2; + |*/ + |SELECT * FROM a + """.stripMargin, plan) } test("nexted bracketed comment case two") { val plan = table("a").select(star()) - assertEqual("SELECT -- continued after the following block comments...\n" + - "/* Deeply nested comment.\n" + - " This includes a single apostrophe to make sure we aren't decoding this part as a " + - "string.\n" + - "SELECT 'deep nest' AS n1;\n" + - "/* Second level of nesting...\n" + - "SELECT 'deeper nest' as n2;\n" + - "/* Third level of nesting...\n" + - "SELECT 'deepest nest' as n3;\n" + - "*/\n" + - "Hoo boy. Still two deep...\n" + - "*/\n" + - "Now just one deep...\n" + - "*/\n" + - "* FROM a", plan) + assertEqual( + """ + |SELECT -- continued after the following block comments... + |/* Deeply nested comment. + | This includes a single apostrophe to make sure we aren't decoding this part as a string. + |SELECT 'deep nest' AS n1; + |/* Second level of nesting... + |SELECT 'deeper nest' as n2; + |/* Third level of nesting... + |SELECT 'deepest nest' as n3; + |*/ + |Hoo boy. Still two deep... + |*/ + |Now just one deep... + |*/ + |* FROM a + """.stripMargin, plan) } test("case insensitive") { From 1b9d1da8bd7942c7a37ca1fff6cc67d0113784da Mon Sep 17 00:00:00 2001 From: beliefer Date: Sat, 8 Feb 2020 19:03:40 +0800 Subject: [PATCH 04/26] Add test cases --- .../resources/sql-tests/inputs/comments.sql | 65 +++++++++++++++++++ .../sql-tests/results/comments.sql.out | 12 ++++ 2 files changed, 77 insertions(+) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/comments.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/comments.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/comments.sql new file mode 100644 index 0000000000000..f2aea03cf9010 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/comments.sql @@ -0,0 +1,65 @@ +-- Test comments. +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null) +AS testData(a, b); + +-- bracketed comment case one +-- /* This is the first example of bracketed comment. +-- SELECT * FROM testData; +-- */ +-- SELECT * FROM testData; + +-- bracketed comment case two +-- /* This is the second example of bracketed comment. +-- SELECT '/', * FROM testData; +-- */ +-- SELECT '/', * FROM testData; + +-- bracketed comment case three +-- /* This is the third example of bracketed comment. +-- *SELECT '*', * FROM testData; +-- */ +-- SELECT '*', * FROM testData; + +-- nested bracketed comment case one +-- /* This is the first example of nested bracketed comment. +-- /* I am a nested bracketed comment.*/ +-- */ +-- SELECT * FROM testData; + +-- nested bracketed comment case two +-- /* This is the second example of nested bracketed comment. +-- /* I am a nested bracketed comment. +-- */ +-- */ +-- SELECT * FROM testData; + +-- nested bracketed comment case three +-- /* +-- * This is the third example of nested bracketed comment. +-- /* +-- * I am a nested bracketed comment. +-- */ +-- */ +-- SELECT * FROM testData; + +-- nested bracketed comment case four +-- /* +-- * This is the four example of nested bracketed comment. +-- SELECT /* I am a nested bracketed comment.*/ * FROM testData; +-- */ +-- SELECT * FROM testData; + +-- nested bracketed comment case five +-- SELECT * /* +-- * This is the five example of nested bracketed comment. +-- /* I am a second level of nested bracketed comment. +-- /* I am a third level of nested bracketed comment. +-- Other information of third level. +-- SELECT * FROM testData; +-- */ +-- Other information of second level. +-- */ +-- Other information of first level. +-- */ +-- FROM testData; diff --git a/sql/core/src/test/resources/sql-tests/results/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/comments.sql.out new file mode 100644 index 0000000000000..90f8d1f5c3cec --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/comments.sql.out @@ -0,0 +1,12 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 1 + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null) +AS testData(a, b) +-- !query schema +struct<> +-- !query output + From 3d036886df60ec77f66e35142156803569577935 Mon Sep 17 00:00:00 2001 From: beliefer Date: Sat, 8 Feb 2020 20:56:04 +0800 Subject: [PATCH 05/26] Update jira comment. --- .../test/resources/sql-tests/inputs/postgreSQL/comments.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql index 6725ce45e72a5..eeb3b4475529c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql @@ -11,13 +11,13 @@ SELECT /* embedded single line */ 'embedded' AS `second`; SELECT /* both embedded and trailing single line */ 'both' AS third; -- trailing single line SELECT 'before multi-line' AS fourth; --- [SPARK-28880] ANSI SQL: Bracketed comments +-- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files /* This is an example of SQL which should not execute: * select 'multi-line'; */ SELECT 'after multi-line' AS fifth; --- [SPARK-28880] ANSI SQL: Bracketed comments +-- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- -- Nested comments -- From 596d905b3cdf0aa6ba84ae545e0a458abf91bb98 Mon Sep 17 00:00:00 2001 From: beliefer Date: Sat, 8 Feb 2020 21:01:22 +0800 Subject: [PATCH 06/26] Update jira comment. --- sql/core/src/test/resources/sql-tests/inputs/comments.sql | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sql/core/src/test/resources/sql-tests/inputs/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/comments.sql index f2aea03cf9010..5da8c22499ca5 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/comments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/comments.sql @@ -3,30 +3,35 @@ CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null) AS testData(a, b); +-- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- bracketed comment case one -- /* This is the first example of bracketed comment. -- SELECT * FROM testData; -- */ -- SELECT * FROM testData; +-- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- bracketed comment case two -- /* This is the second example of bracketed comment. -- SELECT '/', * FROM testData; -- */ -- SELECT '/', * FROM testData; +-- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- bracketed comment case three -- /* This is the third example of bracketed comment. -- *SELECT '*', * FROM testData; -- */ -- SELECT '*', * FROM testData; +-- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case one -- /* This is the first example of nested bracketed comment. -- /* I am a nested bracketed comment.*/ -- */ -- SELECT * FROM testData; +-- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case two -- /* This is the second example of nested bracketed comment. -- /* I am a nested bracketed comment. @@ -34,6 +39,7 @@ AS testData(a, b); -- */ -- SELECT * FROM testData; +-- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case three -- /* -- * This is the third example of nested bracketed comment. @@ -43,6 +49,7 @@ AS testData(a, b); -- */ -- SELECT * FROM testData; +-- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case four -- /* -- * This is the four example of nested bracketed comment. @@ -50,6 +57,7 @@ AS testData(a, b); -- */ -- SELECT * FROM testData; +-- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case five -- SELECT * /* -- * This is the five example of nested bracketed comment. From 7760f9181259ee3493fb5c02d1d2b9b6dd70f2bb Mon Sep 17 00:00:00 2001 From: beliefer Date: Sun, 9 Feb 2020 19:20:49 +0800 Subject: [PATCH 07/26] release comment --- .../resources/sql-tests/inputs/comments.sql | 90 +++---- .../sql-tests/results/comments.sql.out | 235 +++++++++++++++++- 2 files changed, 279 insertions(+), 46 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/comments.sql index 5da8c22499ca5..ceff69a85df91 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/comments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/comments.sql @@ -5,69 +5,69 @@ AS testData(a, b); -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- bracketed comment case one --- /* This is the first example of bracketed comment. --- SELECT * FROM testData; --- */ --- SELECT * FROM testData; +/* This is the first example of bracketed comment. +SELECT * FROM testData; +*/ +SELECT * FROM testData; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- bracketed comment case two --- /* This is the second example of bracketed comment. --- SELECT '/', * FROM testData; --- */ --- SELECT '/', * FROM testData; +/* This is the second example of bracketed comment. +SELECT '/', * FROM testData; +*/ +SELECT '/', * FROM testData; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- bracketed comment case three --- /* This is the third example of bracketed comment. --- *SELECT '*', * FROM testData; --- */ --- SELECT '*', * FROM testData; +/* This is the third example of bracketed comment. + *SELECT '*', * FROM testData; + */ +SELECT '*', * FROM testData; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case one --- /* This is the first example of nested bracketed comment. --- /* I am a nested bracketed comment.*/ --- */ --- SELECT * FROM testData; +/* This is the first example of nested bracketed comment. +/* I am a nested bracketed comment.*/ +*/ +SELECT * FROM testData; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case two --- /* This is the second example of nested bracketed comment. --- /* I am a nested bracketed comment. --- */ --- */ --- SELECT * FROM testData; +/* This is the second example of nested bracketed comment. +/* I am a nested bracketed comment. + */ + */ +SELECT * FROM testData; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case three --- /* --- * This is the third example of nested bracketed comment. --- /* --- * I am a nested bracketed comment. --- */ --- */ --- SELECT * FROM testData; +/* + * This is the third example of nested bracketed comment. + /* + * I am a nested bracketed comment. + */ + */ +SELECT * FROM testData; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case four --- /* --- * This is the four example of nested bracketed comment. --- SELECT /* I am a nested bracketed comment.*/ * FROM testData; --- */ --- SELECT * FROM testData; +/* + * This is the four example of nested bracketed comment. +SELECT /* I am a nested bracketed comment.*/ * FROM testData; + */ +SELECT * FROM testData; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case five --- SELECT * /* --- * This is the five example of nested bracketed comment. --- /* I am a second level of nested bracketed comment. --- /* I am a third level of nested bracketed comment. --- Other information of third level. --- SELECT * FROM testData; --- */ --- Other information of second level. --- */ --- Other information of first level. --- */ --- FROM testData; +SELECT * /* + * This is the five example of nested bracketed comment. +/* I am a second level of nested bracketed comment. +/* I am a third level of nested bracketed comment. +Other information of third level. +SELECT * FROM testData; +*/ +Other information of second level. +*/ +Other information of first level. +*/ +FROM testData; diff --git a/sql/core/src/test/resources/sql-tests/results/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/comments.sql.out index 90f8d1f5c3cec..d7073eea1ae50 100644 --- a/sql/core/src/test/resources/sql-tests/results/comments.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/comments.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 1 +-- Number of queries: 14 -- !query @@ -10,3 +10,236 @@ AS testData(a, b) struct<> -- !query output + + +-- !query +/* This is the first example of bracketed comment. +SELECT * FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) + +== SQL == +/* This is the first example of bracketed comment. +^^^ +SELECT * FROM testData + + +-- !query +*/ +SELECT * FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) + +== SQL == +*/ +^^^ +SELECT * FROM testData + + +-- !query +/* This is the second example of bracketed comment. +SELECT '/', * FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) + +== SQL == +/* This is the second example of bracketed comment. +^^^ +SELECT '/', * FROM testData + + +-- !query +*/ +SELECT '/', * FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) + +== SQL == +*/ +^^^ +SELECT '/', * FROM testData + + +-- !query +/* This is the third example of bracketed comment. + *SELECT '*', * FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) + +== SQL == +/* This is the third example of bracketed comment. +^^^ + *SELECT '*', * FROM testData + + +-- !query +*/ +SELECT '*', * FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) + +== SQL == +*/ +^^^ +SELECT '*', * FROM testData + + +-- !query +/* This is the first example of nested bracketed comment. +/* I am a nested bracketed comment.*/ +*/ +SELECT * FROM testData +-- !query schema +struct +-- !query output +1 1 +1 1 +1 2 +1 NULL +2 1 +NULL 2 +NULL NULL + + +-- !query +/* This is the second example of nested bracketed comment. +/* I am a nested bracketed comment. + */ + */ +SELECT * FROM testData +-- !query schema +struct +-- !query output +1 1 +1 1 +1 2 +1 NULL +2 1 +NULL 2 +NULL NULL + + +-- !query +/* + * This is the third example of nested bracketed comment. + /* + * I am a nested bracketed comment. + */ + */ +SELECT * FROM testData +-- !query schema +struct +-- !query output +1 1 +1 1 +1 2 +1 NULL +2 1 +NULL 2 +NULL NULL + + +-- !query +/* + * This is the four example of nested bracketed comment. +SELECT /* I am a nested bracketed comment.*/ * FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) + +== SQL == +/* +^^^ + * This is the four example of nested bracketed comment. +SELECT /* I am a nested bracketed comment.*/ * FROM testData + + +-- !query +*/ +SELECT * FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) + +== SQL == +*/ +^^^ +SELECT * FROM testData + + +-- !query +SELECT * /* + * This is the five example of nested bracketed comment. +/* I am a second level of nested bracketed comment. +/* I am a third level of nested bracketed comment. +Other information of third level. +SELECT * FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +mismatched input 'the' expecting {, ',', 'CLUSTER', 'DISTRIBUTE', 'EXCEPT', 'FROM', 'GROUP', 'HAVING', 'INTERSECT', 'LATERAL', 'LIMIT', 'ORDER', 'MINUS', 'SORT', 'UNION', 'WHERE', 'WINDOW', '-'}(line 2, pos 11) + +== SQL == +SELECT * /* + * This is the five example of nested bracketed comment. +-----------^^^ +/* I am a second level of nested bracketed comment. +/* I am a third level of nested bracketed comment. +Other information of third level. +SELECT * FROM testData + + +-- !query +*/ +Other information of second level. +*/ +Other information of first level. +*/ +FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) + +== SQL == +*/ +^^^ +Other information of second level. +*/ +Other information of first level. +*/ +FROM testData From efe98fad056dbedf43a41256a2e4245c3e3372dd Mon Sep 17 00:00:00 2001 From: beliefer Date: Mon, 10 Feb 2020 10:35:14 +0800 Subject: [PATCH 08/26] Update test cases. --- .../resources/sql-tests/inputs/comments.sql | 29 +++--- .../sql-tests/results/comments.sql.out | 92 +++++++------------ 2 files changed, 45 insertions(+), 76 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/comments.sql index ceff69a85df91..75c047c401bd6 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/comments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/comments.sql @@ -1,35 +1,32 @@ -- Test comments. -CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES -(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null) -AS testData(a, b); -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- bracketed comment case one /* This is the first example of bracketed comment. -SELECT * FROM testData; +SELECT 'ommented out content' AS first; */ -SELECT * FROM testData; +SELECT 'selected content' AS first; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- bracketed comment case two /* This is the second example of bracketed comment. -SELECT '/', * FROM testData; +SELECT '/', 'ommented out content' AS second; */ -SELECT '/', * FROM testData; +SELECT '/', 'selected content' AS second; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- bracketed comment case three /* This is the third example of bracketed comment. - *SELECT '*', * FROM testData; + *SELECT '*', 'ommented out content' AS third; */ -SELECT '*', * FROM testData; +SELECT '*', 'selected content' AS third; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case one /* This is the first example of nested bracketed comment. /* I am a nested bracketed comment.*/ */ -SELECT * FROM testData; +SELECT 'selected content' AS four; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case two @@ -37,7 +34,7 @@ SELECT * FROM testData; /* I am a nested bracketed comment. */ */ -SELECT * FROM testData; +SELECT 'selected content' AS five; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case three @@ -47,7 +44,7 @@ SELECT * FROM testData; * I am a nested bracketed comment. */ */ -SELECT * FROM testData; +SELECT 'selected content' AS six; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case four @@ -55,19 +52,19 @@ SELECT * FROM testData; * This is the four example of nested bracketed comment. SELECT /* I am a nested bracketed comment.*/ * FROM testData; */ -SELECT * FROM testData; +SELECT 'selected content' AS seven; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case five -SELECT * /* +SELECT /* * This is the five example of nested bracketed comment. /* I am a second level of nested bracketed comment. /* I am a third level of nested bracketed comment. Other information of third level. -SELECT * FROM testData; +SELECT 'ommented out content' AS eight; */ Other information of second level. */ Other information of first level. */ -FROM testData; +'selected content' AS eight; diff --git a/sql/core/src/test/resources/sql-tests/results/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/comments.sql.out index d7073eea1ae50..3268f74400be9 100644 --- a/sql/core/src/test/resources/sql-tests/results/comments.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/comments.sql.out @@ -1,20 +1,10 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 14 - - --- !query -CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES -(1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null) -AS testData(a, b) --- !query schema -struct<> --- !query output - +-- Number of queries: 13 -- !query /* This is the first example of bracketed comment. -SELECT * FROM testData +SELECT 'ommented out content' AS first -- !query schema struct<> -- !query output @@ -25,12 +15,12 @@ mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR' == SQL == /* This is the first example of bracketed comment. ^^^ -SELECT * FROM testData +SELECT 'ommented out content' AS first -- !query */ -SELECT * FROM testData +SELECT 'selected content' AS first -- !query schema struct<> -- !query output @@ -41,12 +31,12 @@ extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR == SQL == */ ^^^ -SELECT * FROM testData +SELECT 'selected content' AS first -- !query /* This is the second example of bracketed comment. -SELECT '/', * FROM testData +SELECT '/', 'ommented out content' AS second -- !query schema struct<> -- !query output @@ -57,12 +47,12 @@ mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR' == SQL == /* This is the second example of bracketed comment. ^^^ -SELECT '/', * FROM testData +SELECT '/', 'ommented out content' AS second -- !query */ -SELECT '/', * FROM testData +SELECT '/', 'selected content' AS second -- !query schema struct<> -- !query output @@ -73,12 +63,12 @@ extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR == SQL == */ ^^^ -SELECT '/', * FROM testData +SELECT '/', 'selected content' AS second -- !query /* This is the third example of bracketed comment. - *SELECT '*', * FROM testData + *SELECT '*', 'ommented out content' AS third -- !query schema struct<> -- !query output @@ -89,12 +79,12 @@ mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR' == SQL == /* This is the third example of bracketed comment. ^^^ - *SELECT '*', * FROM testData + *SELECT '*', 'ommented out content' AS third -- !query */ -SELECT '*', * FROM testData +SELECT '*', 'selected content' AS third -- !query schema struct<> -- !query output @@ -105,24 +95,18 @@ extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR == SQL == */ ^^^ -SELECT '*', * FROM testData +SELECT '*', 'selected content' AS third -- !query /* This is the first example of nested bracketed comment. /* I am a nested bracketed comment.*/ */ -SELECT * FROM testData +SELECT 'selected content' AS four -- !query schema -struct +struct -- !query output -1 1 -1 1 -1 2 -1 NULL -2 1 -NULL 2 -NULL NULL +selected content -- !query @@ -130,17 +114,11 @@ NULL NULL /* I am a nested bracketed comment. */ */ -SELECT * FROM testData +SELECT 'selected content' AS five -- !query schema -struct +struct -- !query output -1 1 -1 1 -1 2 -1 NULL -2 1 -NULL 2 -NULL NULL +selected content -- !query @@ -150,17 +128,11 @@ NULL NULL * I am a nested bracketed comment. */ */ -SELECT * FROM testData +SELECT 'selected content' AS six -- !query schema -struct +struct -- !query output -1 1 -1 1 -1 2 -1 NULL -2 1 -NULL 2 -NULL NULL +selected content -- !query @@ -183,7 +155,7 @@ SELECT /* I am a nested bracketed comment.*/ * FROM testData -- !query */ -SELECT * FROM testData +SELECT 'selected content' AS seven -- !query schema struct<> -- !query output @@ -194,31 +166,31 @@ extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR == SQL == */ ^^^ -SELECT * FROM testData +SELECT 'selected content' AS seven -- !query -SELECT * /* +SELECT /* * This is the five example of nested bracketed comment. /* I am a second level of nested bracketed comment. /* I am a third level of nested bracketed comment. Other information of third level. -SELECT * FROM testData +SELECT 'ommented out content' AS eight -- !query schema struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -mismatched input 'the' expecting {, ',', 'CLUSTER', 'DISTRIBUTE', 'EXCEPT', 'FROM', 'GROUP', 'HAVING', 'INTERSECT', 'LATERAL', 'LIMIT', 'ORDER', 'MINUS', 'SORT', 'UNION', 'WHERE', 'WINDOW', '-'}(line 2, pos 11) +no viable alternative at input 'SELECT /'(line 1, pos 7) == SQL == -SELECT * /* +SELECT /* +-------^^^ * This is the five example of nested bracketed comment. ------------^^^ /* I am a second level of nested bracketed comment. /* I am a third level of nested bracketed comment. Other information of third level. -SELECT * FROM testData +SELECT 'ommented out content' AS eight -- !query @@ -227,7 +199,7 @@ Other information of second level. */ Other information of first level. */ -FROM testData +'selected content' AS eight -- !query schema struct<> -- !query output @@ -242,4 +214,4 @@ Other information of second level. */ Other information of first level. */ -FROM testData +'selected content' AS eight From a2241aea0c600f209f5cba8427bb74fd70123de7 Mon Sep 17 00:00:00 2001 From: beliefer Date: Thu, 13 Feb 2020 09:46:23 +0800 Subject: [PATCH 09/26] Optimize code --- .../sql/catalyst/parser/PlanParserSuite.scala | 4 +-- .../resources/sql-tests/inputs/comments.sql | 18 ++++++------ .../sql-tests/results/comments.sql.out | 28 +++++++++---------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index daf6ec765650f..66d10b06dd077 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -82,7 +82,7 @@ class PlanParserSuite extends AnalysisTest { """.stripMargin, plan) } - test("nexted bracketed comment case one") { + test("nested bracketed comment case one") { val plan = table("a").select(star()) assertEqual( """ @@ -93,7 +93,7 @@ class PlanParserSuite extends AnalysisTest { """.stripMargin, plan) } - test("nexted bracketed comment case two") { + test("nested bracketed comment case two") { val plan = table("a").select(star()) assertEqual( """ diff --git a/sql/core/src/test/resources/sql-tests/inputs/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/comments.sql index 75c047c401bd6..f668508ce8d67 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/comments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/comments.sql @@ -26,7 +26,7 @@ SELECT '*', 'selected content' AS third; /* This is the first example of nested bracketed comment. /* I am a nested bracketed comment.*/ */ -SELECT 'selected content' AS four; +SELECT 'selected content' AS fourth; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case two @@ -34,7 +34,7 @@ SELECT 'selected content' AS four; /* I am a nested bracketed comment. */ */ -SELECT 'selected content' AS five; +SELECT 'selected content' AS fifth; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files -- nested bracketed comment case three @@ -44,24 +44,24 @@ SELECT 'selected content' AS five; * I am a nested bracketed comment. */ */ -SELECT 'selected content' AS six; +SELECT 'selected content' AS sixth; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files --- nested bracketed comment case four +-- nested bracketed comment case fourth /* - * This is the four example of nested bracketed comment. + * This is the fourth example of nested bracketed comment. SELECT /* I am a nested bracketed comment.*/ * FROM testData; */ -SELECT 'selected content' AS seven; +SELECT 'selected content' AS seventh; -- [SPARK-30758] Spark SQL can't display bracketed comments well in generated golden files --- nested bracketed comment case five +-- nested bracketed comment case fifth SELECT /* - * This is the five example of nested bracketed comment. + * This is the fifth example of nested bracketed comment. /* I am a second level of nested bracketed comment. /* I am a third level of nested bracketed comment. Other information of third level. -SELECT 'ommented out content' AS eight; +SELECT 'ommented out content' AS eighth; */ Other information of second level. */ diff --git a/sql/core/src/test/resources/sql-tests/results/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/comments.sql.out index 3268f74400be9..dae173bf9e4b3 100644 --- a/sql/core/src/test/resources/sql-tests/results/comments.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/comments.sql.out @@ -102,9 +102,9 @@ SELECT '*', 'selected content' AS third /* This is the first example of nested bracketed comment. /* I am a nested bracketed comment.*/ */ -SELECT 'selected content' AS four +SELECT 'selected content' AS fourth -- !query schema -struct +struct -- !query output selected content @@ -114,9 +114,9 @@ selected content /* I am a nested bracketed comment. */ */ -SELECT 'selected content' AS five +SELECT 'selected content' AS fifth -- !query schema -struct +struct -- !query output selected content @@ -128,16 +128,16 @@ selected content * I am a nested bracketed comment. */ */ -SELECT 'selected content' AS six +SELECT 'selected content' AS sixth -- !query schema -struct +struct -- !query output selected content -- !query /* - * This is the four example of nested bracketed comment. + * This is the fourth example of nested bracketed comment. SELECT /* I am a nested bracketed comment.*/ * FROM testData -- !query schema struct<> @@ -149,13 +149,13 @@ mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR' == SQL == /* ^^^ - * This is the four example of nested bracketed comment. + * This is the fourth example of nested bracketed comment. SELECT /* I am a nested bracketed comment.*/ * FROM testData -- !query */ -SELECT 'selected content' AS seven +SELECT 'selected content' AS seventh -- !query schema struct<> -- !query output @@ -166,16 +166,16 @@ extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR == SQL == */ ^^^ -SELECT 'selected content' AS seven +SELECT 'selected content' AS seventh -- !query SELECT /* - * This is the five example of nested bracketed comment. + * This is the fifth example of nested bracketed comment. /* I am a second level of nested bracketed comment. /* I am a third level of nested bracketed comment. Other information of third level. -SELECT 'ommented out content' AS eight +SELECT 'ommented out content' AS eighth -- !query schema struct<> -- !query output @@ -186,11 +186,11 @@ no viable alternative at input 'SELECT /'(line 1, pos 7) == SQL == SELECT /* -------^^^ - * This is the five example of nested bracketed comment. + * This is the fifth example of nested bracketed comment. /* I am a second level of nested bracketed comment. /* I am a third level of nested bracketed comment. Other information of third level. -SELECT 'ommented out content' AS eight +SELECT 'ommented out content' AS eighth -- !query From f2e7fb50e088203a3e3f45780a702e5afdd3a5e0 Mon Sep 17 00:00:00 2001 From: beliefer Date: Fri, 14 Feb 2020 17:34:34 +0800 Subject: [PATCH 10/26] Support / used in nested bracketed comments. --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../spark/sql/catalyst/parser/PlanParserSuite.scala | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 08e6fd52d69ec..4cc7e2d2c0f51 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1801,7 +1801,7 @@ BRACKETED_EMPTY_COMMENT ; BRACKETED_COMMENT - : '/*' ~[+] ( ~'/' | ~'*' '/' ~'*' )*? BRACKETED_COMMENT? ( ~'/' | ~'*' '/' ~'*' )*? '*/' -> channel(HIDDEN) + : '/*' ~[+] ~'/'*? ( ~[*/] '/' ~[*/] )*? ~'*'+ BRACKETED_COMMENT? ~'*'+ ( ~[*/] '/' ~[*/] )*? ~'/'*? '*/' -> channel(HIDDEN) ; WS diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 66d10b06dd077..b4ef09d797de8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -114,6 +114,18 @@ class PlanParserSuite extends AnalysisTest { """.stripMargin, plan) } + test("nested bracketed comment case three") { + val plan = table("a").select(star()) + assertEqual( + """ + |/* This block comment surrounds a query which itself has a block comment... + |//* I am a nested bracketed comment. + |*/ + |*/ + |SELECT * FROM a + """.stripMargin, plan) + } + test("case insensitive") { val plan = table("a").select(star()) assertEqual("sELEct * FroM a", plan) From 1b188d792a1a2d67ec70415d8c22a21b5314a8ad Mon Sep 17 00:00:00 2001 From: beliefer Date: Fri, 14 Feb 2020 19:40:54 +0800 Subject: [PATCH 11/26] Optimize antlr4 --- .../main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 4cc7e2d2c0f51..82861042bc4a0 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1801,7 +1801,7 @@ BRACKETED_EMPTY_COMMENT ; BRACKETED_COMMENT - : '/*' ~[+] ~'/'*? ( ~[*/] '/' ~[*/] )*? ~'*'+ BRACKETED_COMMENT? ~'*'+ ( ~[*/] '/' ~[*/] )*? ~'/'*? '*/' -> channel(HIDDEN) + : '/*' ~[+] ~'/'*? ( ~'*' '/' ~'*' )*? ~'*'+ BRACKETED_COMMENT? ~'*'+ ( ~'*' '/' ~'*' )*? ~'/'*? '*/' -> channel(HIDDEN) ; WS From e1d3b3bbefebdcf14a27aeb2b5a1a051893d4001 Mon Sep 17 00:00:00 2001 From: beliefer Date: Fri, 14 Feb 2020 20:52:56 +0800 Subject: [PATCH 12/26] Add comment for bracketed comment. --- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 82861042bc4a0..21ab46905e82a 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1800,6 +1800,21 @@ BRACKETED_EMPTY_COMMENT : '/**/' -> channel(HIDDEN) ; +// The bracketed comment consists of six main parts, from left to right are: +// "/*", ~[+], left part, nested bracketed comment, right part, "*/". +// Ⅰ. '/*' is used to match the beginning of a bracketed comment. +// Ⅱ. ~[+] is used to avoid conflicts with hints. +// Ⅲ. The left part is a bit difficult to understand, so let's focus on it. +// The left part is composed of three parts, non-backslash part, backslash part, non-asterisk part. +// 1. The non-backslash part is used to match substrings that do not contain a backslash in the comment. +// 2. The backslash part is used to match substrings containing backslashes in comments. This part must avoid +// asterisks before and after the backslash, otherwise it will conflict with the beginning and end of the +// bracketed comment. +// 3. The non-asterisk part. An asterisk is not allowed in this section, otherwise it will conflict with the +// beginning of the nested bracketed comment or the end of the current bracketed comment. +// IV. The sub-comment of the current bracketed comment, also known as a nested bracketed comment. +// V. The right part has the same effect as the left part, except that the matching direction is exactly the opposite. +// VI. '*/' is used to match the end of a bracketed comment. BRACKETED_COMMENT : '/*' ~[+] ~'/'*? ( ~'*' '/' ~'*' )*? ~'*'+ BRACKETED_COMMENT? ~'*'+ ( ~'*' '/' ~'*' )*? ~'/'*? '*/' -> channel(HIDDEN) ; From 812b16dab7b7b5b956e5cdc951e0871e6d3838b4 Mon Sep 17 00:00:00 2001 From: beliefer Date: Fri, 14 Feb 2020 21:08:30 +0800 Subject: [PATCH 13/26] Update comments --- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 21ab46905e82a..4cea07591bc02 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1805,16 +1805,16 @@ BRACKETED_EMPTY_COMMENT // Ⅰ. '/*' is used to match the beginning of a bracketed comment. // Ⅱ. ~[+] is used to avoid conflicts with hints. // Ⅲ. The left part is a bit difficult to understand, so let's focus on it. -// The left part is composed of three parts, non-backslash part, backslash part, non-asterisk part. -// 1. The non-backslash part is used to match substrings that do not contain a backslash in the comment. -// 2. The backslash part is used to match substrings containing backslashes in comments. This part must avoid -// asterisks before and after the backslash, otherwise it will conflict with the beginning and end of the +// The left part is composed of three parts, non-slash part, slash part, non-asterisk part. +// 1. The non-slash part is used to match substrings that do not contain a slash in the comment. +// 2. The slash part is used to match substrings containing slashes in comments. This part must avoid +// asterisks before and after the slash, otherwise it will conflict with the beginning and end of the // bracketed comment. // 3. The non-asterisk part. An asterisk is not allowed in this section, otherwise it will conflict with the // beginning of the nested bracketed comment or the end of the current bracketed comment. -// IV. The sub-comment of the current bracketed comment, also known as a nested bracketed comment. -// V. The right part has the same effect as the left part, except that the matching direction is exactly the opposite. -// VI. '*/' is used to match the end of a bracketed comment. +// Ⅳ. The sub-comment of the current bracketed comment, also known as a nested bracketed comment. +// Ⅴ. The right part has the same effect as the left part, except that the matching direction is exactly the opposite. +// Ⅵ. '*/' is used to match the end of a bracketed comment. BRACKETED_COMMENT : '/*' ~[+] ~'/'*? ( ~'*' '/' ~'*' )*? ~'*'+ BRACKETED_COMMENT? ~'*'+ ( ~'*' '/' ~'*' )*? ~'/'*? '*/' -> channel(HIDDEN) ; From f5a817f594473677990e15676d59a3dde1201ce0 Mon Sep 17 00:00:00 2001 From: beliefer Date: Fri, 14 Feb 2020 23:11:00 +0800 Subject: [PATCH 14/26] Update comments --- .../spark/sql/catalyst/parser/SqlBase.g4 | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 4cea07591bc02..adc6cec28f979 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1802,19 +1802,19 @@ BRACKETED_EMPTY_COMMENT // The bracketed comment consists of six main parts, from left to right are: // "/*", ~[+], left part, nested bracketed comment, right part, "*/". -// Ⅰ. '/*' is used to match the beginning of a bracketed comment. -// Ⅱ. ~[+] is used to avoid conflicts with hints. -// Ⅲ. The left part is a bit difficult to understand, so let's focus on it. +// 1. '/*' is used to match the beginning of a bracketed comment. +// 2. ~[+] is used to avoid conflicts with hints. +// 3. The left part is a bit difficult to understand, so let's focus on it. // The left part is composed of three parts, non-slash part, slash part, non-asterisk part. -// 1. The non-slash part is used to match substrings that do not contain a slash in the comment. -// 2. The slash part is used to match substrings containing slashes in comments. This part must avoid +// 3-1. The non-slash part is used to match substrings that do not contain a slash in the comment. +// 3-2. The slash part is used to match substrings containing slashes in comments. This part must avoid // asterisks before and after the slash, otherwise it will conflict with the beginning and end of the // bracketed comment. -// 3. The non-asterisk part. An asterisk is not allowed in this section, otherwise it will conflict with the +// 3-3. The non-asterisk part. An asterisk is not allowed in this section, otherwise it will conflict with the // beginning of the nested bracketed comment or the end of the current bracketed comment. -// Ⅳ. The sub-comment of the current bracketed comment, also known as a nested bracketed comment. -// Ⅴ. The right part has the same effect as the left part, except that the matching direction is exactly the opposite. -// Ⅵ. '*/' is used to match the end of a bracketed comment. +// 4. The sub-comment of the current bracketed comment, also known as a nested bracketed comment. +// 5. The right part has the same effect as the left part, except that the matching direction is exactly the opposite. +// 6. '*/' is used to match the end of a bracketed comment. BRACKETED_COMMENT : '/*' ~[+] ~'/'*? ( ~'*' '/' ~'*' )*? ~'*'+ BRACKETED_COMMENT? ~'*'+ ( ~'*' '/' ~'*' )*? ~'/'*? '*/' -> channel(HIDDEN) ; From 99d918708474e798696f38e8b85464603903efa1 Mon Sep 17 00:00:00 2001 From: beliefer Date: Fri, 14 Feb 2020 23:38:29 +0800 Subject: [PATCH 15/26] Optimize code --- .../main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index adc6cec28f979..4c332958300f6 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1816,7 +1816,7 @@ BRACKETED_EMPTY_COMMENT // 5. The right part has the same effect as the left part, except that the matching direction is exactly the opposite. // 6. '*/' is used to match the end of a bracketed comment. BRACKETED_COMMENT - : '/*' ~[+] ~'/'*? ( ~'*' '/' ~'*' )*? ~'*'+ BRACKETED_COMMENT? ~'*'+ ( ~'*' '/' ~'*' )*? ~'/'*? '*/' -> channel(HIDDEN) + : '/*' ~[+] ( ~'/' | ~'*' '/' ~'*' )*? ~'*'+ BRACKETED_COMMENT? ~'*'+ ( ~'/' | ~'*' '/' ~'*' )*? '*/' -> channel(HIDDEN) ; WS From 8e89a87d90aff36b45e018ed9a10cdaaa1007995 Mon Sep 17 00:00:00 2001 From: beliefer Date: Sat, 15 Feb 2020 11:04:02 +0800 Subject: [PATCH 16/26] Improve code --- .../spark/sql/catalyst/parser/SqlBase.g4 | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 4c332958300f6..190bb7ab32ec5 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1800,24 +1800,9 @@ BRACKETED_EMPTY_COMMENT : '/**/' -> channel(HIDDEN) ; -// The bracketed comment consists of six main parts, from left to right are: -// "/*", ~[+], left part, nested bracketed comment, right part, "*/". -// 1. '/*' is used to match the beginning of a bracketed comment. -// 2. ~[+] is used to avoid conflicts with hints. -// 3. The left part is a bit difficult to understand, so let's focus on it. -// The left part is composed of three parts, non-slash part, slash part, non-asterisk part. -// 3-1. The non-slash part is used to match substrings that do not contain a slash in the comment. -// 3-2. The slash part is used to match substrings containing slashes in comments. This part must avoid -// asterisks before and after the slash, otherwise it will conflict with the beginning and end of the -// bracketed comment. -// 3-3. The non-asterisk part. An asterisk is not allowed in this section, otherwise it will conflict with the -// beginning of the nested bracketed comment or the end of the current bracketed comment. -// 4. The sub-comment of the current bracketed comment, also known as a nested bracketed comment. -// 5. The right part has the same effect as the left part, except that the matching direction is exactly the opposite. -// 6. '*/' is used to match the end of a bracketed comment. BRACKETED_COMMENT - : '/*' ~[+] ( ~'/' | ~'*' '/' ~'*' )*? ~'*'+ BRACKETED_COMMENT? ~'*'+ ( ~'/' | ~'*' '/' ~'*' )*? '*/' -> channel(HIDDEN) - ; + : '/*' ~[+] (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) + ; WS : [ \r\n\t]+ -> channel(HIDDEN) From 8118ab545d80ff32a73b71353ec6d788e18f5448 Mon Sep 17 00:00:00 2001 From: beliefer Date: Sat, 15 Feb 2020 11:24:26 +0800 Subject: [PATCH 17/26] Update code --- .../antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 190bb7ab32ec5..17835b3b7b24e 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1801,8 +1801,8 @@ BRACKETED_EMPTY_COMMENT ; BRACKETED_COMMENT - : '/*' ~[+] (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) - ; + : '/*' ~[+] (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) + ; WS : [ \r\n\t]+ -> channel(HIDDEN) From 116e9b80846c8a4a87deafd58c77053296c0d702 Mon Sep 17 00:00:00 2001 From: beliefer Date: Mon, 17 Feb 2020 17:13:20 +0800 Subject: [PATCH 18/26] Support empty nested bracketed comments. --- .../spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../sql/catalyst/parser/PlanParserSuite.scala | 9 ++++ .../resources/sql-tests/inputs/comments.sql | 40 +++++++++++------- .../sql-tests/results/comments.sql.out | 42 +++++++++++++------ 4 files changed, 66 insertions(+), 27 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 17835b3b7b24e..19fe2547f2039 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1797,7 +1797,7 @@ SIMPLE_COMMENT ; BRACKETED_EMPTY_COMMENT - : '/**/' -> channel(HIDDEN) + : '/*' BRACKETED_EMPTY_COMMENT? '*/' -> channel(HIDDEN) ; BRACKETED_COMMENT diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index b4ef09d797de8..eb93ae873f723 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -126,6 +126,15 @@ class PlanParserSuite extends AnalysisTest { """.stripMargin, plan) } + test("nested bracketed comment case four") { + val plan = table("a").select(star()) + assertEqual( + """ + |/*/**/*/ + |SELECT * FROM a + """.stripMargin, plan) + } + test("case insensitive") { val plan = table("a").select(star()) assertEqual("sELEct * FroM a", plan) diff --git a/sql/core/src/test/resources/sql-tests/inputs/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/comments.sql index ceceb72b811da..19f11de22dfd1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/comments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/comments.sql @@ -1,6 +1,6 @@ -- Test comments. --- bracketed comment case one +-- the first case of bracketed comment --QUERY-DELIMITER-START /* This is the first example of bracketed comment. SELECT 'ommented out content' AS first; @@ -8,7 +8,7 @@ SELECT 'ommented out content' AS first; SELECT 'selected content' AS first; --QUERY-DELIMITER-END --- bracketed comment case two +-- the second case of bracketed comment --QUERY-DELIMITER-START /* This is the second example of bracketed comment. SELECT '/', 'ommented out content' AS second; @@ -16,7 +16,7 @@ SELECT '/', 'ommented out content' AS second; SELECT '/', 'selected content' AS second; --QUERY-DELIMITER-END --- bracketed comment case three +-- the third case of bracketed comment --QUERY-DELIMITER-START /* This is the third example of bracketed comment. *SELECT '*', 'ommented out content' AS third; @@ -24,24 +24,30 @@ SELECT '/', 'selected content' AS second; SELECT '*', 'selected content' AS third; --QUERY-DELIMITER-END --- nested bracketed comment case one +-- the first case of empty bracketed comment +--QUERY-DELIMITER-START +/**/ +SELECT 'selected content' AS fourth; +--QUERY-DELIMITER-END + +-- the first case of nested bracketed comment --QUERY-DELIMITER-START /* This is the first example of nested bracketed comment. /* I am a nested bracketed comment.*/ */ -SELECT 'selected content' AS fourth; +SELECT 'selected content' AS fifth; --QUERY-DELIMITER-END --- nested bracketed comment case two +-- the second case of nested bracketed comment --QUERY-DELIMITER-START /* This is the second example of nested bracketed comment. /* I am a nested bracketed comment. */ */ -SELECT 'selected content' AS fifth; +SELECT 'selected content' AS sixth; --QUERY-DELIMITER-END --- nested bracketed comment case three +-- the third case of nested bracketed comment --QUERY-DELIMITER-START /* * This is the third example of nested bracketed comment. @@ -49,30 +55,36 @@ SELECT 'selected content' AS fifth; * I am a nested bracketed comment. */ */ -SELECT 'selected content' AS sixth; +SELECT 'selected content' AS seventh; --QUERY-DELIMITER-END --- nested bracketed comment case fourth +-- the fourth case of nested bracketed comment --QUERY-DELIMITER-START /* * This is the fourth example of nested bracketed comment. SELECT /* I am a nested bracketed comment.*/ * FROM testData; */ -SELECT 'selected content' AS seventh; +SELECT 'selected content' AS eighth; --QUERY-DELIMITER-END --- nested bracketed comment case fifth +-- the fifth case of nested bracketed comment --QUERY-DELIMITER-START SELECT /* * This is the fifth example of nested bracketed comment. /* I am a second level of nested bracketed comment. /* I am a third level of nested bracketed comment. Other information of third level. -SELECT 'ommented out content' AS eighth; +SELECT 'ommented out content' AS ninth; */ Other information of second level. */ Other information of first level. */ -'selected content' AS eight; +'selected content' AS ninth; +--QUERY-DELIMITER-END + +-- the first case of empty nested bracketed comment +--QUERY-DELIMITER-START +/*/**/*/ +SELECT 'selected content' AS tenth; --QUERY-DELIMITER-END diff --git a/sql/core/src/test/resources/sql-tests/results/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/comments.sql.out index 9c3c2500dc56d..fd58a33595fe6 100644 --- a/sql/core/src/test/resources/sql-tests/results/comments.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/comments.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 8 +-- Number of queries: 10 -- !query @@ -35,13 +35,22 @@ struct<*:string,third:string> * selected content +-- !query +/**/ +SELECT 'selected content' AS fourth +-- !query schema +struct +-- !query output +selected content + + -- !query /* This is the first example of nested bracketed comment. /* I am a nested bracketed comment.*/ */ -SELECT 'selected content' AS fourth +SELECT 'selected content' AS fifth -- !query schema -struct +struct -- !query output selected content @@ -51,9 +60,9 @@ selected content /* I am a nested bracketed comment. */ */ -SELECT 'selected content' AS fifth +SELECT 'selected content' AS sixth -- !query schema -struct +struct -- !query output selected content @@ -65,9 +74,9 @@ selected content * I am a nested bracketed comment. */ */ -SELECT 'selected content' AS sixth +SELECT 'selected content' AS seventh -- !query schema -struct +struct -- !query output selected content @@ -77,9 +86,9 @@ selected content * This is the fourth example of nested bracketed comment. SELECT /* I am a nested bracketed comment.*/ * FROM testData; */ -SELECT 'selected content' AS seventh +SELECT 'selected content' AS eighth -- !query schema -struct +struct -- !query output selected content @@ -90,14 +99,23 @@ SELECT /* /* I am a second level of nested bracketed comment. /* I am a third level of nested bracketed comment. Other information of third level. -SELECT 'ommented out content' AS eighth; +SELECT 'ommented out content' AS ninth; */ Other information of second level. */ Other information of first level. */ -'selected content' AS eight +'selected content' AS ninth +-- !query schema +struct +-- !query output +selected content + + +-- !query +/*/**/*/ +SELECT 'selected content' AS tenth -- !query schema -struct +struct -- !query output selected content From 25d0863015e881819c67fdeb2e85c47dfb08f098 Mon Sep 17 00:00:00 2001 From: beliefer Date: Tue, 18 Feb 2020 10:03:43 +0800 Subject: [PATCH 19/26] Optimize code --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../spark/sql/catalyst/parser/PlanParserSuite.scala | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 19fe2547f2039..8b4cbcc0b0f8d 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1801,7 +1801,7 @@ BRACKETED_EMPTY_COMMENT ; BRACKETED_COMMENT - : '/*' ~[+] (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) + : '/*' (BRACKETED_COMMENT .*? | ~[+] (BRACKETED_COMMENT|.)*?) '*/' -> channel(HIDDEN) ; WS diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index eb93ae873f723..897a2d0ccd9d2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -135,6 +135,15 @@ class PlanParserSuite extends AnalysisTest { """.stripMargin, plan) } + test("nested bracketed comment case five") { + val plan = table("a").select(star()) + assertEqual( + """ + |/*/*abc*/*/ + |SELECT * FROM a + """.stripMargin, plan) + } + test("case insensitive") { val plan = table("a").select(star()) assertEqual("sELEct * FroM a", plan) From f1806ef7c41cc0fc757a407066ef49a40bb10be9 Mon Sep 17 00:00:00 2001 From: beliefer Date: Tue, 18 Feb 2020 13:49:39 +0800 Subject: [PATCH 20/26] Optimize code --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 4 ++-- .../spark/sql/catalyst/parser/PlanParserSuite.scala | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 8b4cbcc0b0f8d..87fb6ccd9df11 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1797,11 +1797,11 @@ SIMPLE_COMMENT ; BRACKETED_EMPTY_COMMENT - : '/*' BRACKETED_EMPTY_COMMENT? '*/' -> channel(HIDDEN) + : '/*' BRACKETED_EMPTY_COMMENT* '*/' -> channel(HIDDEN) ; BRACKETED_COMMENT - : '/*' (BRACKETED_COMMENT .*? | ~[+] (BRACKETED_COMMENT|.)*?) '*/' -> channel(HIDDEN) + : '/*' (BRACKETED_COMMENT .*? | ~[+] (BRACKETED_COMMENT|.)*?)* '*/' -> channel(HIDDEN) ; WS diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 897a2d0ccd9d2..7382ef64d0fc4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -144,6 +144,15 @@ class PlanParserSuite extends AnalysisTest { """.stripMargin, plan) } + test("nested bracketed comment case six") { + val plan = table("a").select(star()) + assertEqual( + """ + |/*/*foo*//*bar*/*/ + |SELECT * FROM a + """.stripMargin, plan) + } + test("case insensitive") { val plan = table("a").select(star()) assertEqual("sELEct * FroM a", plan) From 93eb3c5f3d07da70468f55d49ef1853d663a8288 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 19 Feb 2020 00:02:13 +0800 Subject: [PATCH 21/26] Improve code --- .../main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 87fb6ccd9df11..d9dedca623601 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1801,7 +1801,7 @@ BRACKETED_EMPTY_COMMENT ; BRACKETED_COMMENT - : '/*' (BRACKETED_COMMENT .*? | ~[+] (BRACKETED_COMMENT|.)*?)* '*/' -> channel(HIDDEN) + : '/*' (BRACKETED_COMMENT .*? | ~[+] (BRACKETED_COMMENT|.)*?)*? '*/' -> channel(HIDDEN) ; WS From 20452bcac119694798d5c088201cc4b5aaa26322 Mon Sep 17 00:00:00 2001 From: beliefer Date: Thu, 20 Feb 2020 12:14:48 +0800 Subject: [PATCH 22/26] Optimize code --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index d9dedca623601..5b8b3e645226c 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1800,8 +1800,17 @@ BRACKETED_EMPTY_COMMENT : '/*' BRACKETED_EMPTY_COMMENT* '*/' -> channel(HIDDEN) ; +// The bracketed comment consists of three main parts, from left to right are: +// '/*', the content of comment, '*/'. +// 1. '/*' is used to match the beginning of a bracketed comment. +// 2. The content of comment is divided into two optional parts by '|'. +// 1. The left part is used to match comments like below: +// '/*/*content of nested bracketed*/*/' or '/*/*content of nested bracketed*/content of bracketed*/' +// 2. The right part is used to avoid conflicts with hints and matches like the comment below: +// '/*content of bracketed/*content of nested bracketed*/*/' or '/*content of bracketed/*content of nested bracketed*/content of bracketed*/' +// 3. '*/' is used to match the end of a bracketed comment. BRACKETED_COMMENT - : '/*' (BRACKETED_COMMENT .*? | ~[+] (BRACKETED_COMMENT|.)*?)*? '*/' -> channel(HIDDEN) + : '/*' ((BRACKETED_COMMENT .*? (BRACKETED_COMMENT|.)*? | ~[+] (BRACKETED_COMMENT|.)*?)) '*/' -> channel(HIDDEN) ; WS From 0a42a59df69ab262f78a9f6158eb8fad593b5579 Mon Sep 17 00:00:00 2001 From: beliefer Date: Thu, 20 Feb 2020 14:23:08 +0800 Subject: [PATCH 23/26] Optimize code --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 5b8b3e645226c..6f59bfd6004fc 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1803,14 +1803,14 @@ BRACKETED_EMPTY_COMMENT // The bracketed comment consists of three main parts, from left to right are: // '/*', the content of comment, '*/'. // 1. '/*' is used to match the beginning of a bracketed comment. -// 2. The content of comment is divided into two optional parts by '|'. -// 1. The left part is used to match comments like below: -// '/*/*content of nested bracketed*/*/' or '/*/*content of nested bracketed*/content of bracketed*/' -// 2. The right part is used to avoid conflicts with hints and matches like the comment below: -// '/*content of bracketed/*content of nested bracketed*/*/' or '/*content of bracketed/*content of nested bracketed*/content of bracketed*/' +// 2. The content of comment is consists of two parts. +// 1. The first part is used to match the beginning of the brancketed comment content. +// This matches the nested bracketed comment first and then matches the content of the bracketed comment +// that need to avoid conflicts with hints. +// 2. The second part is used to matches the tail of the brancketed comment content. // 3. '*/' is used to match the end of a bracketed comment. BRACKETED_COMMENT - : '/*' ((BRACKETED_COMMENT .*? (BRACKETED_COMMENT|.)*? | ~[+] (BRACKETED_COMMENT|.)*?)) '*/' -> channel(HIDDEN) + : '/*' (BRACKETED_COMMENT .*? | ~[+]) (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) ; WS From 660cd3424ef86ee43b1a8b04fad6ce00345049e9 Mon Sep 17 00:00:00 2001 From: beliefer Date: Fri, 21 Feb 2020 18:34:48 +0800 Subject: [PATCH 24/26] Optimize code --- .../spark/sql/catalyst/parser/SqlBase.g4 | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 6f59bfd6004fc..21c31f4cf380f 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -61,6 +61,19 @@ grammar SqlBase; * When true, the behavior of keywords follows ANSI SQL standard. */ public boolean SQL_standard_keyword_behavior = false; + + /** + * Verify whether current token is a valid hint token (which follows '/*' and is '+'). + * Returns true if the first character is '+'. + */ + public boolean isHint() { + int firstChar = _input.LA(1); + if (firstChar == '+') { + return true; + } else { + return false; + } + } } singleStatement @@ -1800,17 +1813,8 @@ BRACKETED_EMPTY_COMMENT : '/*' BRACKETED_EMPTY_COMMENT* '*/' -> channel(HIDDEN) ; -// The bracketed comment consists of three main parts, from left to right are: -// '/*', the content of comment, '*/'. -// 1. '/*' is used to match the beginning of a bracketed comment. -// 2. The content of comment is consists of two parts. -// 1. The first part is used to match the beginning of the brancketed comment content. -// This matches the nested bracketed comment first and then matches the content of the bracketed comment -// that need to avoid conflicts with hints. -// 2. The second part is used to matches the tail of the brancketed comment content. -// 3. '*/' is used to match the end of a bracketed comment. BRACKETED_COMMENT - : '/*' (BRACKETED_COMMENT .*? | ~[+]) (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) + : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) ; WS From 9c856b79ce4f8606f2b1d3e4a4af6dfabe46cbdb Mon Sep 17 00:00:00 2001 From: beliefer Date: Fri, 21 Feb 2020 19:03:50 +0800 Subject: [PATCH 25/26] Optimize code --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 21c31f4cf380f..dc7470138a5c7 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -63,12 +63,15 @@ grammar SqlBase; public boolean SQL_standard_keyword_behavior = false; /** - * Verify whether current token is a valid hint token (which follows '/*' and is '+'). + * This method will be called when we see '/*' and try to match it as a bracketed comment. + * If the next character is '+', it should be parsed as hint later, otherwise we cannot match + * it as a bracketed comment. + * * Returns true if the first character is '+'. */ public boolean isHint() { - int firstChar = _input.LA(1); - if (firstChar == '+') { + int nextChar = _input.LA(1); + if (nextChar == '+') { return true; } else { return false; @@ -1809,10 +1812,6 @@ SIMPLE_COMMENT : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) ; -BRACKETED_EMPTY_COMMENT - : '/*' BRACKETED_EMPTY_COMMENT* '*/' -> channel(HIDDEN) - ; - BRACKETED_COMMENT : '/*' {!isHint()}? (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN) ; From 48814c356794ea97191c09416958296be5b01e7f Mon Sep 17 00:00:00 2001 From: beliefer Date: Fri, 21 Feb 2020 19:32:46 +0800 Subject: [PATCH 26/26] Optimize code --- .../antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index dc7470138a5c7..ee14a01c64777 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -64,10 +64,10 @@ grammar SqlBase; /** * This method will be called when we see '/*' and try to match it as a bracketed comment. - * If the next character is '+', it should be parsed as hint later, otherwise we cannot match + * If the next character is '+', it should be parsed as hint later, and we cannot match * it as a bracketed comment. * - * Returns true if the first character is '+'. + * Returns true if the next character is '+'. */ public boolean isHint() { int nextChar = _input.LA(1);