apache · beliefer · Feb 7, 2020 · Feb 11, 2020 · Feb 11, 2020 · Feb 11, 2020
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -55,6 +55,27 @@ class PlanParserSuite extends AnalysisTest {
     With(plan, ctes)
   }
 
+  test("single comment") {
+    val plan = table("a").select(star())
+    assertEqual("-- single comment\nSELECT * FROM a", plan)
+  }
+
+  test("bracketed comment case one") {
+    val plan = table("a").select(star())
+    assertEqual("/* This is an example of SQL which should not execute:\n" +
+      " * select 'multi-line';\n" +
+      " */\n" +
+      "SELECT * FROM a", plan)
+  }
+
+  test("bracketed comment case two") {
+    val plan = table("a").select(star())
+    assertEqual("/*\n" +
+      "SELECT 'trailing' as x1; -- inside block comment\n" +
+      "*/\n" +
+      "SELECT * FROM a", plan)
+  }
+
   test("case insensitive") {
     val plan = table("a").select(star())
     assertEqual("sELEct * FroM a", plan)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql
@@ -26,23 +26,23 @@ SELECT 'after multi-line' AS fifth;
 SELECT 'trailing' as x1; -- inside block comment
 */
 
-/* This block comment surrounds a query which itself has a block comment...
-SELECT /* embedded single line */ 'embedded' AS x2;
-*/
+-- /* This block comment surrounds a query which itself has a block comment...
+-- SELECT /* embedded single line */ 'embedded' AS x2;
+-- */
 
 SELECT -- continued after the following block comments...
-/* Deeply nested comment.
-   This includes a single apostrophe to make sure we aren't decoding this part as a string.
-SELECT 'deep nest' AS n1;
-/* Second level of nesting...
-SELECT 'deeper nest' as n2;
-/* Third level of nesting...
-SELECT 'deepest nest' as n3;
-*/
-Hoo boy. Still two deep...
-*/
-Now just one deep...
-*/
+-- /* Deeply nested comment.
+--    This includes a single apostrophe to make sure we aren't decoding this part as a string.
+-- SELECT 'deep nest' AS n1;
+-- /* Second level of nesting...
+-- SELECT 'deeper nest' as n2;
+-- /* Third level of nesting...
+-- SELECT 'deepest nest' as n3;
+-- */
+-- Hoo boy. Still two deep...
+-- */
+-- Now just one deep...
+-- */
 'deeply nested example' AS sixth;
 
-/* and this is the end of the file */
+-- /* and this is the end of the file */
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
+-- Number of queries: 6
 
 
 -- !query
@@ -36,161 +36,24 @@ before multi-line
 
 -- !query
 /* This is an example of SQL which should not execute:
- * select 'multi-line'
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/* This is an example of SQL which should not execute:
-^^^
- * select 'multi-line'
-
-
--- !query
-*/
+ * select 'multi-line';
+ */
 SELECT 'after multi-line' AS fifth
 -- !query schema
-struct<>
+struct<fifth:string>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-*/
-^^^
-SELECT 'after multi-line' AS fifth
+after multi-line
 
 
 -- !query
 /*
-SELECT 'trailing' as x1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/*
-^^^
-SELECT 'trailing' as x1
-
-
--- !query
-*/
-
-/* This block comment surrounds a query which itself has a block comment...
-SELECT /* embedded single line */ 'embedded' AS x2
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
+SELECT 'trailing' as x1; -- inside block comment
 */
-^^^
 
-/* This block comment surrounds a query which itself has a block comment...
-SELECT /* embedded single line */ 'embedded' AS x2
-
-
--- !query
-*/
 
 SELECT -- continued after the following block comments...
-/* Deeply nested comment.
-   This includes a single apostrophe to make sure we aren't decoding this part as a string.
-SELECT 'deep nest' AS n1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-*/
-^^^
-
-SELECT -- continued after the following block comments...
-/* Deeply nested comment.
-   This includes a single apostrophe to make sure we aren't decoding this part as a string.
-SELECT 'deep nest' AS n1
-
-
--- !query
-/* Second level of nesting...
-SELECT 'deeper nest' as n2
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/* Second level of nesting...
-^^^
-SELECT 'deeper nest' as n2
-
-
--- !query
-/* Third level of nesting...
-SELECT 'deepest nest' as n3
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-/* Third level of nesting...
-^^^
-SELECT 'deepest nest' as n3
-
-
--- !query
-*/
-Hoo boy. Still two deep...
-*/
-Now just one deep...
-*/
 'deeply nested example' AS sixth
 -- !query schema
-struct<>
+struct<sixth:string>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
-
-== SQL ==
-*/
-^^^
-Hoo boy. Still two deep...
-*/
-Now just one deep...
-*/
-'deeply nested example' AS sixth
-
-
--- !query
-/* and this is the end of the file */
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-mismatched input '<EOF>' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 37)
-
-== SQL ==
-/* and this is the end of the file */
--------------------------------------^^^
+deeply nested example
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -19,7 +19,9 @@ package org.apache.spark.sql
 
 import java.io.File
 import java.util.{Locale, TimeZone}
+import java.util.regex.Pattern
 
+import scala.collection.mutable.HashMap
 import scala.util.control.NonFatal
 
 import org.apache.spark.{SparkConf, SparkException}
@@ -262,12 +264,36 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
     }.flatten
 
     // List of SQL queries to run
-    // note: this is not a robust way to split queries using semicolon, but works for now.
-    val queries = (importedCode ++ code).mkString("\n").split("(?<=[^\\\\]);")
+    // Replace bracketed comments with a placeholder
+    var codeStr = (importedCode ++ code).mkString("\n")
+    val m = Pattern.compile("/\\*(\\s|.)*?\\*/").matcher(codeStr)
+    val multiCommentMap = new HashMap[String, String]()
+    var i = 0
+    while(m.find()) {
+      val group = m.group
+      val placeHolder = s"/*$i*/"
+      multiCommentMap(placeHolder) = group
+      codeStr = codeStr.replace(group, placeHolder)
+      i += 1
+    }
+
+    val tempQueries = codeStr.split("(?<=[^\\\\]);")
       .map(_.trim).filter(_ != "").toSeq
       // Fix misplacement when comment is at the end of the query.
       .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "")
 
+    // Replace placeholders with original bracketed comments
+    val pattern = Pattern.compile("/\\*[0-9]+\\*/")
+    val queries = tempQueries.map { query =>
+      var newQuery = query
+      val m = pattern.matcher(query)
+      while(m.find()) {
+        val group = m.group
+        newQuery = newQuery.replace(group, multiCommentMap(group))
+      }
+      newQuery
+    }
+
     val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6))
     val settings = settingLines.flatMap(_.split(",").map { kv =>
       val (conf, value) = kv.span(_ != '=')