From 2f3a54cab65ca05cfd1a1b3dc458c5a987b9a28f Mon Sep 17 00:00:00 2001 From: beliefer Date: Fri, 7 Feb 2020 10:10:38 +0800 Subject: [PATCH 01/11] Improve bracketed comments tests. --- .../sql/catalyst/parser/PlanParserSuite.scala | 21 +++ .../sql-tests/inputs/postgreSQL/comments.sql | 32 ++-- .../results/postgreSQL/comments.sql.out | 153 +----------------- .../apache/spark/sql/SQLQueryTestSuite.scala | 30 +++- 4 files changed, 73 insertions(+), 163 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 875096f615241..38cb57f392450 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -55,6 +55,27 @@ class PlanParserSuite extends AnalysisTest { With(plan, ctes) } + test("single comment") { + val plan = table("a").select(star()) + assertEqual("-- single comment\nSELECT * FROM a", plan) + } + + test("bracketed comment case one") { + val plan = table("a").select(star()) + assertEqual("/* This is an example of SQL which should not execute:\n" + + " * select 'multi-line';\n" + + " */\n" + + "SELECT * FROM a", plan) + } + + test("bracketed comment case two") { + val plan = table("a").select(star()) + assertEqual("/*\n" + + "SELECT 'trailing' as x1; -- inside block comment\n" + + "*/\n" + + "SELECT * FROM a", plan) + } + test("case insensitive") { val plan = table("a").select(star()) assertEqual("sELEct * FroM a", plan) diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql index 6725ce45e72a5..778f4e7a04bc5 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql @@ -26,23 +26,23 @@ SELECT 'after multi-line' AS fifth; SELECT 'trailing' as x1; -- inside block comment */ -/* This block comment surrounds a query which itself has a block comment... -SELECT /* embedded single line */ 'embedded' AS x2; -*/ +-- /* This block comment surrounds a query which itself has a block comment... +-- SELECT /* embedded single line */ 'embedded' AS x2; +-- */ SELECT -- continued after the following block comments... -/* Deeply nested comment. - This includes a single apostrophe to make sure we aren't decoding this part as a string. -SELECT 'deep nest' AS n1; -/* Second level of nesting... -SELECT 'deeper nest' as n2; -/* Third level of nesting... -SELECT 'deepest nest' as n3; -*/ -Hoo boy. Still two deep... -*/ -Now just one deep... -*/ +-- /* Deeply nested comment. +-- This includes a single apostrophe to make sure we aren't decoding this part as a string. +-- SELECT 'deep nest' AS n1; +-- /* Second level of nesting... +-- SELECT 'deeper nest' as n2; +-- /* Third level of nesting... +-- SELECT 'deepest nest' as n3; +-- */ +-- Hoo boy. Still two deep... +-- */ +-- Now just one deep... +-- */ 'deeply nested example' AS sixth; -/* and this is the end of the file */ +-- /* and this is the end of the file */ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out index 4ea49013a62d1..bb3df1571f8f1 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 13 +-- Number of queries: 6 -- !query @@ -36,161 +36,24 @@ before multi-line -- !query /* This is an example of SQL which should not execute: - * select 'multi-line' --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -/* This is an example of SQL which should not execute: -^^^ - * select 'multi-line' - - --- !query -*/ + * select 'multi-line'; + */ SELECT 'after multi-line' AS fifth -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -*/ -^^^ -SELECT 'after multi-line' AS fifth +after multi-line -- !query /* -SELECT 'trailing' as x1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -/* -^^^ -SELECT 'trailing' as x1 - - --- !query -*/ - -/* This block comment surrounds a query which itself has a block comment... -SELECT /* embedded single line */ 'embedded' AS x2 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == +SELECT 'trailing' as x1; -- inside block comment */ -^^^ -/* This block comment surrounds a query which itself has a block comment... -SELECT /* embedded single line */ 'embedded' AS x2 - - --- !query -*/ SELECT -- continued after the following block comments... -/* Deeply nested comment. - This includes a single apostrophe to make sure we aren't decoding this part as a string. -SELECT 'deep nest' AS n1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -*/ -^^^ - -SELECT -- continued after the following block comments... -/* Deeply nested comment. - This includes a single apostrophe to make sure we aren't decoding this part as a string. -SELECT 'deep nest' AS n1 - - --- !query -/* Second level of nesting... -SELECT 'deeper nest' as n2 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -/* Second level of nesting... -^^^ -SELECT 'deeper nest' as n2 - - --- !query -/* Third level of nesting... -SELECT 'deepest nest' as n3 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -/* Third level of nesting... -^^^ -SELECT 'deepest nest' as n3 - - --- !query -*/ -Hoo boy. Still two deep... -*/ -Now just one deep... -*/ 'deeply nested example' AS sixth -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -*/ -^^^ -Hoo boy. Still two deep... -*/ -Now just one deep... -*/ -'deeply nested example' AS sixth - - --- !query -/* and this is the end of the file */ --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 37) - -== SQL == -/* and this is the end of the file */ --------------------------------------^^^ +deeply nested example diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 2e5a9e0b4d45d..0b5e6340738d0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -19,7 +19,9 @@ package org.apache.spark.sql import java.io.File import java.util.{Locale, TimeZone} +import java.util.regex.Pattern +import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.spark.{SparkConf, SparkException} @@ -262,12 +264,36 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { }.flatten // List of SQL queries to run - // note: this is not a robust way to split queries using semicolon, but works for now. - val queries = (importedCode ++ code).mkString("\n").split("(?<=[^\\\\]);") + // Replace bracketed comments with a placeholder + var codeStr = (importedCode ++ code).mkString("\n") + val m = Pattern.compile("/\\*(\\s|.)*?\\*/").matcher(codeStr) + val multiCommentMap = new HashMap[String, String]() + var i = 0 + while(m.find()) { + val group = m.group + val placeHolder = s"/*$i*/" + multiCommentMap(placeHolder) = group + codeStr = codeStr.replace(group, placeHolder) + i += 1 + } + + val tempQueries = codeStr.split("(?<=[^\\\\]);") .map(_.trim).filter(_ != "").toSeq // Fix misplacement when comment is at the end of the query. .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "") + // Replace placeholders with original bracketed comments + val pattern = Pattern.compile("/\\*[0-9]+\\*/") + val queries = tempQueries.map { query => + var newQuery = query + val m = pattern.matcher(query) + while(m.find()) { + val group = m.group + newQuery = newQuery.replace(group, multiCommentMap(group)) + } + newQuery + } + val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6)) val settings = settingLines.flatMap(_.split(",").map { kv => val (conf, value) = kv.span(_ != '=') From 4619ac774ab78c129316da8754a8114a6821dc24 Mon Sep 17 00:00:00 2001 From: beliefer Date: Tue, 11 Feb 2020 23:15:02 +0800 Subject: [PATCH 02/11] Adjust code --- .../sql-tests/inputs/postgreSQL/comments.sql | 36 +++++------ .../results/postgreSQL/comments.sql.out | 46 +++++++++++++- .../apache/spark/sql/SQLQueryTestSuite.scala | 61 ++++++++++--------- 3 files changed, 96 insertions(+), 47 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql index 778f4e7a04bc5..349e0e756af8f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql @@ -11,38 +11,40 @@ SELECT /* embedded single line */ 'embedded' AS `second`; SELECT /* both embedded and trailing single line */ 'both' AS third; -- trailing single line SELECT 'before multi-line' AS fourth; +--QUERY-DELIMITER-START -- [SPARK-28880] ANSI SQL: Bracketed comments /* This is an example of SQL which should not execute: * select 'multi-line'; */ SELECT 'after multi-line' AS fifth; +--QUERY-DELIMITER-END -- [SPARK-28880] ANSI SQL: Bracketed comments -- -- Nested comments -- - +--QUERY-DELIMITER-START /* SELECT 'trailing' as x1; -- inside block comment */ --- /* This block comment surrounds a query which itself has a block comment... --- SELECT /* embedded single line */ 'embedded' AS x2; --- */ +/* This block comment surrounds a query which itself has a block comment... +SELECT /* embedded single line */ 'embedded' AS x2; +*/ SELECT -- continued after the following block comments... --- /* Deeply nested comment. --- This includes a single apostrophe to make sure we aren't decoding this part as a string. --- SELECT 'deep nest' AS n1; --- /* Second level of nesting... --- SELECT 'deeper nest' as n2; --- /* Third level of nesting... --- SELECT 'deepest nest' as n3; --- */ --- Hoo boy. Still two deep... --- */ --- Now just one deep... --- */ +/* Deeply nested comment. + This includes a single apostrophe to make sure we aren't decoding this part as a string. +SELECT 'deep nest' AS n1; +/* Second level of nesting... +SELECT 'deeper nest' as n2; +/* Third level of nesting... +SELECT 'deepest nest' as n3; +*/ +Hoo boy. Still two deep... +*/ +Now just one deep... +*/ 'deeply nested example' AS sixth; - +--QUERY-DELIMITER-END -- /* and this is the end of the file */ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out index bb3df1571f8f1..5c7ef689b601d 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out @@ -50,10 +50,52 @@ after multi-line SELECT 'trailing' as x1; -- inside block comment */ +/* This block comment surrounds a query which itself has a block comment... +SELECT /* embedded single line */ 'embedded' AS x2; +*/ SELECT -- continued after the following block comments... +/* Deeply nested comment. + This includes a single apostrophe to make sure we aren't decoding this part as a string. +SELECT 'deep nest' AS n1; +/* Second level of nesting... +SELECT 'deeper nest' as n2; +/* Third level of nesting... +SELECT 'deepest nest' as n3; +*/ +Hoo boy. Still two deep... +*/ +Now just one deep... +*/ 'deeply nested example' AS sixth -- !query schema -struct +struct<> -- !query output -deeply nested example +org.apache.spark.sql.catalyst.parser.ParseException + +mismatched input ''embedded'' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 6, pos 34) + +== SQL == +/* +SELECT 'trailing' as x1; -- inside block comment +*/ + +/* This block comment surrounds a query which itself has a block comment... +SELECT /* embedded single line */ 'embedded' AS x2; +----------------------------------^^^ +*/ + +SELECT -- continued after the following block comments... +/* Deeply nested comment. + This includes a single apostrophe to make sure we aren't decoding this part as a string. +SELECT 'deep nest' AS n1; +/* Second level of nesting... +SELECT 'deeper nest' as n2; +/* Third level of nesting... +SELECT 'deepest nest' as n3; +*/ +Hoo boy. Still two deep... +*/ +Now just one deep... +*/ +'deeply nested example' AS sixth diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 0b5e6340738d0..56c660343ce60 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -21,7 +21,7 @@ import java.io.File import java.util.{Locale, TimeZone} import java.util.regex.Pattern -import scala.collection.mutable.HashMap +import scala.collection.mutable.{ArrayBuffer, HashMap} import scala.util.control.NonFatal import org.apache.spark.{SparkConf, SparkException} @@ -250,7 +250,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { protected def runTest(testCase: TestCase): Unit = { val input = fileToString(new File(testCase.inputFile)) - val (comments, code) = input.split("\n").partition(_.trim.startsWith("--")) + val (comments, code) = input.split("\n").partition { line => + val newLine = line.trim + newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER") + } // If `--IMPORT` found, load code from another test case file, then insert them // into the head in this test. @@ -263,37 +266,39 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { } }.flatten - // List of SQL queries to run - // Replace bracketed comments with a placeholder - var codeStr = (importedCode ++ code).mkString("\n") - val m = Pattern.compile("/\\*(\\s|.)*?\\*/").matcher(codeStr) - val multiCommentMap = new HashMap[String, String]() - var i = 0 - while(m.find()) { - val group = m.group - val placeHolder = s"/*$i*/" - multiCommentMap(placeHolder) = group - codeStr = codeStr.replace(group, placeHolder) - i += 1 + val tempQueries = if (code.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { + val querys = new ArrayBuffer[String] + val otherCodes = new ArrayBuffer[String] + var tempStr = "" + var start = false + for (c <- code) { + if (c.trim.startsWith("--QUERY-DELIMITER-START")) { + start = true + querys ++= otherCodes.toSeq.mkString("\n").split("(?<=[^\\\\]);") + otherCodes.clear() + } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) { + start = false + if (tempStr.endsWith(";")) { + tempStr = tempStr.substring(0, tempStr.length - 1) + } + querys += s"\n$tempStr" + tempStr = "" + } else if (start) { + tempStr += s"\n$c" + } else { + otherCodes += c + } + } + querys.toSeq + } else { + (importedCode ++ code).mkString("\n").split("(?<=[^\\\\]);").toSeq } - val tempQueries = codeStr.split("(?<=[^\\\\]);") - .map(_.trim).filter(_ != "").toSeq + // List of SQL queries to run + val queries = tempQueries.map(_.trim).filter(_ != "").toSeq // Fix misplacement when comment is at the end of the query. .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "") - // Replace placeholders with original bracketed comments - val pattern = Pattern.compile("/\\*[0-9]+\\*/") - val queries = tempQueries.map { query => - var newQuery = query - val m = pattern.matcher(query) - while(m.find()) { - val group = m.group - newQuery = newQuery.replace(group, multiCommentMap(group)) - } - newQuery - } - val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6)) val settings = settingLines.flatMap(_.split(",").map { kv => val (conf, value) = kv.span(_ != '=') From c743bf34f4b23f28b7d56cf3d10c5d63f6185b49 Mon Sep 17 00:00:00 2001 From: beliefer Date: Tue, 11 Feb 2020 23:33:41 +0800 Subject: [PATCH 03/11] Revert PlanParserSuite --- .../sql/catalyst/parser/PlanParserSuite.scala | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 38cb57f392450..875096f615241 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -55,27 +55,6 @@ class PlanParserSuite extends AnalysisTest { With(plan, ctes) } - test("single comment") { - val plan = table("a").select(star()) - assertEqual("-- single comment\nSELECT * FROM a", plan) - } - - test("bracketed comment case one") { - val plan = table("a").select(star()) - assertEqual("/* This is an example of SQL which should not execute:\n" + - " * select 'multi-line';\n" + - " */\n" + - "SELECT * FROM a", plan) - } - - test("bracketed comment case two") { - val plan = table("a").select(star()) - assertEqual("/*\n" + - "SELECT 'trailing' as x1; -- inside block comment\n" + - "*/\n" + - "SELECT * FROM a", plan) - } - test("case insensitive") { val plan = table("a").select(star()) assertEqual("sELEct * FroM a", plan) From 7f21b1b9188000b7ff6daa1e17595bb40d690913 Mon Sep 17 00:00:00 2001 From: beliefer Date: Tue, 11 Feb 2020 23:44:44 +0800 Subject: [PATCH 04/11] Add comment. --- .../src/test/resources/sql-tests/inputs/postgreSQL/comments.sql | 2 +- .../src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql index 349e0e756af8f..1a454179ef79f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql @@ -47,4 +47,4 @@ Now just one deep... */ 'deeply nested example' AS sixth; --QUERY-DELIMITER-END --- /* and this is the end of the file */ +/* and this is the end of the file */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 56c660343ce60..d1a2ba6cae203 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -267,6 +267,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { }.flatten val tempQueries = if (code.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { + // Although the loop is heavy, only used for comments test. val querys = new ArrayBuffer[String] val otherCodes = new ArrayBuffer[String] var tempStr = "" From 71e4c438f086afa73ed3372b5b1fc419cc0cd99b Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 12 Feb 2020 09:18:11 +0800 Subject: [PATCH 05/11] Update how to use --QUERY-DELIMITER --- .../test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index d1a2ba6cae203..ccaae42135428 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -64,7 +64,10 @@ import org.apache.spark.tags.ExtendedSQLTest * }}} * * The format for input files is simple: - * 1. A list of SQL queries separated by semicolon. + * 1. A list of SQL queries separated by semicolons by default. If the semicolon cannot effectively + * separate the SQL queries in the test file, please use --QUERY-DELIMITER-START and + * --QUERY-DELIMITER-END. Lines starting with --QUERY-DELIMITER-START and --QUERY-DELIMITER-END + * represent the beginning and end of a query, respectively. * 2. Lines starting with -- are treated as comments and ignored. * 3. Lines starting with --SET are used to specify the configs when running this testing file. You * can set multiple configs in one --SET, using comma to separate them. Or you can use multiple From 5a70f007d0f0c1ae0d98321b816b14d3adeaeb92 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 12 Feb 2020 09:22:31 +0800 Subject: [PATCH 06/11] Update how to use --QUERY-DELIMITER --- .../src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index ccaae42135428..a483681634dae 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -65,7 +65,7 @@ import org.apache.spark.tags.ExtendedSQLTest * * The format for input files is simple: * 1. A list of SQL queries separated by semicolons by default. If the semicolon cannot effectively - * separate the SQL queries in the test file, please use --QUERY-DELIMITER-START and + * separate the SQL queries in the test file(e.g. bracketed comments), please use --QUERY-DELIMITER-START and * --QUERY-DELIMITER-END. Lines starting with --QUERY-DELIMITER-START and --QUERY-DELIMITER-END * represent the beginning and end of a query, respectively. * 2. Lines starting with -- are treated as comments and ignored. From 3f8497ff36b8e934b4709b9ba2cea5eca0cb8751 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 12 Feb 2020 09:24:31 +0800 Subject: [PATCH 07/11] Update how to use --QUERY-DELIMITER --- .../scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index a483681634dae..8aea2a3b8da11 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -65,9 +65,10 @@ import org.apache.spark.tags.ExtendedSQLTest * * The format for input files is simple: * 1. A list of SQL queries separated by semicolons by default. If the semicolon cannot effectively - * separate the SQL queries in the test file(e.g. bracketed comments), please use --QUERY-DELIMITER-START and - * --QUERY-DELIMITER-END. Lines starting with --QUERY-DELIMITER-START and --QUERY-DELIMITER-END - * represent the beginning and end of a query, respectively. + * separate the SQL queries in the test file(e.g. bracketed comments), please use + * --QUERY-DELIMITER-START and --QUERY-DELIMITER-END. Lines starting with + * --QUERY-DELIMITER-START and --QUERY-DELIMITER-END represent the beginning and end of a query, + * respectively. * 2. Lines starting with -- are treated as comments and ignored. * 3. Lines starting with --SET are used to specify the configs when running this testing file. You * can set multiple configs in one --SET, using comma to separate them. Or you can use multiple From a5126643a958d3aacfee994a9491e851506bdf54 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 12 Feb 2020 09:28:51 +0800 Subject: [PATCH 08/11] Update how to use --QUERY-DELIMITER --- .../test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 8aea2a3b8da11..f981d6c7b83ff 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -68,7 +68,8 @@ import org.apache.spark.tags.ExtendedSQLTest * separate the SQL queries in the test file(e.g. bracketed comments), please use * --QUERY-DELIMITER-START and --QUERY-DELIMITER-END. Lines starting with * --QUERY-DELIMITER-START and --QUERY-DELIMITER-END represent the beginning and end of a query, - * respectively. + * respectively. Code that is not surrounded by lines that begin with --QUERY-DELIMITER-START + * and --QUERY-DELIMITER-END is still separated by semicolons. * 2. Lines starting with -- are treated as comments and ignored. * 3. Lines starting with --SET are used to specify the configs when running this testing file. You * can set multiple configs in one --SET, using comma to separate them. Or you can use multiple From 38005e838fc8fc4c52b6e9bd7a2f73c0e2801e86 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 12 Feb 2020 20:30:09 +0800 Subject: [PATCH 09/11] Optimize code --- .../results/postgreSQL/comments.sql.out | 16 ++++++++++++++- .../apache/spark/sql/SQLQueryTestSuite.scala | 20 +++++++++++-------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out index 5c7ef689b601d..637c5561bd940 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 6 +-- Number of queries: 7 -- !query @@ -99,3 +99,17 @@ Hoo boy. Still two deep... Now just one deep... */ 'deeply nested example' AS sixth + + +-- !query +/* and this is the end of the file */ +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +mismatched input '' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 37) + +== SQL == +/* and this is the end of the file */ +-------------------------------------^^^ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index f981d6c7b83ff..24644b65f3a48 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -271,23 +271,24 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { } }.flatten - val tempQueries = if (code.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { - // Although the loop is heavy, only used for comments test. + val allCode = importedCode ++ code + val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { + // Although the loop is heavy, only used for nested comments test. val querys = new ArrayBuffer[String] val otherCodes = new ArrayBuffer[String] var tempStr = "" var start = false - for (c <- code) { + for (c <- allCode) { if (c.trim.startsWith("--QUERY-DELIMITER-START")) { start = true querys ++= otherCodes.toSeq.mkString("\n").split("(?<=[^\\\\]);") otherCodes.clear() } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) { start = false - if (tempStr.endsWith(";")) { - tempStr = tempStr.substring(0, tempStr.length - 1) - } - querys += s"\n$tempStr" +// if (tempStr.endsWith(";")) { +// tempStr = tempStr.substring(0, tempStr.length - 1) +// } + querys += s"\n${tempStr.stripSuffix(";")}" tempStr = "" } else if (start) { tempStr += s"\n$c" @@ -295,9 +296,12 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { otherCodes += c } } + if (otherCodes.nonEmpty) { + querys ++= otherCodes.toSeq.mkString("\n").split("(?<=[^\\\\]);") + } querys.toSeq } else { - (importedCode ++ code).mkString("\n").split("(?<=[^\\\\]);").toSeq + allCode.mkString("\n").split("(?<=[^\\\\]);").toSeq } // List of SQL queries to run From fa8397e439e141f95df728564d2e3bae1641f313 Mon Sep 17 00:00:00 2001 From: beliefer Date: Thu, 13 Feb 2020 09:27:42 +0800 Subject: [PATCH 10/11] Optimize code --- .../org/apache/spark/sql/SQLQueryTestSuite.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 24644b65f3a48..f83581b81ee5a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -253,6 +253,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { /** Run a test case. */ protected def runTest(testCase: TestCase): Unit = { + def splitWithSemicolon(seq: Seq[String]) = { + seq.mkString("\n").split("(?<=[^\\\\]);") + } val input = fileToString(new File(testCase.inputFile)) val (comments, code) = input.split("\n").partition { line => @@ -281,13 +284,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { for (c <- allCode) { if (c.trim.startsWith("--QUERY-DELIMITER-START")) { start = true - querys ++= otherCodes.toSeq.mkString("\n").split("(?<=[^\\\\]);") + querys ++= splitWithSemicolon(otherCodes.toSeq) otherCodes.clear() } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) { start = false -// if (tempStr.endsWith(";")) { -// tempStr = tempStr.substring(0, tempStr.length - 1) -// } querys += s"\n${tempStr.stripSuffix(";")}" tempStr = "" } else if (start) { @@ -297,11 +297,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { } } if (otherCodes.nonEmpty) { - querys ++= otherCodes.toSeq.mkString("\n").split("(?<=[^\\\\]);") + querys ++= splitWithSemicolon(otherCodes.toSeq) } querys.toSeq } else { - allCode.mkString("\n").split("(?<=[^\\\\]);").toSeq + splitWithSemicolon(allCode).toSeq } // List of SQL queries to run From 900cc733397cad9eeeba4a65c016fc5f5008ac33 Mon Sep 17 00:00:00 2001 From: beliefer Date: Thu, 13 Feb 2020 14:57:22 +0800 Subject: [PATCH 11/11] Update comment --- .../src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index f83581b81ee5a..d532dd778540e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -276,7 +276,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { val allCode = importedCode ++ code val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { - // Although the loop is heavy, only used for nested comments test. + // Although the loop is heavy, only used for bracketed comments test. val querys = new ArrayBuffer[String] val otherCodes = new ArrayBuffer[String] var tempStr = ""