Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,27 @@ class PlanParserSuite extends AnalysisTest {
With(plan, ctes)
}

test("single comment") {
val plan = table("a").select(star())
assertEqual("-- single comment\nSELECT * FROM a", plan)
}

test("bracketed comment case one") {
val plan = table("a").select(star())
assertEqual("/* This is an example of SQL which should not execute:\n" +
" * select 'multi-line';\n" +
" */\n" +
"SELECT * FROM a", plan)
}

test("bracketed comment case two") {
val plan = table("a").select(star())
assertEqual("/*\n" +
"SELECT 'trailing' as x1; -- inside block comment\n" +
"*/\n" +
"SELECT * FROM a", plan)
}

test("case insensitive") {
val plan = table("a").select(star())
assertEqual("sELEct * FroM a", plan)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,23 @@ SELECT 'after multi-line' AS fifth;
SELECT 'trailing' as x1; -- inside block comment
*/

/* This block comment surrounds a query which itself has a block comment...
SELECT /* embedded single line */ 'embedded' AS x2;
*/
-- /* This block comment surrounds a query which itself has a block comment...
-- SELECT /* embedded single line */ 'embedded' AS x2;
-- */

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spark SQL can't support nested bracketed comments and I will open another PR to support it.

We need to update this file in this PR? If you will work on that, I think its ok to update this file in the next one.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your review. I have a question the nested bracketed comments will throw parsed exception not look good. Should I display the parsed exception into output?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I think better error messages look good if we can fix it easily.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It won't be easy for the time being. So I want comment out temporarily.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@maropu I think we can fix the test cases in 3.0. Regarding #27495, it is an enhancement, we can merge it to master only.

@maropu maropu Feb 9, 2020

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need this change to comment out these tests in branch-3.0? If branch-3.0 doesn't support these nested comments, its better to fix them so that the test could throw an exception for nested comments here instead of just commenting out them?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I will not comment out nested comments and throw exception into golden files.

SELECT -- continued after the following block comments...
/* Deeply nested comment.
This includes a single apostrophe to make sure we aren't decoding this part as a string.
SELECT 'deep nest' AS n1;
/* Second level of nesting...
SELECT 'deeper nest' as n2;
/* Third level of nesting...
SELECT 'deepest nest' as n3;
*/
Hoo boy. Still two deep...
*/
Now just one deep...
*/
-- /* Deeply nested comment.
-- This includes a single apostrophe to make sure we aren't decoding this part as a string.
-- SELECT 'deep nest' AS n1;
-- /* Second level of nesting...
-- SELECT 'deeper nest' as n2;
-- /* Third level of nesting...
-- SELECT 'deepest nest' as n3;
-- */
-- Hoo boy. Still two deep...
-- */
-- Now just one deep...
-- */
'deeply nested example' AS sixth;

/* and this is the end of the file */
-- /* and this is the end of the file */
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 13
-- Number of queries: 6


-- !query
Expand Down Expand Up @@ -36,161 +36,24 @@ before multi-line

-- !query
/* This is an example of SQL which should not execute:
* select 'multi-line'
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
/* This is an example of SQL which should not execute:
^^^
* select 'multi-line'


-- !query
*/
* select 'multi-line';
*/
SELECT 'after multi-line' AS fifth

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, the output is pretty nice.

-- !query schema
struct<>
struct<fifth:string>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
*/
^^^
SELECT 'after multi-line' AS fifth
after multi-line


-- !query
/*
SELECT 'trailing' as x1
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
/*
^^^
SELECT 'trailing' as x1


-- !query
*/

/* This block comment surrounds a query which itself has a block comment...
SELECT /* embedded single line */ 'embedded' AS x2
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
SELECT 'trailing' as x1; -- inside block comment
*/
^^^

/* This block comment surrounds a query which itself has a block comment...
SELECT /* embedded single line */ 'embedded' AS x2


-- !query
*/

SELECT -- continued after the following block comments...
/* Deeply nested comment.
This includes a single apostrophe to make sure we aren't decoding this part as a string.
SELECT 'deep nest' AS n1
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
*/
^^^

SELECT -- continued after the following block comments...
/* Deeply nested comment.
This includes a single apostrophe to make sure we aren't decoding this part as a string.
SELECT 'deep nest' AS n1


-- !query
/* Second level of nesting...
SELECT 'deeper nest' as n2
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
/* Second level of nesting...
^^^
SELECT 'deeper nest' as n2


-- !query
/* Third level of nesting...
SELECT 'deepest nest' as n3
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
/* Third level of nesting...
^^^
SELECT 'deepest nest' as n3


-- !query
*/
Hoo boy. Still two deep...
*/
Now just one deep...
*/
'deeply nested example' AS sixth
-- !query schema
struct<>
struct<sixth:string>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
*/
^^^
Hoo boy. Still two deep...
*/
Now just one deep...
*/
'deeply nested example' AS sixth


-- !query
/* and this is the end of the file */
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '<EOF>' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 37)

== SQL ==
/* and this is the end of the file */
-------------------------------------^^^
deeply nested example
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ package org.apache.spark.sql

import java.io.File
import java.util.{Locale, TimeZone}
import java.util.regex.Pattern

import scala.collection.mutable.HashMap
import scala.util.control.NonFatal

import org.apache.spark.{SparkConf, SparkException}
Expand Down Expand Up @@ -262,12 +264,36 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
}.flatten

// List of SQL queries to run
// note: this is not a robust way to split queries using semicolon, but works for now.
val queries = (importedCode ++ code).mkString("\n").split("(?<=[^\\\\]);")
Comment thread
cloud-fan marked this conversation as resolved.
// Replace bracketed comments with a placeholder
var codeStr = (importedCode ++ code).mkString("\n")
val m = Pattern.compile("/\\*(\\s|.)*?\\*/").matcher(codeStr)
val multiCommentMap = new HashMap[String, String]()
var i = 0
while(m.find()) {
val group = m.group
val placeHolder = s"/*$i*/"
multiCommentMap(placeHolder) = group
codeStr = codeStr.replace(group, placeHolder)
i += 1
}

val tempQueries = codeStr.split("(?<=[^\\\\]);")
.map(_.trim).filter(_ != "").toSeq
// Fix misplacement when comment is at the end of the query.
.map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "")

// Replace placeholders with original bracketed comments
val pattern = Pattern.compile("/\\*[0-9]+\\*/")
val queries = tempQueries.map { query =>
var newQuery = query
val m = pattern.matcher(query)
while(m.find()) {
val group = m.group
newQuery = newQuery.replace(group, multiCommentMap(group))
}
newQuery
}

val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6))
val settings = settingLines.flatMap(_.split(",").map { kv =>
val (conf, value) = kv.span(_ != '=')
Expand Down