From b42d28fd8e3942fb15c188236c54c9d95844fe9e Mon Sep 17 00:00:00 2001 From: fwang12 Date: Wed, 6 Jan 2021 12:26:15 +0800 Subject: [PATCH 01/10] [SPARK-33100][FOLLOWUP] Find correct bound for bracketed comment in spark-sql --- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 9155eacfa489..6d43abeb289b 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -530,6 +530,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { var bracketedCommentLevel = 0 var escape = false var beginIndex = 0 + var bracketedCommentRightBound = -1 var includingStatement = false val ret = new JArrayList[String] @@ -539,6 +540,10 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty) for (index <- 0 until line.length) { + if (index > 0 && index - 1 == bracketedCommentRightBound) { + bracketedCommentLevel -= 1 + } + if (line.charAt(index) == '\'' && !insideComment) { // take a look to see if it is escaped // See the comment above about SPARK-31595 @@ -585,7 +590,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { if (insideSingleQuote || insideDoubleQuote) { // Ignores '/' in any case of quotes } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) { - bracketedCommentLevel -= 1 + bracketedCommentRightBound = index } else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') { bracketedCommentLevel += 1 } From a3173449c03af8e54106795996e3731dc7694242 Mon Sep 17 00:00:00 2001 From: fwang12 Date: Wed, 6 Jan 2021 21:02:46 +0800 Subject: [PATCH 02/10] complete ut --- .../org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 6708cf99e7f4..1a96012a0b4e 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -577,8 +577,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "/* SELECT 'test';*/ SELECT 'test';" -> "test", ";;/* SELECT 'test';*/ SELECT 'test';" -> "test", "/* SELECT 'test';*/;; SELECT 'test';" -> "test", - "SELECT 'test'; -- SELECT 'test';" -> "", - "SELECT 'test'; /* SELECT 'test';*/;" -> "", + "SELECT 'test'; -- SELECT 'test';" -> "test", + "SELECT 'test'; /* SELECT 'test';*/;" -> "test", "/*$meta chars{^\\;}*/ SELECT 'test';" -> "test", "/*\nmulti-line\n*/ SELECT 'test';" -> "test", "/*/* multi-level bracketed*/ SELECT 'test';" -> "test" From ada4d5cfbb3c1667f41f19374f3213640c1bed6e Mon Sep 17 00:00:00 2001 From: fwang12 Date: Thu, 7 Jan 2021 10:39:36 +0800 Subject: [PATCH 03/10] address comment --- .../hive/thriftserver/SparkSQLCLIDriver.scala | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 6d43abeb289b..2629e77c5f12 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -530,18 +530,22 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { var bracketedCommentLevel = 0 var escape = false var beginIndex = 0 - var bracketedCommentRightBound = -1 - var includingStatement = false + var leavingBracketedComment = false + var isStatement = false val ret = new JArrayList[String] def insideBracketedComment: Boolean = bracketedCommentLevel > 0 def insideComment: Boolean = insideSimpleComment || insideBracketedComment - def statementBegin(index: Int): Boolean = includingStatement || (!insideComment && + def statementInProgress(index: Int): Boolean = isStatement || (!insideComment && index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty) for (index <- 0 until line.length) { - if (index > 0 && index - 1 == bracketedCommentRightBound) { + // judge and reduce bracketed comment level if the flag of leaving bracketed comment is true, + // because that the last character of bracketed comment is still inside the comment and we can + // only mark it and reduce bracketed comment level in next loop + if (leavingBracketedComment) { bracketedCommentLevel -= 1 + leavingBracketedComment = false } if (line.charAt(index) == '\'' && !insideComment) { @@ -573,12 +577,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { if (insideSingleQuote || insideDoubleQuote || insideComment) { // do not split } else { - if (includingStatement) { + if (isStatement) { // split, do not include ; itself ret.add(line.substring(beginIndex, index)) } beginIndex = index + 1 - includingStatement = false + isStatement = false } } else if (line.charAt(index) == '\n') { // with a new line the inline simple comment should end. @@ -590,7 +594,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { if (insideSingleQuote || insideDoubleQuote) { // Ignores '/' in any case of quotes } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) { - bracketedCommentRightBound = index + leavingBracketedComment = true } else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') { bracketedCommentLevel += 1 } @@ -602,9 +606,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { escape = true } - includingStatement = statementBegin(index) + isStatement = statementInProgress(index) } - if (includingStatement) { + if (isStatement) { ret.add(line.substring(beginIndex)) } ret From e1814a68d69b9179e80ee6c0d16d6edd9b95d8c2 Mon Sep 17 00:00:00 2001 From: fwang12 Date: Thu, 7 Jan 2021 13:53:09 +0800 Subject: [PATCH 04/10] address nits --- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 2629e77c5f12..8606aaab1cae 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -540,9 +540,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty) for (index <- 0 until line.length) { - // judge and reduce bracketed comment level if the flag of leaving bracketed comment is true, - // because that the last character of bracketed comment is still inside the comment and we can - // only mark it and reduce bracketed comment level in next loop + // Checks if we need to decrement a bracketed comment level; the last character '/' of + // bracketed comments is still inside the comment, so `insideBracketedComment` must keep true + // in the previous loop and we decrement the level here if needed. if (leavingBracketedComment) { bracketedCommentLevel -= 1 leavingBracketedComment = false @@ -594,6 +594,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { if (insideSingleQuote || insideDoubleQuote) { // Ignores '/' in any case of quotes } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) { + // Decrements `bracketedCommentLevel` at the beginning of the next loop leavingBracketedComment = true } else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') { bracketedCommentLevel += 1 From 6ee533263faa8d956f05397c23375a718b3264b4 Mon Sep 17 00:00:00 2001 From: fwang12 Date: Thu, 7 Jan 2021 15:35:58 +0800 Subject: [PATCH 05/10] retest please --- .../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 1a96012a0b4e..1825fa70af1c 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -592,6 +592,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "EXPLAIN SELECT /*+ MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "SortMergeJoin", "EXPLAIN SELECT /* + MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "BroadcastHashJoin" + ) } } From 612a58b0ac8f95963cccd0f305beffed01fa3a6b Mon Sep 17 00:00:00 2001 From: fwang12 Date: Thu, 7 Jan 2021 15:36:10 +0800 Subject: [PATCH 06/10] first time --- .../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 1825fa70af1c..1a96012a0b4e 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -592,7 +592,6 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "EXPLAIN SELECT /*+ MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "SortMergeJoin", "EXPLAIN SELECT /* + MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "BroadcastHashJoin" - ) } } From 39a233344fb0f57d5736cabb789b1ad626408a69 Mon Sep 17 00:00:00 2001 From: fwang12 Date: Thu, 7 Jan 2021 15:37:13 +0800 Subject: [PATCH 07/10] retest please --- .../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 1a96012a0b4e..1825fa70af1c 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -592,6 +592,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "EXPLAIN SELECT /*+ MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "SortMergeJoin", "EXPLAIN SELECT /* + MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "BroadcastHashJoin" + ) } } From d9d4342a3461dcebc6f6192f293210376bfa90cf Mon Sep 17 00:00:00 2001 From: fwang12 Date: Thu, 7 Jan 2021 15:37:55 +0800 Subject: [PATCH 08/10] second time --- .../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 1825fa70af1c..1a96012a0b4e 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -592,7 +592,6 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "EXPLAIN SELECT /*+ MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "SortMergeJoin", "EXPLAIN SELECT /* + MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "BroadcastHashJoin" - ) } } From bff0fb5ecf014444ccb29a3052fb871ed2ce7627 Mon Sep 17 00:00:00 2001 From: fwang12 Date: Thu, 7 Jan 2021 15:53:47 +0800 Subject: [PATCH 09/10] retest please --- .../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 1a96012a0b4e..1825fa70af1c 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -592,6 +592,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "EXPLAIN SELECT /*+ MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "SortMergeJoin", "EXPLAIN SELECT /* + MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "BroadcastHashJoin" + ) } } From d348489f93cfad39d6a7102261ff7846d3254765 Mon Sep 17 00:00:00 2001 From: fwang12 Date: Thu, 7 Jan 2021 15:54:02 +0800 Subject: [PATCH 10/10] third time --- .../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 1825fa70af1c..1a96012a0b4e 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -592,7 +592,6 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "EXPLAIN SELECT /*+ MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "SortMergeJoin", "EXPLAIN SELECT /* + MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "BroadcastHashJoin" - ) } }