Skip to content

Commit bee07cd

Browse files
committed
Address gatorsmile's comments
1 parent 6e37517 commit bee07cd

File tree

6 files changed

+52
-24
lines changed

6 files changed

+52
-24
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,8 +1239,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
12391239
val attr = ctx.fieldName.getText
12401240
expression(ctx.base) match {
12411241
case unresolved_attr @ UnresolvedAttribute(nameParts) =>
1242-
matchEscapedIdentifier(ctx.fieldName.getStart.getText) match {
1243-
case Some(i) if conf.supportQuotedIdentifiers =>
1242+
ctx.fieldName.getStart.getText match {
1243+
case escapedIdentifier(i) if conf.supportQuotedRegexColumnName =>
12441244
UnresolvedRegex(i, Some(unresolved_attr.name))
12451245
case _ =>
12461246
UnresolvedAttribute(nameParts :+ attr)
@@ -1255,8 +1255,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
12551255
* quoted in ``
12561256
*/
12571257
override def visitColumnReference(ctx: ColumnReferenceContext): Expression = withOrigin(ctx) {
1258-
matchEscapedIdentifier(ctx.getStart.getText) match {
1259-
case Some(i) if conf.supportQuotedIdentifiers =>
1258+
ctx.getStart.getText match {
1259+
case escapedIdentifier(i) if conf.supportQuotedRegexColumnName =>
12601260
UnresolvedRegex(i, None)
12611261
case _ =>
12621262
UnresolvedAttribute.quoted(ctx.getText)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -177,17 +177,11 @@ object ParserUtils {
177177
sb.toString()
178178
}
179179

180+
/** the column name pattern in quoted regex without qualifier */
180181
val escapedIdentifier = "`(.+)`".r
181182

182-
/**
183-
* Return the substring extracted using regex
184-
*/
185-
def matchEscapedIdentifier(b: String): Option[String] = {
186-
b match {
187-
case escapedIdentifier(i) => Some(i)
188-
case _ => None
189-
}
190-
}
183+
/** the column name pattern in quoted regex with qualifier */
184+
val qualifiedEscapedIdentifier = ("(.+)" + """.""" + "`(.+)`").r
191185

192186
/** Some syntactic sugar which makes it easier to work with optional clauses for LogicalPlans. */
193187
implicit class EnhancedLogicalPlan(val plan: LogicalPlan) extends AnyVal {

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -795,9 +795,9 @@ object SQLConf {
795795
.intConf
796796
.createWithDefault(UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD.toInt)
797797

798-
val SUPPORT_QUOTED_IDENTIFIERS = buildConf("spark.sql.support.quoted.identifiers")
798+
val SUPPORT_QUOTED_REGEX_COLUMN_NAME = buildConf("spark.sql.parser.quotedRegexColumnNames")
799799
.internal()
800-
.doc("When true, identifiers specified by regex patterns will be expanded.")
800+
.doc("When true, column names specified by quoted regex pattern will be expanded.")
801801
.booleanConf
802802
.createWithDefault(false)
803803

@@ -1057,7 +1057,7 @@ class SQLConf extends Serializable with Logging {
10571057

10581058
def starSchemaFTRatio: Double = getConf(STARSCHEMA_FACT_TABLE_RATIO)
10591059

1060-
def supportQuotedIdentifiers: Boolean = getConf(SUPPORT_QUOTED_IDENTIFIERS)
1060+
def supportQuotedRegexColumnName: Boolean = getConf(SUPPORT_QUOTED_REGEX_COLUMN_NAME)
10611061

10621062
/** ********************** SQLConf functionality methods ************ */
10631063

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@ import scala.collection.JavaConverters._
2525
import scala.language.implicitConversions
2626
import scala.reflect.runtime.universe.TypeTag
2727
import scala.util.control.NonFatal
28-
2928
import org.apache.commons.lang3.StringUtils
30-
3129
import org.apache.spark.annotation.{DeveloperApi, Experimental, InterfaceStability}
3230
import org.apache.spark.api.java.JavaRDD
3331
import org.apache.spark.api.java.function._
@@ -40,13 +38,13 @@ import org.apache.spark.sql.catalyst.catalog.CatalogRelation
4038
import org.apache.spark.sql.catalyst.encoders._
4139
import org.apache.spark.sql.catalyst.expressions._
4240
import org.apache.spark.sql.catalyst.expressions.aggregate._
43-
import org.apache.spark.sql.catalyst.json.{JacksonGenerator, JSONOptions}
41+
import org.apache.spark.sql.catalyst.json.{JSONOptions, JacksonGenerator}
4442
import org.apache.spark.sql.catalyst.optimizer.CombineUnions
45-
import org.apache.spark.sql.catalyst.parser.ParseException
43+
import org.apache.spark.sql.catalyst.parser.{ParseException, ParserUtils}
4644
import org.apache.spark.sql.catalyst.plans._
4745
import org.apache.spark.sql.catalyst.plans.logical._
4846
import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, PartitioningCollection}
49-
import org.apache.spark.sql.catalyst.util.{usePrettyExpression, DateTimeUtils}
47+
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, usePrettyExpression}
5048
import org.apache.spark.sql.execution._
5149
import org.apache.spark.sql.execution.command._
5250
import org.apache.spark.sql.execution.datasources.LogicalRelation
@@ -1188,6 +1186,11 @@ class Dataset[T] private[sql](
11881186
def col(colName: String): Column = colName match {
11891187
case "*" =>
11901188
Column(ResolvedStar(queryExecution.analyzed.output))
1189+
case ParserUtils.escapedIdentifier(i) if sqlContext.conf.supportQuotedRegexColumnName =>
1190+
Column(UnresolvedRegex(i, None))
1191+
case ParserUtils.qualifiedEscapedIdentifier(i, j)
1192+
if sqlContext.conf.supportQuotedRegexColumnName =>
1193+
Column(UnresolvedRegex(j, Some(i)))
11911194
case _ =>
11921195
val expr = resolve(colName)
11931196
Column(expr)

sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,8 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
245245
}
246246

247247
test("select 3, regex") {
248-
val ds = Seq(("a", 1), ("b", 2), ("c", 3)).toDF()
248+
val ds = Seq(("a", 1), ("b", 2), ("c", 3)).toDS()
249+
249250
intercept[AnalysisException] {
250251
ds.select(expr("`(_1)?+.+`").as[Int])
251252
}
@@ -254,7 +255,37 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
254255
ds.select(expr("`(_1|_2)`").as[Int])
255256
}
256257

257-
withSQLConf(SQLConf.SUPPORT_QUOTED_IDENTIFIERS.key -> "true") {
258+
intercept[AnalysisException] {
259+
ds.select(ds("`(_1)?+.+`"))
260+
}
261+
262+
intercept[AnalysisException] {
263+
ds.select(ds("`(_1|_2)`"))
264+
}
265+
266+
withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "true") {
267+
checkDataset(
268+
ds.select(ds("`(_1|_2)`"))
269+
.select(expr("named_struct('a', _1, 'b', _2)").as[ClassData]),
270+
ClassData("a", 1), ClassData("b", 2), ClassData("c", 3))
271+
272+
checkDataset(
273+
ds.alias("g")
274+
.select(ds("g.`(_1|_2)`"))
275+
.select(expr("named_struct('a', _1, 'b', _2)").as[ClassData]),
276+
ClassData("a", 1), ClassData("b", 2), ClassData("c", 3))
277+
278+
checkDataset(
279+
ds.select(ds("`(_1)?+.+`"))
280+
.select(expr("_2").as[Int]),
281+
1, 2, 3)
282+
283+
checkDataset(
284+
ds.alias("g")
285+
.select(ds("g.`(_1)?+.+`"))
286+
.select(expr("_2").as[Int]),
287+
1, 2, 3)
288+
258289
checkDataset(
259290
ds.select(expr("`(_1)?+.+`").as[Int]),
260291
1, 2, 3)

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2664,7 +2664,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
26642664
}
26652665

26662666
// now, turn on hive.support.quoted.identifiers
2667-
withSQLConf(SQLConf.SUPPORT_QUOTED_IDENTIFIERS.key -> "true") {
2667+
withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "true") {
26682668
checkAnswer(
26692669
sql(
26702670
"""

0 commit comments

Comments
 (0)