diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html index c2afa993b2f20..bfe31aae555ba 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html +++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html @@ -20,47 +20,47 @@ - + App ID - + App Name - + Attempt ID - + Started - + Completed - + Duration - + Spark User - + Last Updated - + Event Log diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js index 7db8c27e8f7c9..5ec1ce15a2127 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js @@ -195,7 +195,7 @@ $(document).ready(function() { } $(selector).DataTable(conf); - $('#hisotry-summary [data-toggle="tooltip"]').tooltip(); + $('#history-summary [data-toggle="tooltip"]').tooltip(); }); }); }); diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala index 048c4ad0146e2..6764daa0df529 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala @@ -20,6 +20,8 @@ package org.apache.spark.ui.jobs import scala.collection.mutable import scala.collection.mutable.{HashMap, LinkedHashMap} +import com.google.common.collect.Interners + import org.apache.spark.JobExecutionStatus import org.apache.spark.executor._ import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} @@ -141,6 +143,14 @@ private[spark] object UIData { } object TaskUIData { + + private val stringInterner = Interners.newWeakInterner[String]() + + /** String interning to reduce the memory usage. */ + private def weakIntern(s: String): String = { + stringInterner.intern(s) + } + def apply(taskInfo: TaskInfo): TaskUIData = { new TaskUIData(dropInternalAndSQLAccumulables(taskInfo)) } @@ -155,8 +165,8 @@ private[spark] object UIData { index = taskInfo.index, attemptNumber = taskInfo.attemptNumber, launchTime = taskInfo.launchTime, - executorId = taskInfo.executorId, - host = taskInfo.host, + executorId = weakIntern(taskInfo.executorId), + host = weakIntern(taskInfo.host), taskLocality = taskInfo.taskLocality, speculative = taskInfo.speculative ) diff --git a/dev/lint-python b/dev/lint-python index c6f3fbfab84ed..07e2606d45143 100755 --- a/dev/lint-python +++ b/dev/lint-python @@ -20,7 +20,7 @@ SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")" # Exclude auto-geneated configuration file. -PATHS_TO_CHECK="$( cd "$SPARK_ROOT_DIR" && find . -name "*.py" -not -path "*python/docs/conf.py" )" +PATHS_TO_CHECK="$( cd "$SPARK_ROOT_DIR" && find . -name "*.py" )" PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt" PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt" PYLINT_INSTALL_INFO="$SPARK_ROOT_DIR/dev/pylint-info.txt" @@ -64,7 +64,7 @@ export "PATH=$PYTHONPATH:$PATH" #+ first, but we do so so that the check status can #+ be output before the report, like with the #+ scalastyle and RAT checks. -python "$PEP8_SCRIPT_PATH" --ignore=E402,E731,E241,W503,E226 --config=dev/tox.ini $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH" +python "$PEP8_SCRIPT_PATH" --config=dev/tox.ini $PATHS_TO_CHECK >> "$PEP8_REPORT_PATH" pep8_status="${PIPESTATUS[0]}" if [ "$compile_status" -eq 0 -a "$pep8_status" -eq 0 ]; then diff --git a/dev/tox.ini b/dev/tox.ini index 76e3f42cde62d..eeeb637460cfb 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -14,5 +14,6 @@ # limitations under the License. [pep8] +ignore=E402,E731,E241,W503,E226 max-line-length=100 -exclude=cloudpickle.py,heapq3.py,shared.py +exclude=cloudpickle.py,heapq3.py,shared.py,python/docs/conf.py,work/*/*.py diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 4584aea6196a6..43f7ff5cb4a36 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -371,7 +371,7 @@ querySpecification (RECORDREADER recordReader=STRING)? fromClause? (WHERE where=booleanExpression)?) - | ((kind=SELECT hint? setQuantifier? namedExpressionSeq fromClause? + | ((kind=SELECT (hints+=hint)* setQuantifier? namedExpressionSeq fromClause? | fromClause (kind=SELECT setQuantifier? namedExpressionSeq)?) lateralView* (WHERE where=booleanExpression)? @@ -381,12 +381,12 @@ querySpecification ; hint - : '/*+' hintStatement '*/' + : '/*+' hintStatements+=hintStatement (','? hintStatements+=hintStatement)* '*/' ; hintStatement : hintName=identifier - | hintName=identifier '(' parameters+=identifier (',' parameters+=identifier)* ')' + | hintName=identifier '(' parameters+=primaryExpression (',' parameters+=primaryExpression)* ')' ; fromClause diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala index 86c788aaa828a..62a3482d9fac1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.analysis import java.util.Locale +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.CurrentOrigin @@ -91,7 +92,12 @@ object ResolveHints { ResolvedHint(h.child, HintInfo(isBroadcastable = Option(true))) } else { // Otherwise, find within the subtree query plans that should be broadcasted. - applyBroadcastHint(h.child, h.parameters.toSet) + applyBroadcastHint(h.child, h.parameters.map { + case tableName: String => tableName + case tableId: UnresolvedAttribute => tableId.name + case unsupported => throw new AnalysisException("Broadcast hint parameter should be " + + s"an identifier or string but was $unsupported (${unsupported.getClass}") + }.toSet) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index ed423e7e334b6..beee93d906f0f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -381,6 +381,9 @@ package object dsl { def analyze: LogicalPlan = EliminateSubqueryAliases(analysis.SimpleAnalyzer.execute(logicalPlan)) + + def hint(name: String, parameters: Any*): LogicalPlan = + UnresolvedHint(name, parameters, logicalPlan) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 4eb5560155781..a16611af28a7d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -407,7 +407,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val withWindow = withDistinct.optionalMap(windows)(withWindows) // Hint - withWindow.optionalMap(hint)(withHints) + hints.asScala.foldRight(withWindow)(withHints) } } @@ -533,13 +533,16 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Add a [[UnresolvedHint]] to a logical plan. + * Add [[UnresolvedHint]]s to a logical plan. */ private def withHints( ctx: HintContext, query: LogicalPlan): LogicalPlan = withOrigin(ctx) { - val stmt = ctx.hintStatement - UnresolvedHint(stmt.hintName.getText, stmt.parameters.asScala.map(_.getText), query) + var plan = query + ctx.hintStatements.asScala.reverse.foreach { case stmt => + plan = UnresolvedHint(stmt.hintName.getText, stmt.parameters.asScala.map(expression), plan) + } + plan } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala index 5fe6d2d8da064..d16fae56b3d4a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala @@ -23,9 +23,11 @@ import org.apache.spark.sql.internal.SQLConf /** * A general hint for the child that is not yet resolved. This node is generated by the parser and * should be removed This node will be eliminated post analysis. - * A pair of (name, parameters). + * @param name the name of the hint + * @param parameters the parameters of the hint + * @param child the [[LogicalPlan]] on which this hint applies */ -case class UnresolvedHint(name: String, parameters: Seq[String], child: LogicalPlan) +case class UnresolvedHint(name: String, parameters: Seq[Any], child: LogicalPlan) extends UnaryNode { override lazy val resolved: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 1739b0cfa2761..54bee02e44e43 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -345,7 +345,8 @@ object SQLConf { .createWithDefault(true) val COLUMN_NAME_OF_CORRUPT_RECORD = buildConf("spark.sql.columnNameOfCorruptRecord") - .doc("The name of internal column for storing raw/un-parsed JSON records that fail to parse.") + .doc("The name of internal column for storing raw/un-parsed JSON and CSV records that fail " + + "to parse.") .stringConf .createWithDefault("_corrupt_record") @@ -535,8 +536,7 @@ object SQLConf { val IGNORE_CORRUPT_FILES = buildConf("spark.sql.files.ignoreCorruptFiles") .doc("Whether to ignore corrupt files. If true, the Spark jobs will continue to run when " + - "encountering corrupted or non-existing and contents that have been read will still be " + - "returned.") + "encountering corrupted files and the contents that have been read will still be returned.") .booleanConf .createWithDefault(false) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala new file mode 100644 index 0000000000000..48a3ca2ccfb0b --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.analysis.AnalysisTest +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.logical._ + +class DSLHintSuite extends AnalysisTest { + lazy val a = 'a.int + lazy val b = 'b.string + lazy val c = 'c.string + lazy val r1 = LocalRelation(a, b, c) + + test("various hint parameters") { + comparePlans( + r1.hint("hint1"), + UnresolvedHint("hint1", Seq(), r1) + ) + + comparePlans( + r1.hint("hint1", 1, "a"), + UnresolvedHint("hint1", Seq(1, "a"), r1) + ) + + comparePlans( + r1.hint("hint1", 1, $"a"), + UnresolvedHint("hint1", Seq(1, $"a"), r1) + ) + + comparePlans( + r1.hint("hint1", Seq(1, 2, 3), Seq($"a", $"b", $"c")), + UnresolvedHint("hint1", Seq(Seq(1, 2, 3), Seq($"a", $"b", $"c")), r1) + ) + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 3a26adaef9db0..d004d04569772 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.parser import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedTableValuedFunction} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedTableValuedFunction} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ @@ -527,19 +527,13 @@ class PlanParserSuite extends PlanTest { val m = intercept[ParseException] { parsePlan("SELECT /*+ HINT() */ * FROM t") }.getMessage - assert(m.contains("no viable alternative at input")) - - // Hive compatibility: No database. - val m2 = intercept[ParseException] { - parsePlan("SELECT /*+ MAPJOIN(default.t) */ * from default.t") - }.getMessage - assert(m2.contains("mismatched input '.' expecting {')', ','}")) + assert(m.contains("mismatched input")) // Disallow space as the delimiter. val m3 = intercept[ParseException] { parsePlan("SELECT /*+ INDEX(a b c) */ * from default.t") }.getMessage - assert(m3.contains("mismatched input 'b' expecting {')', ','}")) + assert(m3.contains("mismatched input 'b' expecting")) comparePlans( parsePlan("SELECT /*+ HINT */ * FROM t"), @@ -547,27 +541,103 @@ class PlanParserSuite extends PlanTest { comparePlans( parsePlan("SELECT /*+ BROADCASTJOIN(u) */ * FROM t"), - UnresolvedHint("BROADCASTJOIN", Seq("u"), table("t").select(star()))) + UnresolvedHint("BROADCASTJOIN", Seq($"u"), table("t").select(star()))) comparePlans( parsePlan("SELECT /*+ MAPJOIN(u) */ * FROM t"), - UnresolvedHint("MAPJOIN", Seq("u"), table("t").select(star()))) + UnresolvedHint("MAPJOIN", Seq($"u"), table("t").select(star()))) comparePlans( parsePlan("SELECT /*+ STREAMTABLE(a,b,c) */ * FROM t"), - UnresolvedHint("STREAMTABLE", Seq("a", "b", "c"), table("t").select(star()))) + UnresolvedHint("STREAMTABLE", Seq($"a", $"b", $"c"), table("t").select(star()))) comparePlans( parsePlan("SELECT /*+ INDEX(t, emp_job_ix) */ * FROM t"), - UnresolvedHint("INDEX", Seq("t", "emp_job_ix"), table("t").select(star()))) + UnresolvedHint("INDEX", Seq($"t", $"emp_job_ix"), table("t").select(star()))) comparePlans( parsePlan("SELECT /*+ MAPJOIN(`default.t`) */ * from `default.t`"), - UnresolvedHint("MAPJOIN", Seq("default.t"), table("default.t").select(star()))) + UnresolvedHint("MAPJOIN", Seq(UnresolvedAttribute.quoted("default.t")), + table("default.t").select(star()))) comparePlans( parsePlan("SELECT /*+ MAPJOIN(t) */ a from t where true group by a order by a"), - UnresolvedHint("MAPJOIN", Seq("t"), + UnresolvedHint("MAPJOIN", Seq($"t"), table("t").where(Literal(true)).groupBy('a)('a)).orderBy('a.asc)) } + + test("SPARK-20854: select hint syntax with expressions") { + comparePlans( + parsePlan("SELECT /*+ HINT1(a, array(1, 2, 3)) */ * from t"), + UnresolvedHint("HINT1", Seq($"a", + UnresolvedFunction("array", Literal(1) :: Literal(2) :: Literal(3) :: Nil, false)), + table("t").select(star()) + ) + ) + + comparePlans( + parsePlan("SELECT /*+ HINT1(a, array(1, 2, 3)) */ * from t"), + UnresolvedHint("HINT1", Seq($"a", + UnresolvedFunction("array", Literal(1) :: Literal(2) :: Literal(3) :: Nil, false)), + table("t").select(star()) + ) + ) + + comparePlans( + parsePlan("SELECT /*+ HINT1(a, 5, 'a', b) */ * from t"), + UnresolvedHint("HINT1", Seq($"a", Literal(5), Literal("a"), $"b"), + table("t").select(star()) + ) + ) + + comparePlans( + parsePlan("SELECT /*+ HINT1('a', (b, c), (1, 2)) */ * from t"), + UnresolvedHint("HINT1", + Seq(Literal("a"), + CreateStruct($"b" :: $"c" :: Nil), + CreateStruct(Literal(1) :: Literal(2) :: Nil)), + table("t").select(star()) + ) + ) + } + + test("SPARK-20854: multiple hints") { + comparePlans( + parsePlan("SELECT /*+ HINT1(a, 1) hint2(b, 2) */ * from t"), + UnresolvedHint("HINT1", Seq($"a", Literal(1)), + UnresolvedHint("hint2", Seq($"b", Literal(2)), + table("t").select(star()) + ) + ) + ) + + comparePlans( + parsePlan("SELECT /*+ HINT1(a, 1),hint2(b, 2) */ * from t"), + UnresolvedHint("HINT1", Seq($"a", Literal(1)), + UnresolvedHint("hint2", Seq($"b", Literal(2)), + table("t").select(star()) + ) + ) + ) + + comparePlans( + parsePlan("SELECT /*+ HINT1(a, 1) */ /*+ hint2(b, 2) */ * from t"), + UnresolvedHint("HINT1", Seq($"a", Literal(1)), + UnresolvedHint("hint2", Seq($"b", Literal(2)), + table("t").select(star()) + ) + ) + ) + + comparePlans( + parsePlan("SELECT /*+ HINT1(a, 1), hint2(b, 2) */ /*+ hint3(c, 3) */ * from t"), + UnresolvedHint("HINT1", Seq($"a", Literal(1)), + UnresolvedHint("hint2", Seq($"b", Literal(2)), + UnresolvedHint("hint3", Seq($"c", Literal(3)), + table("t").select(star()) + ) + ) + ) + ) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index a9e487f464948..8abec85ee102a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1190,7 +1190,7 @@ class Dataset[T] private[sql]( * @since 2.2.0 */ @scala.annotation.varargs - def hint(name: String, parameters: String*): Dataset[T] = withTypedPlan { + def hint(name: String, parameters: Any*): Dataset[T] = withTypedPlan { UnresolvedHint(name, parameters, logicalPlan) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index 1ba9a79446aad..34998cbd61552 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -200,11 +200,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) { """.stripMargin.trim } - override def toString: String = completeString(appendStats = false) - - def toStringWithStats: String = completeString(appendStats = true) - - private def completeString(appendStats: Boolean): String = { + override def toString: String = { def output = Utils.truncatedString( analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}"), ", ") val analyzedPlan = Seq( @@ -212,25 +208,29 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) { stringOrError(analyzed.treeString(verbose = true)) ).filter(_.nonEmpty).mkString("\n") - val optimizedPlanString = if (appendStats) { - // trigger to compute stats for logical plans - optimizedPlan.stats(sparkSession.sessionState.conf) - optimizedPlan.treeString(verbose = true, addSuffix = true) - } else { - optimizedPlan.treeString(verbose = true) - } - s"""== Parsed Logical Plan == |${stringOrError(logical.treeString(verbose = true))} |== Analyzed Logical Plan == |$analyzedPlan |== Optimized Logical Plan == - |${stringOrError(optimizedPlanString)} + |${stringOrError(optimizedPlan.treeString(verbose = true))} |== Physical Plan == |${stringOrError(executedPlan.treeString(verbose = true))} """.stripMargin.trim } + def stringWithStats: String = { + // trigger to compute stats for logical plans + optimizedPlan.stats(sparkSession.sessionState.conf) + + // only show optimized logical plan and physical plan + s"""== Optimized Logical Plan == + |${stringOrError(optimizedPlan.treeString(verbose = true, addSuffix = true))} + |== Physical Plan == + |${stringOrError(executedPlan.treeString(verbose = true))} + """.stripMargin.trim + } + /** A special namespace for commands that can be used to debug query execution. */ // scalastyle:off object debug { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala index 99d81c49f1e3b..2d82fcf4da6e9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala @@ -127,7 +127,7 @@ case class ExplainCommand( } else if (extended) { queryExecution.toString } else if (cost) { - queryExecution.toStringWithStats + queryExecution.stringWithStats } else { queryExecution.simpleString } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index a93b701146077..7202f1222d10f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -90,38 +90,38 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging { /** * A catalog that interacts with external systems. */ - lazy val externalCatalog: ExternalCatalog = - SharedState.reflect[ExternalCatalog, SparkConf, Configuration]( + lazy val externalCatalog: ExternalCatalog = { + val externalCatalog = SharedState.reflect[ExternalCatalog, SparkConf, Configuration]( SharedState.externalCatalogClassName(sparkContext.conf), sparkContext.conf, sparkContext.hadoopConfiguration) - // Create the default database if it doesn't exist. - { val defaultDbDefinition = CatalogDatabase( SessionCatalog.DEFAULT_DATABASE, "default database", CatalogUtils.stringToURI(warehousePath), Map()) - // Initialize default database if it doesn't exist + // Create default database if it doesn't exist if (!externalCatalog.databaseExists(SessionCatalog.DEFAULT_DATABASE)) { // There may be another Spark application creating default database at the same time, here we // set `ignoreIfExists = true` to avoid `DatabaseAlreadyExists` exception. externalCatalog.createDatabase(defaultDbDefinition, ignoreIfExists = true) } - } - // Make sure we propagate external catalog events to the spark listener bus - externalCatalog.addListener(new ExternalCatalogEventListener { - override def onEvent(event: ExternalCatalogEvent): Unit = { - sparkContext.listenerBus.post(event) - } - }) + // Make sure we propagate external catalog events to the spark listener bus + externalCatalog.addListener(new ExternalCatalogEventListener { + override def onEvent(event: ExternalCatalogEvent): Unit = { + sparkContext.listenerBus.post(event) + } + }) + + externalCatalog + } /** * A manager for global temporary views. */ - val globalTempViewManager: GlobalTempViewManager = { + lazy val globalTempViewManager: GlobalTempViewManager = { // System preserved database should not exists in metastore. However it's hard to guarantee it // for every session, because case-sensitivity differs. Here we always lowercase it to make our // life easier. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala new file mode 100644 index 0000000000000..60f6f23860ed9 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.test.SharedSQLContext + +class DataFrameHintSuite extends PlanTest with SharedSQLContext { + import testImplicits._ + lazy val df = spark.range(10) + + private def check(df: Dataset[_], expected: LogicalPlan) = { + comparePlans( + df.queryExecution.logical, + expected + ) + } + + test("various hint parameters") { + check( + df.hint("hint1"), + UnresolvedHint("hint1", Seq(), + df.logicalPlan + ) + ) + + check( + df.hint("hint1", 1, "a"), + UnresolvedHint("hint1", Seq(1, "a"), df.logicalPlan) + ) + + check( + df.hint("hint1", 1, $"a"), + UnresolvedHint("hint1", Seq(1, $"a"), + df.logicalPlan + ) + ) + + check( + df.hint("hint1", Seq(1, 2, 3), Seq($"a", $"b", $"c")), + UnresolvedHint("hint1", Seq(Seq(1, 2, 3), Seq($"a", $"b", $"c")), + df.logicalPlan + ) + ) + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala index aa1ca2909074f..3066a4f305f00 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala @@ -29,6 +29,12 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto import testImplicits._ test("show cost in explain command") { + // For readability, we only show optimized plan and physical plan in explain cost command + checkKeywordsExist(sql("EXPLAIN COST SELECT * FROM src "), + "Optimized Logical Plan", "Physical Plan") + checkKeywordsNotExist(sql("EXPLAIN COST SELECT * FROM src "), + "Parsed Logical Plan", "Analyzed Logical Plan") + // Only has sizeInBytes before ANALYZE command checkKeywordsExist(sql("EXPLAIN COST SELECT * FROM src "), "sizeInBytes") checkKeywordsNotExist(sql("EXPLAIN COST SELECT * FROM src "), "rowCount")