Skip to content

Commit a9ec033

Browse files
concretevitaminmarmbrus
authored andcommitted
[SPARK-1704][SQL] Fully support EXPLAIN commands as SchemaRDD.
This PR attempts to resolve [SPARK-1704](https://issues.apache.org/jira/browse/SPARK-1704) by introducing a physical plan for EXPLAIN commands, which just prints out the debug string (containing various SparkSQL's plans) of the corresponding QueryExecution for the actual query. Author: Zongheng Yang <[email protected]> Closes #1003 from concretevitamin/explain-cmd and squashes the following commits: 5b7911f [Zongheng Yang] Add a regression test. 1bfa379 [Zongheng Yang] Modify output(). 719ada9 [Zongheng Yang] Override otherCopyArgs for ExplainCommandPhysical. 4318fd7 [Zongheng Yang] Make all output one Row. 439c6ab [Zongheng Yang] Minor cleanups. 408f574 [Zongheng Yang] SPARK-1704: Add CommandStrategy and ExplainCommandPhysical.
1 parent c6e041d commit a9ec033

File tree

6 files changed

+68
-4
lines changed

6 files changed

+68
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical
2020
import org.apache.spark.sql.catalyst.errors.TreeNodeException
2121
import org.apache.spark.sql.catalyst.expressions._
2222
import org.apache.spark.sql.catalyst.plans.QueryPlan
23-
import org.apache.spark.sql.catalyst.types.StructType
23+
import org.apache.spark.sql.catalyst.types.{StringType, StructType}
2424
import org.apache.spark.sql.catalyst.trees
2525

2626
abstract class LogicalPlan extends QueryPlan[LogicalPlan] {
@@ -102,7 +102,7 @@ abstract class LeafNode extends LogicalPlan with trees.LeafNode[LogicalPlan] {
102102
*/
103103
abstract class Command extends LeafNode {
104104
self: Product =>
105-
def output = Seq.empty
105+
def output: Seq[Attribute] = Seq.empty
106106
}
107107

108108
/**
@@ -115,7 +115,9 @@ case class NativeCommand(cmd: String) extends Command
115115
* Returned by a parser when the users only wants to see what query plan would be executed, without
116116
* actually performing the execution.
117117
*/
118-
case class ExplainCommand(plan: LogicalPlan) extends Command
118+
case class ExplainCommand(plan: LogicalPlan) extends Command {
119+
override def output = Seq(AttributeReference("plan", StringType, nullable = false)())
120+
}
119121

120122
/**
121123
* A logical plan node with single child.

sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
191191
val sparkContext = self.sparkContext
192192

193193
val strategies: Seq[Strategy] =
194+
CommandStrategy(self) ::
194195
TakeOrdered ::
195196
PartialAggregation ::
196197
LeftSemiJoin ::
@@ -256,6 +257,11 @@ class SQLContext(@transient val sparkContext: SparkContext)
256257
Batch("Prepare Expressions", Once, new BindReferences[SparkPlan]) :: Nil
257258
}
258259

260+
// TODO: or should we make QueryExecution protected[sql]?
261+
protected[sql] def mkQueryExecution(plan: LogicalPlan) = new QueryExecution {
262+
val logical = plan
263+
}
264+
259265
/**
260266
* The primary workflow for executing relational queries using Spark. Designed to allow easy
261267
* access to the intermediate phases of query execution for developers.

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,4 +233,15 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
233233
case _ => Nil
234234
}
235235
}
236+
237+
// TODO: this should be merged with SPARK-1508's SetCommandStrategy
238+
case class CommandStrategy(context: SQLContext) extends Strategy {
239+
def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
240+
case logical.ExplainCommand(child) =>
241+
val qe = context.mkQueryExecution(child)
242+
Seq(execution.ExplainCommandPhysical(qe.executedPlan, plan.output)(context))
243+
case _ => Nil
244+
}
245+
}
246+
236247
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.execution
19+
20+
import org.apache.spark.rdd.RDD
21+
import org.apache.spark.sql.{SQLContext, Row}
22+
import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
23+
24+
case class ExplainCommandPhysical(child: SparkPlan, output: Seq[Attribute])
25+
(@transient context: SQLContext) extends UnaryNode {
26+
def execute(): RDD[Row] = {
27+
val planString = new GenericRow(Array[Any](child.toString))
28+
context.sparkContext.parallelize(Seq(planString))
29+
}
30+
31+
override def otherCopyArgs = context :: Nil
32+
}

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
218218
val hiveContext = self
219219

220220
override val strategies: Seq[Strategy] = Seq(
221+
CommandStrategy(self),
221222
TakeOrdered,
222223
ParquetOperations,
223224
HiveTableScans,
@@ -304,7 +305,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
304305
*/
305306
def stringResult(): Seq[String] = analyzed match {
306307
case NativeCommand(cmd) => runSqlHive(cmd)
307-
case ExplainCommand(plan) => new QueryExecution { val logical = plan }.toString.split("\n")
308+
case ExplainCommand(plan) => mkQueryExecution(plan).toString.split("\n")
308309
case query =>
309310
val result: Seq[Seq[Any]] = toRdd.collect().toSeq
310311
// We need the types so we can output struct field names

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.spark.sql.hive.execution
1919

2020
import org.apache.spark.sql.hive.test.TestHive._
21+
import org.apache.spark.sql.hive.test.TestHive
2122

2223
/**
2324
* A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
@@ -159,4 +160,15 @@ class HiveQuerySuite extends HiveComparisonTest {
159160
hql("SHOW TABLES").toString
160161
hql("SELECT * FROM src").toString
161162
}
163+
164+
test("SPARK-1704: Explain commands as a SchemaRDD") {
165+
hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
166+
val rdd = hql("explain select key, count(value) from src group by key")
167+
assert(rdd.collect().size == 1)
168+
assert(rdd.toString.contains("ExplainCommand"))
169+
assert(rdd.filter(row => row.toString.contains("ExplainCommand")).collect().size == 0,
170+
"actual contents of the result should be the plans of the query to be explained")
171+
TestHive.reset()
172+
}
173+
162174
}

0 commit comments

Comments
 (0)