Skip to content

Commit aa77f8a

Browse files
marmbruspwendell
authored andcommitted
SPARK-1562 Fix visibility / annotation of Spark SQL APIs
Author: Michael Armbrust <[email protected]> Closes apache#489 from marmbrus/sqlDocFixes and squashes the following commits: acee4f3 [Michael Armbrust] Fix visibility / annotation of Spark SQL APIs
1 parent 662c860 commit aa77f8a

32 files changed

+143
-59
lines changed

project/SparkBuild.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ object SparkBuild extends Build {
367367
publish := {},
368368

369369
unidocProjectFilter in (ScalaUnidoc, unidoc) :=
370-
inAnyProject -- inProjects(repl, examples, tools, yarn, yarnAlpha),
370+
inAnyProject -- inProjects(repl, examples, tools, catalyst, yarn, yarnAlpha),
371371
unidocProjectFilter in (JavaUnidoc, unidoc) :=
372372
inAnyProject -- inProjects(repl, examples, bagel, graphx, catalyst, tools, yarn, yarnAlpha),
373373

@@ -457,7 +457,7 @@ object SparkBuild extends Build {
457457
def catalystSettings = sharedSettings ++ Seq(
458458
name := "catalyst",
459459
// The mechanics of rewriting expression ids to compare trees in some test cases makes
460-
// assumptions about the the expression ids being contiguious. Running tests in parallel breaks
460+
// assumptions about the the expression ids being contiguous. Running tests in parallel breaks
461461
// this non-deterministically. TODO: FIX THIS.
462462
parallelExecution in Test := false,
463463
libraryDependencies ++= Seq(

sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
2424
/**
2525
* Contains functions that are shared between all SchemaRDD types (i.e., Scala, Java)
2626
*/
27-
trait SchemaRDDLike {
27+
private[sql] trait SchemaRDDLike {
2828
@transient val sqlContext: SQLContext
2929
@transient protected[spark] val logicalPlan: LogicalPlan
3030

sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@ package org.apache.spark.sql.execution
1919

2020
import java.util.HashMap
2121

22+
import org.apache.spark.annotation.DeveloperApi
2223
import org.apache.spark.SparkContext
2324
import org.apache.spark.sql.catalyst.errors._
2425
import org.apache.spark.sql.catalyst.expressions._
2526
import org.apache.spark.sql.catalyst.plans.physical._
2627

2728
/**
29+
* :: DeveloperApi ::
2830
* Groups input data by `groupingExpressions` and computes the `aggregateExpressions` for each
2931
* group.
3032
*
@@ -34,6 +36,7 @@ import org.apache.spark.sql.catalyst.plans.physical._
3436
* @param aggregateExpressions expressions that are computed for each group.
3537
* @param child the input data source.
3638
*/
39+
@DeveloperApi
3740
case class Aggregate(
3841
partial: Boolean,
3942
groupingExpressions: Seq[Expression],

sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.spark.sql.execution
1919

20+
import org.apache.spark.annotation.DeveloperApi
2021
import org.apache.spark.{HashPartitioner, RangePartitioner, SparkConf}
2122
import org.apache.spark.rdd.ShuffledRDD
2223
import org.apache.spark.sql.Row
@@ -26,6 +27,10 @@ import org.apache.spark.sql.catalyst.plans.physical._
2627
import org.apache.spark.sql.catalyst.rules.Rule
2728
import org.apache.spark.util.MutablePair
2829

30+
/**
31+
* :: DeveloperApi ::
32+
*/
33+
@DeveloperApi
2934
case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends UnaryNode {
3035

3136
override def outputPartitioning = newPartitioning
@@ -81,7 +86,7 @@ case class Exchange(newPartitioning: Partitioning, child: SparkPlan) extends Una
8186
* [[catalyst.plans.physical.Distribution Distribution]] requirements for each operator by inserting
8287
* [[Exchange]] Operators where required.
8388
*/
84-
object AddExchange extends Rule[SparkPlan] {
89+
private[sql] object AddExchange extends Rule[SparkPlan] {
8590
// TODO: Determine the number of partitions.
8691
val numPartitions = 150
8792

sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717

1818
package org.apache.spark.sql.execution
1919

20+
import org.apache.spark.annotation.DeveloperApi
2021
import org.apache.spark.sql.catalyst.expressions.{Generator, JoinedRow, Literal, Projection}
2122

2223
/**
24+
* :: DeveloperApi ::
2325
* Applies a [[catalyst.expressions.Generator Generator]] to a stream of input rows, combining the
2426
* output of each into a new stream of rows. This operation is similar to a `flatMap` in functional
2527
* programming with one important additional feature, which allows the input rows to be joined with
@@ -29,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.{Generator, JoinedRow, Literal,
2931
* @param outer when true, each input row will be output at least once, even if the output of the
3032
* given `generator` is empty. `outer` has no effect when `join` is false.
3133
*/
34+
@DeveloperApi
3235
case class Generate(
3336
generator: Generator,
3437
join: Boolean,

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.spark.sql.execution
1919

20+
import org.apache.spark.annotation.DeveloperApi
2021
import org.apache.spark.rdd.RDD
2122
import org.apache.spark.sql.{Logging, Row}
2223
import org.apache.spark.sql.catalyst.trees
@@ -26,6 +27,10 @@ import org.apache.spark.sql.catalyst.plans.{QueryPlan, logical}
2627
import org.apache.spark.sql.catalyst.plans.physical._
2728
import org.apache.spark.sql.columnar.InMemoryColumnarTableScan
2829

30+
/**
31+
* :: DeveloperApi ::
32+
*/
33+
@DeveloperApi
2934
abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging {
3035
self: Product =>
3136

@@ -51,6 +56,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging {
5156
}
5257

5358
/**
59+
* :: DeveloperApi ::
5460
* Allows already planned SparkQueries to be linked into logical query plans.
5561
*
5662
* Note that in general it is not valid to use this class to link multiple copies of the same
@@ -59,6 +65,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging {
5965
* replace the output attributes with new copies of themselves without breaking any attribute
6066
* linking.
6167
*/
68+
@DeveloperApi
6269
case class SparkLogicalPlan(alreadyPlanned: SparkPlan)
6370
extends logical.LogicalPlan with MultiInstanceRelation {
6471

@@ -77,15 +84,15 @@ case class SparkLogicalPlan(alreadyPlanned: SparkPlan)
7784
}
7885
}
7986

80-
trait LeafNode extends SparkPlan with trees.LeafNode[SparkPlan] {
87+
private[sql] trait LeafNode extends SparkPlan with trees.LeafNode[SparkPlan] {
8188
self: Product =>
8289
}
8390

84-
trait UnaryNode extends SparkPlan with trees.UnaryNode[SparkPlan] {
91+
private[sql] trait UnaryNode extends SparkPlan with trees.UnaryNode[SparkPlan] {
8592
self: Product =>
8693
override def outputPartitioning: Partitioning = child.outputPartitioning
8794
}
8895

89-
trait BinaryNode extends SparkPlan with trees.BinaryNode[SparkPlan] {
96+
private[sql] trait BinaryNode extends SparkPlan with trees.BinaryNode[SparkPlan] {
9097
self: Product =>
9198
}

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import org.apache.spark.serializer.KryoSerializer
2727
import org.apache.spark.util.MutablePair
2828
import org.apache.spark.util.Utils
2929

30-
class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(conf) {
30+
private[sql] class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(conf) {
3131
override def newKryo(): Kryo = {
3232
val kryo = new Kryo()
3333
kryo.setRegistrationRequired(false)
@@ -50,7 +50,7 @@ class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(conf) {
5050
}
5151
}
5252

53-
object SparkSqlSerializer {
53+
private[sql] object SparkSqlSerializer {
5454
// TODO (lian) Using KryoSerializer here is workaround, needs further investigation
5555
// Using SparkSqlSerializer here makes BasicQuerySuite to fail because of Kryo serialization
5656
// related error.
@@ -68,7 +68,7 @@ object SparkSqlSerializer {
6868
}
6969
}
7070

71-
class BigDecimalSerializer extends Serializer[BigDecimal] {
71+
private[sql] class BigDecimalSerializer extends Serializer[BigDecimal] {
7272
def write(kryo: Kryo, output: Output, bd: math.BigDecimal) {
7373
// TODO: There are probably more efficient representations than strings...
7474
output.writeString(bd.toString())
@@ -83,7 +83,7 @@ class BigDecimalSerializer extends Serializer[BigDecimal] {
8383
* Maps do not have a no arg constructor and so cannot be serialized by default. So, we serialize
8484
* them as `Array[(k,v)]`.
8585
*/
86-
class MapSerializer extends Serializer[Map[_,_]] {
86+
private[sql] class MapSerializer extends Serializer[Map[_,_]] {
8787
def write(kryo: Kryo, output: Output, map: Map[_,_]) {
8888
kryo.writeObject(output, map.flatMap(e => Seq(e._1, e._2)).toArray)
8989
}

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
2525
import org.apache.spark.sql.catalyst.plans.physical._
2626
import org.apache.spark.sql.parquet._
2727

28-
abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
28+
private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
2929
self: SQLContext#SparkPlanner =>
3030

3131
object HashJoin extends Strategy {

sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
1919

2020
import scala.reflect.runtime.universe.TypeTag
2121

22+
import org.apache.spark.annotation.DeveloperApi
2223
import org.apache.spark.{HashPartitioner, SparkConf, SparkContext}
2324
import org.apache.spark.rdd.{RDD, ShuffledRDD}
2425
import org.apache.spark.sql.catalyst.ScalaReflection
@@ -27,6 +28,10 @@ import org.apache.spark.sql.catalyst.expressions._
2728
import org.apache.spark.sql.catalyst.plans.physical.{OrderedDistribution, UnspecifiedDistribution}
2829
import org.apache.spark.util.MutablePair
2930

31+
/**
32+
* :: DeveloperApi ::
33+
*/
34+
@DeveloperApi
3035
case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends UnaryNode {
3136
override def output = projectList.map(_.toAttribute)
3237

@@ -36,6 +41,10 @@ case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends
3641
}
3742
}
3843

44+
/**
45+
* :: DeveloperApi ::
46+
*/
47+
@DeveloperApi
3948
case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode {
4049
override def output = child.output
4150

@@ -44,6 +53,10 @@ case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode {
4453
}
4554
}
4655

56+
/**
57+
* :: DeveloperApi ::
58+
*/
59+
@DeveloperApi
4760
case class Sample(fraction: Double, withReplacement: Boolean, seed: Int, child: SparkPlan)
4861
extends UnaryNode {
4962

@@ -53,6 +66,10 @@ case class Sample(fraction: Double, withReplacement: Boolean, seed: Int, child:
5366
override def execute() = child.execute().sample(withReplacement, fraction, seed)
5467
}
5568

69+
/**
70+
* :: DeveloperApi ::
71+
*/
72+
@DeveloperApi
5673
case class Union(children: Seq[SparkPlan])(@transient sc: SparkContext) extends SparkPlan {
5774
// TODO: attributes output by union should be distinct for nullability purposes
5875
override def output = children.head.output
@@ -62,12 +79,14 @@ case class Union(children: Seq[SparkPlan])(@transient sc: SparkContext) extends
6279
}
6380

6481
/**
82+
* :: DeveloperApi ::
6583
* Take the first limit elements. Note that the implementation is different depending on whether
6684
* this is a terminal operator or not. If it is terminal and is invoked using executeCollect,
6785
* this operator uses Spark's take method on the Spark driver. If it is not terminal or is
6886
* invoked using execute, we first take the limit on each partition, and then repartition all the
6987
* data to a single partition to compute the global limit.
7088
*/
89+
@DeveloperApi
7190
case class Limit(limit: Int, child: SparkPlan)(@transient sc: SparkContext) extends UnaryNode {
7291
// TODO: Implement a partition local limit, and use a strategy to generate the proper limit plan:
7392
// partition local limit -> exchange into one partition -> partition local limit again
@@ -91,10 +110,12 @@ case class Limit(limit: Int, child: SparkPlan)(@transient sc: SparkContext) exte
91110
}
92111

93112
/**
113+
* :: DeveloperApi ::
94114
* Take the first limit elements as defined by the sortOrder. This is logically equivalent to
95115
* having a [[Limit]] operator after a [[Sort]] operator. This could have been named TopK, but
96116
* Spark's top operator does the opposite in ordering so we name it TakeOrdered to avoid confusion.
97117
*/
118+
@DeveloperApi
98119
case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
99120
(@transient sc: SparkContext) extends UnaryNode {
100121
override def otherCopyArgs = sc :: Nil
@@ -111,7 +132,10 @@ case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
111132
override def execute() = sc.makeRDD(executeCollect(), 1)
112133
}
113134

114-
135+
/**
136+
* :: DeveloperApi ::
137+
*/
138+
@DeveloperApi
115139
case class Sort(
116140
sortOrder: Seq[SortOrder],
117141
global: Boolean,
@@ -134,6 +158,10 @@ case class Sort(
134158
override def output = child.output
135159
}
136160

161+
/**
162+
* :: DeveloperApi ::
163+
*/
164+
@DeveloperApi
137165
object ExistingRdd {
138166
def convertToCatalyst(a: Any): Any = a match {
139167
case s: Seq[Any] => s.map(convertToCatalyst)
@@ -167,6 +195,10 @@ object ExistingRdd {
167195
}
168196
}
169197

198+
/**
199+
* :: DeveloperApi ::
200+
*/
201+
@DeveloperApi
170202
case class ExistingRdd(output: Seq[Attribute], rdd: RDD[Row]) extends LeafNode {
171203
override def execute() = rdd
172204
}

sql/core/src/main/scala/org/apache/spark/sql/execution/debug.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.execution
1919

20-
object DebugQuery {
20+
private[sql] object DebugQuery {
2121
def apply(plan: SparkPlan): SparkPlan = {
2222
val visited = new collection.mutable.HashSet[Long]()
2323
plan transform {
@@ -28,7 +28,7 @@ object DebugQuery {
2828
}
2929
}
3030

31-
case class DebugNode(child: SparkPlan) extends UnaryNode {
31+
private[sql] case class DebugNode(child: SparkPlan) extends UnaryNode {
3232
def references = Set.empty
3333
def output = child.output
3434
def execute() = {

0 commit comments

Comments
 (0)