apache · jaceklaskowski · May 23, 2017 · May 25, 2017 · May 25, 2017 · May 25, 2017
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -88,8 +88,10 @@ object ScalaReflection extends ScalaReflection {
   }
 
   /**
-   * Given a type `T` this function constructs and ObjectType that holds a class of type
-   * Array[T].  Special handling is performed for primitive types to map them back to their raw
+   * Given a type `T` this function constructs `ObjectType` that holds a class of type
+   * `Array[T]`.
+   *
+   * Special handling is performed for primitive types to map them back to their raw
    * JVM form instead of the Scala Array that handles auto boxing.
    */
   private def arrayClassFor(tpe: `Type`): ObjectType = ScalaReflectionLock.synchronized {

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -85,8 +85,7 @@ object AnalysisContext {
 
 /**
  * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and
- * [[UnresolvedRelation]]s into fully typed objects using information in a
- * [[SessionCatalog]] and a [[FunctionRegistry]].
+ * [[UnresolvedRelation]]s into fully typed objects using information in a [[SessionCatalog]].
  */
 class Analyzer(
     catalog: SessionCatalog,
@@ -1882,7 +1881,7 @@ class Analyzer(
      * `[Sum(_w0) OVER (PARTITION BY _w1 ORDER BY _w2)]` and the second returned value will be
      * [col1, col2 + col3 as _w0, col4 as _w1, col5 as _w2].
      *
-     * @return (seq of expressions containing at lease one window expressions,
+     * @return (seq of expressions containing at least one window expression,
      *          seq of non-window expressions)
      */
     private def extract(

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -235,7 +235,7 @@ case class ExpressionEncoder[T](
   assert(serializer.flatMap { ser =>
     val boundRefs = ser.collect { case b: BoundReference => b }
     assert(boundRefs.nonEmpty,
-      "each serializer expression should contains at least one `BoundReference`")
+      "each serializer expression should contain at least one `BoundReference`")
     boundRefs
   }.distinct.length <= 1, "all serializer expressions must use the same BoundReference.")
 

diff --git a/...lyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/...lyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -800,7 +800,7 @@ class CodegenContext {
 
   /**
    * Generates code for expressions. If doSubexpressionElimination is true, subexpression
-   * elimination will be performed. Subexpression elimination assumes that the code will for each
+   * elimination will be performed. Subexpression elimination assumes that the code for each
    * expression will be combined in the `expressions` order.
    */
   def generateExpressions(expressions: Seq[Expression],

diff --git a/...in/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/...in/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -23,11 +23,11 @@ import org.apache.spark.sql.types._
 /**
  * Generates a [[Projection]] that returns an [[UnsafeRow]].
  *
- * It generates the code for all the expressions, compute the total length for all the columns
- * (can be accessed via variables), and then copy the data into a scratch buffer space in the
+ * It generates the code for all the expressions, computes the total length for all the columns
+ * (can be accessed via variables), and then copies the data into a scratch buffer space in the
  * form of UnsafeRow (the scratch buffer will grow as needed).
  *
- * Note: The returned UnsafeRow will be pointed to a scratch buffer inside the projection.
+ * @note The returned UnsafeRow will be pointed to a scratch buffer inside the projection.
  */
 object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafeProjection] {
 

diff --git a/...catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/...catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -113,7 +113,7 @@ sealed trait FrameType
  * or a [[ValueFollowing]] is used as its [[FrameBoundary]], the value is considered
  * as a physical offset.
  * For example, `ROW BETWEEN 1 PRECEDING AND 1 FOLLOWING` represents a 3-row frame,
- * from the row precedes the current row to the row follows the current row.
+ * from the row that precedes the current row to the row that follows the current row.
  */
 case object RowFrame extends FrameType
 
@@ -126,7 +126,7 @@ case object RowFrame extends FrameType
  * `RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING` represents a frame containing rows whose values
  * `expr` are in the range of [v-1, v+1].
  *
- * If `ORDER BY` clause is not defined, all rows in the partition is considered as peers
+ * If `ORDER BY` clause is not defined, all rows in the partition are considered as peers
  * of the current row.
  */
 case object RangeFrame extends FrameType
@@ -217,11 +217,11 @@ case object UnboundedFollowing extends FrameBoundary {
 }
 
 /**
- * The trait used to represent the a Window Frame.
+ * Represents a window frame.
  */
 sealed trait WindowFrame
 
-/** Used as a place holder when a frame specification is not defined.  */
+/** Used as a placeholder when a frame specification is not defined. */
 case object UnspecifiedFrame extends WindowFrame
 
 /** A specified Window Frame. */

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.trees.TreeNode
 
 /**
  * Given a [[LogicalPlan]], returns a list of `PhysicalPlan`s that can
- * be used for execution. If this strategy does not apply to the give logical operation then an
+ * be used for execution. If this strategy does not apply to the given logical operation then an
  * empty list should be returned.
  */
 abstract class GenericStrategy[PhysicalPlan <: TreeNode[PhysicalPlan]] extends Logging {
@@ -42,9 +42,10 @@ abstract class GenericStrategy[PhysicalPlan <: TreeNode[PhysicalPlan]] extends L
  * Abstract class for transforming [[LogicalPlan]]s into physical plans.
  * Child classes are responsible for specifying a list of [[GenericStrategy]] objects that
  * each of which can return a list of possible physical plan options.
- * If a given strategy is unable to plan all
- * of the remaining operators in the tree, it can call [[planLater]], which returns a placeholder
- * object that will be filled in using other available strategies.
+ * If a given strategy is unable to plan all of the remaining operators in the tree,
+ * it can call [[GenericStrategy#planLater planLater]], which returns a placeholder
+ * object that will be [[collectPlaceholders collected]] and filled in
+ * using other available strategies.
  *
  * TODO: RIGHT NOW ONLY ONE PLAN IS RETURNED EVER...
  *       PLAN SPACE EXPLORATION WILL BE IMPLEMENTED LATER.
@@ -93,7 +94,10 @@ abstract class QueryPlanner[PhysicalPlan <: TreeNode[PhysicalPlan]] {
     pruned
   }
 
-  /** Collects placeholders marked as [[planLater]] by strategy and its [[LogicalPlan]]s */
+  /**
+   * Collects placeholders marked using [[GenericStrategy#planLater planLater]]
+   * by [[strategies]].
+   */
   protected def collectPlaceholders(plan: PhysicalPlan): Seq[(PhysicalPlan, LogicalPlan)]
 
   /** Prunes bad plans to prevent combinatorial explosion. */

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -1152,7 +1152,7 @@ class Column(val expr: Expression) extends Logging {
   def bitwiseXOR(other: Any): Column = withExpr { BitwiseXor(expr, lit(other).expr) }
 
   /**
-   * Define a windowing column.
+   * Defines a windowing column.
    *
    * {{{
    *   val w = Window.partitionBy("name").orderBy("id")
@@ -1168,7 +1168,7 @@ class Column(val expr: Expression) extends Logging {
   def over(window: expressions.WindowSpec): Column = window.withAggregate(this)
 
   /**
-   * Define a empty analytic clause. In this case the analytic function is applied
+   * Defines an empty analytic clause. In this case the analytic function is applied
    * and presented for all rows in the result set.
    *
    * {{{

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -35,12 +35,13 @@ import org.apache.spark.sql.types.NumericType
 import org.apache.spark.sql.types.StructType
 
 /**
- * A set of methods for aggregations on a `DataFrame`, created by `Dataset.groupBy`.
+ * A set of methods for aggregations on a `DataFrame`, created by [[Dataset#groupBy groupBy]],
+ * [[Dataset#cube cube]] or [[Dataset#rollup rollup]] (and also [[pivot]]).
  *
- * The main method is the agg function, which has multiple variants. This class also contains
- * convenience some first order statistics such as mean, sum for convenience.
+ * The main method is the [[agg]] function, which has multiple variants. This class also contains
+ * some first-order statistics such as [[mean]], [[sum]] for convenience.
  *
- * This class was named `GroupedData` in Spark 1.x.
+ * @note This class was named `GroupedData` in Spark 1.x.
  *
  * @since 2.0.0
  */
@@ -297,8 +298,9 @@ class RelationalGroupedDataset protected[sql](
   }
 
   /**
-   * Pivots a column of the current `DataFrame` and perform the specified aggregation.
-   * There are two versions of pivot function: one that requires the caller to specify the list
+   * Pivots a column of the current `DataFrame` and performs the specified aggregation.
+   *
+   * There are two versions of `pivot` function: one that requires the caller to specify the list
    * of distinct values to pivot on, and one that does not. The latter is more concise but less
    * efficient, because Spark needs to first compute the list of distinct values internally.
    *
@@ -337,7 +339,7 @@ class RelationalGroupedDataset protected[sql](
   }
 
   /**
-   * Pivots a column of the current `DataFrame` and perform the specified aggregation.
+   * Pivots a column of the current `DataFrame` and performs the specified aggregation.
    * There are two versions of pivot function: one that requires the caller to specify the list
    * of distinct values to pivot on, and one that does not. The latter is more concise but less
    * efficient, because Spark needs to first compute the list of distinct values internally.
@@ -369,7 +371,9 @@ class RelationalGroupedDataset protected[sql](
   }
 
   /**
-   * Pivots a column of the current `DataFrame` and perform the specified aggregation.
+   * (Java-specific) Pivots a column of the current `DataFrame` and performs the specified
+   * aggregation.
+   *
    * There are two versions of pivot function: one that requires the caller to specify the list
    * of distinct values to pivot on, and one that does not. The latter is more concise but less
    * efficient, because Spark needs to first compute the list of distinct values internally.
@@ -433,10 +437,6 @@ class RelationalGroupedDataset protected[sql](
   }
 }
 
-
-/**
- * Companion object for GroupedData.
- */
 private[sql] object RelationalGroupedDataset {
 
   def apply(

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -72,24 +72,24 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   }
 
   /**
-   * Return all metadata that describes more details of this SparkPlan.
+   * Returns all metadata that describes more details of this SparkPlan.
    */
   def metadata: Map[String, String] = Map.empty
 
   /**
-   * Return all metrics containing metrics of this SparkPlan.
+   * Returns all metrics containing metrics of this SparkPlan.
    */
   def metrics: Map[String, SQLMetric] = Map.empty
 
   /**
-   * Reset all the metrics.
+   * Resets all the metrics.
    */
   def resetMetrics(): Unit = {
     metrics.valuesIterator.foreach(_.reset())
   }
 
   /**
-   * Return a LongSQLMetric according to the name.
+   * Returns a [[SQLMetric]] according to the name.
    */
   def longMetric(name: String): SQLMetric = metrics(name)
 

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -70,7 +70,7 @@ trait CodegenSupport extends SparkPlan {
   /**
    * Returns all the RDDs of InternalRow which generates the input rows.
    *
-   * Note: right now we support up to two RDDs.
+   * @note Right now we support up to two RDDs
    */
   def inputRDDs(): Seq[RDD[InternalRow]]
 
@@ -227,7 +227,7 @@ trait CodegenSupport extends SparkPlan {
 
 
 /**
- * InputAdapter is used to hide a SparkPlan from a subtree that support codegen.
+ * InputAdapter is used to hide a SparkPlan from a subtree that supports codegen.
  *
  * This is the leaf node of a tree with WholeStageCodegen that is used to generate code
  * that consumes an RDD iterator of InternalRow.
@@ -282,10 +282,10 @@ object WholeStageCodegenExec {
 }
 
 /**
- * WholeStageCodegen compile a subtree of plans that support codegen together into single Java
+ * WholeStageCodegen compiles a subtree of plans that support codegen together into single Java
  * function.
  *
- * Here is the call graph of to generate Java source (plan A support codegen, but plan B does not):
+ * Here is the call graph of to generate Java source (plan A supports codegen, but plan B does not):
  *
  *   WholeStageCodegen       Plan A               FakeInput        Plan B
  * =========================================================================
@@ -304,10 +304,10 @@ object WholeStageCodegenExec {
  *                             |
  *  doConsume()  <--------  consume()
  *
- * SparkPlan A should override doProduce() and doConsume().
+ * SparkPlan A should override `doProduce()` and `doConsume()`.
  *
- * doCodeGen() will create a CodeGenContext, which will hold a list of variables for input,
- * used to generated code for BoundReference.
+ * `doCodeGen()` will create a `CodeGenContext`, which will hold a list of variables for input,
+ * used to generated code for [[BoundReference]].
  */
 case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with CodegenSupport {
 

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -170,7 +170,7 @@ object Window {
    * and `Window.currentRow` to specify special boundary values, rather than using integral
    * values directly.
    *
-   * A range based boundary is based on the actual value of the ORDER BY
+   * A range-based boundary is based on the actual value of the ORDER BY
    * expression(s). An offset is used to alter the value of the ORDER BY expression, for
    * instance if the current order by expression has a value of 10 and the lower bound offset
    * is -3, the resulting lower bound for the current row will be 10 - 3 = 7. This however puts a
@@ -184,7 +184,7 @@ object Window {
    *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
    *     .toDF("id", "category")
    *   val byCategoryOrderedById =
-   *     Window.partitionBy('category).orderBy('id).rowsBetween(Window.currentRow, 1)
+   *     Window.partitionBy('category).orderBy('id).rangeBetween(Window.currentRow, 1)
    *   df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
    *
    *   +---+--------+---+

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1265,7 +1265,7 @@ object functions {
 
   /**
    * Parses the expression string into the column that it represents, similar to
-   * DataFrame.selectExpr
+   * [[Dataset#selectExpr]].
    * {{{
    *   // get the number of words of each length
    *   df.groupBy(expr("length(word)")).count()
@@ -2385,7 +2385,7 @@ object functions {
   def rtrim(e: Column): Column = withExpr { StringTrimRight(e.expr) }
 
   /**
-   * * Return the soundex code for the specified expression.
+   * Returns the soundex code for the specified expression.
    *
    * @group string_funcs
    * @since 1.5.0

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -57,7 +57,7 @@ abstract class BaseSessionStateBuilder(
   type NewBuilder = (SparkSession, Option[SessionState]) => BaseSessionStateBuilder
 
   /**
-   * Function that produces a new instance of the SessionStateBuilder. This is used by the
+   * Function that produces a new instance of the `BaseSessionStateBuilder`. This is used by the
    * [[SessionState]]'s clone functionality. Make sure to override this when implementing your own
    * [[SessionStateBuilder]].
    */

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -109,7 +109,7 @@ private[sql] object SessionState {
 }
 
 /**
- * Concrete implementation of a [[SessionStateBuilder]].
+ * Concrete implementation of a [[BaseSessionStateBuilder]].
  */
 @Experimental
 @InterfaceStability.Unstable

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -91,7 +91,7 @@ trait RelationProvider {
  *
  * The difference between a [[RelationProvider]] and a [[SchemaRelationProvider]] is that
  * users need to provide a schema when using a [[SchemaRelationProvider]].
- * A relation provider can inherits both [[RelationProvider]] and [[SchemaRelationProvider]]
+ * A relation provider can inherit both [[RelationProvider]] and [[SchemaRelationProvider]]
  * if it can support both schema inference and user-specified schemas.
  *
  * @since 1.3.0