Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,10 @@ object ScalaReflection extends ScalaReflection {
}

/**
* Given a type `T` this function constructs and ObjectType that holds a class of type
* Array[T]. Special handling is performed for primitive types to map them back to their raw
* Given a type `T` this function constructs `ObjectType` that holds a class of type
* `Array[T]`.
*
* Special handling is performed for primitive types to map them back to their raw
* JVM form instead of the Scala Array that handles auto boxing.
*/
private def arrayClassFor(tpe: `Type`): ObjectType = ScalaReflectionLock.synchronized {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ object AnalysisContext {

/**
* Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and
* [[UnresolvedRelation]]s into fully typed objects using information in a
* [[SessionCatalog]] and a [[FunctionRegistry]].
* [[UnresolvedRelation]]s into fully typed objects using information in a [[SessionCatalog]].
*/
class Analyzer(
catalog: SessionCatalog,
Expand Down Expand Up @@ -1882,7 +1881,7 @@ class Analyzer(
* `[Sum(_w0) OVER (PARTITION BY _w1 ORDER BY _w2)]` and the second returned value will be
* [col1, col2 + col3 as _w0, col4 as _w1, col5 as _w2].
*
* @return (seq of expressions containing at lease one window expressions,
* @return (seq of expressions containing at least one window expression,
* seq of non-window expressions)
*/
private def extract(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ case class ExpressionEncoder[T](
assert(serializer.flatMap { ser =>
val boundRefs = ser.collect { case b: BoundReference => b }
assert(boundRefs.nonEmpty,
"each serializer expression should contains at least one `BoundReference`")
"each serializer expression should contain at least one `BoundReference`")
boundRefs
}.distinct.length <= 1, "all serializer expressions must use the same BoundReference.")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,7 @@ class CodegenContext {

/**
* Generates code for expressions. If doSubexpressionElimination is true, subexpression
* elimination will be performed. Subexpression elimination assumes that the code will for each
* elimination will be performed. Subexpression elimination assumes that the code for each
* expression will be combined in the `expressions` order.
*/
def generateExpressions(expressions: Seq[Expression],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ import org.apache.spark.sql.types._
/**
* Generates a [[Projection]] that returns an [[UnsafeRow]].
*
* It generates the code for all the expressions, compute the total length for all the columns
* (can be accessed via variables), and then copy the data into a scratch buffer space in the
* It generates the code for all the expressions, computes the total length for all the columns
* (can be accessed via variables), and then copies the data into a scratch buffer space in the
* form of UnsafeRow (the scratch buffer will grow as needed).
*
* Note: The returned UnsafeRow will be pointed to a scratch buffer inside the projection.
* @note The returned UnsafeRow will be pointed to a scratch buffer inside the projection.
*/
object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafeProjection] {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ sealed trait FrameType
* or a [[ValueFollowing]] is used as its [[FrameBoundary]], the value is considered
* as a physical offset.
* For example, `ROW BETWEEN 1 PRECEDING AND 1 FOLLOWING` represents a 3-row frame,
* from the row precedes the current row to the row follows the current row.
* from the row that precedes the current row to the row that follows the current row.
*/
case object RowFrame extends FrameType

Expand All @@ -126,7 +126,7 @@ case object RowFrame extends FrameType
* `RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING` represents a frame containing rows whose values
* `expr` are in the range of [v-1, v+1].
*
* If `ORDER BY` clause is not defined, all rows in the partition is considered as peers
* If `ORDER BY` clause is not defined, all rows in the partition are considered as peers
* of the current row.
*/
case object RangeFrame extends FrameType
Expand Down Expand Up @@ -217,11 +217,11 @@ case object UnboundedFollowing extends FrameBoundary {
}

/**
* The trait used to represent the a Window Frame.
* Represents a window frame.
*/
sealed trait WindowFrame

/** Used as a place holder when a frame specification is not defined. */
/** Used as a placeholder when a frame specification is not defined. */
case object UnspecifiedFrame extends WindowFrame

/** A specified Window Frame. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.trees.TreeNode

/**
* Given a [[LogicalPlan]], returns a list of `PhysicalPlan`s that can
* be used for execution. If this strategy does not apply to the give logical operation then an
* be used for execution. If this strategy does not apply to the given logical operation then an
* empty list should be returned.
*/
abstract class GenericStrategy[PhysicalPlan <: TreeNode[PhysicalPlan]] extends Logging {
Expand All @@ -42,9 +42,10 @@ abstract class GenericStrategy[PhysicalPlan <: TreeNode[PhysicalPlan]] extends L
* Abstract class for transforming [[LogicalPlan]]s into physical plans.
* Child classes are responsible for specifying a list of [[GenericStrategy]] objects that
* each of which can return a list of possible physical plan options.
* If a given strategy is unable to plan all
* of the remaining operators in the tree, it can call [[planLater]], which returns a placeholder
* object that will be filled in using other available strategies.
* If a given strategy is unable to plan all of the remaining operators in the tree,
* it can call [[GenericStrategy#planLater planLater]], which returns a placeholder
* object that will be [[collectPlaceholders collected]] and filled in
* using other available strategies.
*
* TODO: RIGHT NOW ONLY ONE PLAN IS RETURNED EVER...
* PLAN SPACE EXPLORATION WILL BE IMPLEMENTED LATER.
Expand Down Expand Up @@ -93,7 +94,10 @@ abstract class QueryPlanner[PhysicalPlan <: TreeNode[PhysicalPlan]] {
pruned
}

/** Collects placeholders marked as [[planLater]] by strategy and its [[LogicalPlan]]s */
/**
* Collects placeholders marked using [[GenericStrategy#planLater planLater]]
* by [[strategies]].
*/
protected def collectPlaceholders(plan: PhysicalPlan): Seq[(PhysicalPlan, LogicalPlan)]

/** Prunes bad plans to prevent combinatorial explosion. */
Expand Down
4 changes: 2 additions & 2 deletions sql/core/src/main/scala/org/apache/spark/sql/Column.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1152,7 +1152,7 @@ class Column(val expr: Expression) extends Logging {
def bitwiseXOR(other: Any): Column = withExpr { BitwiseXor(expr, lit(other).expr) }

/**
* Define a windowing column.
* Defines a windowing column.
*
* {{{
* val w = Window.partitionBy("name").orderBy("id")
Expand All @@ -1168,7 +1168,7 @@ class Column(val expr: Expression) extends Logging {
def over(window: expressions.WindowSpec): Column = window.withAggregate(this)

/**
* Define a empty analytic clause. In this case the analytic function is applied
* Defines an empty analytic clause. In this case the analytic function is applied
* and presented for all rows in the result set.
*
* {{{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@ import org.apache.spark.sql.types.NumericType
import org.apache.spark.sql.types.StructType

/**
* A set of methods for aggregations on a `DataFrame`, created by `Dataset.groupBy`.
* A set of methods for aggregations on a `DataFrame`, created by [[Dataset#groupBy groupBy]],
* [[Dataset#cube cube]] or [[Dataset#rollup rollup]] (and also [[pivot]]).
*
* The main method is the agg function, which has multiple variants. This class also contains
* convenience some first order statistics such as mean, sum for convenience.
* The main method is the [[agg]] function, which has multiple variants. This class also contains
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These look fine but have you had a chance to run the doc generation, to make sure scaladoc is OK with it?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No scaladoc. I was focused on javadoc and seems I missed it. Fixed.

* some first-order statistics such as [[mean]], [[sum]] for convenience.
*
* This class was named `GroupedData` in Spark 1.x.
* @note This class was named `GroupedData` in Spark 1.x.
*
* @since 2.0.0
*/
Expand Down Expand Up @@ -297,8 +298,9 @@ class RelationalGroupedDataset protected[sql](
}

/**
* Pivots a column of the current `DataFrame` and perform the specified aggregation.
* There are two versions of pivot function: one that requires the caller to specify the list
* Pivots a column of the current `DataFrame` and performs the specified aggregation.
*
* There are two versions of `pivot` function: one that requires the caller to specify the list
* of distinct values to pivot on, and one that does not. The latter is more concise but less
* efficient, because Spark needs to first compute the list of distinct values internally.
*
Expand Down Expand Up @@ -337,7 +339,7 @@ class RelationalGroupedDataset protected[sql](
}

/**
* Pivots a column of the current `DataFrame` and perform the specified aggregation.
* Pivots a column of the current `DataFrame` and performs the specified aggregation.
* There are two versions of pivot function: one that requires the caller to specify the list
* of distinct values to pivot on, and one that does not. The latter is more concise but less
* efficient, because Spark needs to first compute the list of distinct values internally.
Expand Down Expand Up @@ -369,7 +371,9 @@ class RelationalGroupedDataset protected[sql](
}

/**
* Pivots a column of the current `DataFrame` and perform the specified aggregation.
* (Java-specific) Pivots a column of the current `DataFrame` and performs the specified
* aggregation.
*
* There are two versions of pivot function: one that requires the caller to specify the list
* of distinct values to pivot on, and one that does not. The latter is more concise but less
* efficient, because Spark needs to first compute the list of distinct values internally.
Expand Down Expand Up @@ -433,10 +437,6 @@ class RelationalGroupedDataset protected[sql](
}
}


/**
* Companion object for GroupedData.
*/
private[sql] object RelationalGroupedDataset {

def apply(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,24 +72,24 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
}

/**
* Return all metadata that describes more details of this SparkPlan.
* Returns all metadata that describes more details of this SparkPlan.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can even turn this into @return but no big deal.

*/
def metadata: Map[String, String] = Map.empty

/**
* Return all metrics containing metrics of this SparkPlan.
* Returns all metrics containing metrics of this SparkPlan.
*/
def metrics: Map[String, SQLMetric] = Map.empty

/**
* Reset all the metrics.
* Resets all the metrics.
*/
def resetMetrics(): Unit = {
metrics.valuesIterator.foreach(_.reset())
}

/**
* Return a LongSQLMetric according to the name.
* Returns a [[SQLMetric]] according to the name.
*/
def longMetric(name: String): SQLMetric = metrics(name)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ trait CodegenSupport extends SparkPlan {
/**
* Returns all the RDDs of InternalRow which generates the input rows.
*
* Note: right now we support up to two RDDs.
* @note Right now we support up to two RDDs
*/
def inputRDDs(): Seq[RDD[InternalRow]]

Expand Down Expand Up @@ -227,7 +227,7 @@ trait CodegenSupport extends SparkPlan {


/**
* InputAdapter is used to hide a SparkPlan from a subtree that support codegen.
* InputAdapter is used to hide a SparkPlan from a subtree that supports codegen.
*
* This is the leaf node of a tree with WholeStageCodegen that is used to generate code
* that consumes an RDD iterator of InternalRow.
Expand Down Expand Up @@ -282,10 +282,10 @@ object WholeStageCodegenExec {
}

/**
* WholeStageCodegen compile a subtree of plans that support codegen together into single Java
* WholeStageCodegen compiles a subtree of plans that support codegen together into single Java
* function.
*
* Here is the call graph of to generate Java source (plan A support codegen, but plan B does not):
* Here is the call graph of to generate Java source (plan A supports codegen, but plan B does not):
*
* WholeStageCodegen Plan A FakeInput Plan B
* =========================================================================
Expand All @@ -304,10 +304,10 @@ object WholeStageCodegenExec {
* |
* doConsume() <-------- consume()
*
* SparkPlan A should override doProduce() and doConsume().
* SparkPlan A should override `doProduce()` and `doConsume()`.
*
* doCodeGen() will create a CodeGenContext, which will hold a list of variables for input,
* used to generated code for BoundReference.
* `doCodeGen()` will create a `CodeGenContext`, which will hold a list of variables for input,
* used to generated code for [[BoundReference]].
*/
case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with CodegenSupport {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ object Window {
* and `Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A range based boundary is based on the actual value of the ORDER BY
* A range-based boundary is based on the actual value of the ORDER BY
* expression(s). An offset is used to alter the value of the ORDER BY expression, for
* instance if the current order by expression has a value of 10 and the lower bound offset
* is -3, the resulting lower bound for the current row will be 10 - 3 = 7. This however puts a
Expand All @@ -184,7 +184,7 @@ object Window {
* val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
* .toDF("id", "category")
* val byCategoryOrderedById =
* Window.partitionBy('category).orderBy('id).rowsBetween(Window.currentRow, 1)
* Window.partitionBy('category).orderBy('id).rangeBetween(Window.currentRow, 1)
* df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
*
* +---+--------+---+
Expand Down
4 changes: 2 additions & 2 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1265,7 +1265,7 @@ object functions {

/**
* Parses the expression string into the column that it represents, similar to
* DataFrame.selectExpr
* [[Dataset#selectExpr]].
* {{{
* // get the number of words of each length
* df.groupBy(expr("length(word)")).count()
Expand Down Expand Up @@ -2385,7 +2385,7 @@ object functions {
def rtrim(e: Column): Column = withExpr { StringTrimRight(e.expr) }

/**
* * Return the soundex code for the specified expression.
* Returns the soundex code for the specified expression.
*
* @group string_funcs
* @since 1.5.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ abstract class BaseSessionStateBuilder(
type NewBuilder = (SparkSession, Option[SessionState]) => BaseSessionStateBuilder

/**
* Function that produces a new instance of the SessionStateBuilder. This is used by the
* Function that produces a new instance of the `BaseSessionStateBuilder`. This is used by the
* [[SessionState]]'s clone functionality. Make sure to override this when implementing your own
* [[SessionStateBuilder]].
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ private[sql] object SessionState {
}

/**
* Concrete implementation of a [[SessionStateBuilder]].
* Concrete implementation of a [[BaseSessionStateBuilder]].
*/
@Experimental
@InterfaceStability.Unstable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ trait RelationProvider {
*
* The difference between a [[RelationProvider]] and a [[SchemaRelationProvider]] is that
* users need to provide a schema when using a [[SchemaRelationProvider]].
* A relation provider can inherits both [[RelationProvider]] and [[SchemaRelationProvider]]
* A relation provider can inherit both [[RelationProvider]] and [[SchemaRelationProvider]]
* if it can support both schema inference and user-specified schemas.
*
* @since 1.3.0
Expand Down