Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._

@ExpressionDescription(
usage = "_FUNC_(x) - Returns the mean calculated from values of a group.")
case class Average(child: Expression) extends DeclarativeAggregate {

override def prettyName: String = "avg"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ abstract class CentralMomentAgg(child: Expression) extends DeclarativeAggregate
}

// Compute the population standard deviation of a column
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the population standard deviation calculated from values of a group.")
// scalastyle:on line.size.limit
case class StddevPop(child: Expression) extends CentralMomentAgg(child) {

override protected def momentOrder = 2
Expand All @@ -143,6 +147,8 @@ case class StddevPop(child: Expression) extends CentralMomentAgg(child) {
}

// Compute the sample standard deviation of a column
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the sample standard deviation calculated from values of a group.")
case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {

override protected def momentOrder = 2
Expand All @@ -157,6 +163,8 @@ case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
}

// Compute the population variance of a column
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the population variance calculated from values of a group.")
case class VariancePop(child: Expression) extends CentralMomentAgg(child) {

override protected def momentOrder = 2
Expand All @@ -170,6 +178,8 @@ case class VariancePop(child: Expression) extends CentralMomentAgg(child) {
}

// Compute the sample variance of a column
@ExpressionDescription(
usage = "_FUNC_(x) - Returns the sample variance calculated from values of a group.")
case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {

override protected def momentOrder = 2
Expand All @@ -183,6 +193,8 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
override def prettyName: String = "var_samp"
}

@ExpressionDescription(
usage = "_FUNC_(x) - Returns the Skewness value calculated from values of a group.")
case class Skewness(child: Expression) extends CentralMomentAgg(child) {

override def prettyName: String = "skewness"
Expand All @@ -196,6 +208,8 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) {
}
}

@ExpressionDescription(
usage = "_FUNC_(x) - Returns the Kurtosis value calculated from values of a group.")
case class Kurtosis(child: Expression) extends CentralMomentAgg(child) {

override protected def momentOrder = 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import org.apache.spark.sql.types._
* Definition of Pearson correlation can be found at
* http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient
*/
@ExpressionDescription(
usage = "_FUNC_(x,y) - Returns Pearson coefficient of correlation between a set of number pairs.")
case class Corr(x: Expression, y: Expression) extends DeclarativeAggregate {

override def children: Seq[Expression] = Seq(x, y)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types._

// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """_FUNC_(*) - Returns the total number of retrieved rows, including rows containing NULL values.
_FUNC_(expr) - Returns the number of rows for which the supplied expression is non-NULL.
_FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.""")
// scalastyle:on line.size.limit
case class Count(children: Seq[Expression]) extends DeclarativeAggregate {

override def nullable: Boolean = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ abstract class Covariance(x: Expression, y: Expression) extends DeclarativeAggre
}
}

@ExpressionDescription(
usage = "_FUNC_(x,y) - Returns the population covariance of a set of number pairs.")
case class CovPopulation(left: Expression, right: Expression) extends Covariance(left, right) {
override val evaluateExpression: Expression = {
If(n === Literal(0.0), Literal.create(null, DoubleType),
Expand All @@ -85,6 +87,8 @@ case class CovPopulation(left: Expression, right: Expression) extends Covariance
}


@ExpressionDescription(
usage = "_FUNC_(x,y) - Returns the sample covariance of a set of number pairs.")
case class CovSample(left: Expression, right: Expression) extends Covariance(left, right) {
override val evaluateExpression: Expression = {
If(n === Literal(0.0), Literal.create(null, DoubleType),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ import org.apache.spark.sql.types._
* is used) its result will not be deterministic (unless the input table is sorted and has
* a single partition, and we use a single reducer to do the aggregation.).
*/
@ExpressionDescription(
usage = """_FUNC_(expr) - Returns the first value of `child` for a group of rows.
_FUNC_(expr,isIgnoreNull=false) - Returns the first value of `child` for a group of rows.
If isIgnoreNull is true, returns only non-null values.
""")
case class First(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate {

def this(child: Expression) = this(child, Literal.create(false, BooleanType))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
import java.lang.{Long => JLong}
import java.util

import com.clearspring.analytics.hash.MurmurHash

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
Expand All @@ -48,6 +46,11 @@ import org.apache.spark.sql.types._
* @param relativeSD the maximum estimation error allowed.
*/
// scalastyle:on
@ExpressionDescription(
usage = """_FUNC_(expr) - Returns the estimated cardinality by HyperLogLog++.
_FUNC_(expr, relativeSD=0.05) - Returns the estimated cardinality by HyperLogLog++
with relativeSD, the maximum estimation error allowed.
""")
case class HyperLogLogPlusPlus(
child: Expression,
relativeSD: Double = 0.05,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import org.apache.spark.sql.types._
* is used) its result will not be deterministic (unless the input table is sorted and has
* a single partition, and we use a single reducer to do the aggregation.).
*/
@ExpressionDescription(
usage = "_FUNC_(expr,isIgnoreNull) - Returns the last value of `child` for a group of rows.")
case class Last(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


def this(child: Expression) = this(child, Literal.create(false, BooleanType))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._

@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the maximum value of expr.")
case class Max(child: Expression) extends DeclarativeAggregate {

override def children: Seq[Expression] = child :: Nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._


@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the minimum value of expr.")
case class Min(child: Expression) extends DeclarativeAggregate {

override def children: Seq[Expression] = child :: Nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._

@ExpressionDescription(
usage = "_FUNC_(x) - Returns the sum calculated from values of a group.")
case class Sum(child: Expression) extends DeclarativeAggregate {

override def children: Seq[Expression] = child :: Nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval


@ExpressionDescription(
usage = "_FUNC_(a) - Returns -a.")
case class UnaryMinus(child: Expression) extends UnaryExpression
with ExpectsInputTypes with NullIntolerant {

Expand Down Expand Up @@ -59,6 +60,8 @@ case class UnaryMinus(child: Expression) extends UnaryExpression
override def sql: String = s"(-${child.sql})"
}

@ExpressionDescription(
usage = "_FUNC_(a) - Returns a.")
case class UnaryPositive(child: Expression)
extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
override def prettyName: String = "positive"
Expand All @@ -79,8 +82,8 @@ case class UnaryPositive(child: Expression)
* A function that get the absolute value of the numeric value.
*/
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the absolute value of the numeric value",
extended = "> SELECT _FUNC_('-1');\n1")
usage = "_FUNC_(expr) - Returns the absolute value of the numeric value.",
extended = "> SELECT _FUNC_('-1');\n 1")
case class Abs(child: Expression)
extends UnaryExpression with ExpectsInputTypes with NullIntolerant {

Expand Down Expand Up @@ -126,6 +129,8 @@ private[sql] object BinaryArithmetic {
def unapply(e: BinaryArithmetic): Option[(Expression, Expression)] = Some((e.left, e.right))
}

@ExpressionDescription(
usage = "a _FUNC_ b - Returns a+b.")
case class Add(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant {

override def inputType: AbstractDataType = TypeCollection.NumericAndInterval
Expand Down Expand Up @@ -155,6 +160,8 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic wit
}
}

@ExpressionDescription(
usage = "a _FUNC_ b - Returns a-b.")
case class Subtract(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {

Expand Down Expand Up @@ -185,6 +192,8 @@ case class Subtract(left: Expression, right: Expression)
}
}

@ExpressionDescription(
usage = "a _FUNC_ b - Multiplies a by b.")
case class Multiply(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {

Expand All @@ -198,6 +207,9 @@ case class Multiply(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = numeric.times(input1, input2)
}

@ExpressionDescription(
usage = "a _FUNC_ b - Divides a by b.",
extended = "> SELECT 3 _FUNC_ 2;\n 1.5")
case class Divide(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {

Expand Down Expand Up @@ -275,6 +287,8 @@ case class Divide(left: Expression, right: Expression)
}
}

@ExpressionDescription(
usage = "a _FUNC_ b - Returns the remainder when dividing a by b.")
case class Remainder(left: Expression, right: Expression)
extends BinaryArithmetic with NullIntolerant {

Expand Down Expand Up @@ -464,6 +478,9 @@ case class MinOf(left: Expression, right: Expression)
override def symbol: String = "min"
}

@ExpressionDescription(
usage = "_FUNC_(a, b) - Returns the positive modulo",
extended = "> SELECT _FUNC_(10,3);\n 1")
case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant {

override def toString: String = s"pmod($left, $right)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ import org.apache.spark.sql.types._
*
* Code generation inherited from BinaryArithmetic.
*/
@ExpressionDescription(
usage = "a _FUNC_ b - Bitwise AND.",
extended = "> SELECT 3 _FUNC_ 5; 1")
case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic {

override def inputType: AbstractDataType = IntegralType
Expand All @@ -51,6 +54,9 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme
*
* Code generation inherited from BinaryArithmetic.
*/
@ExpressionDescription(
usage = "a _FUNC_ b - Bitwise OR.",
extended = "> SELECT 3 _FUNC_ 5; 7")
case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic {

override def inputType: AbstractDataType = IntegralType
Expand All @@ -76,6 +82,9 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet
*
* Code generation inherited from BinaryArithmetic.
*/
@ExpressionDescription(
usage = "a _FUNC_ b - Bitwise exclusive OR.",
extended = "> SELECT 3 _FUNC_ 5; 2")
case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic {

override def inputType: AbstractDataType = IntegralType
Expand All @@ -99,6 +108,9 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme
/**
* A function that calculates bitwise not(~) of a number.
*/
@ExpressionDescription(
usage = "_FUNC_ b - Bitwise NOT.",
extended = "> SELECT _FUNC_ 0; -1")
case class BitwiseNot(child: Expression) extends UnaryExpression with ExpectsInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import org.apache.spark.sql.types._
/**
* Given an array or map, returns its size.
*/
@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the size of an array or a map.")
case class Size(child: Expression) extends UnaryExpression with ExpectsInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(ArrayType, MapType))
Expand All @@ -44,6 +46,11 @@ case class Size(child: Expression) extends UnaryExpression with ExpectsInputType
* Sorts the input array in ascending / descending order according to the natural ordering of
* the array elements and returns it.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(array(obj1, obj2,...)) - Sorts the input array in ascending order according to the natural ordering of the array elements.",
extended = " > SELECT _FUNC_(array('b', 'd', 'c', 'a'));\n 'a', 'b', 'c', 'd'")
// scalastyle:on line.size.limit
case class SortArray(base: Expression, ascendingOrder: Expression)
extends BinaryExpression with ExpectsInputTypes with CodegenFallback {

Expand Down Expand Up @@ -125,6 +132,9 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
/**
* Checks if the array (left) has the element (right)
*/
@ExpressionDescription(
usage = "_FUNC_(array, value) - Returns TRUE if the array contains value.",
extended = " > SELECT _FUNC_(array(1, 2, 3), 2);\n true")
case class ArrayContains(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import org.apache.spark.unsafe.types.UTF8String
/**
* Returns an Array containing the evaluation of all children expressions.
*/
@ExpressionDescription(
usage = "_FUNC_(n0, ...) - Returns an array with the given elements.")
case class CreateArray(children: Seq[Expression]) extends Expression {

override def foldable: Boolean = children.forall(_.foldable)
Expand Down Expand Up @@ -73,6 +75,8 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
* Returns a catalyst Map containing the evaluation of all children expressions as keys and values.
* The children are a flatted sequence of kv pairs, e.g. (key1, value1, key2, value2, ...)
*/
@ExpressionDescription(
usage = "_FUNC_(key0, value0, key1, value1...) - Creates a map with the given key/value pairs.")
case class CreateMap(children: Seq[Expression]) extends Expression {
private[sql] lazy val keys = children.indices.filter(_ % 2 == 0).map(children)
private[sql] lazy val values = children.indices.filter(_ % 2 != 0).map(children)
Expand Down Expand Up @@ -153,6 +157,8 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
/**
* Returns a Row containing the evaluation of all children expressions.
*/
@ExpressionDescription(
usage = "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values.")
case class CreateStruct(children: Seq[Expression]) extends Expression {

override def foldable: Boolean = children.forall(_.foldable)
Expand Down Expand Up @@ -204,6 +210,10 @@ case class CreateStruct(children: Seq[Expression]) extends Expression {
*
* @param children Seq(name1, val1, name2, val2, ...)
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values.")
// scalastyle:on line.size.limit
case class CreateNamedStruct(children: Seq[Expression]) extends Expression {

/**
Expand Down
Loading