diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index f62c8bb0c293..3fae34cbf00c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -662,7 +662,7 @@ object FunctionRegistry { val clazz = scala.reflect.classTag[Cast].runtimeClass val usage = "_FUNC_(expr) - Casts the value `expr` to the target data type `_FUNC_`." val expressionInfo = - new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", "", "", "") + new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", "", "2.0.1", "") (name, (expressionInfo, builder)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala index e6a4c8f1d374..4bd6418789aa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala @@ -51,7 +51,8 @@ import org.apache.spark.util.Utils c33fb387-8500-4bfa-81d2-6e0e3e930df2 > SELECT _FUNC_('java.util.UUID', 'fromString', 'a5cf6c42-0c85-418f-af6c-3e4e5b1328f2'); a5cf6c42-0c85-418f-af6c-3e4e5b1328f2 - """) + """, + since = "2.0.0") case class CallMethodViaReflection(children: Seq[Expression]) extends Expression with CodegenFallback { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 96154917e163..bf759db59f3e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -1742,7 +1742,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit Examples: > SELECT _FUNC_('10' as int); 10 - """) + """, + since = "1.0.0") case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None) extends CastBase { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala index f1da592a7684..8b04c1aa513f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala @@ -40,7 +40,13 @@ import org.apache.spark.sql.types.{DataType, LongType} within each partition. The assumption is that the data frame has less than 1 billion partitions, and each partition has less than 8 billion records. The function is non-deterministic because its result depends on partition IDs. - """) + """, + examples = """ + Examples: + > SELECT _FUNC_(); + 0 + """, + since = "1.4.0") case class MonotonicallyIncreasingID() extends LeafExpression with Stateful { /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala index 9856b37e53fb..242735b4aebd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala @@ -26,7 +26,13 @@ import org.apache.spark.sql.types.{DataType, IntegerType} * Expression that returns the current partition id. */ @ExpressionDescription( - usage = "_FUNC_() - Returns the current partition id.") + usage = "_FUNC_() - Returns the current partition id.", + examples = """ + Examples: + > SELECT _FUNC_(); + 0 + """, + since = "1.4.0") case class SparkPartitionID() extends LeafExpression with Nondeterministic { override def nullable: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala index 787b21859c6d..8b51e0a908f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala @@ -37,6 +37,7 @@ import org.apache.spark.util.sketch.CountMinSketch * @param confidenceExpression confidence, must be positive and less than 1.0 * @param seedExpression random seed */ +// scalastyle:off line.size.limit @ExpressionDescription( usage = """ _FUNC_(col, eps, confidence, seed) - Returns a count-min sketch of a column with the given esp, @@ -44,8 +45,14 @@ import org.apache.spark.util.sketch.CountMinSketch `CountMinSketch` before usage. Count-min sketch is a probabilistic data structure used for cardinality estimation using sub-linear space. """, + examples = """ + Examples: + > SELECT hex(_FUNC_(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col); + 0000000100000000000000030000000100000004000000005D8D6AB90000000000000000000000000000000200000000000000010000000000000000 + """, group = "agg_funcs", since = "2.2.0") +// scalastyle:on line.size.limit case class CountMinSketchAgg( child: Expression, epsExpression: Expression, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala index b4c1b2c708fb..573dbd6c3f8c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala @@ -60,6 +60,7 @@ abstract class BitAggregate extends DeclarativeAggregate with ExpectsInputTypes > SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col); 1 """, + group = "agg_funcs", since = "3.0.0") case class BitAndAgg(child: Expression) extends BitAggregate { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 7c521838447d..f25fd9b672e8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -32,7 +32,8 @@ import org.apache.spark.unsafe.types.CalendarInterval Examples: > SELECT _FUNC_(1); -1 - """) + """, + since = "1.0.0") case class UnaryMinus(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { private val checkOverflow = SQLConf.get.ansiEnabled @@ -95,7 +96,13 @@ case class UnaryMinus(child: Expression) extends UnaryExpression } @ExpressionDescription( - usage = "_FUNC_(expr) - Returns the value of `expr`.") + usage = "_FUNC_(expr) - Returns the value of `expr`.", + examples = """ + Examples: + > SELECT _FUNC_(1); + 1 + """, + since = "1.5.0") case class UnaryPositive(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { override def prettyName: String = "positive" @@ -121,7 +128,8 @@ case class UnaryPositive(child: Expression) Examples: > SELECT _FUNC_(-1); 1 - """) + """, + since = "1.2.0") case class Abs(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { @@ -223,7 +231,8 @@ object BinaryArithmetic { Examples: > SELECT 1 _FUNC_ 2; 3 - """) + """, + since = "1.0.0") case class Add(left: Expression, right: Expression) extends BinaryArithmetic { override def inputType: AbstractDataType = TypeCollection.NumericAndInterval @@ -255,7 +264,8 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic { Examples: > SELECT 2 _FUNC_ 1; 1 - """) + """, + since = "1.0.0") case class Subtract(left: Expression, right: Expression) extends BinaryArithmetic { override def inputType: AbstractDataType = TypeCollection.NumericAndInterval @@ -287,7 +297,8 @@ case class Subtract(left: Expression, right: Expression) extends BinaryArithmeti Examples: > SELECT 2 _FUNC_ 3; 6 - """) + """, + since = "1.0.0") case class Multiply(left: Expression, right: Expression) extends BinaryArithmetic { override def inputType: AbstractDataType = NumericType @@ -382,7 +393,8 @@ trait DivModLike extends BinaryArithmetic { 1.5 > SELECT 2L _FUNC_ 2L; 1.0 - """) + """, + since = "1.0.0") // scalastyle:on line.size.limit case class Divide(left: Expression, right: Expression) extends DivModLike { @@ -455,7 +467,8 @@ object IntegralDivide { 0.2 > SELECT MOD(2, 1.8); 0.2 - """) + """, + since = "1.0.0") case class Remainder(left: Expression, right: Expression) extends DivModLike { override def inputType: AbstractDataType = NumericType @@ -502,7 +515,8 @@ case class Remainder(left: Expression, right: Expression) extends DivModLike { 1 > SELECT _FUNC_(-10, 3); 2 - """) + """, + since = "1.5.0") case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic { override def toString: String = s"pmod($left, $right)" @@ -658,7 +672,8 @@ case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic { Examples: > SELECT _FUNC_(10, 9, 2, 4, 3); 2 - """) + """, + since = "1.5.0") case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression { override def nullable: Boolean = children.forall(_.nullable) @@ -731,7 +746,8 @@ case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression Examples: > SELECT _FUNC_(10, 9, 2, 4, 3); 10 - """) + """, + since = "1.5.0") case class Greatest(children: Seq[Expression]) extends ComplexTypeMergingExpression { override def nullable: Boolean = children.forall(_.nullable) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala index 342b14eaa339..aa3993dccd1c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala @@ -32,7 +32,8 @@ import org.apache.spark.sql.types._ Examples: > SELECT 3 _FUNC_ 5; 1 - """) + """, + since = "1.4.0") case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic { override def inputType: AbstractDataType = IntegralType @@ -64,7 +65,8 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme Examples: > SELECT 3 _FUNC_ 5; 7 - """) + """, + since = "1.4.0") case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic { override def inputType: AbstractDataType = IntegralType @@ -96,7 +98,8 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet Examples: > SELECT 3 _FUNC_ 5; 6 - """) + """, + since = "1.4.0") case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic { override def inputType: AbstractDataType = IntegralType @@ -126,7 +129,8 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme Examples: > SELECT _FUNC_ 0; -1 - """) + """, + since = "1.4.0") case class BitwiseNot(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index d9de72e1b217..8555f63df986 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -89,7 +89,8 @@ trait BinaryArrayExpressionWithImplicitCast extends BinaryExpression 2 > SELECT _FUNC_(NULL); -1 - """) + """, + since = "1.5.0") case class Size(child: Expression, legacySizeOfNull: Boolean) extends UnaryExpression with ExpectsInputTypes { @@ -139,7 +140,8 @@ object Size { > SELECT _FUNC_(map(1, 'a', 2, 'b')); [1,2] """, - group = "map_funcs") + group = "map_funcs", + since = "2.0.0") case class MapKeys(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { @@ -330,7 +332,8 @@ case class ArraysZip(children: Seq[Expression]) extends Expression with ExpectsI > SELECT _FUNC_(map(1, 'a', 2, 'b')); ["a","b"] """, - group = "map_funcs") + group = "map_funcs", + since = "2.0.0") case class MapValues(child: Expression) extends UnaryExpression with ExpectsInputTypes with NullIntolerant { @@ -871,7 +874,8 @@ object ArraySortLike { > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'), true); [null,"a","b","c","d"] """, - group = "array_funcs") + group = "array_funcs", + since = "1.5.0") // scalastyle:on line.size.limit case class SortArray(base: Expression, ascendingOrder: Expression) extends BinaryExpression with ArraySortLike with NullIntolerant { @@ -1086,7 +1090,8 @@ case class Reverse(child: Expression) > SELECT _FUNC_(array(1, 2, 3), 2); true """, - group = "array_funcs") + group = "array_funcs", + since = "1.5.0") case class ArrayContains(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -2048,7 +2053,8 @@ case class ElementAt(left: Expression, right: Expression) note = """ Concat logic for arrays is available since 2.4.0. """, - group = "array_funcs") + group = "array_funcs", + since = "1.5.0") case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpression { private def allowedTypes: Seq[AbstractDataType] = Seq(StringType, BinaryType, ArrayType) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 563ce7133a3d..42e4d3ec6df5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -36,7 +36,8 @@ import org.apache.spark.unsafe.types.UTF8String Examples: > SELECT _FUNC_(1, 2, 3); [1,2,3] - """) + """, + since = "1.1.0") case class CreateArray(children: Seq[Expression], useStringTypeWhenEmpty: Boolean) extends Expression { @@ -153,7 +154,8 @@ private [sql] object GenArrayData { Examples: > SELECT _FUNC_(1.0, '2', 3.0, '4'); {1.0:"2",3.0:"4"} - """) + """, + since = "2.0.0") case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean) extends Expression { @@ -253,7 +255,8 @@ object CreateMap { Examples: > SELECT _FUNC_(array(1.0, 3.0), array('2', '4')); {1.0:"2",3.0:"4"} - """, since = "2.4.0") + """, + since = "2.4.0") case class MapFromArrays(left: Expression, right: Expression) extends BinaryExpression with ExpectsInputTypes with NullIntolerant { @@ -346,10 +349,14 @@ object CreateStruct { "struct", "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values.", "", + """ + | Examples: + | > SELECT _FUNC_(1, 2, 3); + | {"col1":1,"col2":2,"col3":3} + | """.stripMargin, "", "", - "", - "", + "1.4.0", "") ("struct", (info, this.create)) } @@ -367,7 +374,8 @@ object CreateStruct { Examples: > SELECT _FUNC_("a", 1, "b", 2, "c", 3); {"a":1,"b":2,"c":3} - """) + """, + since = "1.5.0") // scalastyle:on line.size.limit case class CreateNamedStruct(children: Seq[Expression]) extends Expression { lazy val (nameExprs, valExprs) = children.grouped(2).map { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala index 6c6210994954..84065d07e2b4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala @@ -30,7 +30,8 @@ import org.apache.spark.sql.types._ Examples: > SELECT _FUNC_(1 < 2, 'a', 'b'); a - """) + """, + since = "1.0.0") // scalastyle:on line.size.limit case class If(predicate: Expression, trueValue: Expression, falseValue: Expression) extends ComplexTypeMergingExpression { @@ -116,7 +117,8 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi 2.0 > SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 < 0 THEN 2.0 END; NULL - """) + """, + since = "1.0.1") // scalastyle:on line.size.limit case class CaseWhen( branches: Seq[(Expression, Expression)], diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 3d9612018aaf..e889cfbec990 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1565,13 +1565,13 @@ trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes { _FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`. """, arguments = """ - Arguments: - * date - date value or valid date string - * fmt - the format representing the unit to be truncated to - - "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `date` falls in - - "QUARTER" - truncate to the first date of the quarter that the `date` falls in - - "MONTH", "MM", "MON" - truncate to the first date of the month that the `date` falls in - - "WEEK" - truncate to the Monday of the week that the `date` falls in + Arguments: + * date - date value or valid date string + * fmt - the format representing the unit to be truncated to + - "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `date` falls in + - "QUARTER" - truncate to the first date of the quarter that the `date` falls in + - "MONTH", "MM", "MON" - truncate to the first date of the month that the `date` falls in + - "WEEK" - truncate to the Monday of the week that the `date` falls in """, examples = """ Examples: @@ -1619,19 +1619,19 @@ case class TruncDate(date: Expression, format: Expression) _FUNC_(fmt, ts) - Returns timestamp `ts` truncated to the unit specified by the format model `fmt`. """, arguments = """ - Arguments: - * fmt - the format representing the unit to be truncated to - - "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `ts` falls in, the time part will be zero out - - "QUARTER" - truncate to the first date of the quarter that the `ts` falls in, the time part will be zero out - - "MONTH", "MM", "MON" - truncate to the first date of the month that the `ts` falls in, the time part will be zero out - - "WEEK" - truncate to the Monday of the week that the `ts` falls in, the time part will be zero out - - "DAY", "DD" - zero out the time part - - "HOUR" - zero out the minute and second with fraction part - - "MINUTE"- zero out the second with fraction part - - "SECOND" - zero out the second fraction part - - "MILLISECOND" - zero out the microseconds - - "MICROSECOND" - everything remains - * ts - datetime value or valid timestamp string + Arguments: + * fmt - the format representing the unit to be truncated to + - "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `ts` falls in, the time part will be zero out + - "QUARTER" - truncate to the first date of the quarter that the `ts` falls in, the time part will be zero out + - "MONTH", "MM", "MON" - truncate to the first date of the month that the `ts` falls in, the time part will be zero out + - "WEEK" - truncate to the Monday of the week that the `ts` falls in, the time part will be zero out + - "DAY", "DD" - zero out the time part + - "HOUR" - zero out the minute and second with fraction part + - "MINUTE"- zero out the second with fraction part + - "SECOND" - zero out the second fraction part + - "MILLISECOND" - zero out the microseconds + - "MICROSECOND" - everything remains + * ts - datetime value or valid timestamp string """, examples = """ Examples: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index b0a23c62284d..ad6e365f76fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -135,7 +135,8 @@ case class UserDefinedGenerator( > SELECT _FUNC_(2, 1, 2, 3); 1 2 3 NULL - """) + """, + since = "2.0.0") // scalastyle:on line.size.limit line.contains.tab case class Stack(children: Seq[Expression]) extends Generator { @@ -360,7 +361,8 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with > SELECT _FUNC_(array(10, 20)); 10 20 - """) + """, + since = "1.0.0") // scalastyle:on line.size.limit case class Explode(child: Expression) extends ExplodeBase { override val position: Boolean = false @@ -383,7 +385,8 @@ case class Explode(child: Expression) extends ExplodeBase { > SELECT _FUNC_(array(10,20)); 0 10 1 20 - """) + """, + since = "2.0.0") // scalastyle:on line.size.limit line.contains.tab case class PosExplode(child: Expression) extends ExplodeBase { override val position = true @@ -400,7 +403,8 @@ case class PosExplode(child: Expression) extends ExplodeBase { > SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b'))); 1 a 2 b - """) + """, + since = "2.0.0") // scalastyle:on line.size.limit line.contains.tab case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator { override val inline: Boolean = true diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala index 5e21b58f070b..64360827fb79 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala @@ -52,7 +52,8 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} Examples: > SELECT _FUNC_('Spark'); 8cde774d6f7333752ed72cacddb05126 - """) + """, + since = "1.5.0") case class Md5(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -87,7 +88,8 @@ case class Md5(child: Expression) Examples: > SELECT _FUNC_('Spark', 256); 529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b - """) + """, + since = "1.5.0") // scalastyle:on line.size.limit case class Sha2(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable { @@ -160,7 +162,8 @@ case class Sha2(left: Expression, right: Expression) Examples: > SELECT _FUNC_('Spark'); 85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c - """) + """, + since = "1.5.0") case class Sha1(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -188,7 +191,8 @@ case class Sha1(child: Expression) Examples: > SELECT _FUNC_('Spark'); 1557323817 - """) + """, + since = "1.5.0") case class Crc32(child: Expression) extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant { @@ -575,7 +579,8 @@ abstract class InterpretedHashFunction { Examples: > SELECT _FUNC_('Spark', array(123), 2); -1321691492 - """) + """, + since = "2.0.0") case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] { def this(arguments: Seq[Expression]) = this(arguments, 42) @@ -647,7 +652,8 @@ object XxHash64Function extends InterpretedHashFunction { * we can guarantee shuffle and bucketing have same data distribution */ @ExpressionDescription( - usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.") + usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.", + since = "2.2.0") case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] { override val seed = 0 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala index 3b0141ad52cc..e9426223092d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala @@ -24,9 +24,16 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.types.{DataType, LongType, StringType} import org.apache.spark.unsafe.types.UTF8String - +// scalastyle:off whitespace.end.of.line @ExpressionDescription( - usage = "_FUNC_() - Returns the name of the file being read, or empty string if not available.") + usage = "_FUNC_() - Returns the name of the file being read, or empty string if not available.", + examples = """ + Examples: + > SELECT _FUNC_(); + + """, + since = "1.5.0") +// scalastyle:on whitespace.end.of.line case class InputFileName() extends LeafExpression with Nondeterministic { override def nullable: Boolean = false @@ -51,7 +58,13 @@ case class InputFileName() extends LeafExpression with Nondeterministic { @ExpressionDescription( - usage = "_FUNC_() - Returns the start offset of the block being read, or -1 if not available.") + usage = "_FUNC_() - Returns the start offset of the block being read, or -1 if not available.", + examples = """ + Examples: + > SELECT _FUNC_(); + -1 + """, + since = "2.2.0") case class InputFileBlockStart() extends LeafExpression with Nondeterministic { override def nullable: Boolean = false @@ -74,7 +87,13 @@ case class InputFileBlockStart() extends LeafExpression with Nondeterministic { @ExpressionDescription( - usage = "_FUNC_() - Returns the length of the block being read, or -1 if not available.") + usage = "_FUNC_() - Returns the length of the block being read, or -1 if not available.", + examples = """ + Examples: + > SELECT _FUNC_(); + -1 + """, + since = "2.2.0") case class InputFileBlockLength() extends LeafExpression with Nondeterministic { override def nullable: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index f4568f860ac0..ef02d2db97a3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -119,7 +119,8 @@ private[this] object SharedFactory { > SELECT _FUNC_('{"a":"b"}', '$.a'); b """, - group = "json_funcs") + group = "json_funcs", + since = "1.5.0") case class GetJsonObject(json: Expression, path: Expression) extends BinaryExpression with ExpectsInputTypes with CodegenFallback { @@ -343,7 +344,8 @@ case class GetJsonObject(json: Expression, path: Expression) > SELECT _FUNC_('{"a":1, "b":2}', 'a', 'b'); 1 2 """, - group = "json_funcs") + group = "json_funcs", + since = "1.6.0") // scalastyle:on line.size.limit line.contains.tab case class JsonTuple(children: Seq[Expression]) extends Generator with CodegenFallback { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 617ddcb69eab..2458a4aaba65 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -61,7 +61,8 @@ case class PrintToStderr(child: Expression) extends UnaryExpression { Examples: > SELECT _FUNC_(0 < 1); NULL - """) + """, + since = "2.0.0") case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def nullable: Boolean = true @@ -108,7 +109,8 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa Examples: > SELECT _FUNC_(); default - """) + """, + since = "1.6.0") case class CurrentDatabase() extends LeafExpression with Unevaluable { override def dataType: DataType = StringType override def foldable: Boolean = true @@ -144,7 +146,8 @@ case class CurrentCatalog() extends LeafExpression with Unevaluable { """, note = """ The function is non-deterministic. - """) + """, + since = "2.3.0") // scalastyle:on line.size.limit case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Stateful with ExpressionWithRandomSeed { @@ -185,6 +188,11 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Sta // scalastyle:off line.size.limit @ExpressionDescription( usage = """_FUNC_() - Returns the Spark version. The string contains 2 fields, the first being a release version and the second being a git revision.""", + examples = """ + Examples: + > SELECT _FUNC_(); + 3.1.0 a6d6ea3efedbad14d99c24143834cd4e2e52fb40 + """, since = "3.0.0") // scalastyle:on line.size.limit case class SparkVersion() extends LeafExpression with CodegenFallback { @@ -200,7 +208,7 @@ case class SparkVersion() extends LeafExpression with CodegenFallback { @ExpressionDescription( usage = """_FUNC_(expr) - Return DDL-formatted type string for the data type of the input.""", examples = """ - Examples: + Examples: > SELECT _FUNC_(1); int > SELECT _FUNC_(array(1)); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 03066fb34cf2..ddc4d8c0d39b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -250,7 +250,17 @@ trait PredicateHelper extends Logging { } @ExpressionDescription( - usage = "_FUNC_ expr - Logical not.") + usage = "_FUNC_ expr - Logical not.", + examples = """ + Examples: + > SELECT _FUNC_ true; + false + > SELECT _FUNC_ false; + true + > SELECT _FUNC_ NULL; + NULL + """, + since = "1.0.0") case class Not(child: Expression) extends UnaryExpression with Predicate with ImplicitCastInputTypes with NullIntolerant { @@ -353,7 +363,8 @@ case class InSubquery(values: Seq[Expression], query: ListQuery) false > SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3)); true - """) + """, + since = "1.0.0") // scalastyle:on line.size.limit case class In(value: Expression, list: Seq[Expression]) extends Predicate { @@ -577,7 +588,19 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with } @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Logical AND.") + usage = "expr1 _FUNC_ expr2 - Logical AND.", + examples = """ + Examples: + > SELECT true _FUNC_ true; + true + > SELECT true _FUNC_ false; + false + > SELECT true _FUNC_ NULL; + NULL + > SELECT false _FUNC_ NULL; + false + """, + since = "1.0.0") case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate { override def inputType: AbstractDataType = BooleanType @@ -647,7 +670,19 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with } @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Logical OR.") + usage = "expr1 _FUNC_ expr2 - Logical OR.", + examples = """ + Examples: + > SELECT true _FUNC_ false; + true + > SELECT false _FUNC_ false; + false + > SELECT true _FUNC_ NULL; + true + > SELECT false _FUNC_ NULL; + NULL + """, + since = "1.0.0") case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate { override def inputType: AbstractDataType = BooleanType @@ -780,7 +815,8 @@ object Equality { NULL > SELECT NULL _FUNC_ NULL; NULL - """) + """, + since = "1.0.0") case class EqualTo(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -823,7 +859,8 @@ case class EqualTo(left: Expression, right: Expression) false > SELECT NULL _FUNC_ NULL; true - """) + """, + since = "1.1.0") case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison { override def symbol: String = "<=>" @@ -880,7 +917,8 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp true > SELECT 1 _FUNC_ NULL; NULL - """) + """, + since = "1.0.0") case class LessThan(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -910,7 +948,8 @@ case class LessThan(left: Expression, right: Expression) true > SELECT 1 _FUNC_ NULL; NULL - """) + """, + since = "1.0.0") case class LessThanOrEqual(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -940,7 +979,8 @@ case class LessThanOrEqual(left: Expression, right: Expression) false > SELECT 1 _FUNC_ NULL; NULL - """) + """, + since = "1.0.0") case class GreaterThan(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -970,7 +1010,8 @@ case class GreaterThan(left: Expression, right: Expression) false > SELECT 1 _FUNC_ NULL; NULL - """) + """, + since = "1.0.0") case class GreaterThanOrEqual(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 07a2b6fa96c1..1a35a52098f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -411,6 +411,7 @@ abstract class OffsetWindowFunction * integer value. The default offset is 1. When the value of `input` is null at the `offset`th row, * null is returned. If there is no such offset row, the `default` expression is evaluated. */ +// scalastyle:off line.size.limit line.contains.tab @ExpressionDescription( usage = """ _FUNC_(input[, offset[, default]]) - Returns the value of `input` at the `offset`th row @@ -426,8 +427,17 @@ abstract class OffsetWindowFunction * default - a string expression which is to use when the offset is larger than the window. The default value is null. """, + examples = """ + Examples: + > SELECT a, b, _FUNC_(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); + A1 1 1 + A1 1 2 + A1 2 NULL + A2 3 NULL + """, since = "2.0.0", group = "window_funcs") +// scalastyle:on line.size.limit line.contains.tab case class Lead(input: Expression, offset: Expression, default: Expression) extends OffsetWindowFunction { @@ -446,6 +456,7 @@ case class Lead(input: Expression, offset: Expression, default: Expression) * integer value. The default offset is 1. When the value of `input` is null at the `offset`th row, * null is returned. If there is no such offset row, the `default` expression is evaluated. */ +// scalastyle:off line.size.limit line.contains.tab @ExpressionDescription( usage = """ _FUNC_(input[, offset[, default]]) - Returns the value of `input` at the `offset`th row @@ -460,8 +471,17 @@ case class Lead(input: Expression, offset: Expression, default: Expression) * offset - an int expression which is rows to jump back in the partition. * default - a string expression which is to use when the offset row does not exist. """, + examples = """ + Examples: + > SELECT a, b, _FUNC_(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); + A1 1 NULL + A1 1 1 + A1 2 1 + A2 3 NULL + """, since = "2.0.0", group = "window_funcs") +// scalastyle:on line.size.limit line.contains.tab case class Lag(input: Expression, offset: Expression, default: Expression) extends OffsetWindowFunction { @@ -514,13 +534,23 @@ object SizeBasedWindowFunction { * * This documentation has been based upon similar documentation for the Hive and Presto projects. */ +// scalastyle:off line.size.limit line.contains.tab @ExpressionDescription( usage = """ _FUNC_() - Assigns a unique, sequential number to each row, starting with one, according to the ordering of rows within the window partition. """, + examples = """ + Examples: + > SELECT a, b, _FUNC_() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); + A1 1 1 + A1 1 2 + A1 2 3 + A2 3 1 + """, since = "2.0.0", group = "window_funcs") +// scalastyle:on line.size.limit line.contains.tab case class RowNumber() extends RowNumberLike { override val evaluateExpression = rowNumber override def prettyName: String = "row_number" @@ -534,12 +564,22 @@ case class RowNumber() extends RowNumberLike { * * This documentation has been based upon similar documentation for the Hive and Presto projects. */ +// scalastyle:off line.size.limit line.contains.tab @ExpressionDescription( usage = """ _FUNC_() - Computes the position of a value relative to all values in the partition. """, + examples = """ + Examples: + > SELECT a, b, _FUNC_() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); + A1 1 0.6666666666666666 + A1 1 0.6666666666666666 + A1 2 1.0 + A2 3 1.0 + """, since = "2.0.0", group = "window_funcs") +// scalastyle:on line.size.limit line.contains.tab case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { override def dataType: DataType = DoubleType // The frame for CUME_DIST is Range based instead of Row based, because CUME_DIST must @@ -549,6 +589,8 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { override def prettyName: String = "cume_dist" } +// scalastyle:off line.size.limit line.contains.tab + @ExpressionDescription( usage = """ _FUNC_(input[, offset]) - Returns the value of `input` at the row that is the `offset`th row @@ -557,6 +599,14 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { there is no such an `offset`th row (e.g., when the offset is 10, size of the window frame is less than 10), null is returned. """, + examples = """ + Examples: + > SELECT a, b, _FUNC_(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); + A1 1 1 + A1 1 1 + A1 2 1 + A2 3 NULL + """, arguments = """ Arguments: * input - the target column or expression that the function operates on. @@ -567,6 +617,7 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { """, since = "3.1.0", group = "window_funcs") +// scalastyle:on line.size.limit line.contains.tab case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Boolean) extends AggregateWindowFunction with ImplicitCastInputTypes { @@ -642,6 +693,7 @@ case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Bool * * This documentation has been based upon similar documentation for the Hive and Presto projects. */ +// scalastyle:off line.size.limit line.contains.tab @ExpressionDescription( usage = """ _FUNC_(n) - Divides the rows for each window partition into `n` buckets ranging @@ -652,8 +704,17 @@ case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Bool * buckets - an int expression which is number of buckets to divide the rows in. Default value is 1. """, + examples = """ + Examples: + > SELECT a, b, _FUNC_(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); + A1 1 1 + A1 1 1 + A1 2 2 + A2 3 1 + """, since = "2.0.0", group = "window_funcs") +// scalastyle:on line.size.limit line.contains.tab case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindowFunction { def this() = this(Literal(1)) @@ -767,6 +828,7 @@ abstract class RankLike extends AggregateWindowFunction { * * This documentation has been based upon similar documentation for the Hive and Presto projects. */ +// scalastyle:off line.size.limit line.contains.tab @ExpressionDescription( usage = """ _FUNC_() - Computes the rank of a value in a group of values. The result is one plus the number @@ -779,8 +841,17 @@ abstract class RankLike extends AggregateWindowFunction { trigger a change in rank. This is an internal parameter and will be assigned by the Analyser. """, + examples = """ + Examples: + > SELECT a, b, _FUNC_(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); + A1 1 1 + A1 1 1 + A1 2 3 + A2 3 1 + """, since = "2.0.0", group = "window_funcs") +// scalastyle:on line.size.limit line.contains.tab case class Rank(children: Seq[Expression]) extends RankLike { def this() = this(Nil) override def withOrder(order: Seq[Expression]): Rank = Rank(order) @@ -793,6 +864,7 @@ case class Rank(children: Seq[Expression]) extends RankLike { * * This documentation has been based upon similar documentation for the Hive and Presto projects. */ +// scalastyle:off line.size.limit line.contains.tab @ExpressionDescription( usage = """ _FUNC_() - Computes the rank of a value in a group of values. The result is one plus the @@ -805,8 +877,17 @@ case class Rank(children: Seq[Expression]) extends RankLike { trigger a change in rank. This is an internal parameter and will be assigned by the Analyser. """, + examples = """ + Examples: + > SELECT a, b, _FUNC_(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); + A1 1 1 + A1 1 1 + A1 2 2 + A2 3 1 + """, since = "2.0.0", group = "window_funcs") +// scalastyle:on line.size.limit line.contains.tab case class DenseRank(children: Seq[Expression]) extends RankLike { def this() = this(Nil) override def withOrder(order: Seq[Expression]): DenseRank = DenseRank(order) @@ -827,6 +908,7 @@ case class DenseRank(children: Seq[Expression]) extends RankLike { * * This documentation has been based upon similar documentation for the Hive and Presto projects. */ +// scalastyle:off line.size.limit line.contains.tab @ExpressionDescription( usage = """ _FUNC_() - Computes the percentage ranking of a value in a group of values. @@ -837,8 +919,17 @@ case class DenseRank(children: Seq[Expression]) extends RankLike { trigger a change in rank. This is an internal parameter and will be assigned by the Analyser. """, + examples = """ + Examples: + > SELECT a, b, _FUNC_(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b); + A1 1 0.0 + A1 1 0.0 + A1 2 1.0 + A2 3 0.0 + """, since = "2.0.0", group = "window_funcs") +// scalastyle:on line.size.limit line.contains.tab case class PercentRank(children: Seq[Expression]) extends RankLike with SizeBasedWindowFunction { def this() = this(Nil) override def withOrder(order: Seq[Expression]): PercentRank = PercentRank(order) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala index e08a10ecac71..5f10667c55d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala @@ -63,7 +63,8 @@ abstract class XPathExtract Examples: > SELECT _FUNC_('1','a/b'); true - """) + """, + since = "2.0.0") // scalastyle:on line.size.limit case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract { @@ -82,7 +83,8 @@ case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract Examples: > SELECT _FUNC_('12', 'sum(a/b)'); 3 - """) + """, + since = "2.0.0") // scalastyle:on line.size.limit case class XPathShort(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath_short" @@ -101,7 +103,8 @@ case class XPathShort(xml: Expression, path: Expression) extends XPathExtract { Examples: > SELECT _FUNC_('12', 'sum(a/b)'); 3 - """) + """, + since = "2.0.0") // scalastyle:on line.size.limit case class XPathInt(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath_int" @@ -120,7 +123,8 @@ case class XPathInt(xml: Expression, path: Expression) extends XPathExtract { Examples: > SELECT _FUNC_('12', 'sum(a/b)'); 3 - """) + """, + since = "2.0.0") // scalastyle:on line.size.limit case class XPathLong(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath_long" @@ -139,7 +143,8 @@ case class XPathLong(xml: Expression, path: Expression) extends XPathExtract { Examples: > SELECT _FUNC_('12', 'sum(a/b)'); 3.0 - """) + """, + since = "2.0.0") // scalastyle:on line.size.limit case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath_float" @@ -158,7 +163,8 @@ case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract { Examples: > SELECT _FUNC_('12', 'sum(a/b)'); 3.0 - """) + """, + since = "2.0.0") // scalastyle:on line.size.limit case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = @@ -178,7 +184,8 @@ case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract { Examples: > SELECT _FUNC_('bcc','a/c'); cc - """) + """, + since = "2.0.0") // scalastyle:on line.size.limit case class XPathString(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath_string" @@ -197,7 +204,8 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract { Examples: > SELECT _FUNC_('b1b2b3c1c2','a/b/text()'); ["b1","b2","b3"] - """) + """, + since = "2.0.0") // scalastyle:on line.size.limit case class XPathList(xml: Expression, path: Expression) extends XPathExtract { override def prettyName: String = "xpath" diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 45f561a61df7..3ea009de1bac 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,8 +1,8 @@ ## Summary - Number of queries: 340 - - Number of expressions that missing example: 35 - - Expressions missing examples: and,bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,struct,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,!,not,nth_value,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch + - Number of expressions that missing example: 13 + - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window ## Schema of Built-in Functions | Class name | Function name or alias | Query example | Output schema | | ---------- | ---------------------- | ------------- | ------------- | @@ -11,7 +11,7 @@ | org.apache.spark.sql.catalyst.expressions.Acosh | acosh | SELECT acosh(1) | struct | | org.apache.spark.sql.catalyst.expressions.Add | + | SELECT 1 + 2 | struct<(1 + 2):int> | | org.apache.spark.sql.catalyst.expressions.AddMonths | add_months | SELECT add_months('2016-08-31', 1) | struct | -| org.apache.spark.sql.catalyst.expressions.And | and | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.And | and | SELECT true and true | struct<(true AND true):boolean> | | org.apache.spark.sql.catalyst.expressions.ArrayAggregate | aggregate | SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct | | org.apache.spark.sql.catalyst.expressions.ArrayContains | array_contains | SELECT array_contains(array(1, 2, 3), 2) | struct | | org.apache.spark.sql.catalyst.expressions.ArrayDistinct | array_distinct | SELECT array_distinct(array(1, 2, 3, null, 3)) | struct> | @@ -79,10 +79,10 @@ | org.apache.spark.sql.catalyst.expressions.CreateArray | array | SELECT array(1, 2, 3) | struct> | | org.apache.spark.sql.catalyst.expressions.CreateMap | map | SELECT map(1.0, '2', 3.0, '4') | struct> | | org.apache.spark.sql.catalyst.expressions.CreateNamedStruct | named_struct | SELECT named_struct("a", 1, "b", 2, "c", 3) | struct> | -| org.apache.spark.sql.catalyst.expressions.CreateNamedStruct | struct | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.CreateNamedStruct | struct | SELECT struct(1, 2, 3) | struct> | | org.apache.spark.sql.catalyst.expressions.CsvToStructs | from_csv | SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') | struct> | | org.apache.spark.sql.catalyst.expressions.Cube | cube | SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name, age) | struct | -| org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.CurrentCatalog | current_catalog | SELECT current_catalog() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | SELECT current_date() | struct | @@ -97,7 +97,7 @@ | org.apache.spark.sql.catalyst.expressions.DayOfWeek | dayofweek | SELECT dayofweek('2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.DayOfYear | dayofyear | SELECT dayofyear('2016-04-09') | struct | | org.apache.spark.sql.catalyst.expressions.Decode | decode | SELECT decode(encode('abc', 'utf-8'), 'utf-8') | struct | -| org.apache.spark.sql.catalyst.expressions.DenseRank | dense_rank | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.DenseRank | dense_rank | SELECT a, b, dense_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.Divide | / | SELECT 3 / 2 | struct<(CAST(3 AS DOUBLE) / CAST(2 AS DOUBLE)):double> | | org.apache.spark.sql.catalyst.expressions.ElementAt | element_at | SELECT element_at(array(1, 2, 3), 2) | struct | | org.apache.spark.sql.catalyst.expressions.Elt | elt | SELECT elt(1, 'scala', 'java') | struct | @@ -135,9 +135,9 @@ | org.apache.spark.sql.catalyst.expressions.InitCap | initcap | SELECT initcap('sPark sql') | struct | | org.apache.spark.sql.catalyst.expressions.Inline | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct | | org.apache.spark.sql.catalyst.expressions.Inline | inline_outer | SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) | struct | -| org.apache.spark.sql.catalyst.expressions.InputFileBlockLength | input_file_block_length | N/A | N/A | -| org.apache.spark.sql.catalyst.expressions.InputFileBlockStart | input_file_block_start | N/A | N/A | -| org.apache.spark.sql.catalyst.expressions.InputFileName | input_file_name | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.InputFileBlockLength | input_file_block_length | SELECT input_file_block_length() | struct | +| org.apache.spark.sql.catalyst.expressions.InputFileBlockStart | input_file_block_start | SELECT input_file_block_start() | struct | +| org.apache.spark.sql.catalyst.expressions.InputFileName | input_file_name | SELECT input_file_name() | struct | | org.apache.spark.sql.catalyst.expressions.IntegralDivide | div | SELECT 3 div 2 | struct<(CAST(3 AS BIGINT) div CAST(2 AS BIGINT)):bigint> | | org.apache.spark.sql.catalyst.expressions.IsNaN | isnan | SELECT isnan(cast('NaN' as double)) | struct | | org.apache.spark.sql.catalyst.expressions.IsNotNull | isnotnull | SELECT isnotnull(1) | struct<(1 IS NOT NULL):boolean> | @@ -145,9 +145,9 @@ | org.apache.spark.sql.catalyst.expressions.JsonObjectKeys | json_object_keys | SELECT json_object_keys('{}') | struct> | | org.apache.spark.sql.catalyst.expressions.JsonToStructs | from_json | SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') | struct> | | org.apache.spark.sql.catalyst.expressions.JsonTuple | json_tuple | SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') | struct | -| org.apache.spark.sql.catalyst.expressions.Lag | lag | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Lag | lag | SELECT a, b, lag(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.LastDay | last_day | SELECT last_day('2009-01-12') | struct | -| org.apache.spark.sql.catalyst.expressions.Lead | lead | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Lead | lead | SELECT a, b, lead(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.Least | least | SELECT least(10, 9, 2, 4, 3) | struct | | org.apache.spark.sql.catalyst.expressions.Left | left | SELECT left('Spark SQL', 3) | struct | | org.apache.spark.sql.catalyst.expressions.Length | char_length | SELECT char_length('Spark SQL ') | struct | @@ -180,28 +180,28 @@ | org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp | timestamp_micros | SELECT timestamp_micros(1230219000123123) | struct | | org.apache.spark.sql.catalyst.expressions.MillisToTimestamp | timestamp_millis | SELECT timestamp_millis(1230219000123) | struct | | org.apache.spark.sql.catalyst.expressions.Minute | minute | SELECT minute('2009-07-30 12:58:59') | struct | -| org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID | monotonically_increasing_id | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID | monotonically_increasing_id | SELECT monotonically_increasing_id() | struct | | org.apache.spark.sql.catalyst.expressions.Month | month | SELECT month('2016-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.MonthsBetween | months_between | SELECT months_between('1997-02-28 10:30:00', '1996-10-30') | struct | | org.apache.spark.sql.catalyst.expressions.Multiply | * | SELECT 2 * 3 | struct<(2 * 3):int> | | org.apache.spark.sql.catalyst.expressions.Murmur3Hash | hash | SELECT hash('Spark', array(123), 2) | struct | -| org.apache.spark.sql.catalyst.expressions.NTile | ntile | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.NTile | ntile | SELECT a, b, ntile(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.NaNvl | nanvl | SELECT nanvl(cast('NaN' as double), 123) | struct | | org.apache.spark.sql.catalyst.expressions.NextDay | next_day | SELECT next_day('2015-01-14', 'TU') | struct | -| org.apache.spark.sql.catalyst.expressions.Not | ! | N/A | N/A | -| org.apache.spark.sql.catalyst.expressions.Not | not | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Not | ! | SELECT ! true | struct<(NOT true):boolean> | +| org.apache.spark.sql.catalyst.expressions.Not | not | SELECT not true | struct<(NOT true):boolean> | | org.apache.spark.sql.catalyst.expressions.Now | now | SELECT now() | struct | -| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct | | org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct> | | org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct | | org.apache.spark.sql.catalyst.expressions.OctetLength | octet_length | SELECT octet_length('Spark SQL') | struct | -| org.apache.spark.sql.catalyst.expressions.Or | or | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Or | or | SELECT true or false | struct<(true OR false):boolean> | | org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct | | org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct | | org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct | | org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct | -| org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.Pi | pi | SELECT pi() | struct | | org.apache.spark.sql.catalyst.expressions.Pmod | pmod | SELECT pmod(10, 3) | struct | | org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode | SELECT posexplode(array(10,20)) | struct | @@ -213,7 +213,7 @@ | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct | | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct | | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct | -| org.apache.spark.sql.catalyst.expressions.Rank | rank | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Rank | rank | SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.RegExpExtract | regexp_extract | SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) | struct | | org.apache.spark.sql.catalyst.expressions.RegExpExtractAll | regexp_extract_all | SELECT regexp_extract_all('100-200, 300-400', '(\\d+)-(\\d+)', 1) | struct> | | org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct | @@ -224,7 +224,7 @@ | org.apache.spark.sql.catalyst.expressions.Rint | rint | SELECT rint(12.3456) | struct | | org.apache.spark.sql.catalyst.expressions.Rollup | rollup | SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY rollup(name, age) | struct | | org.apache.spark.sql.catalyst.expressions.Round | round | SELECT round(2.5, 0) | struct | -| org.apache.spark.sql.catalyst.expressions.RowNumber | row_number | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.RowNumber | row_number | SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.SchemaOfCsv | schema_of_csv | SELECT schema_of_csv('1,abc') | struct | | org.apache.spark.sql.catalyst.expressions.SchemaOfJson | schema_of_json | SELECT schema_of_json('[{"col":0}]') | struct | | org.apache.spark.sql.catalyst.expressions.Second | second | SELECT second('2009-07-30 12:58:59') | struct | @@ -247,8 +247,8 @@ | org.apache.spark.sql.catalyst.expressions.Slice | slice | SELECT slice(array(1, 2, 3, 4), 2, 2) | struct> | | org.apache.spark.sql.catalyst.expressions.SortArray | sort_array | SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) | struct> | | org.apache.spark.sql.catalyst.expressions.SoundEx | soundex | SELECT soundex('Miller') | struct | -| org.apache.spark.sql.catalyst.expressions.SparkPartitionID | spark_partition_id | N/A | N/A | -| org.apache.spark.sql.catalyst.expressions.SparkVersion | version | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.SparkPartitionID | spark_partition_id | SELECT spark_partition_id() | struct | +| org.apache.spark.sql.catalyst.expressions.SparkVersion | version | SELECT version() | struct | | org.apache.spark.sql.catalyst.expressions.Sqrt | sqrt | SELECT sqrt(4) | struct | | org.apache.spark.sql.catalyst.expressions.Stack | stack | SELECT stack(2, 1, 2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.StringInstr | instr | SELECT instr('SparkSQL', 'SQL') | struct | @@ -285,7 +285,7 @@ | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct | | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct | | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct | -| org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | SELECT positive(1) | struct<(+ 1):int> | | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct | | org.apache.spark.sql.catalyst.expressions.UnixTimestamp | unix_timestamp | SELECT unix_timestamp() | struct | | org.apache.spark.sql.catalyst.expressions.Upper | ucase | SELECT ucase('SparkSql') | struct | @@ -314,7 +314,7 @@ | org.apache.spark.sql.catalyst.expressions.aggregate.Corr | corr | SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.Count | count | SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.CountIf | count_if | SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) | struct | -| org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg | count_min_sketch | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg | count_min_sketch | SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation | covar_pop | SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.CovSample | covar_samp | SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.First | first | SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out index d4872ca03199..42d12b80be98 100644 --- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out @@ -269,6 +269,8 @@ Class: org.apache.spark.sql.catalyst.expressions.Cast Extended Usage: No example/argument for boolean. + Since: 2.0.1 + Function: boolean Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index d18aa9c549eb..37ef04d9cb02 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -178,7 +178,15 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { s"$numberOfQueries record in result file. Try regenerating the result files.") val numberOfMissingExamples = lines(3).split(":")(1).trim.toInt - val expectedMissingExamples = lines(4).split(":")(1).trim.split(",") + val expectedMissingExamples = { + val missingExamples = lines(4).split(":")(1).trim + // Splitting on a empty string would return [""] + if (missingExamples.nonEmpty) { + missingExamples.split(",") + } else { + Array.empty[String] + } + } assert(numberOfMissingExamples == expectedMissingExamples.size, s"expected missing examples size: ${expectedMissingExamples.size} not same as " + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index adc87cd30719..348cf94dfc62 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1863,6 +1863,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { "Returns the concatenation of col1, col2, ..., colN.") :: Nil ) // extended mode + // scalastyle:off whitespace.end.of.line checkAnswer( sql("DESCRIBE FUNCTION EXTENDED ^"), Row("Class: org.apache.spark.sql.catalyst.expressions.BitwiseXor") :: @@ -1871,11 +1872,14 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { | Examples: | > SELECT 3 ^ 5; | 6 - | """.stripMargin) :: + | + | Since: 1.4.0 + |""".stripMargin) :: Row("Function: ^") :: Row("Usage: expr1 ^ expr2 - Returns the result of " + "bitwise exclusive OR of `expr1` and `expr2`.") :: Nil ) + // scalastyle:on whitespace.end.of.line } test("create a data source table without schema") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala index 53f975775073..f487a30c8dfa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala @@ -105,11 +105,37 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { } } + test("SPARK-32870: Default expressions in FunctionRegistry should have their " + + "usage, examples and since filled") { + val ignoreSet = Set( + // Explicitly inherits NonSQLExpression, and has no ExpressionDescription + "org.apache.spark.sql.catalyst.expressions.TimeWindow", + // Cast aliases do not need examples + "org.apache.spark.sql.catalyst.expressions.Cast") + + spark.sessionState.functionRegistry.listFunction().foreach { funcId => + val info = spark.sessionState.catalog.lookupFunctionInfo(funcId) + if (!ignoreSet.contains(info.getClassName)) { + withClue(s"Function '${info.getName}', Expression class '${info.getClassName}'") { + assert(info.getUsage.nonEmpty) + assert(info.getExamples.startsWith("\n Examples:\n")) + assert(info.getExamples.endsWith("\n ")) + assert(info.getSince.matches("[0-9]+\\.[0-9]+\\.[0-9]+")) + + if (info.getArguments.nonEmpty) { + assert(info.getArguments.startsWith("\n Arguments:\n")) + assert(info.getArguments.endsWith("\n ")) + } + } + } + } + } + test("check outputs of expression examples") { def unindentAndTrim(s: String): String = { s.replaceAll("\n\\s+", "\n").trim } - val beginSqlStmtRe = " > ".r + val beginSqlStmtRe = "\n > ".r val endSqlStmtRe = ";\n".r def checkExampleSyntax(example: String): Unit = { val beginStmtNum = beginSqlStmtRe.findAllIn(example).length @@ -129,8 +155,15 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { "org.apache.spark.sql.catalyst.expressions.Randn", "org.apache.spark.sql.catalyst.expressions.Shuffle", "org.apache.spark.sql.catalyst.expressions.Uuid", + // Other nondeterministic expressions + "org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID", + "org.apache.spark.sql.catalyst.expressions.SparkPartitionID", + "org.apache.spark.sql.catalyst.expressions.InputFileName", + "org.apache.spark.sql.catalyst.expressions.InputFileBlockStart", + "org.apache.spark.sql.catalyst.expressions.InputFileBlockLength", // The example calls methods that return unstable results. - "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection") + "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection", + "org.apache.spark.sql.catalyst.expressions.SparkVersion") val parFuncs = new ParVector(spark.sessionState.functionRegistry.listFunction().toVector) parFuncs.foreach { funcId =>