diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index f773da81621bb..92d4497918fa7 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -312,27 +312,7 @@ pub enum Expr { Negative(Box), /// Whether an expression is between a given range. Between(Between), - /// The CASE expression is similar to a series of nested if/else and there are two forms that - /// can be used. The first form consists of a series of boolean "when" expressions with - /// corresponding "then" expressions, and an optional "else" expression. - /// - /// ```text - /// CASE WHEN condition THEN result - /// [WHEN ...] - /// [ELSE result] - /// END - /// ``` - /// - /// The second form uses a base expression and then a series of "when" clauses that match on a - /// literal value. - /// - /// ```text - /// CASE expression - /// WHEN value THEN result - /// [WHEN ...] - /// [ELSE result] - /// END - /// ``` + /// A CASE expression (see docs on [`Case`]) Case(Case), /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast. /// This expression is guaranteed to have a fixed type. @@ -340,7 +320,7 @@ pub enum Expr { /// Casts the expression to a given type and will return a null value if the expression cannot be cast. /// This expression is guaranteed to have a fixed type. TryCast(TryCast), - /// Represents the call of a scalar function with a set of arguments. + /// Call a scalar function with a set of arguments. ScalarFunction(ScalarFunction), /// Calls an aggregate function with arguments, and optional /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`. @@ -349,7 +329,7 @@ pub enum Expr { /// /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt AggregateFunction(AggregateFunction), - /// Represents the call of a window function with arguments. + /// Call a window function with a set of arguments. WindowFunction(WindowFunction), /// Returns whether the list contains the expr value. InList(InList), @@ -378,7 +358,7 @@ pub enum Expr { /// A place holder for parameters in a prepared statement /// (e.g. `$foo` or `$1`) Placeholder(Placeholder), - /// A place holder which hold a reference to a qualified field + /// A placeholder which holds a reference to a qualified field /// in the outer query, used for correlated sub queries. OuterReferenceColumn(DataType, Column), /// Unnest expression @@ -551,6 +531,28 @@ impl Display for BinaryExpr { } /// CASE expression +/// +/// The CASE expression is similar to a series of nested if/else and there are two forms that +/// can be used. The first form consists of a series of boolean "when" expressions with +/// corresponding "then" expressions, and an optional "else" expression. +/// +/// ```text +/// CASE WHEN condition THEN result +/// [WHEN ...] +/// [ELSE result] +/// END +/// ``` +/// +/// The second form uses a base expression and then a series of "when" clauses that match on a +/// literal value. +/// +/// ```text +/// CASE expression +/// WHEN value THEN result +/// [WHEN ...] +/// [ELSE result] +/// END +/// ``` #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)] pub struct Case { /// Optional base expression that can be compared to literal values in the "when" expressions @@ -631,7 +633,9 @@ impl Between { } } -/// ScalarFunction expression invokes a built-in scalar function +/// Invoke a [`ScalarUDF`] with a set of arguments +/// +/// [`ScalarUDF`]: crate::ScalarUDF #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] pub struct ScalarFunction { /// The function @@ -648,7 +652,9 @@ impl ScalarFunction { } impl ScalarFunction { - /// Create a new ScalarFunction expression with a user-defined function (UDF) + /// Create a new `ScalarFunction` from a [`ScalarUDF`] + /// + /// [`ScalarUDF`]: crate::ScalarUDF pub fn new_udf(udf: Arc, args: Vec) -> Self { Self { func: udf, args } } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index bf8339d88a559..208f4e77f7d23 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -34,7 +34,7 @@ use std::sync::Arc; /// /// A scalar function produces a single row output for each row of input. This /// struct contains the information DataFusion needs to plan and invoke -/// functions you supply such name, type signature, return type, and actual +/// functions you supply such as name, type signature, return type, and actual /// implementation. /// /// 1. For simple use cases, use [`create_udf`] (examples in [`simple_udf.rs`]). @@ -42,11 +42,11 @@ use std::sync::Arc; /// 2. For advanced use cases, use [`ScalarUDFImpl`] which provides full API /// access (examples in [`advanced_udf.rs`]). /// -/// See [`Self::call`] to invoke a `ScalarUDF` with arguments. +/// See [`Self::call`] to create an `Expr` which invokes a `ScalarUDF` with arguments. /// /// # API Note /// -/// This is a separate struct from `ScalarUDFImpl` to maintain backwards +/// This is a separate struct from [`ScalarUDFImpl`] to maintain backwards /// compatibility with the older API. /// /// [`create_udf`]: crate::expr_fn::create_udf @@ -568,13 +568,15 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { } /// Returns true if some of this `exprs` subexpressions may not be evaluated - /// and thus any side effects (like divide by zero) may not be encountered - /// Setting this to true prevents certain optimizations such as common subexpression elimination + /// and thus any side effects (like divide by zero) may not be encountered. + /// + /// Setting this to true prevents certain optimizations such as common + /// subexpression elimination fn short_circuits(&self) -> bool { false } - /// Computes the output interval for a [`ScalarUDFImpl`], given the input + /// Computes the output [`Interval`] for a [`ScalarUDFImpl`], given the input /// intervals. /// /// # Parameters @@ -590,9 +592,11 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { Interval::make_unbounded(&DataType::Null) } - /// Updates bounds for child expressions, given a known interval for this - /// function. This is used to propagate constraints down through an expression - /// tree. + /// Updates bounds for child expressions, given a known [`Interval`]s for this + /// function. + /// + /// This function is used to propagate constraints down through an + /// expression tree. /// /// # Parameters /// @@ -641,20 +645,25 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { } } - /// Whether the function preserves lexicographical ordering based on the input ordering + /// Returns true if the function preserves lexicographical ordering based on + /// the input ordering. + /// + /// For example, `concat(a || b)` preserves lexicographical ordering, but `abs(a)` does not. fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result { Ok(false) } /// Coerce arguments of a function call to types that the function can evaluate. /// - /// This function is only called if [`ScalarUDFImpl::signature`] returns [`crate::TypeSignature::UserDefined`]. Most - /// UDFs should return one of the other variants of `TypeSignature` which handle common - /// cases + /// This function is only called if [`ScalarUDFImpl::signature`] returns + /// [`crate::TypeSignature::UserDefined`]. Most UDFs should return one of + /// the other variants of [`TypeSignature`] which handle common cases. /// /// See the [type coercion module](crate::type_coercion) /// documentation for more details on type coercion /// + /// [`TypeSignature`]: crate::TypeSignature + /// /// For example, if your function requires a floating point arguments, but the user calls /// it like `my_func(1::int)` (i.e. with `1` as an integer), coerce_types can return `[DataType::Float64]` /// to ensure the argument is converted to `1::double` @@ -698,8 +707,8 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// Returns the documentation for this Scalar UDF. /// - /// Documentation can be accessed programmatically as well as - /// generating publicly facing documentation. + /// Documentation can be accessed programmatically as well as generating + /// publicly facing documentation. fn documentation(&self) -> Option<&Documentation> { None }