apache · alamb · Feb 3, 2025 · Jan 25, 2025 · Jan 25, 2025 · Jan 25, 2025
diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs
@@ -19,6 +19,7 @@
 //! and return types of functions in DataFusion.
 
 use std::fmt::Display;
+use std::num::NonZeroUsize;
 
 use crate::type_coercion::aggregates::NUMERICS;
 use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
@@ -236,9 +237,9 @@ pub enum ArrayFunctionSignature {
     /// The first argument should be non-list or list, and the second argument should be List/LargeList.
     /// The first argument's list dimension should be one dimension less than the second argument's list dimension.
     ElementAndArray,
-    /// Specialized Signature for Array functions of the form (List/LargeList, Index)
-    /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be Int64.
-    ArrayAndIndex,
+    /// Specialized Signature for Array functions of the form (List/LargeList, Index+)
+    /// The first argument should be List/LargeList/FixedSizedList, and the next n arguments should be Int64.
+    ArrayAndIndexes(NonZeroUsize),
     /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index)
     ArrayAndElementAndOptionalIndex,
     /// Specialized Signature for ArrayEmpty and similar functions
@@ -265,8 +266,12 @@ impl Display for ArrayFunctionSignature {
             ArrayFunctionSignature::ElementAndArray => {
                 write!(f, "element, array")
             }
-            ArrayFunctionSignature::ArrayAndIndex => {
-                write!(f, "array, index")
+            ArrayFunctionSignature::ArrayAndIndexes(count) => {
+                write!(f, "array")?;
+                for _ in 0..count.get() {
+                    write!(f, ", index")?;
+                }
+                Ok(())
             }
             ArrayFunctionSignature::Array => {
                 write!(f, "array")
@@ -455,6 +460,15 @@ fn get_data_types(native_type: &NativeType) -> Vec<DataType> {
     }
 }
 
+/// A function's behavior when the input is Null.
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
+pub enum NullHandling {
+    /// Null inputs are passed into the function implementation.
+    PassThrough,
+    /// Any Null input causes the function to return Null.
+    Propagate,
+}
+
 /// Defines the supported argument types ([`TypeSignature`]) and [`Volatility`] for a function.
 ///
 /// DataFusion will automatically coerce (cast) argument types to one of the supported
@@ -465,6 +479,8 @@ pub struct Signature {
     pub type_signature: TypeSignature,
     /// The volatility of the function. See [Volatility] for more information.
     pub volatility: Volatility,
+    /// The Null handling of the function. See [NullHandling] for more information.
+    pub null_handling: NullHandling,
 }
 
 impl Signature {
@@ -473,20 +489,23 @@ impl Signature {
         Signature {
             type_signature,
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
     /// An arbitrary number of arguments with the same type, from those listed in `common_types`.
     pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self {
         Self {
             type_signature: TypeSignature::Variadic(common_types),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
     /// User-defined coercion rules for the function.
     pub fn user_defined(volatility: Volatility) -> Self {
         Self {
             type_signature: TypeSignature::UserDefined,
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
 
@@ -495,6 +514,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::Numeric(arg_count),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
 
@@ -503,6 +523,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::String(arg_count),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
 
@@ -511,6 +532,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::VariadicAny,
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
     /// A fixed number of arguments of the same type, from those listed in `valid_types`.
@@ -522,13 +544,15 @@ impl Signature {
         Self {
             type_signature: TypeSignature::Uniform(arg_count, valid_types),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
     /// Exactly matches the types in `exact_types`, in order.
     pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
         Signature {
             type_signature: TypeSignature::Exact(exact_types),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
     /// Target coerce types in order
@@ -539,6 +563,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::Coercible(target_types),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
 
@@ -547,13 +572,15 @@ impl Signature {
         Self {
             type_signature: TypeSignature::Comparable(arg_count),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
 
     pub fn nullary(volatility: Volatility) -> Self {
         Signature {
             type_signature: TypeSignature::Nullary,
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
 
@@ -562,13 +589,15 @@ impl Signature {
         Signature {
             type_signature: TypeSignature::Any(arg_count),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
     /// Any one of a list of [TypeSignature]s.
     pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self {
         Signature {
             type_signature: TypeSignature::OneOf(type_signatures),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
     /// Specialized Signature for ArrayAppend and similar functions
@@ -578,6 +607,7 @@ impl Signature {
                 ArrayFunctionSignature::ArrayAndElement,
             ),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
     /// Specialized Signature for Array functions with an optional index
@@ -587,6 +617,7 @@ impl Signature {
                 ArrayFunctionSignature::ArrayAndElementAndOptionalIndex,
             ),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
     /// Specialized Signature for ArrayPrepend and similar functions
@@ -596,24 +627,37 @@ impl Signature {
                 ArrayFunctionSignature::ElementAndArray,
             ),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
     /// Specialized Signature for ArrayElement and similar functions
     pub fn array_and_index(volatility: Volatility) -> Self {
+        Self::array_and_indexes(volatility, NonZeroUsize::new(1).expect("1 is non-zero"))
+    }
+    /// Specialized Signature for ArraySlice and similar functions
+    pub fn array_and_indexes(volatility: Volatility, count: NonZeroUsize) -> Self {
         Signature {
             type_signature: TypeSignature::ArraySignature(
-                ArrayFunctionSignature::ArrayAndIndex,
+                ArrayFunctionSignature::ArrayAndIndexes(count),
             ),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
     /// Specialized Signature for ArrayEmpty and similar functions
     pub fn array(volatility: Volatility) -> Self {
         Signature {
             type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
             volatility,
+            null_handling: NullHandling::PassThrough,
         }
     }
+
+    /// Returns an equivalent Signature, with null_handling set to the input.
+    pub fn with_null_handling(mut self, null_handling: NullHandling) -> Self {
+        self.null_handling = null_handling;
+        self
+    }
 }
 
 #[cfg(test)]

diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
@@ -71,8 +71,8 @@ pub use datafusion_expr_common::columnar_value::ColumnarValue;
 pub use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
 pub use datafusion_expr_common::operator::Operator;
 pub use datafusion_expr_common::signature::{
-    ArrayFunctionSignature, Signature, TypeSignature, TypeSignatureClass, Volatility,
-    TIMEZONE_WILDCARD,
+    ArrayFunctionSignature, NullHandling, Signature, TypeSignature, TypeSignatureClass,
+    Volatility, TIMEZONE_WILDCARD,
 };
 pub use datafusion_expr_common::type_coercion::binary;
 pub use expr::{

diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
@@ -671,13 +671,20 @@ fn get_valid_types(
             ArrayFunctionSignature::ElementAndArray => {
                 array_append_or_prepend_valid_types(current_types, false)?
             }
-            ArrayFunctionSignature::ArrayAndIndex => {
-                if current_types.len() != 2 {
+            ArrayFunctionSignature::ArrayAndIndexes(count) => {
+                if current_types.len() != count.get() + 1 {
                     return Ok(vec![vec![]]);
                 }
                 array(&current_types[0]).map_or_else(
                     || vec![vec![]],
-                    |array_type| vec![vec![array_type, DataType::Int64]],
+                    |array_type| {
+                        let mut inner = Vec::with_capacity(count.get() + 1);
+                        inner.push(array_type);
+                        for _ in 0..count.get() {
+                            inner.push(DataType::Int64);
+                        }
+                        vec![inner]
+                    },
                 )
             }
             ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {

diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs
@@ -35,12 +35,13 @@ use datafusion_common::cast::as_list_array;
 use datafusion_common::{
     exec_err, internal_datafusion_err, plan_err, DataFusionError, Result,
 };
-use datafusion_expr::Expr;
+use datafusion_expr::{ArrayFunctionSignature, Expr, TypeSignature};
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+    ColumnarValue, Documentation, NullHandling, ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
 use std::any::Any;
+use std::num::NonZeroUsize;
 use std::sync::Arc;
 
 use crate::utils::make_scalar_function;
@@ -330,7 +331,27 @@ pub(super) struct ArraySlice {
 impl ArraySlice {
     pub fn new() -> Self {
         Self {
-            signature: Signature::variadic_any(Volatility::Immutable),
+            signature: Signature::one_of(
+                vec![
+                    TypeSignature::ArraySignature(
+                        ArrayFunctionSignature::ArrayAndIndexes(
+                            NonZeroUsize::new(1).expect("1 is non-zero"),
+                        ),
+                    ),
+                    TypeSignature::ArraySignature(
+                        ArrayFunctionSignature::ArrayAndIndexes(
+                            NonZeroUsize::new(2).expect("2 is non-zero"),
+                        ),
+                    ),
+                    TypeSignature::ArraySignature(
+                        ArrayFunctionSignature::ArrayAndIndexes(
+                            NonZeroUsize::new(3).expect("3 is non-zero"),
+                        ),
+                    ),
+                ],
+                Volatility::Immutable,
+            )
+            .with_null_handling(NullHandling::Propagate),
             aliases: vec![String::from("list_slice")],
         }
     }
@@ -430,8 +451,6 @@ fn array_slice_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
         }
         LargeList(_) => {
             let array = as_large_list_array(&args[0])?;
-            let from_array = as_int64_array(&args[1])?;
-            let to_array = as_int64_array(&args[2])?;
             general_array_slice::<i64>(array, from_array, to_array, stride)
         }
         _ => exec_err!("array_slice does not support type: {:?}", array_data_type),

diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs
@@ -27,8 +27,8 @@ use datafusion_common::cast::{
 };
 use datafusion_common::{exec_err, Result};
 use datafusion_expr::{
-    ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature,
-    TypeSignature, Volatility,
+    ArrayFunctionSignature, ColumnarValue, Documentation, NullHandling, ScalarUDFImpl,
+    Signature, TypeSignature, Volatility,
 };
 use datafusion_macros::user_doc;
 use std::any::Any;
@@ -80,6 +80,7 @@ impl Flatten {
                     ArrayFunctionSignature::RecursiveArray,
                 ),
                 volatility: Volatility::Immutable,
+                null_handling: NullHandling::PassThrough,
             },
             aliases: vec![],
         }

diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs
@@ -47,6 +47,7 @@ use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf;
 use datafusion_expr::{
     expr_vec_fmt, ColumnarValue, Expr, ReturnTypeArgs, ScalarFunctionArgs, ScalarUDF,
 };
+use datafusion_expr_common::signature::NullHandling;
 
 /// Physical expression of a scalar function
 #[derive(Eq, PartialEq, Hash)]
@@ -186,6 +187,15 @@ impl PhysicalExpr for ScalarFunctionExpr {
             .map(|e| e.evaluate(batch))
             .collect::<Result<Vec<_>>>()?;
 
+        if self.fun.signature().null_handling == NullHandling::Propagate
+            && args.iter().any(
+                |arg| matches!(arg, ColumnarValue::Scalar(scalar) if scalar.is_null()),
+            )
+        {
+            let null_value = ScalarValue::try_from(&self.return_type)?;
+            return Ok(ColumnarValue::Scalar(null_value));
+        }
+
         let input_empty = args.is_empty();
         let input_all_scalar = args
             .iter()