diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs index 2f8ac93a195e..3a7ef517fb95 100644 --- a/datafusion/functions-aggregate/src/min_max.rs +++ b/datafusion/functions-aggregate/src/min_max.rs @@ -578,7 +578,7 @@ macro_rules! min_max_batch { } /// dynamically-typed min(array) -> ScalarValue -fn min_batch(values: &ArrayRef) -> Result { +pub fn min_batch(values: &ArrayRef) -> Result { Ok(match values.data_type() { DataType::Utf8 => { typed_min_max_batch_string!(values, StringArray, Utf8, min_string) diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs index f1626e82cbbd..1d3f11b50c61 100644 --- a/datafusion/functions-nested/src/lib.rs +++ b/datafusion/functions-nested/src/lib.rs @@ -54,7 +54,7 @@ pub mod map_entries; pub mod map_extract; pub mod map_keys; pub mod map_values; -pub mod max; +pub mod min_max; pub mod planner; pub mod position; pub mod range; @@ -100,6 +100,8 @@ pub mod expr_fn { pub use super::map_extract::map_extract; pub use super::map_keys::map_keys; pub use super::map_values::map_values; + pub use super::min_max::array_max; + pub use super::min_max::array_min; pub use super::position::array_position; pub use super::position::array_positions; pub use super::range::gen_series; @@ -148,7 +150,8 @@ pub fn all_default_nested_functions() -> Vec> { length::array_length_udf(), distance::array_distance_udf(), flatten::flatten_udf(), - max::array_max_udf(), + min_max::array_max_udf(), + min_max::array_min_udf(), sort::array_sort_udf(), repeat::array_repeat_udf(), resize::array_resize_udf(), diff --git a/datafusion/functions-nested/src/max.rs b/datafusion/functions-nested/src/min_max.rs similarity index 61% rename from datafusion/functions-nested/src/max.rs rename to datafusion/functions-nested/src/min_max.rs index b667a7b42650..6fc561d73623 100644 --- a/datafusion/functions-nested/src/max.rs +++ b/datafusion/functions-nested/src/min_max.rs @@ -123,22 +123,106 @@ impl ScalarUDFImpl for ArrayMax { pub fn array_max_inner(args: &[ArrayRef]) -> Result { let [array] = take_function_args("array_max", args)?; match array.data_type() { - List(_) => general_array_max(as_list_array(array)?), - LargeList(_) => general_array_max(as_large_list_array(array)?), + List(_) => array_min_max_helper(as_list_array(array)?, min_max::max_batch), + LargeList(_) => { + array_min_max_helper(as_large_list_array(array)?, min_max::max_batch) + } arg_type => exec_err!("array_max does not support type: {arg_type}"), } } -fn general_array_max( +make_udf_expr_and_func!( + ArrayMin, + array_min, + array, + "returns the minimum value in the array", + array_min_udf +); +#[user_doc( + doc_section(label = "Array Functions"), + description = "Returns the minimum value in the array.", + syntax_example = "array_min(array)", + sql_example = r#"```sql +> select array_min([3,1,4,2]); ++-----------------------------------------+ +| array_min(List([3,1,4,2])) | ++-----------------------------------------+ +| 1 | ++-----------------------------------------+ +```"#, + argument( + name = "array", + description = "Array expression. Can be a constant, column, or function, and any combination of array operators." + ) +)] +#[derive(Debug)] +struct ArrayMin { + signature: Signature, +} + +impl Default for ArrayMin { + fn default() -> Self { + Self::new() + } +} + +impl ArrayMin { + fn new() -> Self { + Self { + signature: Signature::array(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for ArrayMin { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "array_min" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + let [array] = take_function_args(self.name(), arg_types)?; + match array { + List(field) | LargeList(field) => Ok(field.data_type().clone()), + arg_type => plan_err!("{} does not support type {}", self.name(), arg_type), + } + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + make_scalar_function(array_min_inner)(&args.args) + } + + fn documentation(&self) -> Option<&Documentation> { + self.doc() + } +} + +pub fn array_min_inner(args: &[ArrayRef]) -> Result { + let [array] = take_function_args("array_min", args)?; + match array.data_type() { + List(_) => array_min_max_helper(as_list_array(array)?, min_max::min_batch), + LargeList(_) => { + array_min_max_helper(as_large_list_array(array)?, min_max::min_batch) + } + arg_type => exec_err!("array_min does not support type: {arg_type}"), + } +} + +fn array_min_max_helper( array: &GenericListArray, + agg_fn: fn(&ArrayRef) -> Result, ) -> Result { let null_value = ScalarValue::try_from(array.value_type())?; let result_vec: Vec = array .iter() - .map(|arr| { - arr.as_ref() - .map_or_else(|| Ok(null_value.clone()), min_max::max_batch) - }) + .map(|arr| arr.as_ref().map_or_else(|| Ok(null_value.clone()), agg_fn)) .try_collect()?; ScalarValue::iter_to_array(result_vec) } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index f64535e66071..a3d9c3e1d9c1 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1535,6 +1535,91 @@ NULL query error DataFusion error: Error during planning: 'array_max' does not support zero arguments select array_max(); +## array_min + +query I +select array_min(make_array(5, 3, 6, 4)); +---- +3 + +query I +select array_min(make_array(5, 3, 4, NULL, 6, NULL)); +---- +3 + +query ? +select array_min(make_array(NULL, NULL)); +---- +NULL + +query T +select array_min(make_array('h', 'e', 'o', 'l', 'l')); +---- +e + +query T +select array_min(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL)); +---- +e + +query B +select array_min(make_array(false, true, false, true)); +---- +false + +query B +select array_min(make_array(false, true, NULL, false, true)); +---- +false + +query D +select array_min(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01')); +---- +1985-11-01 + +query D +select array_min(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL)); +---- +1993-03-01 + +query P +select array_min(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01')); +---- +1984-10-01T00:00:00 + +query P +select array_min(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01')); +---- +1995-06-01T00:00:00 + +query R +select array_min(make_array(5.1, -3.2, 6.3, 4.9)); +---- +-3.2 + +query ?I +select input, array_min(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d)) +---- +[-1, 0, 1] -1 +[9, 10, 11] 9 +[19, 20, 21] 19 +[29, 30, 31] 29 +[NULL, NULL, NULL] NULL + +query II +select array_min(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_min(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')); +---- +1 1 + +query ? +select array_min(make_array()); +---- +NULL + +# Testing with empty arguments should result in an error +query error DataFusion error: Error during planning: 'array_min' does not support zero arguments +select array_min(); + ## array_pop_back (aliases: `list_pop_back`) @@ -6009,7 +6094,7 @@ false false NULL false false false false NULL # Row 1: [[NULL,2],[3,NULL]], [1.1,2.2,3.3], ['L','o','r','e','m'] -# Row 2: [[3,4],[5,6]], [NULL,5.5,6.6], ['i','p',NULL,'u','m'] +# Row 2: [[3,4],[5,6]], [NULL,5.5,6.6], ['i','p',NULL,'u','m'] # Row 3: [[5,6],[7,8]], [7.7,8.8,9.9], ['d',NULL,'l','o','r'] # Row 4: [[7,NULL],[9,10]], [10.1,NULL,12.2], ['s','i','t','a','b'] # Row 5: NULL, [13.3,14.4,15.5], ['a','m','e','t','x'] diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index bbd9c0c4d1f5..eb4b86e4b486 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2552,6 +2552,7 @@ _Alias of [current_date](#current_date)._ - [array_join](#array_join) - [array_length](#array_length) - [array_max](#array_max) +- [array_min](#array_min) - [array_ndims](#array_ndims) - [array_pop_back](#array_pop_back) - [array_pop_front](#array_pop_front) @@ -3058,6 +3059,29 @@ array_max(array) - list_max +### `array_min` + +Returns the minimum value in the array. + +```sql +array_min(array) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_min([3,1,4,2]); ++-----------------------------------------+ +| array_min(List([3,1,4,2])) | ++-----------------------------------------+ +| 1 | ++-----------------------------------------+ +``` + ### `array_ndims` Returns the number of dimensions of the array.