Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datafusion/functions-aggregate/src/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ macro_rules! min_max_batch {
}

/// dynamically-typed min(array) -> ScalarValue
fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
pub fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
Ok(match values.data_type() {
DataType::Utf8 => {
typed_min_max_batch_string!(values, StringArray, Utf8, min_string)
Expand Down
7 changes: 5 additions & 2 deletions datafusion/functions-nested/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pub mod map_entries;
pub mod map_extract;
pub mod map_keys;
pub mod map_values;
pub mod max;
pub mod min_max;
pub mod planner;
pub mod position;
pub mod range;
Expand Down Expand Up @@ -100,6 +100,8 @@ pub mod expr_fn {
pub use super::map_extract::map_extract;
pub use super::map_keys::map_keys;
pub use super::map_values::map_values;
pub use super::min_max::array_max;
pub use super::min_max::array_min;
pub use super::position::array_position;
pub use super::position::array_positions;
pub use super::range::gen_series;
Expand Down Expand Up @@ -148,7 +150,8 @@ pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
length::array_length_udf(),
distance::array_distance_udf(),
flatten::flatten_udf(),
max::array_max_udf(),
min_max::array_max_udf(),
min_max::array_min_udf(),
sort::array_sort_udf(),
repeat::array_repeat_udf(),
resize::array_resize_udf(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,22 +123,106 @@ impl ScalarUDFImpl for ArrayMax {
pub fn array_max_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array] = take_function_args("array_max", args)?;
match array.data_type() {
List(_) => general_array_max(as_list_array(array)?),
LargeList(_) => general_array_max(as_large_list_array(array)?),
List(_) => array_min_max_helper(as_list_array(array)?, min_max::max_batch),
LargeList(_) => {
array_min_max_helper(as_large_list_array(array)?, min_max::max_batch)
}
arg_type => exec_err!("array_max does not support type: {arg_type}"),
}
}

fn general_array_max<O: OffsetSizeTrait>(
make_udf_expr_and_func!(
ArrayMin,
array_min,
array,
"returns the minimum value in the array",
array_min_udf
);
#[user_doc(
doc_section(label = "Array Functions"),
description = "Returns the minimum value in the array.",
syntax_example = "array_min(array)",
sql_example = r#"```sql
> select array_min([3,1,4,2]);
+-----------------------------------------+
| array_min(List([3,1,4,2])) |
+-----------------------------------------+
| 1 |
+-----------------------------------------+
```"#,
argument(
name = "array",
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
)
)]
#[derive(Debug)]
struct ArrayMin {
signature: Signature,
}

impl Default for ArrayMin {
fn default() -> Self {
Self::new()
}
}

impl ArrayMin {
fn new() -> Self {
Self {
signature: Signature::array(Volatility::Immutable),
}
}
}

impl ScalarUDFImpl for ArrayMin {
fn as_any(&self) -> &dyn Any {
self
}

fn name(&self) -> &str {
"array_min"
}

fn signature(&self) -> &Signature {
&self.signature
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
let [array] = take_function_args(self.name(), arg_types)?;
match array {
List(field) | LargeList(field) => Ok(field.data_type().clone()),
arg_type => plan_err!("{} does not support type {}", self.name(), arg_type),
}
}

fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
make_scalar_function(array_min_inner)(&args.args)
}

fn documentation(&self) -> Option<&Documentation> {
self.doc()
}
}

pub fn array_min_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
let [array] = take_function_args("array_min", args)?;
match array.data_type() {
List(_) => array_min_max_helper(as_list_array(array)?, min_max::min_batch),
LargeList(_) => {
array_min_max_helper(as_large_list_array(array)?, min_max::min_batch)
}
arg_type => exec_err!("array_min does not support type: {arg_type}"),
}
}

fn array_min_max_helper<O: OffsetSizeTrait>(
array: &GenericListArray<O>,
agg_fn: fn(&ArrayRef) -> Result<ScalarValue>,
) -> Result<ArrayRef> {
let null_value = ScalarValue::try_from(array.value_type())?;
let result_vec: Vec<ScalarValue> = array
.iter()
.map(|arr| {
arr.as_ref()
.map_or_else(|| Ok(null_value.clone()), min_max::max_batch)
})
.map(|arr| arr.as_ref().map_or_else(|| Ok(null_value.clone()), agg_fn))
.try_collect()?;
ScalarValue::iter_to_array(result_vec)
}
87 changes: 86 additions & 1 deletion datafusion/sqllogictest/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1535,6 +1535,91 @@ NULL
query error DataFusion error: Error during planning: 'array_max' does not support zero arguments
select array_max();

## array_min

query I
select array_min(make_array(5, 3, 6, 4));
----
3

query I
select array_min(make_array(5, 3, 4, NULL, 6, NULL));
----
3

query ?
select array_min(make_array(NULL, NULL));
----
NULL

query T
select array_min(make_array('h', 'e', 'o', 'l', 'l'));
----
e

query T
select array_min(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL));
----
e

query B
select array_min(make_array(false, true, false, true));
----
false

query B
select array_min(make_array(false, true, NULL, false, true));
----
false

query D
select array_min(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01'));
----
1985-11-01

query D
select array_min(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL));
----
1993-03-01

query P
select array_min(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01'));
----
1984-10-01T00:00:00

query P
select array_min(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01'));
----
1995-06-01T00:00:00

query R
select array_min(make_array(5.1, -3.2, 6.3, 4.9));
----
-3.2

query ?I
select input, array_min(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d))
----
[-1, 0, 1] -1
[9, 10, 11] 9
[19, 20, 21] 19
[29, 30, 31] 29
[NULL, NULL, NULL] NULL

query II
select array_min(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_min(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
----
1 1

query ?
select array_min(make_array());
----
NULL

# Testing with empty arguments should result in an error
query error DataFusion error: Error during planning: 'array_min' does not support zero arguments
select array_min();


## array_pop_back (aliases: `list_pop_back`)

Expand Down Expand Up @@ -6009,7 +6094,7 @@ false false NULL false
false false false NULL

# Row 1: [[NULL,2],[3,NULL]], [1.1,2.2,3.3], ['L','o','r','e','m']
# Row 2: [[3,4],[5,6]], [NULL,5.5,6.6], ['i','p',NULL,'u','m']
# Row 2: [[3,4],[5,6]], [NULL,5.5,6.6], ['i','p',NULL,'u','m']
# Row 3: [[5,6],[7,8]], [7.7,8.8,9.9], ['d',NULL,'l','o','r']
# Row 4: [[7,NULL],[9,10]], [10.1,NULL,12.2], ['s','i','t','a','b']
# Row 5: NULL, [13.3,14.4,15.5], ['a','m','e','t','x']
Expand Down
24 changes: 24 additions & 0 deletions docs/source/user-guide/sql/scalar_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -2552,6 +2552,7 @@ _Alias of [current_date](#current_date)._
- [array_join](#array_join)
- [array_length](#array_length)
- [array_max](#array_max)
- [array_min](#array_min)
- [array_ndims](#array_ndims)
- [array_pop_back](#array_pop_back)
- [array_pop_front](#array_pop_front)
Expand Down Expand Up @@ -3058,6 +3059,29 @@ array_max(array)

- list_max

### `array_min`

Returns the minimum value in the array.

```sql
array_min(array)
```

#### Arguments

- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators.

#### Example

```sql
> select array_min([3,1,4,2]);
+-----------------------------------------+
| array_min(List([3,1,4,2])) |
+-----------------------------------------+
| 1 |
+-----------------------------------------+
```

### `array_ndims`

Returns the number of dimensions of the array.
Expand Down