From 5907c21f20448fe75b101ebd6a5d9746383386e1 Mon Sep 17 00:00:00 2001 From: Igor Izvekov Date: Thu, 20 Jul 2023 22:16:49 +0300 Subject: [PATCH] fix: `array_concat` with arrays with different dimensions, add `_list*` aliases for `_array*` functions (#7008) * fix: array_concat with arrays with different dimensions does not work properly * fix: cargo doc * fix: array_concat does not work with nested empty arrays * fix: some comments --- .../tests/sqllogictests/test_files/array.slt | 193 ++++++++++-- datafusion/expr/src/built_in_function.rs | 69 ++++- .../physical-expr/src/array_expressions.rs | 34 ++- docs/source/user-guide/expressions.md | 36 +-- .../source/user-guide/sql/scalar_functions.md | 289 +++++++++++++++++- 5 files changed, 550 insertions(+), 71 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt index 1e9b32414bba..6069ee0e9360 100644 --- a/datafusion/core/tests/sqllogictests/test_files/array.slt +++ b/datafusion/core/tests/sqllogictests/test_files/array.slt @@ -163,7 +163,7 @@ select column1, column2, column3 from arrays_values_without_nulls; ### Array function tests -## make_array +## make_array (aliases: `make_list`) # make_array scalar function #1 query ??? @@ -207,6 +207,12 @@ select make_array(make_array()), make_array(make_array(make_array())) ---- [[]] [[[]]] +# make_list scalar function #8 (function alias: `make_array`) +query ??? +select make_list(1, 2, 3), make_list(1.0, 2.0, 3.0), make_list('h', 'e', 'l', 'l', 'o'); +---- +[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o] + # make_array scalar function with nulls query ??? select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h', NULL, 'l', NULL, 'o'); @@ -286,9 +292,9 @@ from values; -## array_append +## array_append (aliases: `list_append`, `array_push_back`, `list_push_back`) -# array_append scalar function #2 +# array_append scalar function #1 query ? select array_append(make_array(), 4); ---- @@ -312,6 +318,24 @@ select array_append(make_array([1], [2], [3]), make_array(4)), array_append(make ---- [[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] +# list_append scalar function #5 (function alias `array_append`) +query ??? +select list_append(make_array(1, 2, 3), 4), list_append(make_array(1.0, 2.0, 3.0), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_push_back scalar function #6 (function alias `array_append`) +query ??? +select array_push_back(make_array(1, 2, 3), 4), array_push_back(make_array(1.0, 2.0, 3.0), 4.0), array_push_back(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# list_push_back scalar function #7 (function alias `array_append`) +query ??? +select list_push_back(make_array(1, 2, 3), 4), list_push_back(make_array(1.0, 2.0, 3.0), 4.0), list_push_back(make_array('h', 'e', 'l', 'l'), 'o'); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + # array_append with columns #1 query ? select array_append(column1, column2) from arrays_values; @@ -351,7 +375,7 @@ select array_append(column1, make_array(1, 11, 111)), array_append(make_array(ma [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]] [[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]] -## array_prepend +## array_prepend (aliases: `list_prepend`, `array_push_front`, `list_push_front`) # array_prepend scalar function #1 query ? @@ -377,6 +401,24 @@ select array_prepend(make_array(1), make_array(make_array(2), make_array(3), mak ---- [[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]] +# list_prepend scalar function #5 (function alias `array_prepend`) +query ??? +select list_prepend(1, make_array(2, 3, 4)), list_prepend(1.0, make_array(2.0, 3.0, 4.0)), list_prepend('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# array_push_front scalar function #6 (function alias `array_prepend`) +query ??? +select array_push_front(1, make_array(2, 3, 4)), array_push_front(1.0, make_array(2.0, 3.0, 4.0)), array_push_front('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + +# list_push_front scalar function #7 (function alias `array_prepend`) +query ??? +select list_push_front(1, make_array(2, 3, 4)), list_push_front(1.0, make_array(2.0, 3.0, 4.0)), list_push_front('h', make_array('e', 'l', 'l', 'o')); +---- +[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] + # array_prepend with columns #1 query ? select array_prepend(column2, column1) from arrays_values; @@ -436,7 +478,7 @@ select array_fill(1, make_array()) ---- [] -## array_concat +## array_concat (aliases: `array_cat`, `list_concat`, `list_cat`) # array_concat scalar function #1 query ?? @@ -474,30 +516,78 @@ select array_concat(make_array(), make_array(2, 3)); ---- [2, 3] +# array_concat scalar function #7 (with empty arrays) +query ? +select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array())); +---- +[[1, 2], [3, 4]] + +# array_concat scalar function #8 (with empty arrays) +query ? +select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array()), make_array(make_array(), make_array()), make_array(make_array(5, 6), make_array(7, 8))); +---- +[[1, 2], [3, 4], [5, 6], [7, 8]] + +# array_concat scalar function #9 (with empty arrays) +query ? +select array_concat(make_array(make_array()), make_array(make_array(1, 2), make_array(3, 4))); +---- +[[1, 2], [3, 4]] + +# array_cat scalar function #10 (function alias `array_concat`) +query ?? +select array_cat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_cat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# list_concat scalar function #11 (function alias `array_concat`) +query ?? +select list_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), list_concat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + +# list_cat scalar function #12 (function alias `array_concat`) +query ?? +select list_cat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), list_cat(make_array([1], [2]), make_array([3], [4])); +---- +[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]] + # array_concat with different dimensions #1 (2D + 1D) query ? -select array_concat(make_array([1,2], [3,4]), make_array(5, 6)) +select array_concat(make_array([1,2], [3,4]), make_array(5, 6)); ---- [[1, 2], [3, 4], [5, 6]] # array_concat with different dimensions #2 (1D + 2D) query ? -select array_concat(make_array(5, 6), make_array([1,2], [3,4])) +select array_concat(make_array(5, 6), make_array([1,2], [3,4])); ---- [[5, 6], [1, 2], [3, 4]] # array_concat with different dimensions #3 (2D + 1D + 1D) query ? -select array_concat(make_array([1,2], [3,4]), make_array(5, 6), make_array(7,8)) +select array_concat(make_array([1,2], [3,4]), make_array(5, 6), make_array(7,8)); ---- [[1, 2], [3, 4], [5, 6], [7, 8]] # array_concat with different dimensions #4 (1D + 2D + 3D) query ? -select array_concat(make_array(10, 20), make_array([30, 40]), make_array([[50, 60]])) +select array_concat(make_array(10, 20), make_array([30, 40]), make_array([[50, 60]])); ---- [[[10, 20]], [[30, 40]], [[50, 60]]] +# array_concat with different dimensions #5 (2D + 1D + 3D) +query ? +select array_concat(make_array([30, 40]), make_array(10, 20), make_array([[50, 60]])); +---- +[[[30, 40]], [[10, 20]], [[50, 60]]] + +# array_concat with different dimensions #6 (2D + 1D + 3D + 4D + 3D) +query ? +select array_concat(make_array([30, 40]), make_array(10, 20), make_array([[50, 60]]), make_array([[[70, 80]]]), make_array([[80, 40]])); +---- +[[[[30, 40]]], [[[10, 20]]], [[[50, 60]]], [[[70, 80]]], [[[80, 40]]]] + # array_concat column-wise #1 query ? select array_concat(column1, make_array(0)) from arrays_values_without_nulls; @@ -635,7 +725,7 @@ select array_concat(make_array(column3), column1, column2) from arrays_values_v2 [, 11, 12] [] -## array_position +## array_position (aliases: `list_position`, `array_indexof`, `list_indexof`) # array_position scalar function #1 query III @@ -661,6 +751,24 @@ select array_position(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, ---- 4 3 +# list_position scalar function #5 (function alias `array_position`) +query III +select list_position(['h', 'e', 'l', 'l', 'o'], 'l'), list_position([1, 2, 3, 4, 5], 5), list_position([1, 1, 1], 1); +---- +3 5 1 + +# array_indexof scalar function #6 (function alias `array_position`) +query III +select array_indexof(['h', 'e', 'l', 'l', 'o'], 'l'), array_indexof([1, 2, 3, 4, 5], 5), array_indexof([1, 1, 1], 1); +---- +3 5 1 + +# list_indexof scalar function #7 (function alias `array_position`) +query III +select list_indexof(['h', 'e', 'l', 'l', 'o'], 'l'), list_indexof([1, 2, 3, 4, 5], 5), list_indexof([1, 1, 1], 1); +---- +3 5 1 + # array_position with columns #1 query II select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values_without_nulls; @@ -693,7 +801,7 @@ select array_position(make_array([1, 2, 3], [4, 5, 6], [11, 12, 13]), column2), NULL 6 4 NULL 1 NULL -## array_positions +## array_positions (aliases: `list_positions`) # array_positions scalar function #1 query ??? @@ -701,12 +809,18 @@ select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3 ---- [3, 4] [5] [1, 2, 3] -# array_positions scalar function #2 +# array_positions scalar function #2 (element is list) query ? select array_positions(make_array([1, 2, 3], [2, 1, 3], [1, 5, 6], [2, 1, 3], [4, 5, 6]), [2, 1, 3]); ---- [2, 4] +# list_positions scalar function #3 (function alias `array_positions`) +query ??? +select list_positions(['h', 'e', 'l', 'l', 'o'], 'l'), list_positions([1, 2, 3, 4, 5], 5), list_positions([1, 1, 1], 1); +---- +[3, 4] [5] [1, 2, 3] + # array_positions with columns #1 query ? select array_positions(column1, column2) from arrays_values_without_nulls; @@ -747,9 +861,9 @@ select array_replace(make_array(1, 2, 3, 4), 2, 3), array_replace(make_array(1, ---- [1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] -## array_to_string +## array_to_string (aliases: `list_to_string`, `array_join`, `list_join`) -# array_to_string scalar function +# array_to_string scalar function #1 query TTT select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|'); ---- @@ -767,6 +881,24 @@ select array_to_string(make_array(), ',') ---- (empty) +# list_to_string scalar function #4 (function alias `array_to_string`) +query TTT +select list_to_string(['h', 'e', 'l', 'l', 'o'], ','), list_to_string([1, 2, 3, 4, 5], '-'), list_to_string([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# array_join scalar function #5 (function alias `array_to_string`) +query TTT +select array_join(['h', 'e', 'l', 'l', 'o'], ','), array_join([1, 2, 3, 4, 5], '-'), array_join([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + +# list_join scalar function #6 (function alias `list_join`) +query TTT +select list_join(['h', 'e', 'l', 'l', 'o'], ','), list_join([1, 2, 3, 4, 5], '-'), list_join([1.0, 2.0, 3.0], '|'); +---- +h,e,l,l,o 1-2-3-4-5 1|2|3 + # array_to_string scalar function with nulls #1 query TTT select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','), array_to_string(make_array(1, NULL, 3, NULL, 5), '-'), array_to_string(make_array(NULL, 2.0, 3.0), '|'); @@ -887,9 +1019,9 @@ select trim_array(make_array(), 0), trim_array(make_array(), 1) ---- [] [] -## array_length +## array_length (aliases: `list_length`) -# array_length scalar function +# array_length scalar function #1 query III select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6])); ---- @@ -919,6 +1051,12 @@ select array_length(make_array()), array_length(make_array(), 1), array_length(m ---- 0 0 NULL +# list_length scalar function #6 (function alias `array_length`) +query III +select list_length(make_array(1, 2, 3, 4, 5)), list_length(make_array(1, 2, 3)), list_length(make_array([1, 2], [3, 4], [5, 6])); +---- +5 3 3 + # array_length with columns query I select array_length(column1, column3) from arrays_values; @@ -945,7 +1083,7 @@ NULL 10 NULL 10 NULL 10 -## array_dims +## array_dims (aliases: `list_dims`) # array_dims scalar function query ??? @@ -965,6 +1103,12 @@ select array_dims(make_array()), array_dims(make_array(make_array())) ---- NULL [1, 0] +# list_dims scalar function #4 (function alias `array_dims`) +query ??? +select list_dims(make_array(1, 2, 3)), list_dims(make_array([1, 2], [3, 4])), list_dims(make_array([[[[1], [2]]]])); +---- +[3] [2, 2] [1, 1, 1, 2, 1] + # array_dims with columns query ??? select array_dims(column1), array_dims(column2), array_dims(column3) from arrays; @@ -977,9 +1121,9 @@ NULL [3] [4] [2, 2] NULL [1] [2, 2] [3] NULL -## array_ndims +## array_ndims (aliases: `list_ndims`) -# array_ndims scalar function +# array_ndims scalar function #1 query III select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4])), array_ndims(make_array([[[[1], [2]]]])); ---- @@ -997,6 +1141,17 @@ select array_ndims(make_array()), array_ndims(make_array(make_array())) ---- NULL 2 +# list_ndims scalar function #4 (function alias `array_ndims`) +query III +select list_ndims(make_array(1, 2, 3)), list_ndims(make_array([1, 2], [3, 4])), list_ndims(make_array([[[[1], [2]]]])); +---- +1 2 5 + +query II +select array_ndims(make_array()), array_ndims(make_array(make_array())) +---- +NULL 2 + # array_ndims with columns query III select array_ndims(column1), array_ndims(column2), array_ndims(column3) from arrays; diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 66c20d362e3f..dded73c088b4 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -430,7 +430,22 @@ impl BuiltinScalarFunction { ) } - /// Returns the output [`DataType` of this function + /// Returns the dimension [`DataType`] of [`DataType::List`]. + fn return_dimension(self, input_expr_type: DataType) -> u64 { + let mut res: u64 = 1; + let mut current_data_type = input_expr_type; + loop { + match current_data_type { + DataType::List(field) => { + current_data_type = field.data_type().clone(); + res += 1; + } + _ => return res, + } + } + } + + /// Returns the output [`DataType`] of this function pub fn return_type(self, input_expr_types: &[DataType]) -> Result { use DataType::*; use TimeUnit::*; @@ -464,12 +479,16 @@ impl BuiltinScalarFunction { }, BuiltinScalarFunction::ArrayConcat => { let mut expr_type = Null; + let mut max_dims = 0; for input_expr_type in input_expr_types { match input_expr_type { List(field) => { if !field.data_type().equals_datatype(&Null) { - expr_type = field.data_type().clone(); - break; + let dims = self.return_dimension(input_expr_type.clone()); + if max_dims < dims { + max_dims = dims; + expr_type = input_expr_type.clone(); + } } } _ => { @@ -480,7 +499,7 @@ impl BuiltinScalarFunction { } } - Ok(List(Arc::new(Field::new("item", expr_type, true)))) + Ok(expr_type) } BuiltinScalarFunction::ArrayContains => Ok(Boolean), BuiltinScalarFunction::ArrayDims => { @@ -1250,21 +1269,43 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] { BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"], // array functions - BuiltinScalarFunction::ArrayAppend => &["array_append"], - BuiltinScalarFunction::ArrayConcat => &["array_concat"], + BuiltinScalarFunction::ArrayAppend => &[ + "array_append", + "list_append", + "array_push_back", + "list_push_back", + ], + BuiltinScalarFunction::ArrayConcat => { + &["array_concat", "array_cat", "list_concat", "list_cat"] + } BuiltinScalarFunction::ArrayContains => &["array_contains"], - BuiltinScalarFunction::ArrayDims => &["array_dims"], + BuiltinScalarFunction::ArrayDims => &["array_dims", "list_dims"], BuiltinScalarFunction::ArrayFill => &["array_fill"], - BuiltinScalarFunction::ArrayLength => &["array_length"], - BuiltinScalarFunction::ArrayNdims => &["array_ndims"], - BuiltinScalarFunction::ArrayPosition => &["array_position"], - BuiltinScalarFunction::ArrayPositions => &["array_positions"], - BuiltinScalarFunction::ArrayPrepend => &["array_prepend"], + BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"], + BuiltinScalarFunction::ArrayNdims => &["array_ndims", "list_ndims"], + BuiltinScalarFunction::ArrayPosition => &[ + "array_position", + "list_position", + "array_indexof", + "list_indexof", + ], + BuiltinScalarFunction::ArrayPositions => &["array_positions", "list_positions"], + BuiltinScalarFunction::ArrayPrepend => &[ + "array_prepend", + "list_prepend", + "array_push_front", + "list_push_front", + ], BuiltinScalarFunction::ArrayRemove => &["array_remove"], BuiltinScalarFunction::ArrayReplace => &["array_replace"], - BuiltinScalarFunction::ArrayToString => &["array_to_string"], + BuiltinScalarFunction::ArrayToString => &[ + "array_to_string", + "list_to_string", + "array_join", + "list_join", + ], BuiltinScalarFunction::Cardinality => &["cardinality"], - BuiltinScalarFunction::MakeArray => &["make_array"], + BuiltinScalarFunction::MakeArray => &["make_array", "make_list"], BuiltinScalarFunction::TrimArray => &["trim_array"], } } diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index b16432b50531..47558ecc262c 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -163,13 +163,20 @@ fn compute_array_length( /// Returns the dimension of the array fn compute_array_ndims(arr: Option) -> Result> { + Ok(compute_array_ndims_with_datatype(arr)?.0) +} + +/// Returns the dimension and the datatype of elements of the array +fn compute_array_ndims_with_datatype( + arr: Option, +) -> Result<(Option, DataType)> { let mut res: u64 = 1; let mut value = match arr { Some(arr) => arr, - None => return Ok(None), + None => return Ok((None, DataType::Null)), }; if value.is_empty() { - return Ok(None); + return Ok((None, DataType::Null)); } loop { @@ -178,7 +185,7 @@ fn compute_array_ndims(arr: Option) -> Result> { value = downcast_arg!(value, ListArray).value(0); res += 1; } - _ => return Ok(Some(res)), + data_type => return Ok((Some(res), data_type.clone())), } } } @@ -623,15 +630,20 @@ fn concat_internal(args: &[ArrayRef]) -> Result { /// Array_concat/Array_cat SQL function pub fn array_concat(args: &[ArrayRef]) -> Result { - match args[0].data_type() { - DataType::List(field) => match field.data_type() { - DataType::Null => array_concat(&args[1..]), - _ => concat_internal(args), - }, - data_type => Err(DataFusionError::NotImplemented(format!( - "Array is not type '{data_type:?}'." - ))), + let mut new_args = vec![]; + for arg in args { + let (ndim, lower_data_type) = + compute_array_ndims_with_datatype(Some(arg.clone()))?; + if ndim.is_none() || ndim == Some(1) { + return Err(DataFusionError::NotImplemented(format!( + "Array is not type '{lower_data_type:?}'." + ))); + } else if !lower_data_type.equals_datatype(&DataType::Null) { + new_args.push(arg.clone()); + } } + + concat_internal(new_args.as_slice()) } macro_rules! fill { diff --git a/docs/source/user-guide/expressions.md b/docs/source/user-guide/expressions.md index b5d4cfa0ac9e..cf1e2f58af07 100644 --- a/docs/source/user-guide/expressions.md +++ b/docs/source/user-guide/expressions.md @@ -179,24 +179,24 @@ Unlike to some databases the math functions in Datafusion works the same way as ## Array Expressions -| Function | Notes | -| ----------------------------------------- | ------------------------------------------------------------------------------------------------ | -| array_append(array, element) | Appends an element to the end of an array. | -| array_concat(array[, ..., array_n]) | Concatenates arrays. | -| array_contains(first_array, second_array) | Returns true, if each element of the second array appearing in the first array, otherwise false. | -| array_dims(array) | Returns an array of the array's dimensions. | -| array_fill(element, array) | Returns an array filled with copies of the given value. | -| array_length(array, dimension) | Returns the length of the array dimension. | -| array_ndims(array) | Returns the number of dimensions of the array. | -| array_position(array, element) | Searches for an element in the array, returns first occurrence. | -| array_positions(array, element) | Searches for an element in the array, returns all occurrences. | -| array_prepend(array, element) | Prepends an element to the beginning of an array. | -| array_remove(array, element) | Removes all elements equal to the given value from the array. | -| array_replace(array, from, to) | Replaces a specified element with another specified element. | -| array_to_string(array, delimeter) | Converts each element to its text representation. | -| cardinality(array) | Returns the total number of elements in the array. | -| make_array(value1, [value2 [, ...]]) | Returns an Arrow array using the specified input expressions. | -| trim_array(array, n) | Removes the last n elements from the array. | +| Function | Notes | +| ----------------------------------------- | -------------------------------------------------------------------------------------------------------------- | +| array_append(array, element) | Appends an element to the end of an array. `array_append([1, 2, 3], 4) -> [1, 2, 3, 4]` | +| array_concat(array[, ..., array_n]) | Concatenates arrays. `array_concat([1, 2, 3], [4, 5, 6]) -> [1, 2, 3, 4, 5, 6]` | +| array_contains(first_array, second_array) | Returns true, if each element of the second array appearing in the first array, otherwise false. | +| array_dims(array) | Returns an array of the array's dimensions. `array_dims([[1, 2, 3], [4, 5, 6]]) -> [2, 3]` | +| array_fill(element, array) | Returns an array filled with copies of the given value. | +| array_length(array, dimension) | Returns the length of the array dimension. `array_length([1, 2, 3, 4, 5]) -> 5` | +| array_ndims(array) | Returns the number of dimensions of the array. `array_ndims([[1, 2, 3], [4, 5, 6]]) -> 2` | +| array_position(array, element) | Searches for an element in the array, returns first occurrence. `array_position([1, 2, 2, 3, 4], 2) -> 2` | +| array_positions(array, element) | Searches for an element in the array, returns all occurrences. `array_positions([1, 2, 2, 3, 4], 2) -> [2, 3]` | +| array_prepend(array, element) | Prepends an element to the beginning of an array. `array_prepend(1, [2, 3, 4]) -> [1, 2, 3, 4]` | +| array_remove(array, element) | Removes all elements equal to the given value from the array. | +| array_replace(array, from, to) | Replaces a specified element with another specified element. | +| array_to_string(array, delimeter) | Converts each element to its text representation. `array_to_string([1, 2, 3, 4], ',') -> 1,2,3,4` | +| cardinality(array) | Returns the total number of elements in the array. `cardinality([[1, 2, 3], [4, 5, 6]]) -> 6` | +| make_array(value1, [value2 [, ...]]) | Returns an Arrow array using the specified input expressions. `make_array(1, 2, 3) -> [1, 2, 3]` | +| trim_array(array, n) | Removes the last n elements from the array. | ## Regular Expressions diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 58d0dd253e64..31b075821b10 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1420,20 +1420,40 @@ from_unixtime(expression) ## Array Functions - [array_append](#array_append) +- [array_cat](#array_cat) - [array_concat](#array_concat) - [array_contains](#array_contains) - [array_dims](#array_dims) - [array_fill](#array_fill) +- [array_indexof](#array_indexof) +- [array_join](#array_join) - [array_length](#array_length) - [array_ndims](#array_ndims) +- [array_prepend](#array_prepend) - [array_position](#array_position) - [array_positions](#array_positions) -- [array_prepend](#array_prepend) +- [array_push_back](#array_push_back) +- [array_push_front](#array_push_front) - [array_remove](#array_remove) - [array_replace](#array_replace) - [array_to_string](#array_to_string) - [cardinality](#cardinality) +- [list_append](#list_append) +- [list_cat](#list_cat) +- [list_concat](#list_concat) +- [list_dims](#list_dims) +- [list_indexof](#list_indexof) +- [list_join](#list_join) +- [list_length](#list_length) +- [list_ndims](#list_ndims) +- [list_prepend](#list_prepend) +- [list_position](#list_position) +- [list_positions](#list_positions) +- [list_push_back](#list_push_back) +- [list_push_front](#list_push_front) +- [list_to_string](#list_to_string) - [make_array](#make_array) +- [make_list](#make_list) - [trim_array](#trim_array) ### `array_append` @@ -1450,6 +1470,27 @@ array_append(array, element) Can be a constant, column, or function, and any combination of array operators. - **element**: Element to append to the array. +#### Example + +``` +❯ select array_append([1, 2, 3], 4); ++--------------------------------------+ +| array_append(List([1,2,3]),Int64(4)) | ++--------------------------------------+ +| [1, 2, 3, 4] | ++--------------------------------------+ +``` + +#### Aliases + +- array_push_back +- list_append +- list_push_back + +### `array_cat` + +_Alias of [array_concat](#array_concat)._ + ### `array_concat` Concatenates arrays. @@ -1464,6 +1505,23 @@ array_concat(array[, ..., array_n]) Can be a constant, column, or function, and any combination of array operators. - **array_n**: Subsequent array column or literal array to concatenate. +#### Example + +``` +❯ select array_concat([1, 2], [3, 4], [5, 6]); ++---------------------------------------------------+ +| array_concat(List([1,2]),List([3,4]),List([5,6])) | ++---------------------------------------------------+ +| [1, 2, 3, 4, 5, 6] | ++---------------------------------------------------+ +``` + +#### Aliases + +- array_cat +- list_cat +- list_concat + ### `array_contains` Returns true, if each element of the second array appears in the first array, otherwise false. @@ -1492,6 +1550,21 @@ array_dims(array) - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +#### Example + +``` +❯ select array_dims([[1, 2, 3], [4, 5, 6]]); ++---------------------------------+ +| array_dims(List([1,2,3,4,5,6])) | ++---------------------------------+ +| [2, 3] | ++---------------------------------+ +``` + +#### Aliases + +- list_dims + ### `array_fill` Returns an array filled with copies of the given value. @@ -1506,6 +1579,14 @@ array_fill(element, array) Can be a constant, column, or function, and any combination of array operators. - **element**: Element to copy to the array. +### `array_indexof` + +_Alias of [array_position](#array_position)._ + +### `array_join` + +_Alias of [array_to_string](#array_to_string)._ + ### `array_length` Returns the length of the array dimension. @@ -1520,6 +1601,21 @@ array_length(array, dimension) Can be a constant, column, or function, and any combination of array operators. - **dimension**: Array dimension. +#### Example + +``` +❯ select array_length([1, 2, 3, 4, 5]); ++---------------------------------+ +| array_length(List([1,2,3,4,5])) | ++---------------------------------+ +| 5 | ++---------------------------------+ +``` + +#### Aliases + +- list_length + ### `array_ndims` Returns the number of dimensions of the array. @@ -1533,6 +1629,52 @@ array_ndims(array, element) - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +#### Example + +``` +❯ select array_ndims([[1, 2, 3], [4, 5, 6]]); ++----------------------------------+ +| array_ndims(List([1,2,3,4,5,6])) | ++----------------------------------+ +| 2 | ++----------------------------------+ +``` + +#### Aliases + +- list_ndims + +### `array_prepend` + +Prepends an element to the beginning of an array. + +``` +array_prepend(element, array) +``` + +#### Arguments + +- **element**: Element to prepend to the array. +- **array**: Array expression. + Can be a constant, column, or function, and any combination of array operators. + +#### Example + +``` +❯ select array_prepend(1, [2, 3, 4]); ++---------------------------------------+ +| array_prepend(Int64(1),List([2,3,4])) | ++---------------------------------------+ +| [1, 2, 3, 4] | ++---------------------------------------+ +``` + +#### Aliases + +- array_push_front +- list_prepend +- list_push_front + ### `array_position` Returns a string with an input string repeated a specified number. @@ -1549,6 +1691,23 @@ array_position(array, element, index) - **element**: Element to search for position in the array. - **index**: Index at which to start searching. +#### Example + +``` +❯ select array_position([1, 2, 2, 3, 1, 4], 2); ++----------------------------------------------+ +| array_position(List([1,2,2,3,1,4]),Int64(2)) | ++----------------------------------------------+ +| 2 | ++----------------------------------------------+ +``` + +#### Aliases + +- array_indexof +- list_indexof +- list_position + ### `array_positions` Searches for an element in the array, returns all occurrences. @@ -1563,19 +1722,28 @@ array_positions(array, element) Can be a constant, column, or function, and any combination of array operators. - **element**: Element to search for positions in the array. -### `array_prepend` - -Prepends an element to the beginning of an array. +#### Example ``` -array_prepend(element, array) +❯ select array_positions([1, 2, 2, 3, 1, 4], 2); ++-----------------------------------------------+ +| array_positions(List([1,2,2,3,1,4]),Int64(2)) | ++-----------------------------------------------+ +| [2, 3] | ++-----------------------------------------------+ ``` -#### Arguments +#### Aliases -- **element**: Element to prepend to the array. -- **array**: Array expression. - Can be a constant, column, or function, and any combination of array operators. +- list_positions + +### `array_push_back` + +_Alias of [array_append](#array_append)._ + +### `array_push_front` + +_Alias of [array_prepend](#array_prepend)._ ### `array_remove` @@ -1620,6 +1788,23 @@ array_to_string(array, delimeter) Can be a constant, column, or function, and any combination of array operators. - **delimeter**: Array element separator. +#### Example + +``` +❯ select array_to_string([[1, 2, 3, 4], [5, 6, 7, 8]], ','); ++----------------------------------------------------+ +| array_to_string(List([1,2,3,4,5,6,7,8]),Utf8(",")) | ++----------------------------------------------------+ +| 1,2,3,4,5,6,7,8 | ++----------------------------------------------------+ +``` + +#### Aliases + +- array_join +- list_join +- list_to_string + ### `cardinality` Returns the total number of elements in the array. @@ -1633,6 +1818,73 @@ cardinality(array) - **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +#### Example + +``` +❯ select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]); ++--------------------------------------+ +| cardinality(List([1,2,3,4,5,6,7,8])) | ++--------------------------------------+ +| 8 | ++--------------------------------------+ +``` + +### `list_append` + +_Alias of [array_append](#array_append)._ + +### `list_cat` + +_Alias of [array_concat](#array_concat)._ + +### `list_concat` + +_Alias of [array_concat](#array_concat)._ + +### `list_dims` + +_Alias of [array_dims](#array_dims)._ + +### `list_indexof` + +_Alias of [array_position](#array_position)._ + +### `list_join` + +_Alias of [array_to_string](#array_to_string)._ + +### `list_length` + +_Alias of [array_length](#array_length)._ + +### `list_ndims` + +_Alias of [array_ndims](#array_ndims)._ + +### `list_prepend` + +_Alias of [array_prepend](#array_prepend)._ + +### `list_position` + +_Alias of [array_position](#array_position)._ + +### `list_positions` + +_Alias of [array_positions](#array_positions)._ + +### `list_push_back` + +_Alias of [array_append](#array_append)._ + +### `list_push_front` + +_Alias of [array_prepend](#array_prepend)._ + +### `list_to_string` + +_Alias of [list_to_string](#list_to_string)._ + ### `make_array` Returns an Arrow array using the specified input expressions. @@ -1647,6 +1899,25 @@ make_array(expression1[, ..., expression_n]) Can be a constant, column, or function, and any combination of arithmetic or string operators. +#### Example + +``` +❯ select make_array(1, 2, 3, 4, 5); ++----------------------------------------------------------+ +| make_array(Int64(1),Int64(2),Int64(3),Int64(4),Int64(5)) | ++----------------------------------------------------------+ +| [1, 2, 3, 4, 5] | ++----------------------------------------------------------+ +``` + +#### Aliases + +- make_list + +### `make_list` + +_Alias of [make_array](#make_array)._ + ### `trim_array` Removes the last n elements from the array.