From 118caff7b8710d78bdcbff4801a69554ed8050d9 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sat, 6 Apr 2024 12:16:51 -0400 Subject: [PATCH 1/4] move Floor, Gcd, Lcm, Pi to datafusion-functions --- datafusion/expr/src/built_in_function.rs | 29 +--- datafusion/expr/src/expr_fn.rs | 17 +- datafusion/functions/src/math/gcd.rs | 145 ++++++++++++++++++ datafusion/functions/src/math/lcm.rs | 126 +++++++++++++++ datafusion/functions/src/math/mod.rs | 14 +- datafusion/functions/src/math/pi.rs | 76 +++++++++ .../optimizer/src/analyzer/type_coercion.rs | 30 ++-- .../physical-expr/src/equivalence/ordering.rs | 44 ++++-- .../src/equivalence/projection.rs | 41 +++-- .../src/equivalence/properties.rs | 41 ++--- datafusion/physical-expr/src/functions.rs | 10 +- .../physical-expr/src/math_expressions.rs | 117 -------------- datafusion/physical-expr/src/udf.rs | 55 ++----- datafusion/physical-expr/src/utils/mod.rs | 99 +++++++++++- datafusion/proto/proto/datafusion.proto | 8 +- datafusion/proto/src/generated/pbjson.rs | 12 -- datafusion/proto/src/generated/prost.rs | 16 +- .../proto/src/logical_plan/from_proto.rs | 20 +-- datafusion/proto/src/logical_plan/to_proto.rs | 4 - datafusion/sql/src/expr/function.rs | 20 ++- datafusion/sql/src/expr/mod.rs | 7 +- 21 files changed, 588 insertions(+), 343 deletions(-) create mode 100644 datafusion/functions/src/math/gcd.rs create mode 100644 datafusion/functions/src/math/lcm.rs create mode 100644 datafusion/functions/src/math/pi.rs diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index dc1fc98a5c02..7426ccd938e7 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -45,20 +45,12 @@ pub enum BuiltinScalarFunction { Exp, /// factorial Factorial, - /// floor - Floor, - /// gcd, Greatest common divisor - Gcd, - /// lcm, Least common multiple - Lcm, /// iszero Iszero, /// log, same as log10 Log, /// nanvl Nanvl, - /// pi - Pi, /// power Power, /// round @@ -135,13 +127,9 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Coalesce => Volatility::Immutable, BuiltinScalarFunction::Exp => Volatility::Immutable, BuiltinScalarFunction::Factorial => Volatility::Immutable, - BuiltinScalarFunction::Floor => Volatility::Immutable, - BuiltinScalarFunction::Gcd => Volatility::Immutable, BuiltinScalarFunction::Iszero => Volatility::Immutable, - BuiltinScalarFunction::Lcm => Volatility::Immutable, BuiltinScalarFunction::Log => Volatility::Immutable, BuiltinScalarFunction::Nanvl => Volatility::Immutable, - BuiltinScalarFunction::Pi => Volatility::Immutable, BuiltinScalarFunction::Power => Volatility::Immutable, BuiltinScalarFunction::Round => Volatility::Immutable, BuiltinScalarFunction::Cot => Volatility::Immutable, @@ -183,13 +171,10 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::InitCap => { utf8_to_str_type(&input_expr_types[0], "initcap") } - BuiltinScalarFunction::Pi => Ok(Float64), BuiltinScalarFunction::Random => Ok(Float64), BuiltinScalarFunction::EndsWith => Ok(Boolean), - BuiltinScalarFunction::Factorial - | BuiltinScalarFunction::Gcd - | BuiltinScalarFunction::Lcm => Ok(Int64), + BuiltinScalarFunction::Factorial => Ok(Int64), BuiltinScalarFunction::Power => match &input_expr_types[0] { Int64 => Ok(Int64), @@ -210,7 +195,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Ceil | BuiltinScalarFunction::Exp - | BuiltinScalarFunction::Floor | BuiltinScalarFunction::Round | BuiltinScalarFunction::Trunc | BuiltinScalarFunction::Cot => match input_expr_types[0] { @@ -248,7 +232,6 @@ impl BuiltinScalarFunction { ], self.volatility(), ), - BuiltinScalarFunction::Pi => Signature::exact(vec![], self.volatility()), BuiltinScalarFunction::Random => Signature::exact(vec![], self.volatility()), BuiltinScalarFunction::Power => Signature::one_of( vec![Exact(vec![Int64, Int64]), Exact(vec![Float64, Float64])], @@ -289,12 +272,8 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Factorial => { Signature::uniform(1, vec![Int64], self.volatility()) } - BuiltinScalarFunction::Gcd | BuiltinScalarFunction::Lcm => { - Signature::uniform(2, vec![Int64], self.volatility()) - } BuiltinScalarFunction::Ceil | BuiltinScalarFunction::Exp - | BuiltinScalarFunction::Floor | BuiltinScalarFunction::Cot => { // math expressions expect 1 argument of type f64 or f32 // priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we @@ -319,10 +298,8 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Ceil | BuiltinScalarFunction::Exp | BuiltinScalarFunction::Factorial - | BuiltinScalarFunction::Floor | BuiltinScalarFunction::Round | BuiltinScalarFunction::Trunc - | BuiltinScalarFunction::Pi ) { Some(vec![Some(true)]) } else if *self == BuiltinScalarFunction::Log { @@ -339,13 +316,9 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Cot => &["cot"], BuiltinScalarFunction::Exp => &["exp"], BuiltinScalarFunction::Factorial => &["factorial"], - BuiltinScalarFunction::Floor => &["floor"], - BuiltinScalarFunction::Gcd => &["gcd"], BuiltinScalarFunction::Iszero => &["iszero"], - BuiltinScalarFunction::Lcm => &["lcm"], BuiltinScalarFunction::Log => &["log"], BuiltinScalarFunction::Nanvl => &["nanvl"], - BuiltinScalarFunction::Pi => &["pi"], BuiltinScalarFunction::Power => &["power", "pow"], BuiltinScalarFunction::Random => &["random"], BuiltinScalarFunction::Round => &["round"], diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index a1235a093d76..dd22014b89ca 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -298,11 +298,6 @@ pub fn concat_ws(sep: Expr, values: Vec) -> Expr { )) } -/// Returns an approximate value of π -pub fn pi() -> Expr { - Expr::ScalarFunction(ScalarFunction::new(BuiltinScalarFunction::Pi, vec![])) -} - /// Returns a random value in the range 0.0 <= x < 1.0 pub fn random() -> Expr { Expr::ScalarFunction(ScalarFunction::new(BuiltinScalarFunction::Random, vec![])) @@ -538,12 +533,6 @@ macro_rules! nary_scalar_expr { // math functions scalar_expr!(Cot, cot, num, "cotangent of a number"); scalar_expr!(Factorial, factorial, num, "factorial"); -scalar_expr!( - Floor, - floor, - num, - "nearest integer less than or equal to argument" -); scalar_expr!( Ceil, ceil, @@ -557,8 +546,7 @@ nary_scalar_expr!( "truncate toward zero, with optional precision" ); scalar_expr!(Exp, exp, num, "exponential"); -scalar_expr!(Gcd, gcd, arg_1 arg_2, "greatest common divisor"); -scalar_expr!(Lcm, lcm, arg_1 arg_2, "least common multiple"); + scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`"); scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`"); @@ -1058,7 +1046,6 @@ mod test { fn scalar_function_definitions() { test_unary_scalar_expr!(Cot, cot); test_unary_scalar_expr!(Factorial, factorial); - test_unary_scalar_expr!(Floor, floor); test_unary_scalar_expr!(Ceil, ceil); test_nary_scalar_expr!(Round, round, input); test_nary_scalar_expr!(Round, round, input, decimal_places); @@ -1068,8 +1055,6 @@ mod test { test_scalar_expr!(Nanvl, nanvl, x, y); test_scalar_expr!(Iszero, iszero, input); - test_scalar_expr!(Gcd, gcd, arg_1, arg_2); - test_scalar_expr!(Lcm, lcm, arg_1, arg_2); test_scalar_expr!(InitCap, initcap, string); test_scalar_expr!(EndsWith, ends_with, string, characters); } diff --git a/datafusion/functions/src/math/gcd.rs b/datafusion/functions/src/math/gcd.rs new file mode 100644 index 000000000000..41c9e4e23314 --- /dev/null +++ b/datafusion/functions/src/math/gcd.rs @@ -0,0 +1,145 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{ArrayRef, Int64Array}; +use std::any::Any; +use std::mem::swap; +use std::sync::Arc; + +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::Int64; + +use crate::utils::make_scalar_function; +use datafusion_common::{exec_err, DataFusionError, Result}; +use datafusion_expr::ColumnarValue; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; + +#[derive(Debug)] +pub struct GcdFunc { + signature: Signature, +} + +impl Default for GcdFunc { + fn default() -> Self { + Self::new() + } +} + +impl GcdFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::uniform(2, vec![Int64], Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for GcdFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "gcd" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Int64) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + make_scalar_function(gcd, vec![])(args) + } +} + +/// Gcd SQL function +fn gcd(args: &[ArrayRef]) -> Result { + match args[0].data_type() { + Int64 => Ok(Arc::new(make_function_inputs2!( + &args[0], + &args[1], + "x", + "y", + Int64Array, + Int64Array, + { compute_gcd } + )) as ArrayRef), + other => exec_err!("Unsupported data type {other:?} for function gcd"), + } +} + +/// Computes greatest common divisor using Binary GCD algorithm. +pub fn compute_gcd(x: i64, y: i64) -> i64 { + let mut a = x.wrapping_abs(); + let mut b = y.wrapping_abs(); + + if a == 0 { + return b; + } + if b == 0 { + return a; + } + + let shift = (a | b).trailing_zeros(); + a >>= shift; + b >>= shift; + a >>= a.trailing_zeros(); + + loop { + b >>= b.trailing_zeros(); + if a > b { + swap(&mut a, &mut b); + } + + b -= a; + + if b == 0 { + return a << shift; + } + } +} + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use arrow::array::{ArrayRef, Int64Array}; + + use crate::math::gcd::gcd; + use datafusion_common::cast::as_int64_array; + + #[test] + fn test_gcd_i64() { + let args: Vec = vec![ + Arc::new(Int64Array::from(vec![0, 3, 25, -16])), // x + Arc::new(Int64Array::from(vec![0, -2, 15, 8])), // y + ]; + + let result = gcd(&args).expect("failed to initialize function gcd"); + let ints = as_int64_array(&result).expect("failed to initialize function gcd"); + + assert_eq!(ints.len(), 4); + assert_eq!(ints.value(0), 0); + assert_eq!(ints.value(1), 1); + assert_eq!(ints.value(2), 5); + assert_eq!(ints.value(3), 8); + } +} diff --git a/datafusion/functions/src/math/lcm.rs b/datafusion/functions/src/math/lcm.rs new file mode 100644 index 000000000000..3674f7371de2 --- /dev/null +++ b/datafusion/functions/src/math/lcm.rs @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::array::{ArrayRef, Int64Array}; +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::Int64; + +use datafusion_common::{exec_err, DataFusionError, Result}; +use datafusion_expr::ColumnarValue; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; + +use crate::math::gcd::compute_gcd; +use crate::utils::make_scalar_function; + +#[derive(Debug)] +pub struct LcmFunc { + signature: Signature, +} + +impl Default for LcmFunc { + fn default() -> Self { + LcmFunc::new() + } +} + +impl LcmFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::uniform(2, vec![Int64], Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for LcmFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "lcm" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Int64) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + make_scalar_function(lcm, vec![])(args) + } +} + +/// Lcm SQL function +fn lcm(args: &[ArrayRef]) -> Result { + let compute_lcm = |x: i64, y: i64| { + let a = x.wrapping_abs(); + let b = y.wrapping_abs(); + + if a == 0 || b == 0 { + return 0; + } + a / compute_gcd(a, b) * b + }; + + match args[0].data_type() { + Int64 => Ok(Arc::new(make_function_inputs2!( + &args[0], + &args[1], + "x", + "y", + Int64Array, + Int64Array, + { compute_lcm } + )) as ArrayRef), + other => exec_err!("Unsupported data type {other:?} for function lcm"), + } +} + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use arrow::array::{ArrayRef, Int64Array}; + + use datafusion_common::cast::as_int64_array; + + use crate::math::lcm::lcm; + + #[test] + fn test_lcm_i64() { + let args: Vec = vec![ + Arc::new(Int64Array::from(vec![0, 3, 25, -16])), // x + Arc::new(Int64Array::from(vec![0, -2, 15, 8])), // y + ]; + + let result = lcm(&args).expect("failed to initialize function lcm"); + let ints = as_int64_array(&result).expect("failed to initialize function lcm"); + + assert_eq!(ints.len(), 4); + assert_eq!(ints.value(0), 0); + assert_eq!(ints.value(1), 6); + assert_eq!(ints.value(2), 75); + assert_eq!(ints.value(3), 16); + } +} diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs index f241c8b3250b..3a1f7cc13bb7 100644 --- a/datafusion/functions/src/math/mod.rs +++ b/datafusion/functions/src/math/mod.rs @@ -18,11 +18,17 @@ //! "math" DataFusion functions pub mod abs; +pub mod gcd; +pub mod lcm; pub mod nans; +pub mod pi; // Create UDFs make_udf_function!(nans::IsNanFunc, ISNAN, isnan); make_udf_function!(abs::AbsFunc, ABS, abs); +make_udf_function!(gcd::GcdFunc, GCD, gcd); +make_udf_function!(lcm::LcmFunc, LCM, lcm); +make_udf_function!(pi::PiFunc, PI, pi); make_math_unary_udf!(Log2Func, LOG2, log2, log2, Some(vec![Some(true)])); make_math_unary_udf!(Log10Func, LOG10, log10, log10, Some(vec![Some(true)])); @@ -50,6 +56,8 @@ make_math_unary_udf!(CosFunc, COS, cos, cos, None); make_math_unary_udf!(CoshFunc, COSH, cosh, cosh, None); make_math_unary_udf!(DegreesFunc, DEGREES, degrees, to_degrees, None); +make_math_unary_udf!(FloorFunc, FLOOR, floor, floor, Some(vec![Some(true)])); + // Export the functions out of this package, both as expr_fn as well as a list of functions export_functions!( ( @@ -86,5 +94,9 @@ export_functions!( (cbrt, num, "cube root of a number"), (cos, num, "cosine"), (cosh, num, "hyperbolic cosine"), - (degrees, num, "converts radians to degrees") + (degrees, num, "converts radians to degrees"), + (gcd, x y, "greatest common divisor"), + (lcm, x y, "least common multiple"), + (floor, num, "nearest integer less than or equal to argument"), + (pi, , "Returns an approximate value of π") ); diff --git a/datafusion/functions/src/math/pi.rs b/datafusion/functions/src/math/pi.rs new file mode 100644 index 000000000000..0801e797511b --- /dev/null +++ b/datafusion/functions/src/math/pi.rs @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::array::Float64Array; +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::Float64; + +use datafusion_common::{exec_err, Result}; +use datafusion_expr::{ColumnarValue, FuncMonotonicity, Volatility}; +use datafusion_expr::{ScalarUDFImpl, Signature}; + +#[derive(Debug)] +pub struct PiFunc { + signature: Signature, +} + +impl Default for PiFunc { + fn default() -> Self { + PiFunc::new() + } +} + +impl PiFunc { + pub fn new() -> Self { + Self { + signature: Signature::exact(vec![], Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for PiFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "pi" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Float64) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + if !matches!(&args[0], ColumnarValue::Array(_)) { + return exec_err!("Expect pi function to take no param"); + } + let array = Float64Array::from_value(std::f64::consts::PI, 1); + Ok(ColumnarValue::Array(Arc::new(array))) + } + + fn monotonicity(&self) -> Result> { + Ok(Some(vec![Some(true)])) + } +} diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 04de243fba07..1ea8b9534e80 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -19,9 +19,8 @@ use std::sync::Arc; -use crate::analyzer::AnalyzerRule; - use arrow::datatypes::{DataType, IntervalUnit}; + use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNodeRewriter}; use datafusion_common::{ @@ -51,6 +50,8 @@ use datafusion_expr::{ WindowFrameUnits, }; +use crate::analyzer::AnalyzerRule; + #[derive(Default)] pub struct TypeCoercion {} @@ -758,25 +759,25 @@ mod test { use std::any::Any; use std::sync::{Arc, OnceLock}; - use crate::analyzer::type_coercion::{ - coerce_case_expression, TypeCoercion, TypeCoercionRewriter, - }; - use crate::test::assert_analyzed_plan_eq; - use arrow::datatypes::{DataType, Field, TimeUnit}; + use datafusion_common::tree_node::{TransformedResult, TreeNode}; use datafusion_common::{DFSchema, DFSchemaRef, Result, ScalarValue}; use datafusion_expr::expr::{self, InSubquery, Like, ScalarFunction}; use datafusion_expr::logical_plan::{EmptyRelation, Projection}; use datafusion_expr::{ cast, col, concat, concat_ws, create_udaf, is_true, lit, - AccumulatorFactoryFunction, AggregateFunction, AggregateUDF, BinaryExpr, - BuiltinScalarFunction, Case, ColumnarValue, Expr, ExprSchemable, Filter, - LogicalPlan, Operator, ScalarUDF, ScalarUDFImpl, Signature, SimpleAggregateUDF, - Subquery, Volatility, + AccumulatorFactoryFunction, AggregateFunction, AggregateUDF, BinaryExpr, Case, + ColumnarValue, Expr, ExprSchemable, Filter, LogicalPlan, Operator, ScalarUDF, + ScalarUDFImpl, Signature, SimpleAggregateUDF, Subquery, Volatility, }; use datafusion_physical_expr::expressions::AvgAccumulator; + use crate::analyzer::type_coercion::{ + coerce_case_expression, TypeCoercion, TypeCoercionRewriter, + }; + use crate::test::assert_analyzed_plan_eq; + fn empty() -> Arc { Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, @@ -875,14 +876,15 @@ mod test { // test that automatic argument type coercion for scalar functions work let empty = empty(); let lit_expr = lit(10i64); - let fun: BuiltinScalarFunction = BuiltinScalarFunction::Floor; + let fun = ScalarUDF::new_from_impl(TestScalarUDF {}); let scalar_function_expr = - Expr::ScalarFunction(ScalarFunction::new(fun, vec![lit_expr])); + Expr::ScalarFunction(ScalarFunction::new_udf(Arc::new(fun), vec![lit_expr])); let plan = LogicalPlan::Projection(Projection::try_new( vec![scalar_function_expr], empty, )?); - let expected = "Projection: floor(CAST(Int64(10) AS Float64))\n EmptyRelation"; + let expected = + "Projection: TestScalarUDF(CAST(Int64(10) AS Float32))\n EmptyRelation"; assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), &plan, expected) } diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs index 1364d3a8c028..688cdf798bdd 100644 --- a/datafusion/physical-expr/src/equivalence/ordering.rs +++ b/datafusion/physical-expr/src/equivalence/ordering.rs @@ -15,10 +15,11 @@ // specific language governing permissions and limitations // under the License. -use arrow_schema::SortOptions; use std::hash::Hash; use std::sync::Arc; +use arrow_schema::SortOptions; + use crate::equivalence::add_offset_to_expr; use crate::{LexOrdering, PhysicalExpr, PhysicalSortExpr}; @@ -220,6 +221,16 @@ fn resolve_overlap(orderings: &mut [LexOrdering], idx: usize, pre_idx: usize) -> #[cfg(test)] mod tests { + use std::sync::Arc; + + use arrow::datatypes::{DataType, Field, Schema}; + use arrow_schema::SortOptions; + use itertools::Itertools; + + use datafusion_common::{DFSchema, Result}; + use datafusion_expr::execution_props::ExecutionProps; + use datafusion_expr::{BuiltinScalarFunction, Operator, ScalarUDF}; + use crate::equivalence::tests::{ convert_to_orderings, convert_to_sort_exprs, create_random_schema, create_test_params, generate_table_for_eq_properties, is_table_same_after_sort, @@ -231,14 +242,8 @@ mod tests { use crate::expressions::Column; use crate::expressions::{col, BinaryExpr}; use crate::functions::create_physical_expr; + use crate::utils::tests::TestScalarUDF; use crate::{PhysicalExpr, PhysicalSortExpr}; - use arrow::datatypes::{DataType, Field, Schema}; - use arrow_schema::SortOptions; - use datafusion_common::Result; - use datafusion_expr::execution_props::ExecutionProps; - use datafusion_expr::{BuiltinScalarFunction, Operator}; - use itertools::Itertools; - use std::sync::Arc; #[test] fn test_ordering_satisfy() -> Result<()> { @@ -281,17 +286,20 @@ mod tests { let col_d = &col("d", &test_schema)?; let col_e = &col("e", &test_schema)?; let col_f = &col("f", &test_schema)?; - let floor_a = &create_physical_expr( - &BuiltinScalarFunction::Floor, + let test_fun = ScalarUDF::new_from_impl(TestScalarUDF::new()); + let floor_a = &crate::udf::create_physical_expr( + &test_fun, &[col("a", &test_schema)?], &test_schema, - &ExecutionProps::default(), + &[], + &DFSchema::empty(), )?; - let floor_f = &create_physical_expr( - &BuiltinScalarFunction::Floor, + let floor_f = &crate::udf::create_physical_expr( + &test_fun, &[col("f", &test_schema)?], &test_schema, - &ExecutionProps::default(), + &[], + &DFSchema::empty(), )?; let exp_a = &create_physical_expr( &BuiltinScalarFunction::Exp, @@ -804,11 +812,13 @@ mod tests { let table_data_with_properties = generate_table_for_eq_properties(&eq_properties, N_ELEMENTS, N_DISTINCT)?; - let floor_a = create_physical_expr( - &BuiltinScalarFunction::Floor, + let test_fun = ScalarUDF::new_from_impl(TestScalarUDF::new()); + let floor_a = crate::udf::create_physical_expr( + &test_fun, &[col("a", &test_schema)?], &test_schema, - &ExecutionProps::default(), + &[], + &DFSchema::empty(), )?; let a_plus_b = Arc::new(BinaryExpr::new( col("a", &test_schema)?, diff --git a/datafusion/physical-expr/src/equivalence/projection.rs b/datafusion/physical-expr/src/equivalence/projection.rs index b8231a74c271..5efcf5942c39 100644 --- a/datafusion/physical-expr/src/equivalence/projection.rs +++ b/datafusion/physical-expr/src/equivalence/projection.rs @@ -17,13 +17,14 @@ use std::sync::Arc; -use crate::expressions::Column; -use crate::PhysicalExpr; - use arrow::datatypes::SchemaRef; + use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::Result; +use crate::expressions::Column; +use crate::PhysicalExpr; + /// Stores the mapping between source expressions and target expressions for a /// projection. #[derive(Debug, Clone)] @@ -111,7 +112,14 @@ impl ProjectionMapping { mod tests { use std::sync::Arc; - use super::*; + use arrow::datatypes::{DataType, Field, Schema}; + use arrow_schema::{SortOptions, TimeUnit}; + use itertools::Itertools; + + use datafusion_common::{DFSchema, Result}; + use datafusion_expr::execution_props::ExecutionProps; + use datafusion_expr::{BuiltinScalarFunction, Operator, ScalarUDF}; + use crate::equivalence::tests::{ apply_projection, convert_to_orderings, convert_to_orderings_owned, create_random_schema, generate_table_for_eq_properties, is_table_same_after_sort, @@ -119,16 +127,11 @@ mod tests { }; use crate::equivalence::EquivalenceProperties; use crate::expressions::{col, BinaryExpr}; - use crate::functions::create_physical_expr; + use crate::udf::create_physical_expr; + use crate::utils::tests::TestScalarUDF; use crate::PhysicalSortExpr; - use arrow::datatypes::{DataType, Field, Schema}; - use arrow_schema::{SortOptions, TimeUnit}; - use datafusion_common::Result; - use datafusion_expr::execution_props::ExecutionProps; - use datafusion_expr::{BuiltinScalarFunction, Operator}; - - use itertools::Itertools; + use super::*; #[test] fn project_orderings() -> Result<()> { @@ -646,7 +649,7 @@ mod tests { col_b.clone(), )) as Arc; - let round_c = &create_physical_expr( + let round_c = &crate::functions::create_physical_expr( &BuiltinScalarFunction::Round, &[col_c.clone()], &schema, @@ -973,11 +976,13 @@ mod tests { let table_data_with_properties = generate_table_for_eq_properties(&eq_properties, N_ELEMENTS, N_DISTINCT)?; // Floor(a) + let test_fun = ScalarUDF::new_from_impl(TestScalarUDF::new()); let floor_a = create_physical_expr( - &BuiltinScalarFunction::Floor, + &test_fun, &[col("a", &test_schema)?], &test_schema, - &ExecutionProps::default(), + &[], + &DFSchema::empty(), )?; // a + b let a_plus_b = Arc::new(BinaryExpr::new( @@ -1049,11 +1054,13 @@ mod tests { let table_data_with_properties = generate_table_for_eq_properties(&eq_properties, N_ELEMENTS, N_DISTINCT)?; // Floor(a) + let test_fun = ScalarUDF::new_from_impl(TestScalarUDF::new()); let floor_a = create_physical_expr( - &BuiltinScalarFunction::Floor, + &test_fun, &[col("a", &test_schema)?], &test_schema, - &ExecutionProps::default(), + &[], + &DFSchema::empty(), )?; // a + b let a_plus_b = Arc::new(BinaryExpr::new( diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs index 7ce540b267b2..c14c88d6c69b 100644 --- a/datafusion/physical-expr/src/equivalence/properties.rs +++ b/datafusion/physical-expr/src/equivalence/properties.rs @@ -18,7 +18,13 @@ use std::hash::{Hash, Hasher}; use std::sync::Arc; -use super::ordering::collapse_lex_ordering; +use arrow_schema::{SchemaRef, SortOptions}; +use indexmap::{IndexMap, IndexSet}; +use itertools::Itertools; + +use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; +use datafusion_common::{JoinSide, JoinType, Result}; + use crate::equivalence::{ collapse_lex_req, EquivalenceGroup, OrderingEquivalenceClass, ProjectionMapping, }; @@ -30,12 +36,7 @@ use crate::{ PhysicalSortRequirement, }; -use arrow_schema::{SchemaRef, SortOptions}; -use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::{JoinSide, JoinType, Result}; - -use indexmap::{IndexMap, IndexSet}; -use itertools::Itertools; +use super::ordering::collapse_lex_ordering; /// A `EquivalenceProperties` object stores useful information related to a schema. /// Currently, it keeps track of: @@ -1296,7 +1297,13 @@ mod tests { use std::ops::Not; use std::sync::Arc; - use super::*; + use arrow::datatypes::{DataType, Field, Schema}; + use arrow_schema::{Fields, SortOptions, TimeUnit}; + use itertools::Itertools; + + use datafusion_common::{DFSchema, Result}; + use datafusion_expr::{Operator, ScalarUDF}; + use crate::equivalence::add_offset_to_expr; use crate::equivalence::tests::{ convert_to_orderings, convert_to_sort_exprs, convert_to_sort_reqs, @@ -1304,16 +1311,10 @@ mod tests { generate_table_for_eq_properties, is_table_same_after_sort, output_schema, }; use crate::expressions::{col, BinaryExpr, Column}; - use crate::functions::create_physical_expr; + use crate::utils::tests::TestScalarUDF; use crate::PhysicalSortExpr; - use arrow::datatypes::{DataType, Field, Schema}; - use arrow_schema::{Fields, SortOptions, TimeUnit}; - use datafusion_common::Result; - use datafusion_expr::execution_props::ExecutionProps; - use datafusion_expr::{BuiltinScalarFunction, Operator}; - - use itertools::Itertools; + use super::*; #[test] fn project_equivalence_properties_test() -> Result<()> { @@ -1792,11 +1793,13 @@ mod tests { let table_data_with_properties = generate_table_for_eq_properties(&eq_properties, N_ELEMENTS, N_DISTINCT)?; - let floor_a = create_physical_expr( - &BuiltinScalarFunction::Floor, + let test_fun = ScalarUDF::new_from_impl(TestScalarUDF::new()); + let floor_a = crate::udf::create_physical_expr( + &test_fun, &[col("a", &test_schema)?], &test_schema, - &ExecutionProps::default(), + &[], + &DFSchema::empty(), )?; let a_plus_b = Arc::new(BinaryExpr::new( col("a", &test_schema)?, diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 770d9184325a..79d69b273d2c 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -184,16 +184,9 @@ pub fn create_physical_fun( BuiltinScalarFunction::Factorial => { Arc::new(|args| make_scalar_function_inner(math_expressions::factorial)(args)) } - BuiltinScalarFunction::Floor => Arc::new(math_expressions::floor), - BuiltinScalarFunction::Gcd => { - Arc::new(|args| make_scalar_function_inner(math_expressions::gcd)(args)) - } BuiltinScalarFunction::Iszero => { Arc::new(|args| make_scalar_function_inner(math_expressions::iszero)(args)) } - BuiltinScalarFunction::Lcm => { - Arc::new(|args| make_scalar_function_inner(math_expressions::lcm)(args)) - } BuiltinScalarFunction::Nanvl => { Arc::new(|args| make_scalar_function_inner(math_expressions::nanvl)(args)) } @@ -204,7 +197,6 @@ pub fn create_physical_fun( BuiltinScalarFunction::Trunc => { Arc::new(|args| make_scalar_function_inner(math_expressions::trunc)(args)) } - BuiltinScalarFunction::Pi => Arc::new(math_expressions::pi), BuiltinScalarFunction::Power => { Arc::new(|args| make_scalar_function_inner(math_expressions::power)(args)) } @@ -573,7 +565,7 @@ mod tests { let execution_props = ExecutionProps::new(); let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); - let funs = [BuiltinScalarFunction::Pi, BuiltinScalarFunction::Random]; + let funs = [BuiltinScalarFunction::Random]; for fun in funs.iter() { create_physical_expr_with_type_coercion(fun, &[], &schema, &execution_props)?; diff --git a/datafusion/physical-expr/src/math_expressions.rs b/datafusion/physical-expr/src/math_expressions.rs index f8244ad9525f..a883c7430d9a 100644 --- a/datafusion/physical-expr/src/math_expressions.rs +++ b/datafusion/physical-expr/src/math_expressions.rs @@ -19,7 +19,6 @@ use std::any::type_name; use std::iter; -use std::mem::swap; use std::sync::Arc; use arrow::array::ArrayRef; @@ -181,79 +180,6 @@ pub fn factorial(args: &[ArrayRef]) -> Result { } } -/// Computes greatest common divisor using Binary GCD algorithm. -fn compute_gcd(x: i64, y: i64) -> i64 { - let mut a = x.wrapping_abs(); - let mut b = y.wrapping_abs(); - - if a == 0 { - return b; - } - if b == 0 { - return a; - } - - let shift = (a | b).trailing_zeros(); - a >>= shift; - b >>= shift; - a >>= a.trailing_zeros(); - - loop { - b >>= b.trailing_zeros(); - if a > b { - swap(&mut a, &mut b); - } - - b -= a; - - if b == 0 { - return a << shift; - } - } -} - -/// Gcd SQL function -pub fn gcd(args: &[ArrayRef]) -> Result { - match args[0].data_type() { - DataType::Int64 => Ok(Arc::new(make_function_inputs2!( - &args[0], - &args[1], - "x", - "y", - Int64Array, - Int64Array, - { compute_gcd } - )) as ArrayRef), - other => exec_err!("Unsupported data type {other:?} for function gcd"), - } -} - -/// Lcm SQL function -pub fn lcm(args: &[ArrayRef]) -> Result { - let compute_lcm = |x: i64, y: i64| { - let a = x.wrapping_abs(); - let b = y.wrapping_abs(); - - if a == 0 || b == 0 { - return 0; - } - a / compute_gcd(a, b) * b - }; - - match args[0].data_type() { - DataType::Int64 => Ok(Arc::new(make_function_inputs2!( - &args[0], - &args[1], - "x", - "y", - Int64Array, - Int64Array, - { compute_lcm } - )) as ArrayRef), - other => exec_err!("Unsupported data type {other:?} for function lcm"), - } -} - /// Nanvl SQL function pub fn nanvl(args: &[ArrayRef]) -> Result { match args[0].data_type() { @@ -345,15 +271,6 @@ pub fn iszero(args: &[ArrayRef]) -> Result { } } -/// Pi SQL function -pub fn pi(args: &[ColumnarValue]) -> Result { - if !matches!(&args[0], ColumnarValue::Array(_)) { - return exec_err!("Expect pi function to take no param"); - } - let array = Float64Array::from_value(std::f64::consts::PI, 1); - Ok(ColumnarValue::Array(Arc::new(array))) -} - /// Random SQL function pub fn random(args: &[ColumnarValue]) -> Result { let len: usize = match &args[0] { @@ -808,40 +725,6 @@ mod tests { assert_eq!(ints, &expected); } - #[test] - fn test_gcd_i64() { - let args: Vec = vec![ - Arc::new(Int64Array::from(vec![0, 3, 25, -16])), // x - Arc::new(Int64Array::from(vec![0, -2, 15, 8])), // y - ]; - - let result = gcd(&args).expect("failed to initialize function gcd"); - let ints = as_int64_array(&result).expect("failed to initialize function gcd"); - - assert_eq!(ints.len(), 4); - assert_eq!(ints.value(0), 0); - assert_eq!(ints.value(1), 1); - assert_eq!(ints.value(2), 5); - assert_eq!(ints.value(3), 8); - } - - #[test] - fn test_lcm_i64() { - let args: Vec = vec![ - Arc::new(Int64Array::from(vec![0, 3, 25, -16])), // x - Arc::new(Int64Array::from(vec![0, -2, 15, 8])), // y - ]; - - let result = lcm(&args).expect("failed to initialize function lcm"); - let ints = as_int64_array(&result).expect("failed to initialize function lcm"); - - assert_eq!(ints.len(), 4); - assert_eq!(ints.value(0), 0); - assert_eq!(ints.value(1), 6); - assert_eq!(ints.value(2), 75); - assert_eq!(ints.value(3), 16); - } - #[test] fn test_cot_f32() { let args: Vec = diff --git a/datafusion/physical-expr/src/udf.rs b/datafusion/physical-expr/src/udf.rs index 4fc94bfa15ec..368dfdf92f45 100644 --- a/datafusion/physical-expr/src/udf.rs +++ b/datafusion/physical-expr/src/udf.rs @@ -16,14 +16,17 @@ // under the License. //! UDF support -use crate::{PhysicalExpr, ScalarFunctionExpr}; +use std::sync::Arc; + use arrow_schema::Schema; + use datafusion_common::{DFSchema, Result}; pub use datafusion_expr::ScalarUDF; use datafusion_expr::{ type_coercion::functions::data_types, Expr, ScalarFunctionDefinition, }; -use std::sync::Arc; + +use crate::{PhysicalExpr, ScalarFunctionExpr}; /// Create a physical expression of the UDF. /// @@ -60,58 +63,18 @@ pub fn create_physical_expr( #[cfg(test)] mod tests { - use arrow_schema::{DataType, Schema}; + use arrow_schema::Schema; + use datafusion_common::{DFSchema, Result}; - use datafusion_expr::{ - ColumnarValue, FuncMonotonicity, ScalarUDF, ScalarUDFImpl, Signature, Volatility, - }; + use datafusion_expr::ScalarUDF; + use crate::utils::tests::TestScalarUDF; use crate::ScalarFunctionExpr; use super::create_physical_expr; #[test] fn test_functions() -> Result<()> { - #[derive(Debug, Clone)] - struct TestScalarUDF { - signature: Signature, - } - - impl TestScalarUDF { - fn new() -> Self { - let signature = - Signature::exact(vec![DataType::Float64], Volatility::Immutable); - - Self { signature } - } - } - - impl ScalarUDFImpl for TestScalarUDF { - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn name(&self) -> &str { - "my_fn" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(DataType::Float64) - } - - fn invoke(&self, _args: &[ColumnarValue]) -> Result { - unimplemented!("my_fn is not implemented") - } - - fn monotonicity(&self) -> Result> { - Ok(Some(vec![Some(true)])) - } - } - // create and register the udf let udf = ScalarUDF::from(TestScalarUDF::new()); diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs index e55bc3d15665..d7bebbff891c 100644 --- a/datafusion/physical-expr/src/utils/mod.rs +++ b/datafusion/physical-expr/src/utils/mod.rs @@ -256,7 +256,9 @@ pub fn merge_vectors( } #[cfg(test)] -mod tests { +pub(crate) mod tests { + use arrow_array::{ArrayRef, Float32Array, Float64Array}; + use std::any::Any; use std::fmt::{Display, Formatter}; use std::sync::Arc; @@ -265,10 +267,103 @@ mod tests { use crate::PhysicalSortExpr; use arrow_schema::{DataType, Field, Schema}; - use datafusion_common::{Result, ScalarValue}; + use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue}; + use datafusion_expr::{ + ColumnarValue, FuncMonotonicity, ScalarUDFImpl, Signature, Volatility, + }; use petgraph::visit::Bfs; + #[derive(Debug, Clone)] + pub struct TestScalarUDF { + signature: Signature, + } + + impl TestScalarUDF { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::uniform( + 1, + vec![Float64, Float32], + Volatility::Immutable, + ), + } + } + } + + impl ScalarUDFImpl for TestScalarUDF { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "test-scalar-udf" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + let arg_type = &arg_types[0]; + + match arg_type { + DataType::Float32 => Ok(DataType::Float32), + _ => Ok(DataType::Float64), + } + } + + fn monotonicity(&self) -> Result> { + Ok(Some(vec![Some(true)])) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + let args = ColumnarValue::values_to_arrays(args)?; + + let arr: ArrayRef = match args[0].data_type() { + DataType::Float64 => Arc::new({ + let arg = &args[0] + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Internal(format!( + "could not cast {} to {}", + self.name(), + std::any::type_name::() + )) + })?; + + arg.iter() + .map(|a| a.map(f64::floor)) + .collect::() + }), + DataType::Float32 => Arc::new({ + let arg = &args[0] + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DataFusionError::Internal(format!( + "could not cast {} to {}", + self.name(), + std::any::type_name::() + )) + })?; + + arg.iter() + .map(|a| a.map(f32::floor)) + .collect::() + }), + other => { + return exec_err!( + "Unsupported data type {other:?} for function {}", + self.name() + ); + } + }; + Ok(ColumnarValue::Array(arr)) + } + } + #[derive(Clone)] struct DummyProperty { expr_type: String, diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 7f967657f573..b656bededc07 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -550,7 +550,7 @@ enum ScalarFunction { // 6 was Cos // 7 was Digest Exp = 8; - Floor = 9; + // 9 was Floor // 10 was Ln Log = 11; // 12 was Log10 @@ -621,12 +621,12 @@ enum ScalarFunction { // 77 was Sinh // 78 was Cosh // Tanh = 79 - Pi = 80; + // 80 was Pi // 81 was Degrees // 82 was Radians Factorial = 83; - Lcm = 84; - Gcd = 85; + // 84 was Lcm + // 85 was Gcd // 86 was ArrayAppend // 87 was ArrayConcat // 88 was ArrayDims diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 966d7f7f7487..c13ae045bdb5 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22794,7 +22794,6 @@ impl serde::Serialize for ScalarFunction { Self::Unknown => "unknown", Self::Ceil => "Ceil", Self::Exp => "Exp", - Self::Floor => "Floor", Self::Log => "Log", Self::Round => "Round", Self::Trunc => "Trunc", @@ -22804,10 +22803,7 @@ impl serde::Serialize for ScalarFunction { Self::Random => "Random", Self::Coalesce => "Coalesce", Self::Power => "Power", - Self::Pi => "Pi", Self::Factorial => "Factorial", - Self::Lcm => "Lcm", - Self::Gcd => "Gcd", Self::Cot => "Cot", Self::Nanvl => "Nanvl", Self::Iszero => "Iszero", @@ -22826,7 +22822,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "unknown", "Ceil", "Exp", - "Floor", "Log", "Round", "Trunc", @@ -22836,10 +22831,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Random", "Coalesce", "Power", - "Pi", "Factorial", - "Lcm", - "Gcd", "Cot", "Nanvl", "Iszero", @@ -22887,7 +22879,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "unknown" => Ok(ScalarFunction::Unknown), "Ceil" => Ok(ScalarFunction::Ceil), "Exp" => Ok(ScalarFunction::Exp), - "Floor" => Ok(ScalarFunction::Floor), "Log" => Ok(ScalarFunction::Log), "Round" => Ok(ScalarFunction::Round), "Trunc" => Ok(ScalarFunction::Trunc), @@ -22897,10 +22888,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Random" => Ok(ScalarFunction::Random), "Coalesce" => Ok(ScalarFunction::Coalesce), "Power" => Ok(ScalarFunction::Power), - "Pi" => Ok(ScalarFunction::Pi), "Factorial" => Ok(ScalarFunction::Factorial), - "Lcm" => Ok(ScalarFunction::Lcm), - "Gcd" => Ok(ScalarFunction::Gcd), "Cot" => Ok(ScalarFunction::Cot), "Nanvl" => Ok(ScalarFunction::Nanvl), "Iszero" => Ok(ScalarFunction::Iszero), diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index c94aa1f4ed93..092d5c59d081 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2849,7 +2849,7 @@ pub enum ScalarFunction { /// 6 was Cos /// 7 was Digest Exp = 8, - Floor = 9, + /// 9 was Floor /// 10 was Ln Log = 11, /// 12 was Log10 @@ -2920,12 +2920,12 @@ pub enum ScalarFunction { /// 77 was Sinh /// 78 was Cosh /// Tanh = 79 - Pi = 80, + /// 80 was Pi /// 81 was Degrees /// 82 was Radians Factorial = 83, - Lcm = 84, - Gcd = 85, + /// 84 was Lcm + /// 85 was Gcd /// 86 was ArrayAppend /// 87 was ArrayConcat /// 88 was ArrayDims @@ -2989,7 +2989,6 @@ impl ScalarFunction { ScalarFunction::Unknown => "unknown", ScalarFunction::Ceil => "Ceil", ScalarFunction::Exp => "Exp", - ScalarFunction::Floor => "Floor", ScalarFunction::Log => "Log", ScalarFunction::Round => "Round", ScalarFunction::Trunc => "Trunc", @@ -2999,10 +2998,7 @@ impl ScalarFunction { ScalarFunction::Random => "Random", ScalarFunction::Coalesce => "Coalesce", ScalarFunction::Power => "Power", - ScalarFunction::Pi => "Pi", ScalarFunction::Factorial => "Factorial", - ScalarFunction::Lcm => "Lcm", - ScalarFunction::Gcd => "Gcd", ScalarFunction::Cot => "Cot", ScalarFunction::Nanvl => "Nanvl", ScalarFunction::Iszero => "Iszero", @@ -3015,7 +3011,6 @@ impl ScalarFunction { "unknown" => Some(Self::Unknown), "Ceil" => Some(Self::Ceil), "Exp" => Some(Self::Exp), - "Floor" => Some(Self::Floor), "Log" => Some(Self::Log), "Round" => Some(Self::Round), "Trunc" => Some(Self::Trunc), @@ -3025,10 +3020,7 @@ impl ScalarFunction { "Random" => Some(Self::Random), "Coalesce" => Some(Self::Coalesce), "Power" => Some(Self::Power), - "Pi" => Some(Self::Pi), "Factorial" => Some(Self::Factorial), - "Lcm" => Some(Self::Lcm), - "Gcd" => Some(Self::Gcd), "Cot" => Some(Self::Cot), "Nanvl" => Some(Self::Nanvl), "Iszero" => Some(Self::Iszero), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 96b3b5942ec3..9c24a3941895 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -39,9 +39,9 @@ use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_ use datafusion_expr::{ ceil, coalesce, concat_expr, concat_ws_expr, cot, ends_with, exp, expr::{self, InList, Sort, WindowFunction}, - factorial, floor, gcd, initcap, iszero, lcm, log, + factorial, initcap, iszero, log, logical_plan::{PlanType, StringifiedPlan}, - nanvl, pi, power, random, round, trunc, AggregateFunction, Between, BinaryExpr, + nanvl, power, random, round, trunc, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet, GroupingSet::GroupingSets, @@ -423,9 +423,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::Exp => Self::Exp, ScalarFunction::Log => Self::Log, ScalarFunction::Factorial => Self::Factorial, - ScalarFunction::Gcd => Self::Gcd, - ScalarFunction::Lcm => Self::Lcm, - ScalarFunction::Floor => Self::Floor, ScalarFunction::Ceil => Self::Ceil, ScalarFunction::Round => Self::Round, ScalarFunction::Trunc => Self::Trunc, @@ -435,7 +432,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::InitCap => Self::InitCap, ScalarFunction::Random => Self::Random, ScalarFunction::Coalesce => Self::Coalesce, - ScalarFunction::Pi => Self::Pi, ScalarFunction::Power => Self::Power, ScalarFunction::Nanvl => Self::Nanvl, ScalarFunction::Iszero => Self::Iszero, @@ -1301,9 +1297,6 @@ pub fn parse_expr( match scalar_function { ScalarFunction::Unknown => Err(proto_error("Unknown scalar function")), ScalarFunction::Exp => Ok(exp(parse_expr(&args[0], registry, codec)?)), - ScalarFunction::Floor => { - Ok(floor(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::Factorial => { Ok(factorial(parse_expr(&args[0], registry, codec)?)) } @@ -1313,14 +1306,6 @@ pub fn parse_expr( ScalarFunction::InitCap => { Ok(initcap(parse_expr(&args[0], registry, codec)?)) } - ScalarFunction::Gcd => Ok(gcd( - parse_expr(&args[0], registry, codec)?, - parse_expr(&args[1], registry, codec)?, - )), - ScalarFunction::Lcm => Ok(lcm( - parse_expr(&args[0], registry, codec)?, - parse_expr(&args[1], registry, codec)?, - )), ScalarFunction::Random => Ok(random()), ScalarFunction::Concat => { Ok(concat_expr(parse_exprs(args, registry, codec)?)) @@ -1335,7 +1320,6 @@ pub fn parse_expr( ScalarFunction::Coalesce => { Ok(coalesce(parse_exprs(args, registry, codec)?)) } - ScalarFunction::Pi => Ok(pi()), ScalarFunction::Power => Ok(power( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index a10edb393241..bd964b43d418 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1410,10 +1410,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::Cot => Self::Cot, BuiltinScalarFunction::Exp => Self::Exp, BuiltinScalarFunction::Factorial => Self::Factorial, - BuiltinScalarFunction::Gcd => Self::Gcd, - BuiltinScalarFunction::Lcm => Self::Lcm, BuiltinScalarFunction::Log => Self::Log, - BuiltinScalarFunction::Floor => Self::Floor, BuiltinScalarFunction::Ceil => Self::Ceil, BuiltinScalarFunction::Round => Self::Round, BuiltinScalarFunction::Trunc => Self::Trunc, @@ -1423,7 +1420,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::InitCap => Self::InitCap, BuiltinScalarFunction::Random => Self::Random, BuiltinScalarFunction::Coalesce => Self::Coalesce, - BuiltinScalarFunction::Pi => Self::Pi, BuiltinScalarFunction::Power => Self::Power, BuiltinScalarFunction::Nanvl => Self::Nanvl, BuiltinScalarFunction::Iszero => Self::Iszero, diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index e97eb1a32b12..4bf0906685ca 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -18,7 +18,8 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use arrow_schema::DataType; use datafusion_common::{ - not_impl_err, plan_datafusion_err, plan_err, DFSchema, Dependency, Result, + internal_datafusion_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema, + Dependency, Result, }; use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion_expr::{ @@ -264,6 +265,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { plan_err!("Invalid function '{name}'.\nDid you mean '{suggested_func_name}'?") } + pub(super) fn sql_fn_name_to_expr( + &self, + expr: SQLExpr, + fn_name: &str, + schema: &DFSchema, + planner_context: &mut PlannerContext, + ) -> Result { + let fun = self + .context_provider + .get_function_meta(fn_name) + .ok_or_else(|| { + internal_datafusion_err!("Unable to find expected '{fn_name}' function") + })?; + let args = vec![self.sql_expr_to_logical_expr(expr, schema, planner_context)?]; + Ok(Expr::ScalarFunction(ScalarFunction::new_udf(fun, args))) + } + pub(super) fn sql_named_function_to_expr( &self, expr: SQLExpr, diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index c2f72720afcb..7763fa2d8dab 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -518,12 +518,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLExpr::Floor { expr, field: _field, - } => self.sql_named_function_to_expr( - *expr, - BuiltinScalarFunction::Floor, - schema, - planner_context, - ), + } => self.sql_fn_name_to_expr(*expr, "floor", schema, planner_context), SQLExpr::Ceil { expr, field: _field, From e7de59a54b9b8b6d46d9991459777fc249caf9ec Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sat, 6 Apr 2024 12:56:50 -0400 Subject: [PATCH 2/4] remove floor fn --- datafusion/physical-expr/src/math_expressions.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/physical-expr/src/math_expressions.rs b/datafusion/physical-expr/src/math_expressions.rs index a883c7430d9a..384f8d87eb96 100644 --- a/datafusion/physical-expr/src/math_expressions.rs +++ b/datafusion/physical-expr/src/math_expressions.rs @@ -160,7 +160,6 @@ math_unary_function!("atan", atan); math_unary_function!("asinh", asinh); math_unary_function!("acosh", acosh); math_unary_function!("atanh", atanh); -math_unary_function!("floor", floor); math_unary_function!("ceil", ceil); math_unary_function!("exp", exp); math_unary_function!("ln", ln); From cf1d5e1e8c443b64331e6c72ad47dc48c4242186 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Mon, 8 Apr 2024 12:26:20 -0400 Subject: [PATCH 3/4] move Trunc, Cot, Round, iszero functions to datafusion-functions --- datafusion/expr/src/built_in_function.rs | 61 +-- datafusion/expr/src/expr_fn.rs | 46 +- datafusion/functions/src/macros.rs | 13 + datafusion/functions/src/math/cot.rs | 166 +++++++ datafusion/functions/src/math/iszero.rs | 141 ++++++ datafusion/functions/src/math/mod.rs | 282 ++++++++--- datafusion/functions/src/math/round.rs | 252 ++++++++++ datafusion/functions/src/math/trunc.rs | 235 ++++++++++ .../src/equivalence/projection.rs | 11 +- datafusion/physical-expr/src/functions.rs | 12 - .../physical-expr/src/math_expressions.rs | 436 +----------------- datafusion/proto/proto/datafusion.proto | 8 +- datafusion/proto/src/generated/pbjson.rs | 12 - datafusion/proto/src/generated/prost.rs | 16 +- .../proto/src/logical_plan/from_proto.rs | 20 +- datafusion/proto/src/logical_plan/to_proto.rs | 4 - .../tests/cases/roundtrip_physical_plan.rs | 29 +- datafusion/sql/tests/sql_integration.rs | 5 + 18 files changed, 1061 insertions(+), 688 deletions(-) create mode 100644 datafusion/functions/src/math/cot.rs create mode 100644 datafusion/functions/src/math/iszero.rs create mode 100644 datafusion/functions/src/math/round.rs create mode 100644 datafusion/functions/src/math/trunc.rs diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 7426ccd938e7..e7166e61b08e 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -45,21 +45,12 @@ pub enum BuiltinScalarFunction { Exp, /// factorial Factorial, - /// iszero - Iszero, /// log, same as log10 Log, /// nanvl Nanvl, /// power Power, - /// round - Round, - /// trunc - Trunc, - /// cot - Cot, - // string functions /// concat Concat, @@ -127,13 +118,9 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Coalesce => Volatility::Immutable, BuiltinScalarFunction::Exp => Volatility::Immutable, BuiltinScalarFunction::Factorial => Volatility::Immutable, - BuiltinScalarFunction::Iszero => Volatility::Immutable, BuiltinScalarFunction::Log => Volatility::Immutable, BuiltinScalarFunction::Nanvl => Volatility::Immutable, BuiltinScalarFunction::Power => Volatility::Immutable, - BuiltinScalarFunction::Round => Volatility::Immutable, - BuiltinScalarFunction::Cot => Volatility::Immutable, - BuiltinScalarFunction::Trunc => Volatility::Immutable, BuiltinScalarFunction::Concat => Volatility::Immutable, BuiltinScalarFunction::ConcatWithSeparator => Volatility::Immutable, BuiltinScalarFunction::EndsWith => Volatility::Immutable, @@ -191,16 +178,12 @@ impl BuiltinScalarFunction { _ => Ok(Float64), }, - BuiltinScalarFunction::Iszero => Ok(Boolean), - - BuiltinScalarFunction::Ceil - | BuiltinScalarFunction::Exp - | BuiltinScalarFunction::Round - | BuiltinScalarFunction::Trunc - | BuiltinScalarFunction::Cot => match input_expr_types[0] { - Float32 => Ok(Float32), - _ => Ok(Float64), - }, + BuiltinScalarFunction::Ceil | BuiltinScalarFunction::Exp => { + match input_expr_types[0] { + Float32 => Ok(Float32), + _ => Ok(Float64), + } + } } } @@ -237,24 +220,6 @@ impl BuiltinScalarFunction { vec![Exact(vec![Int64, Int64]), Exact(vec![Float64, Float64])], self.volatility(), ), - BuiltinScalarFunction::Round => Signature::one_of( - vec![ - Exact(vec![Float64, Int64]), - Exact(vec![Float32, Int64]), - Exact(vec![Float64]), - Exact(vec![Float32]), - ], - self.volatility(), - ), - BuiltinScalarFunction::Trunc => Signature::one_of( - vec![ - Exact(vec![Float32, Int64]), - Exact(vec![Float64, Int64]), - Exact(vec![Float64]), - Exact(vec![Float32]), - ], - self.volatility(), - ), BuiltinScalarFunction::Log => Signature::one_of( vec![ @@ -272,9 +237,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Factorial => { Signature::uniform(1, vec![Int64], self.volatility()) } - BuiltinScalarFunction::Ceil - | BuiltinScalarFunction::Exp - | BuiltinScalarFunction::Cot => { + BuiltinScalarFunction::Ceil | BuiltinScalarFunction::Exp => { // math expressions expect 1 argument of type f64 or f32 // priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we // return the best approximation for it (in f64). @@ -282,10 +245,6 @@ impl BuiltinScalarFunction { // will be as good as the number of digits in the number Signature::uniform(1, vec![Float64, Float32], self.volatility()) } - BuiltinScalarFunction::Iszero => Signature::one_of( - vec![Exact(vec![Float32]), Exact(vec![Float64])], - self.volatility(), - ), } } @@ -298,8 +257,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Ceil | BuiltinScalarFunction::Exp | BuiltinScalarFunction::Factorial - | BuiltinScalarFunction::Round - | BuiltinScalarFunction::Trunc ) { Some(vec![Some(true)]) } else if *self == BuiltinScalarFunction::Log { @@ -313,16 +270,12 @@ impl BuiltinScalarFunction { pub fn aliases(&self) -> &'static [&'static str] { match self { BuiltinScalarFunction::Ceil => &["ceil"], - BuiltinScalarFunction::Cot => &["cot"], BuiltinScalarFunction::Exp => &["exp"], BuiltinScalarFunction::Factorial => &["factorial"], - BuiltinScalarFunction::Iszero => &["iszero"], BuiltinScalarFunction::Log => &["log"], BuiltinScalarFunction::Nanvl => &["nanvl"], BuiltinScalarFunction::Power => &["power", "pow"], BuiltinScalarFunction::Random => &["random"], - BuiltinScalarFunction::Round => &["round"], - BuiltinScalarFunction::Trunc => &["trunc"], // conditional functions BuiltinScalarFunction::Coalesce => &["coalesce"], diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 6c811ff06418..5fef264c0747 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -530,7 +530,6 @@ macro_rules! nary_scalar_expr { // generate methods for creating the supported unary/binary expressions // math functions -scalar_expr!(Cot, cot, num, "cotangent of a number"); scalar_expr!(Factorial, factorial, num, "factorial"); scalar_expr!( Ceil, @@ -538,12 +537,7 @@ scalar_expr!( num, "nearest integer greater than or equal to argument" ); -nary_scalar_expr!(Round, round, "round to nearest integer"); -nary_scalar_expr!( - Trunc, - trunc, - "truncate toward zero, with optional precision" -); + scalar_expr!(Exp, exp, num, "exponential"); scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`"); @@ -560,12 +554,6 @@ nary_scalar_expr!( ); nary_scalar_expr!(Concat, concat_expr, "concatenates several strings"); scalar_expr!(Nanvl, nanvl, x y, "returns x if x is not NaN otherwise returns y"); -scalar_expr!( - Iszero, - iszero, - num, - "returns true if a given number is +0.0 or -0.0 otherwise returns false" -); /// Create a CASE WHEN statement with literal WHEN expressions for comparison to the base expression. pub fn case(expr: Expr) -> CaseBuilder { @@ -875,12 +863,6 @@ impl WindowUDFImpl for SimpleWindowUDF { } /// Calls a named built in function -/// ``` -/// use datafusion_expr::{col, lit, call_fn}; -/// -/// // create the expression trunc(x) < 0.2 -/// let expr = call_fn("trunc", vec![col("x")]).unwrap().lt(lit(0.2)); -/// ``` pub fn call_fn(name: impl AsRef, args: Vec) -> Result { match name.as_ref().parse::() { Ok(fun) => Ok(Expr::ScalarFunction(ScalarFunction::new(fun, args))), @@ -938,38 +920,12 @@ mod test { }; } - macro_rules! test_nary_scalar_expr { - ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => { - let expected = [$(stringify!($arg)),*]; - let result = $FUNC( - vec![ - $( - col(stringify!($arg.to_string())) - ),* - ] - ); - if let Expr::ScalarFunction(ScalarFunction { func_def: ScalarFunctionDefinition::BuiltIn(fun), args }) = result { - let name = built_in_function::BuiltinScalarFunction::$ENUM; - assert_eq!(name, fun); - assert_eq!(expected.len(), args.len()); - } else { - assert!(false, "unexpected: {:?}", result); - } - }; -} - #[test] fn scalar_function_definitions() { - test_unary_scalar_expr!(Cot, cot); test_unary_scalar_expr!(Factorial, factorial); test_unary_scalar_expr!(Ceil, ceil); - test_nary_scalar_expr!(Round, round, input); - test_nary_scalar_expr!(Round, round, input, decimal_places); - test_nary_scalar_expr!(Trunc, trunc, num); - test_nary_scalar_expr!(Trunc, trunc, num, precision); test_unary_scalar_expr!(Exp, exp); test_scalar_expr!(Nanvl, nanvl, x, y); - test_scalar_expr!(Iszero, iszero, input); test_scalar_expr!(InitCap, initcap, string); test_scalar_expr!(EndsWith, ends_with, string, characters); diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs index c92cb27ef5bb..6f39aeca77d4 100644 --- a/datafusion/functions/src/macros.rs +++ b/datafusion/functions/src/macros.rs @@ -108,6 +108,19 @@ macro_rules! make_stub_package { }; } +macro_rules! make_function_scalar_inputs { + ($ARG: expr, $NAME:expr, $ARRAY_TYPE:ident, $FUNC: block) => {{ + let arg = downcast_arg!($ARG, $NAME, $ARRAY_TYPE); + + arg.iter() + .map(|a| match a { + Some(a) => Some($FUNC(a)), + _ => None, + }) + .collect::<$ARRAY_TYPE>() + }}; +} + /// Invokes a function on each element of an array and returns the result as a new array /// /// $ARG: ArrayRef diff --git a/datafusion/functions/src/math/cot.rs b/datafusion/functions/src/math/cot.rs new file mode 100644 index 000000000000..66219960d9a2 --- /dev/null +++ b/datafusion/functions/src/math/cot.rs @@ -0,0 +1,166 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::array::{ArrayRef, Float32Array, Float64Array}; +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::{Float32, Float64}; + +use datafusion_common::{exec_err, DataFusionError, Result}; +use datafusion_expr::ColumnarValue; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; + +use crate::utils::make_scalar_function; + +#[derive(Debug)] +pub struct CotFunc { + signature: Signature, +} + +impl Default for CotFunc { + fn default() -> Self { + CotFunc::new() + } +} + +impl CotFunc { + pub fn new() -> Self { + use DataType::*; + Self { + // math expressions expect 1 argument of type f64 or f32 + // priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we + // return the best approximation for it (in f64). + // We accept f32 because in this case it is clear that the best approximation + // will be as good as the number of digits in the number + signature: Signature::uniform( + 1, + vec![Float64, Float32], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for CotFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "cot" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + match arg_types[0] { + Float32 => Ok(Float32), + _ => Ok(Float64), + } + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + make_scalar_function(cot, vec![])(args) + } +} + +///cot SQL function +fn cot(args: &[ArrayRef]) -> Result { + match args[0].data_type() { + Float64 => Ok(Arc::new(make_function_scalar_inputs!( + &args[0], + "x", + Float64Array, + { compute_cot64 } + )) as ArrayRef), + Float32 => Ok(Arc::new(make_function_scalar_inputs!( + &args[0], + "x", + Float32Array, + { compute_cot32 } + )) as ArrayRef), + other => exec_err!("Unsupported data type {other:?} for function cot"), + } +} + +fn compute_cot32(x: f32) -> f32 { + let a = f32::tan(x); + 1.0 / a +} + +fn compute_cot64(x: f64) -> f64 { + let a = f64::tan(x); + 1.0 / a +} + +#[cfg(test)] +mod test { + use crate::math::cot::cot; + use arrow::array::{ArrayRef, Float32Array, Float64Array}; + use datafusion_common::cast::{as_float32_array, as_float64_array}; + use std::sync::Arc; + + #[test] + fn test_cot_f32() { + let args: Vec = + vec![Arc::new(Float32Array::from(vec![12.1, 30.0, 90.0, -30.0]))]; + let result = cot(&args).expect("failed to initialize function cot"); + let floats = + as_float32_array(&result).expect("failed to initialize function cot"); + + let expected = Float32Array::from(vec![ + -1.986_460_4, + -0.156_119_96, + -0.501_202_8, + 0.156_119_96, + ]); + + let eps = 1e-6; + assert_eq!(floats.len(), 4); + assert!((floats.value(0) - expected.value(0)).abs() < eps); + assert!((floats.value(1) - expected.value(1)).abs() < eps); + assert!((floats.value(2) - expected.value(2)).abs() < eps); + assert!((floats.value(3) - expected.value(3)).abs() < eps); + } + + #[test] + fn test_cot_f64() { + let args: Vec = + vec![Arc::new(Float64Array::from(vec![12.1, 30.0, 90.0, -30.0]))]; + let result = cot(&args).expect("failed to initialize function cot"); + let floats = + as_float64_array(&result).expect("failed to initialize function cot"); + + let expected = Float64Array::from(vec![ + -1.986_458_685_881_4, + -0.156_119_952_161_6, + -0.501_202_783_380_1, + 0.156_119_952_161_6, + ]); + + let eps = 1e-12; + assert_eq!(floats.len(), 4); + assert!((floats.value(0) - expected.value(0)).abs() < eps); + assert!((floats.value(1) - expected.value(1)).abs() < eps); + assert!((floats.value(2) - expected.value(2)).abs() < eps); + assert!((floats.value(3) - expected.value(3)).abs() < eps); + } +} diff --git a/datafusion/functions/src/math/iszero.rs b/datafusion/functions/src/math/iszero.rs new file mode 100644 index 000000000000..e6a728053359 --- /dev/null +++ b/datafusion/functions/src/math/iszero.rs @@ -0,0 +1,141 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array}; +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::{Boolean, Float32, Float64}; + +use datafusion_common::{exec_err, DataFusionError, Result}; +use datafusion_expr::ColumnarValue; +use datafusion_expr::TypeSignature::Exact; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; + +use crate::utils::make_scalar_function; + +#[derive(Debug)] +pub struct IsZeroFunc { + signature: Signature, +} + +impl Default for IsZeroFunc { + fn default() -> Self { + IsZeroFunc::new() + } +} + +impl IsZeroFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![Exact(vec![Float32]), Exact(vec![Float64])], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for IsZeroFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "iszero" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Boolean) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + make_scalar_function(iszero, vec![])(args) + } +} + +/// Iszero SQL function +pub fn iszero(args: &[ArrayRef]) -> Result { + match args[0].data_type() { + Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!( + &args[0], + "x", + Float64Array, + BooleanArray, + { |x: f64| { x == 0_f64 } } + )) as ArrayRef), + + Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!( + &args[0], + "x", + Float32Array, + BooleanArray, + { |x: f32| { x == 0_f32 } } + )) as ArrayRef), + + other => exec_err!("Unsupported data type {other:?} for function iszero"), + } +} + +#[cfg(test)] +mod test { + use std::sync::Arc; + + use arrow::array::{ArrayRef, Float32Array, Float64Array}; + + use datafusion_common::cast::as_boolean_array; + + use crate::math::iszero::iszero; + + #[test] + fn test_iszero_f64() { + let args: Vec = + vec![Arc::new(Float64Array::from(vec![1.0, 0.0, 3.0, -0.0]))]; + + let result = iszero(&args).expect("failed to initialize function iszero"); + let booleans = + as_boolean_array(&result).expect("failed to initialize function iszero"); + + assert_eq!(booleans.len(), 4); + assert!(!booleans.value(0)); + assert!(booleans.value(1)); + assert!(!booleans.value(2)); + assert!(booleans.value(3)); + } + + #[test] + fn test_iszero_f32() { + let args: Vec = + vec![Arc::new(Float32Array::from(vec![1.0, 0.0, 3.0, -0.0]))]; + + let result = iszero(&args).expect("failed to initialize function iszero"); + let booleans = + as_boolean_array(&result).expect("failed to initialize function iszero"); + + assert_eq!(booleans.len(), 4); + assert!(!booleans.value(0)); + assert!(booleans.value(1)); + assert!(!booleans.value(2)); + assert!(booleans.value(3)); + } +} diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs index 3a1f7cc13bb7..069af5d90ffd 100644 --- a/datafusion/functions/src/math/mod.rs +++ b/datafusion/functions/src/math/mod.rs @@ -17,86 +17,244 @@ //! "math" DataFusion functions +use datafusion_expr::ScalarUDF; +use std::sync::Arc; + pub mod abs; +pub mod cot; pub mod gcd; +mod iszero; pub mod lcm; pub mod nans; pub mod pi; +pub mod round; +pub mod trunc; // Create UDFs -make_udf_function!(nans::IsNanFunc, ISNAN, isnan); make_udf_function!(abs::AbsFunc, ABS, abs); -make_udf_function!(gcd::GcdFunc, GCD, gcd); -make_udf_function!(lcm::LcmFunc, LCM, lcm); -make_udf_function!(pi::PiFunc, PI, pi); - -make_math_unary_udf!(Log2Func, LOG2, log2, log2, Some(vec![Some(true)])); -make_math_unary_udf!(Log10Func, LOG10, log10, log10, Some(vec![Some(true)])); -make_math_unary_udf!(LnFunc, LN, ln, ln, Some(vec![Some(true)])); - -make_math_unary_udf!(TanhFunc, TANH, tanh, tanh, None); make_math_unary_udf!(AcosFunc, ACOS, acos, acos, None); +make_math_unary_udf!(AcoshFunc, ACOSH, acosh, acosh, Some(vec![Some(true)])); make_math_unary_udf!(AsinFunc, ASIN, asin, asin, None); -make_math_unary_udf!(TanFunc, TAN, tan, tan, None); - -make_math_unary_udf!(AtanhFunc, ATANH, atanh, atanh, Some(vec![Some(true)])); make_math_unary_udf!(AsinhFunc, ASINH, asinh, asinh, Some(vec![Some(true)])); -make_math_unary_udf!(AcoshFunc, ACOSH, acosh, acosh, Some(vec![Some(true)])); make_math_unary_udf!(AtanFunc, ATAN, atan, atan, Some(vec![Some(true)])); +make_math_unary_udf!(AtanhFunc, ATANH, atanh, atanh, Some(vec![Some(true)])); make_math_binary_udf!(Atan2, ATAN2, atan2, atan2, Some(vec![Some(true)])); - +make_math_unary_udf!(CbrtFunc, CBRT, cbrt, cbrt, None); +make_math_unary_udf!(CosFunc, COS, cos, cos, None); +make_math_unary_udf!(CoshFunc, COSH, cosh, cosh, None); +make_udf_function!(cot::CotFunc, COT, cot); +make_math_unary_udf!(DegreesFunc, DEGREES, degrees, to_degrees, None); +make_math_unary_udf!(FloorFunc, FLOOR, floor, floor, Some(vec![Some(true)])); +make_udf_function!(gcd::GcdFunc, GCD, gcd); +make_udf_function!(nans::IsNanFunc, ISNAN, isnan); +make_udf_function!(iszero::IsZeroFunc, ISZERO, iszero); +make_udf_function!(lcm::LcmFunc, LCM, lcm); +make_math_unary_udf!(LnFunc, LN, ln, ln, Some(vec![Some(true)])); +make_math_unary_udf!(Log2Func, LOG2, log2, log2, Some(vec![Some(true)])); +make_math_unary_udf!(Log10Func, LOG10, log10, log10, Some(vec![Some(true)])); +make_udf_function!(pi::PiFunc, PI, pi); make_math_unary_udf!(RadiansFunc, RADIANS, radians, to_radians, None); +make_udf_function!(round::RoundFunc, ROUND, round); make_math_unary_udf!(SignumFunc, SIGNUM, signum, signum, None); make_math_unary_udf!(SinFunc, SIN, sin, sin, None); make_math_unary_udf!(SinhFunc, SINH, sinh, sinh, None); make_math_unary_udf!(SqrtFunc, SQRT, sqrt, sqrt, None); +make_math_unary_udf!(TanFunc, TAN, tan, tan, None); +make_math_unary_udf!(TanhFunc, TANH, tanh, tanh, None); +make_udf_function!(trunc::TruncFunc, TRUNC, trunc); -make_math_unary_udf!(CbrtFunc, CBRT, cbrt, cbrt, None); -make_math_unary_udf!(CosFunc, COS, cos, cos, None); -make_math_unary_udf!(CoshFunc, COSH, cosh, cosh, None); -make_math_unary_udf!(DegreesFunc, DEGREES, degrees, to_degrees, None); +pub mod expr_fn { + use datafusion_expr::Expr; -make_math_unary_udf!(FloorFunc, FLOOR, floor, floor, Some(vec![Some(true)])); + #[doc = "returns the absolute value of a given number"] + pub fn abs(num: Expr) -> Expr { + super::abs().call(vec![num]) + } + + #[doc = "returns the arc cosine or inverse cosine of a number"] + pub fn acos(num: Expr) -> Expr { + super::acos().call(vec![num]) + } + + #[doc = "returns inverse hyperbolic cosine"] + pub fn acosh(num: Expr) -> Expr { + super::acosh().call(vec![num]) + } + + #[doc = "returns the arc sine or inverse sine of a number"] + pub fn asin(num: Expr) -> Expr { + super::asin().call(vec![num]) + } + + #[doc = "returns inverse hyperbolic sine"] + pub fn asinh(num: Expr) -> Expr { + super::asinh().call(vec![num]) + } + + #[doc = "returns inverse tangent"] + pub fn atan(num: Expr) -> Expr { + super::atan().call(vec![num]) + } + + #[doc = "returns inverse tangent of a division given in the argument"] + pub fn atan2(y: Expr, x: Expr) -> Expr { + super::atan2().call(vec![y, x]) + } + + #[doc = "returns inverse hyperbolic tangent"] + pub fn atanh(num: Expr) -> Expr { + super::atanh().call(vec![num]) + } + + #[doc = "cube root of a number"] + pub fn cbrt(num: Expr) -> Expr { + super::cbrt().call(vec![num]) + } + + #[doc = "cosine"] + pub fn cos(num: Expr) -> Expr { + super::cos().call(vec![num]) + } + + #[doc = "hyperbolic cosine"] + pub fn cosh(num: Expr) -> Expr { + super::cosh().call(vec![num]) + } + + #[doc = "cotangent of a number"] + pub fn cot(num: Expr) -> Expr { + super::cot().call(vec![num]) + } + + #[doc = "converts radians to degrees"] + pub fn degrees(num: Expr) -> Expr { + super::degrees().call(vec![num]) + } + + #[doc = "nearest integer less than or equal to argument"] + pub fn floor(num: Expr) -> Expr { + super::floor().call(vec![num]) + } + + #[doc = "greatest common divisor"] + pub fn gcd(x: Expr, y: Expr) -> Expr { + super::gcd().call(vec![x, y]) + } + + #[doc = "returns true if a given number is +NaN or -NaN otherwise returns false"] + pub fn isnan(num: Expr) -> Expr { + super::isnan().call(vec![num]) + } + + #[doc = "returns true if a given number is +0.0 or -0.0 otherwise returns false"] + pub fn iszero(num: Expr) -> Expr { + super::iszero().call(vec![num]) + } + + #[doc = "least common multiple"] + pub fn lcm(x: Expr, y: Expr) -> Expr { + super::lcm().call(vec![x, y]) + } + + #[doc = "natural logarithm (base e) of a number"] + pub fn ln(num: Expr) -> Expr { + super::ln().call(vec![num]) + } + + #[doc = "base 2 logarithm of a number"] + pub fn log2(num: Expr) -> Expr { + super::log2().call(vec![num]) + } + + #[doc = "base 10 logarithm of a number"] + pub fn log10(num: Expr) -> Expr { + super::log10().call(vec![num]) + } + + #[doc = "Returns an approximate value of π"] + pub fn pi() -> Expr { + super::pi().call(vec![]) + } + + #[doc = "converts degrees to radians"] + pub fn radians(num: Expr) -> Expr { + super::radians().call(vec![num]) + } + + #[doc = "round to nearest integer"] + pub fn round(args: Vec) -> Expr { + super::round().call(args) + } + + #[doc = "sign of the argument (-1, 0, +1)"] + pub fn signum(num: Expr) -> Expr { + super::signum().call(vec![num]) + } + + #[doc = "sine"] + pub fn sin(num: Expr) -> Expr { + super::sin().call(vec![num]) + } + + #[doc = "hyperbolic sine"] + pub fn sinh(num: Expr) -> Expr { + super::sinh().call(vec![num]) + } + + #[doc = "square root of a number"] + pub fn sqrt(num: Expr) -> Expr { + super::sqrt().call(vec![num]) + } + + #[doc = "returns the tangent of a number"] + pub fn tan(num: Expr) -> Expr { + super::tan().call(vec![num]) + } + + #[doc = "returns the hyperbolic tangent of a number"] + pub fn tanh(num: Expr) -> Expr { + super::tanh().call(vec![num]) + } + + #[doc = "truncate toward zero, with optional precision"] + pub fn trunc(args: Vec) -> Expr { + super::trunc().call(args) + } +} -// Export the functions out of this package, both as expr_fn as well as a list of functions -export_functions!( - ( - isnan, - num, - "returns true if a given number is +NaN or -NaN otherwise returns false" - ), - (abs, num, "returns the absolute value of a given number"), - (log2, num, "base 2 logarithm of a number"), - (log10, num, "base 10 logarithm of a number"), - (ln, num, "natural logarithm (base e) of a number"), - ( - acos, - num, - "returns the arc cosine or inverse cosine of a number" - ), - ( - asin, - num, - "returns the arc sine or inverse sine of a number" - ), - (tan, num, "returns the tangent of a number"), - (tanh, num, "returns the hyperbolic tangent of a number"), - (atanh, num, "returns inverse hyperbolic tangent"), - (asinh, num, "returns inverse hyperbolic sine"), - (acosh, num, "returns inverse hyperbolic cosine"), - (atan, num, "returns inverse tangent"), - (atan2, y x, "returns inverse tangent of a division given in the argument"), - (radians, num, "converts degrees to radians"), - (signum, num, "sign of the argument (-1, 0, +1)"), - (sin, num, "sine"), - (sinh, num, "hyperbolic sine"), - (sqrt, num, "square root of a number"), - (cbrt, num, "cube root of a number"), - (cos, num, "cosine"), - (cosh, num, "hyperbolic cosine"), - (degrees, num, "converts radians to degrees"), - (gcd, x y, "greatest common divisor"), - (lcm, x y, "least common multiple"), - (floor, num, "nearest integer less than or equal to argument"), - (pi, , "Returns an approximate value of π") -); +/// Return a list of all functions in this package +pub fn functions() -> Vec> { + vec![ + abs(), + acos(), + acosh(), + asin(), + asinh(), + atan(), + atan2(), + atanh(), + cbrt(), + cos(), + cosh(), + cot(), + degrees(), + floor(), + gcd(), + isnan(), + iszero(), + lcm(), + ln(), + log2(), + log10(), + pi(), + radians(), + round(), + signum(), + sin(), + sinh(), + sqrt(), + tan(), + tanh(), + trunc(), + ] +} diff --git a/datafusion/functions/src/math/round.rs b/datafusion/functions/src/math/round.rs new file mode 100644 index 000000000000..f4a163137a35 --- /dev/null +++ b/datafusion/functions/src/math/round.rs @@ -0,0 +1,252 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::array::{ArrayRef, Float32Array, Float64Array, Int64Array}; +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::{Float32, Float64}; + +use crate::utils::make_scalar_function; +use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue}; +use datafusion_expr::TypeSignature::Exact; +use datafusion_expr::{ColumnarValue, FuncMonotonicity}; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; + +#[derive(Debug)] +pub struct RoundFunc { + signature: Signature, +} + +impl Default for RoundFunc { + fn default() -> Self { + RoundFunc::new() + } +} + +impl RoundFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![ + Exact(vec![Float64, Int64]), + Exact(vec![Float32, Int64]), + Exact(vec![Float64]), + Exact(vec![Float32]), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for RoundFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "round" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + match arg_types[0] { + Float32 => Ok(Float32), + _ => Ok(Float64), + } + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + make_scalar_function(round, vec![])(args) + } + + fn monotonicity(&self) -> Result> { + Ok(Some(vec![Some(true)])) + } +} + +/// Round SQL function +pub fn round(args: &[ArrayRef]) -> Result { + if args.len() != 1 && args.len() != 2 { + return exec_err!( + "round function requires one or two arguments, got {}", + args.len() + ); + } + + let mut decimal_places = ColumnarValue::Scalar(ScalarValue::Int64(Some(0))); + + if args.len() == 2 { + decimal_places = ColumnarValue::Array(args[1].clone()); + } + + match args[0].data_type() { + DataType::Float64 => match decimal_places { + ColumnarValue::Scalar(ScalarValue::Int64(Some(decimal_places))) => { + let decimal_places = decimal_places.try_into().unwrap(); + + Ok(Arc::new(make_function_scalar_inputs!( + &args[0], + "value", + Float64Array, + { + |value: f64| { + (value * 10.0_f64.powi(decimal_places)).round() + / 10.0_f64.powi(decimal_places) + } + } + )) as ArrayRef) + } + ColumnarValue::Array(decimal_places) => Ok(Arc::new(make_function_inputs2!( + &args[0], + decimal_places, + "value", + "decimal_places", + Float64Array, + Int64Array, + { + |value: f64, decimal_places: i64| { + (value * 10.0_f64.powi(decimal_places.try_into().unwrap())) + .round() + / 10.0_f64.powi(decimal_places.try_into().unwrap()) + } + } + )) as ArrayRef), + _ => { + exec_err!("round function requires a scalar or array for decimal_places") + } + }, + + DataType::Float32 => match decimal_places { + ColumnarValue::Scalar(ScalarValue::Int64(Some(decimal_places))) => { + let decimal_places = decimal_places.try_into().unwrap(); + + Ok(Arc::new(make_function_scalar_inputs!( + &args[0], + "value", + Float32Array, + { + |value: f32| { + (value * 10.0_f32.powi(decimal_places)).round() + / 10.0_f32.powi(decimal_places) + } + } + )) as ArrayRef) + } + ColumnarValue::Array(decimal_places) => Ok(Arc::new(make_function_inputs2!( + &args[0], + decimal_places, + "value", + "decimal_places", + Float32Array, + Int64Array, + { + |value: f32, decimal_places: i64| { + (value * 10.0_f32.powi(decimal_places.try_into().unwrap())) + .round() + / 10.0_f32.powi(decimal_places.try_into().unwrap()) + } + } + )) as ArrayRef), + _ => { + exec_err!("round function requires a scalar or array for decimal_places") + } + }, + + other => exec_err!("Unsupported data type {other:?} for function round"), + } +} + +#[cfg(test)] +mod test { + use crate::math::round::round; + use arrow::array::{ArrayRef, Float32Array, Float64Array, Int64Array}; + use datafusion_common::cast::{as_float32_array, as_float64_array}; + use std::sync::Arc; + + #[test] + fn test_round_f32() { + let args: Vec = vec![ + Arc::new(Float32Array::from(vec![125.2345; 10])), // input + Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4, 5, -1, -2, -3, -4])), // decimal_places + ]; + + let result = round(&args).expect("failed to initialize function round"); + let floats = + as_float32_array(&result).expect("failed to initialize function round"); + + let expected = Float32Array::from(vec![ + 125.0, 125.2, 125.23, 125.235, 125.2345, 125.2345, 130.0, 100.0, 0.0, 0.0, + ]); + + assert_eq!(floats, &expected); + } + + #[test] + fn test_round_f64() { + let args: Vec = vec![ + Arc::new(Float64Array::from(vec![125.2345; 10])), // input + Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4, 5, -1, -2, -3, -4])), // decimal_places + ]; + + let result = round(&args).expect("failed to initialize function round"); + let floats = + as_float64_array(&result).expect("failed to initialize function round"); + + let expected = Float64Array::from(vec![ + 125.0, 125.2, 125.23, 125.235, 125.2345, 125.2345, 130.0, 100.0, 0.0, 0.0, + ]); + + assert_eq!(floats, &expected); + } + + #[test] + fn test_round_f32_one_input() { + let args: Vec = vec![ + Arc::new(Float32Array::from(vec![125.2345, 12.345, 1.234, 0.1234])), // input + ]; + + let result = round(&args).expect("failed to initialize function round"); + let floats = + as_float32_array(&result).expect("failed to initialize function round"); + + let expected = Float32Array::from(vec![125.0, 12.0, 1.0, 0.0]); + + assert_eq!(floats, &expected); + } + + #[test] + fn test_round_f64_one_input() { + let args: Vec = vec![ + Arc::new(Float64Array::from(vec![125.2345, 12.345, 1.234, 0.1234])), // input + ]; + + let result = round(&args).expect("failed to initialize function round"); + let floats = + as_float64_array(&result).expect("failed to initialize function round"); + + let expected = Float64Array::from(vec![125.0, 12.0, 1.0, 0.0]); + + assert_eq!(floats, &expected); + } +} diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs new file mode 100644 index 000000000000..6f88099889cc --- /dev/null +++ b/datafusion/functions/src/math/trunc.rs @@ -0,0 +1,235 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::array::{ArrayRef, Float32Array, Float64Array, Int64Array}; +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::{Float32, Float64}; + +use crate::utils::make_scalar_function; +use datafusion_common::ScalarValue::Int64; +use datafusion_common::{exec_err, DataFusionError, Result}; +use datafusion_expr::TypeSignature::Exact; +use datafusion_expr::{ColumnarValue, FuncMonotonicity}; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; + +#[derive(Debug)] +pub struct TruncFunc { + signature: Signature, +} + +impl Default for TruncFunc { + fn default() -> Self { + TruncFunc::new() + } +} + +impl TruncFunc { + pub fn new() -> Self { + use DataType::*; + Self { + // math expressions expect 1 argument of type f64 or f32 + // priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we + // return the best approximation for it (in f64). + // We accept f32 because in this case it is clear that the best approximation + // will be as good as the number of digits in the number + signature: Signature::one_of( + vec![ + Exact(vec![Float32, Int64]), + Exact(vec![Float64, Int64]), + Exact(vec![Float64]), + Exact(vec![Float32]), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for TruncFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "trunc" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + match arg_types[0] { + Float32 => Ok(Float32), + _ => Ok(Float64), + } + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + make_scalar_function(trunc, vec![])(args) + } + + fn monotonicity(&self) -> Result> { + Ok(Some(vec![Some(true)])) + } +} + +/// Truncate(numeric, decimalPrecision) and trunc(numeric) SQL function +fn trunc(args: &[ArrayRef]) -> Result { + if args.len() != 1 && args.len() != 2 { + return exec_err!( + "truncate function requires one or two arguments, got {}", + args.len() + ); + } + + //if only one arg then invoke toolchain trunc(num) and precision = 0 by default + //or then invoke the compute_truncate method to process precision + let num = &args[0]; + let precision = if args.len() == 1 { + ColumnarValue::Scalar(Int64(Some(0))) + } else { + ColumnarValue::Array(args[1].clone()) + }; + + match args[0].data_type() { + Float64 => match precision { + ColumnarValue::Scalar(Int64(Some(0))) => Ok(Arc::new( + make_function_scalar_inputs!(num, "num", Float64Array, { f64::trunc }), + ) as ArrayRef), + ColumnarValue::Array(precision) => Ok(Arc::new(make_function_inputs2!( + num, + precision, + "x", + "y", + Float64Array, + Int64Array, + { compute_truncate64 } + )) as ArrayRef), + _ => exec_err!("trunc function requires a scalar or array for precision"), + }, + Float32 => match precision { + ColumnarValue::Scalar(Int64(Some(0))) => Ok(Arc::new( + make_function_scalar_inputs!(num, "num", Float32Array, { f32::trunc }), + ) as ArrayRef), + ColumnarValue::Array(precision) => Ok(Arc::new(make_function_inputs2!( + num, + precision, + "x", + "y", + Float32Array, + Int64Array, + { compute_truncate32 } + )) as ArrayRef), + _ => exec_err!("trunc function requires a scalar or array for precision"), + }, + other => exec_err!("Unsupported data type {other:?} for function trunc"), + } +} + +fn compute_truncate32(x: f32, y: i64) -> f32 { + let factor = 10.0_f32.powi(y as i32); + (x * factor).round() / factor +} + +fn compute_truncate64(x: f64, y: i64) -> f64 { + let factor = 10.0_f64.powi(y as i32); + (x * factor).round() / factor +} + +#[cfg(test)] +mod test { + use crate::math::trunc::trunc; + use arrow::array::{ArrayRef, Float32Array, Float64Array, Int64Array}; + use datafusion_common::cast::{as_float32_array, as_float64_array}; + use std::sync::Arc; + + #[test] + fn test_truncate_32() { + let args: Vec = vec![ + Arc::new(Float32Array::from(vec![ + 15.0, + 1_234.267_8, + 1_233.123_4, + 3.312_979_2, + -21.123_4, + ])), + Arc::new(Int64Array::from(vec![0, 3, 2, 5, 6])), + ]; + + let result = trunc(&args).expect("failed to initialize function truncate"); + let floats = + as_float32_array(&result).expect("failed to initialize function truncate"); + + assert_eq!(floats.len(), 5); + assert_eq!(floats.value(0), 15.0); + assert_eq!(floats.value(1), 1_234.268); + assert_eq!(floats.value(2), 1_233.12); + assert_eq!(floats.value(3), 3.312_98); + assert_eq!(floats.value(4), -21.123_4); + } + + #[test] + fn test_truncate_64() { + let args: Vec = vec![ + Arc::new(Float64Array::from(vec![ + 5.0, + 234.267_812_176, + 123.123_456_789, + 123.312_979_313_2, + -321.123_1, + ])), + Arc::new(Int64Array::from(vec![0, 3, 2, 5, 6])), + ]; + + let result = trunc(&args).expect("failed to initialize function truncate"); + let floats = + as_float64_array(&result).expect("failed to initialize function truncate"); + + assert_eq!(floats.len(), 5); + assert_eq!(floats.value(0), 5.0); + assert_eq!(floats.value(1), 234.268); + assert_eq!(floats.value(2), 123.12); + assert_eq!(floats.value(3), 123.312_98); + assert_eq!(floats.value(4), -321.123_1); + } + + #[test] + fn test_truncate_64_one_arg() { + let args: Vec = vec![Arc::new(Float64Array::from(vec![ + 5.0, + 234.267_812, + 123.123_45, + 123.312_979_313_2, + -321.123, + ]))]; + + let result = trunc(&args).expect("failed to initialize function truncate"); + let floats = + as_float64_array(&result).expect("failed to initialize function truncate"); + + assert_eq!(floats.len(), 5); + assert_eq!(floats.value(0), 5.0); + assert_eq!(floats.value(1), 234.0); + assert_eq!(floats.value(2), 123.0); + assert_eq!(floats.value(3), 123.0); + assert_eq!(floats.value(4), -321.0); + } +} diff --git a/datafusion/physical-expr/src/equivalence/projection.rs b/datafusion/physical-expr/src/equivalence/projection.rs index 5efcf5942c39..92772e4623be 100644 --- a/datafusion/physical-expr/src/equivalence/projection.rs +++ b/datafusion/physical-expr/src/equivalence/projection.rs @@ -117,8 +117,7 @@ mod tests { use itertools::Itertools; use datafusion_common::{DFSchema, Result}; - use datafusion_expr::execution_props::ExecutionProps; - use datafusion_expr::{BuiltinScalarFunction, Operator, ScalarUDF}; + use datafusion_expr::{Operator, ScalarUDF}; use crate::equivalence::tests::{ apply_projection, convert_to_orderings, convert_to_orderings_owned, @@ -649,11 +648,13 @@ mod tests { col_b.clone(), )) as Arc; - let round_c = &crate::functions::create_physical_expr( - &BuiltinScalarFunction::Round, + let test_fun = ScalarUDF::new_from_impl(TestScalarUDF::new()); + let round_c = &create_physical_expr( + &test_fun, &[col_c.clone()], &schema, - &ExecutionProps::default(), + &[], + &DFSchema::empty(), )?; let option_asc = SortOptions { diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 79d69b273d2c..b7118bab0cc1 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -184,28 +184,16 @@ pub fn create_physical_fun( BuiltinScalarFunction::Factorial => { Arc::new(|args| make_scalar_function_inner(math_expressions::factorial)(args)) } - BuiltinScalarFunction::Iszero => { - Arc::new(|args| make_scalar_function_inner(math_expressions::iszero)(args)) - } BuiltinScalarFunction::Nanvl => { Arc::new(|args| make_scalar_function_inner(math_expressions::nanvl)(args)) } BuiltinScalarFunction::Random => Arc::new(math_expressions::random), - BuiltinScalarFunction::Round => { - Arc::new(|args| make_scalar_function_inner(math_expressions::round)(args)) - } - BuiltinScalarFunction::Trunc => { - Arc::new(|args| make_scalar_function_inner(math_expressions::trunc)(args)) - } BuiltinScalarFunction::Power => { Arc::new(|args| make_scalar_function_inner(math_expressions::power)(args)) } BuiltinScalarFunction::Log => { Arc::new(|args| make_scalar_function_inner(math_expressions::log)(args)) } - BuiltinScalarFunction::Cot => { - Arc::new(|args| make_scalar_function_inner(math_expressions::cot)(args)) - } // string functions BuiltinScalarFunction::Coalesce => Arc::new(conditional_expressions::coalesce), BuiltinScalarFunction::Concat => Arc::new(string_expressions::concat), diff --git a/datafusion/physical-expr/src/math_expressions.rs b/datafusion/physical-expr/src/math_expressions.rs index 384f8d87eb96..75d7278aec62 100644 --- a/datafusion/physical-expr/src/math_expressions.rs +++ b/datafusion/physical-expr/src/math_expressions.rs @@ -27,7 +27,7 @@ use arrow::datatypes::DataType; use arrow_array::Array; use rand::{thread_rng, Rng}; -use datafusion_common::ScalarValue::{Float32, Int64}; +use datafusion_common::ScalarValue::Float32; use datafusion_common::{exec_err, ScalarValue}; use datafusion_common::{DataFusionError, Result}; use datafusion_expr::ColumnarValue; @@ -154,17 +154,8 @@ macro_rules! make_function_scalar_inputs_return_type { }}; } -math_unary_function!("asin", asin); -math_unary_function!("acos", acos); -math_unary_function!("atan", atan); -math_unary_function!("asinh", asinh); -math_unary_function!("acosh", acosh); -math_unary_function!("atanh", atanh); math_unary_function!("ceil", ceil); math_unary_function!("exp", exp); -math_unary_function!("ln", ln); -math_unary_function!("log2", log2); -math_unary_function!("log10", log10); /// Factorial SQL function pub fn factorial(args: &[ArrayRef]) -> Result { @@ -247,29 +238,6 @@ pub fn isnan(args: &[ArrayRef]) -> Result { } } -/// Iszero SQL function -pub fn iszero(args: &[ArrayRef]) -> Result { - match args[0].data_type() { - DataType::Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!( - &args[0], - "x", - Float64Array, - BooleanArray, - { |x: f64| { x == 0_f64 } } - )) as ArrayRef), - - DataType::Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!( - &args[0], - "x", - Float32Array, - BooleanArray, - { |x: f32| { x == 0_f32 } } - )) as ArrayRef), - - other => exec_err!("Unsupported data type {other:?} for function iszero"), - } -} - /// Random SQL function pub fn random(args: &[ColumnarValue]) -> Result { let len: usize = match &args[0] { @@ -282,98 +250,6 @@ pub fn random(args: &[ColumnarValue]) -> Result { Ok(ColumnarValue::Array(Arc::new(array))) } -/// Round SQL function -pub fn round(args: &[ArrayRef]) -> Result { - if args.len() != 1 && args.len() != 2 { - return exec_err!( - "round function requires one or two arguments, got {}", - args.len() - ); - } - - let mut decimal_places = ColumnarValue::Scalar(ScalarValue::Int64(Some(0))); - - if args.len() == 2 { - decimal_places = ColumnarValue::Array(args[1].clone()); - } - - match args[0].data_type() { - DataType::Float64 => match decimal_places { - ColumnarValue::Scalar(ScalarValue::Int64(Some(decimal_places))) => { - let decimal_places = decimal_places.try_into().unwrap(); - - Ok(Arc::new(make_function_scalar_inputs!( - &args[0], - "value", - Float64Array, - { - |value: f64| { - (value * 10.0_f64.powi(decimal_places)).round() - / 10.0_f64.powi(decimal_places) - } - } - )) as ArrayRef) - } - ColumnarValue::Array(decimal_places) => Ok(Arc::new(make_function_inputs2!( - &args[0], - decimal_places, - "value", - "decimal_places", - Float64Array, - Int64Array, - { - |value: f64, decimal_places: i64| { - (value * 10.0_f64.powi(decimal_places.try_into().unwrap())) - .round() - / 10.0_f64.powi(decimal_places.try_into().unwrap()) - } - } - )) as ArrayRef), - _ => { - exec_err!("round function requires a scalar or array for decimal_places") - } - }, - - DataType::Float32 => match decimal_places { - ColumnarValue::Scalar(ScalarValue::Int64(Some(decimal_places))) => { - let decimal_places = decimal_places.try_into().unwrap(); - - Ok(Arc::new(make_function_scalar_inputs!( - &args[0], - "value", - Float32Array, - { - |value: f32| { - (value * 10.0_f32.powi(decimal_places)).round() - / 10.0_f32.powi(decimal_places) - } - } - )) as ArrayRef) - } - ColumnarValue::Array(decimal_places) => Ok(Arc::new(make_function_inputs2!( - &args[0], - decimal_places, - "value", - "decimal_places", - Float32Array, - Int64Array, - { - |value: f32, decimal_places: i64| { - (value * 10.0_f32.powi(decimal_places.try_into().unwrap())) - .round() - / 10.0_f32.powi(decimal_places.try_into().unwrap()) - } - } - )) as ArrayRef), - _ => { - exec_err!("round function requires a scalar or array for decimal_places") - } - }, - - other => exec_err!("Unsupported data type {other:?} for function round"), - } -} - /// Power SQL function pub fn power(args: &[ArrayRef]) -> Result { match args[0].data_type() { @@ -453,100 +329,6 @@ pub fn log(args: &[ArrayRef]) -> Result { } } -///cot SQL function -pub fn cot(args: &[ArrayRef]) -> Result { - match args[0].data_type() { - DataType::Float64 => Ok(Arc::new(make_function_scalar_inputs!( - &args[0], - "x", - Float64Array, - { compute_cot64 } - )) as ArrayRef), - - DataType::Float32 => Ok(Arc::new(make_function_scalar_inputs!( - &args[0], - "x", - Float32Array, - { compute_cot32 } - )) as ArrayRef), - - other => exec_err!("Unsupported data type {other:?} for function cot"), - } -} - -fn compute_cot32(x: f32) -> f32 { - let a = f32::tan(x); - 1.0 / a -} - -fn compute_cot64(x: f64) -> f64 { - let a = f64::tan(x); - 1.0 / a -} - -/// Truncate(numeric, decimalPrecision) and trunc(numeric) SQL function -pub fn trunc(args: &[ArrayRef]) -> Result { - if args.len() != 1 && args.len() != 2 { - return exec_err!( - "truncate function requires one or two arguments, got {}", - args.len() - ); - } - - //if only one arg then invoke toolchain trunc(num) and precision = 0 by default - //or then invoke the compute_truncate method to process precision - let num = &args[0]; - let precision = if args.len() == 1 { - ColumnarValue::Scalar(Int64(Some(0))) - } else { - ColumnarValue::Array(args[1].clone()) - }; - - match args[0].data_type() { - DataType::Float64 => match precision { - ColumnarValue::Scalar(Int64(Some(0))) => Ok(Arc::new( - make_function_scalar_inputs!(num, "num", Float64Array, { f64::trunc }), - ) as ArrayRef), - ColumnarValue::Array(precision) => Ok(Arc::new(make_function_inputs2!( - num, - precision, - "x", - "y", - Float64Array, - Int64Array, - { compute_truncate64 } - )) as ArrayRef), - _ => exec_err!("trunc function requires a scalar or array for precision"), - }, - DataType::Float32 => match precision { - ColumnarValue::Scalar(Int64(Some(0))) => Ok(Arc::new( - make_function_scalar_inputs!(num, "num", Float32Array, { f32::trunc }), - ) as ArrayRef), - ColumnarValue::Array(precision) => Ok(Arc::new(make_function_inputs2!( - num, - precision, - "x", - "y", - Float32Array, - Int64Array, - { compute_truncate32 } - )) as ArrayRef), - _ => exec_err!("trunc function requires a scalar or array for precision"), - }, - other => exec_err!("Unsupported data type {other:?} for function trunc"), - } -} - -fn compute_truncate32(x: f32, y: i64) -> f32 { - let factor = 10.0_f32.powi(y as i32); - (x * factor).round() / factor -} - -fn compute_truncate64(x: f64, y: i64) -> f64 { - let factor = 10.0_f64.powi(y as i32); - (x * factor).round() / factor -} - #[cfg(test)] mod tests { use arrow::array::{Float64Array, NullArray}; @@ -643,72 +425,6 @@ mod tests { assert_eq!(floats.value(3), 4.0); } - #[test] - fn test_round_f32() { - let args: Vec = vec![ - Arc::new(Float32Array::from(vec![125.2345; 10])), // input - Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4, 5, -1, -2, -3, -4])), // decimal_places - ]; - - let result = round(&args).expect("failed to initialize function round"); - let floats = - as_float32_array(&result).expect("failed to initialize function round"); - - let expected = Float32Array::from(vec![ - 125.0, 125.2, 125.23, 125.235, 125.2345, 125.2345, 130.0, 100.0, 0.0, 0.0, - ]); - - assert_eq!(floats, &expected); - } - - #[test] - fn test_round_f64() { - let args: Vec = vec![ - Arc::new(Float64Array::from(vec![125.2345; 10])), // input - Arc::new(Int64Array::from(vec![0, 1, 2, 3, 4, 5, -1, -2, -3, -4])), // decimal_places - ]; - - let result = round(&args).expect("failed to initialize function round"); - let floats = - as_float64_array(&result).expect("failed to initialize function round"); - - let expected = Float64Array::from(vec![ - 125.0, 125.2, 125.23, 125.235, 125.2345, 125.2345, 130.0, 100.0, 0.0, 0.0, - ]); - - assert_eq!(floats, &expected); - } - - #[test] - fn test_round_f32_one_input() { - let args: Vec = vec![ - Arc::new(Float32Array::from(vec![125.2345, 12.345, 1.234, 0.1234])), // input - ]; - - let result = round(&args).expect("failed to initialize function round"); - let floats = - as_float32_array(&result).expect("failed to initialize function round"); - - let expected = Float32Array::from(vec![125.0, 12.0, 1.0, 0.0]); - - assert_eq!(floats, &expected); - } - - #[test] - fn test_round_f64_one_input() { - let args: Vec = vec![ - Arc::new(Float64Array::from(vec![125.2345, 12.345, 1.234, 0.1234])), // input - ]; - - let result = round(&args).expect("failed to initialize function round"); - let floats = - as_float64_array(&result).expect("failed to initialize function round"); - - let expected = Float64Array::from(vec![125.0, 12.0, 1.0, 0.0]); - - assert_eq!(floats, &expected); - } - #[test] fn test_factorial_i64() { let args: Vec = vec![ @@ -724,124 +440,6 @@ mod tests { assert_eq!(ints, &expected); } - #[test] - fn test_cot_f32() { - let args: Vec = - vec![Arc::new(Float32Array::from(vec![12.1, 30.0, 90.0, -30.0]))]; - let result = cot(&args).expect("failed to initialize function cot"); - let floats = - as_float32_array(&result).expect("failed to initialize function cot"); - - let expected = Float32Array::from(vec![ - -1.986_460_4, - -0.156_119_96, - -0.501_202_8, - 0.156_119_96, - ]); - - let eps = 1e-6; - assert_eq!(floats.len(), 4); - assert!((floats.value(0) - expected.value(0)).abs() < eps); - assert!((floats.value(1) - expected.value(1)).abs() < eps); - assert!((floats.value(2) - expected.value(2)).abs() < eps); - assert!((floats.value(3) - expected.value(3)).abs() < eps); - } - - #[test] - fn test_cot_f64() { - let args: Vec = - vec![Arc::new(Float64Array::from(vec![12.1, 30.0, 90.0, -30.0]))]; - let result = cot(&args).expect("failed to initialize function cot"); - let floats = - as_float64_array(&result).expect("failed to initialize function cot"); - - let expected = Float64Array::from(vec![ - -1.986_458_685_881_4, - -0.156_119_952_161_6, - -0.501_202_783_380_1, - 0.156_119_952_161_6, - ]); - - let eps = 1e-12; - assert_eq!(floats.len(), 4); - assert!((floats.value(0) - expected.value(0)).abs() < eps); - assert!((floats.value(1) - expected.value(1)).abs() < eps); - assert!((floats.value(2) - expected.value(2)).abs() < eps); - assert!((floats.value(3) - expected.value(3)).abs() < eps); - } - - #[test] - fn test_truncate_32() { - let args: Vec = vec![ - Arc::new(Float32Array::from(vec![ - 15.0, - 1_234.267_8, - 1_233.123_4, - 3.312_979_2, - -21.123_4, - ])), - Arc::new(Int64Array::from(vec![0, 3, 2, 5, 6])), - ]; - - let result = trunc(&args).expect("failed to initialize function truncate"); - let floats = - as_float32_array(&result).expect("failed to initialize function truncate"); - - assert_eq!(floats.len(), 5); - assert_eq!(floats.value(0), 15.0); - assert_eq!(floats.value(1), 1_234.268); - assert_eq!(floats.value(2), 1_233.12); - assert_eq!(floats.value(3), 3.312_98); - assert_eq!(floats.value(4), -21.123_4); - } - - #[test] - fn test_truncate_64() { - let args: Vec = vec![ - Arc::new(Float64Array::from(vec![ - 5.0, - 234.267_812_176, - 123.123_456_789, - 123.312_979_313_2, - -321.123_1, - ])), - Arc::new(Int64Array::from(vec![0, 3, 2, 5, 6])), - ]; - - let result = trunc(&args).expect("failed to initialize function truncate"); - let floats = - as_float64_array(&result).expect("failed to initialize function truncate"); - - assert_eq!(floats.len(), 5); - assert_eq!(floats.value(0), 5.0); - assert_eq!(floats.value(1), 234.268); - assert_eq!(floats.value(2), 123.12); - assert_eq!(floats.value(3), 123.312_98); - assert_eq!(floats.value(4), -321.123_1); - } - - #[test] - fn test_truncate_64_one_arg() { - let args: Vec = vec![Arc::new(Float64Array::from(vec![ - 5.0, - 234.267_812, - 123.123_45, - 123.312_979_313_2, - -321.123, - ]))]; - - let result = trunc(&args).expect("failed to initialize function truncate"); - let floats = - as_float64_array(&result).expect("failed to initialize function truncate"); - - assert_eq!(floats.len(), 5); - assert_eq!(floats.value(0), 5.0); - assert_eq!(floats.value(1), 234.0); - assert_eq!(floats.value(2), 123.0); - assert_eq!(floats.value(3), 123.0); - assert_eq!(floats.value(4), -321.0); - } - #[test] fn test_nanvl_f64() { let args: Vec = vec![ @@ -917,36 +515,4 @@ mod tests { assert!(!booleans.value(2)); assert!(booleans.value(3)); } - - #[test] - fn test_iszero_f64() { - let args: Vec = - vec![Arc::new(Float64Array::from(vec![1.0, 0.0, 3.0, -0.0]))]; - - let result = iszero(&args).expect("failed to initialize function iszero"); - let booleans = - as_boolean_array(&result).expect("failed to initialize function iszero"); - - assert_eq!(booleans.len(), 4); - assert!(!booleans.value(0)); - assert!(booleans.value(1)); - assert!(!booleans.value(2)); - assert!(booleans.value(3)); - } - - #[test] - fn test_iszero_f32() { - let args: Vec = - vec![Arc::new(Float32Array::from(vec![1.0, 0.0, 3.0, -0.0]))]; - - let result = iszero(&args).expect("failed to initialize function iszero"); - let booleans = - as_boolean_array(&result).expect("failed to initialize function iszero"); - - assert_eq!(booleans.len(), 4); - assert!(!booleans.value(0)); - assert!(booleans.value(1)); - assert!(!booleans.value(2)); - assert!(booleans.value(3)); - } } diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index b656bededc07..a6c0edc31fb4 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -555,12 +555,12 @@ enum ScalarFunction { Log = 11; // 12 was Log10 // 13 was Log2 - Round = 14; + // 14 was Round // 15 was Signum // 16 was Sin // 17 was Sqrt // Tan = 18; - Trunc = 19; + // 19 was Trunc // 20 was Array // RegexpMatch = 21; // 22 was BitLength @@ -642,7 +642,7 @@ enum ScalarFunction { // 98 was Cardinality // 99 was ArrayElement // 100 was ArraySlice - Cot = 103; + // 103 was Cot // 104 was ArrayHas // 105 was ArrayHasAny // 106 was ArrayHasAll @@ -653,7 +653,7 @@ enum ScalarFunction { Nanvl = 111; // 112 was Flatten // 113 was IsNan - Iszero = 114; + // 114 was Iszero // 115 was ArrayEmpty // 116 was ArrayPopBack // 117 was StringToArray diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index c13ae045bdb5..4a8eb2a195d4 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22795,8 +22795,6 @@ impl serde::Serialize for ScalarFunction { Self::Ceil => "Ceil", Self::Exp => "Exp", Self::Log => "Log", - Self::Round => "Round", - Self::Trunc => "Trunc", Self::Concat => "Concat", Self::ConcatWithSeparator => "ConcatWithSeparator", Self::InitCap => "InitCap", @@ -22804,9 +22802,7 @@ impl serde::Serialize for ScalarFunction { Self::Coalesce => "Coalesce", Self::Power => "Power", Self::Factorial => "Factorial", - Self::Cot => "Cot", Self::Nanvl => "Nanvl", - Self::Iszero => "Iszero", Self::EndsWith => "EndsWith", }; serializer.serialize_str(variant) @@ -22823,8 +22819,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Ceil", "Exp", "Log", - "Round", - "Trunc", "Concat", "ConcatWithSeparator", "InitCap", @@ -22832,9 +22826,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Coalesce", "Power", "Factorial", - "Cot", "Nanvl", - "Iszero", "EndsWith", ]; @@ -22880,8 +22872,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Ceil" => Ok(ScalarFunction::Ceil), "Exp" => Ok(ScalarFunction::Exp), "Log" => Ok(ScalarFunction::Log), - "Round" => Ok(ScalarFunction::Round), - "Trunc" => Ok(ScalarFunction::Trunc), "Concat" => Ok(ScalarFunction::Concat), "ConcatWithSeparator" => Ok(ScalarFunction::ConcatWithSeparator), "InitCap" => Ok(ScalarFunction::InitCap), @@ -22889,9 +22879,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Coalesce" => Ok(ScalarFunction::Coalesce), "Power" => Ok(ScalarFunction::Power), "Factorial" => Ok(ScalarFunction::Factorial), - "Cot" => Ok(ScalarFunction::Cot), "Nanvl" => Ok(ScalarFunction::Nanvl), - "Iszero" => Ok(ScalarFunction::Iszero), "EndsWith" => Ok(ScalarFunction::EndsWith), _ => Err(serde::de::Error::unknown_variant(value, FIELDS)), } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 092d5c59d081..4a1a59741015 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2854,12 +2854,12 @@ pub enum ScalarFunction { Log = 11, /// 12 was Log10 /// 13 was Log2 - Round = 14, + /// 14 was Round /// 15 was Signum /// 16 was Sin /// 17 was Sqrt /// Tan = 18; - Trunc = 19, + /// 19 was Trunc /// 20 was Array /// RegexpMatch = 21; /// 22 was BitLength @@ -2941,7 +2941,7 @@ pub enum ScalarFunction { /// 98 was Cardinality /// 99 was ArrayElement /// 100 was ArraySlice - Cot = 103, + /// 103 was Cot /// 104 was ArrayHas /// 105 was ArrayHasAny /// 106 was ArrayHasAll @@ -2952,7 +2952,7 @@ pub enum ScalarFunction { Nanvl = 111, /// 112 was Flatten /// 113 was IsNan - Iszero = 114, + /// 114 was Iszero /// 115 was ArrayEmpty /// 116 was ArrayPopBack /// 117 was StringToArray @@ -2990,8 +2990,6 @@ impl ScalarFunction { ScalarFunction::Ceil => "Ceil", ScalarFunction::Exp => "Exp", ScalarFunction::Log => "Log", - ScalarFunction::Round => "Round", - ScalarFunction::Trunc => "Trunc", ScalarFunction::Concat => "Concat", ScalarFunction::ConcatWithSeparator => "ConcatWithSeparator", ScalarFunction::InitCap => "InitCap", @@ -2999,9 +2997,7 @@ impl ScalarFunction { ScalarFunction::Coalesce => "Coalesce", ScalarFunction::Power => "Power", ScalarFunction::Factorial => "Factorial", - ScalarFunction::Cot => "Cot", ScalarFunction::Nanvl => "Nanvl", - ScalarFunction::Iszero => "Iszero", ScalarFunction::EndsWith => "EndsWith", } } @@ -3012,8 +3008,6 @@ impl ScalarFunction { "Ceil" => Some(Self::Ceil), "Exp" => Some(Self::Exp), "Log" => Some(Self::Log), - "Round" => Some(Self::Round), - "Trunc" => Some(Self::Trunc), "Concat" => Some(Self::Concat), "ConcatWithSeparator" => Some(Self::ConcatWithSeparator), "InitCap" => Some(Self::InitCap), @@ -3021,9 +3015,7 @@ impl ScalarFunction { "Coalesce" => Some(Self::Coalesce), "Power" => Some(Self::Power), "Factorial" => Some(Self::Factorial), - "Cot" => Some(Self::Cot), "Nanvl" => Some(Self::Nanvl), - "Iszero" => Some(Self::Iszero), "EndsWith" => Some(Self::EndsWith), _ => None, } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 9c24a3941895..9248891a3ccb 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -37,13 +37,13 @@ use datafusion_expr::expr::Unnest; use datafusion_expr::expr::{Alias, Placeholder}; use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion_expr::{ - ceil, coalesce, concat_expr, concat_ws_expr, cot, ends_with, exp, + ceil, coalesce, concat_expr, concat_ws_expr, ends_with, exp, expr::{self, InList, Sort, WindowFunction}, - factorial, initcap, iszero, log, + factorial, initcap, log, logical_plan::{PlanType, StringifiedPlan}, - nanvl, power, random, round, trunc, AggregateFunction, Between, BinaryExpr, - BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, - GetIndexedField, GroupingSet, + nanvl, power, random, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, + BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField, + GroupingSet, GroupingSet::GroupingSets, JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, @@ -419,13 +419,10 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { use protobuf::ScalarFunction; match f { ScalarFunction::Unknown => todo!(), - ScalarFunction::Cot => Self::Cot, ScalarFunction::Exp => Self::Exp, ScalarFunction::Log => Self::Log, ScalarFunction::Factorial => Self::Factorial, ScalarFunction::Ceil => Self::Ceil, - ScalarFunction::Round => Self::Round, - ScalarFunction::Trunc => Self::Trunc, ScalarFunction::Concat => Self::Concat, ScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator, ScalarFunction::EndsWith => Self::EndsWith, @@ -434,7 +431,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::Coalesce => Self::Coalesce, ScalarFunction::Power => Self::Power, ScalarFunction::Nanvl => Self::Nanvl, - ScalarFunction::Iszero => Self::Iszero, } } } @@ -1301,8 +1297,6 @@ pub fn parse_expr( Ok(factorial(parse_expr(&args[0], registry, codec)?)) } ScalarFunction::Ceil => Ok(ceil(parse_expr(&args[0], registry, codec)?)), - ScalarFunction::Round => Ok(round(parse_exprs(args, registry, codec)?)), - ScalarFunction::Trunc => Ok(trunc(parse_exprs(args, registry, codec)?)), ScalarFunction::InitCap => { Ok(initcap(parse_expr(&args[0], registry, codec)?)) } @@ -1328,14 +1322,10 @@ pub fn parse_expr( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, )), - ScalarFunction::Cot => Ok(cot(parse_expr(&args[0], registry, codec)?)), ScalarFunction::Nanvl => Ok(nanvl( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, )), - ScalarFunction::Iszero => { - Ok(iszero(parse_expr(&args[0], registry, codec)?)) - } } } ExprType::ScalarUdfExpr(protobuf::ScalarUdfExprNode { diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index bd964b43d418..d8b88676ee62 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1407,13 +1407,10 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { fn try_from(scalar: &BuiltinScalarFunction) -> Result { let scalar_function = match scalar { - BuiltinScalarFunction::Cot => Self::Cot, BuiltinScalarFunction::Exp => Self::Exp, BuiltinScalarFunction::Factorial => Self::Factorial, BuiltinScalarFunction::Log => Self::Log, BuiltinScalarFunction::Ceil => Self::Ceil, - BuiltinScalarFunction::Round => Self::Round, - BuiltinScalarFunction::Trunc => Self::Trunc, BuiltinScalarFunction::Concat => Self::Concat, BuiltinScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator, BuiltinScalarFunction::EndsWith => Self::EndsWith, @@ -1422,7 +1419,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::Coalesce => Self::Coalesce, BuiltinScalarFunction::Power => Self::Power, BuiltinScalarFunction::Nanvl => Self::Nanvl, - BuiltinScalarFunction::Iszero => Self::Iszero, }; Ok(scalar_function) diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index 5dacf692e904..a74b1a38935b 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -34,9 +34,7 @@ use datafusion::datasource::physical_plan::{ FileSinkConfig, ParquetExec, }; use datafusion::execution::FunctionRegistry; -use datafusion::logical_expr::{ - create_udf, BuiltinScalarFunction, JoinType, Operator, Volatility, -}; +use datafusion::logical_expr::{create_udf, JoinType, Operator, Volatility}; use datafusion::physical_expr::expressions::NthValueAgg; use datafusion::physical_expr::window::SlidingAggregateWindowExpr; use datafusion::physical_expr::{PhysicalSortRequirement, ScalarFunctionExpr}; @@ -603,31 +601,6 @@ async fn roundtrip_parquet_exec_with_table_partition_cols() -> Result<()> { ))) } -#[test] -fn roundtrip_builtin_scalar_function() -> Result<()> { - let field_a = Field::new("a", DataType::Int64, false); - let field_b = Field::new("b", DataType::Int64, false); - let schema = Arc::new(Schema::new(vec![field_a, field_b])); - - let input = Arc::new(EmptyExec::new(schema.clone())); - - let fun_def = ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::Trunc); - - let expr = ScalarFunctionExpr::new( - "trunc", - fun_def, - vec![col("a", &schema)?], - DataType::Float64, - Some(vec![Some(true)]), - false, - ); - - let project = - ProjectionExec::try_new(vec![(Arc::new(expr), "a".to_string())], input)?; - - roundtrip_test(Arc::new(project)) -} - #[test] fn roundtrip_scalar_udf() -> Result<()> { let field_a = Field::new("a", DataType::Int64, false); diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index f2f188105faf..74e326bb9b31 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -2695,6 +2695,11 @@ fn logical_plan_with_dialect_and_options( vec![DataType::Int32, DataType::Int32], DataType::Int32, )) + .with_udf(make_udf( + "round", + vec![DataType::Float64, DataType::Int64], + DataType::Float32, + )) .with_udf(make_udf( "arrow_cast", vec![DataType::Int64, DataType::Utf8], From 5d693fa51f468f4ebb835fa776d3235e6d480a18 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Mon, 8 Apr 2024 18:03:23 -0400 Subject: [PATCH 4/4] Make mod iszero public, minor ordering change to keep the alphabetical ordering theme. --- datafusion/functions/src/math/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs index 3ea46c5e21fa..544de04e4a98 100644 --- a/datafusion/functions/src/math/mod.rs +++ b/datafusion/functions/src/math/mod.rs @@ -23,7 +23,7 @@ use std::sync::Arc; pub mod abs; pub mod cot; pub mod gcd; -mod iszero; +pub mod iszero; pub mod lcm; pub mod log; pub mod nans; @@ -48,7 +48,6 @@ make_udf_function!(cot::CotFunc, COT, cot); make_math_unary_udf!(DegreesFunc, DEGREES, degrees, to_degrees, None); make_math_unary_udf!(FloorFunc, FLOOR, floor, floor, Some(vec![Some(true)])); make_udf_function!(log::LogFunc, LOG, log); -make_udf_function!(power::PowerFunc, POWER, power); make_udf_function!(gcd::GcdFunc, GCD, gcd); make_udf_function!(nans::IsNanFunc, ISNAN, isnan); make_udf_function!(iszero::IsZeroFunc, ISZERO, iszero); @@ -57,6 +56,7 @@ make_math_unary_udf!(LnFunc, LN, ln, ln, Some(vec![Some(true)])); make_math_unary_udf!(Log2Func, LOG2, log2, log2, Some(vec![Some(true)])); make_math_unary_udf!(Log10Func, LOG10, log10, log10, Some(vec![Some(true)])); make_udf_function!(pi::PiFunc, PI, pi); +make_udf_function!(power::PowerFunc, POWER, power); make_math_unary_udf!(RadiansFunc, RADIANS, radians, to_radians, None); make_udf_function!(round::RoundFunc, ROUND, round); make_math_unary_udf!(SignumFunc, SIGNUM, signum, signum, None);