Skip to content

Commit

Permalink
Add isnan and iszero (#7274)
Browse files Browse the repository at this point in the history
* Add isnan and iszero.

* Modified doc.

* f64 doesn't need high priority.

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
sarutak and alamb committed Aug 16, 2023
1 parent 90484bb commit cf152af
Show file tree
Hide file tree
Showing 14 changed files with 286 additions and 3 deletions.
2 changes: 2 additions & 0 deletions datafusion/core/tests/sql/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ async fn test_mathematical_expressions_with_null() -> Result<()> {
test_expression!("nanvl(NULL, NULL)", "NULL");
test_expression!("nanvl(1, NULL)", "NULL");
test_expression!("nanvl(NULL, 1)", "NULL");
test_expression!("isnan(NULL)", "NULL");
test_expression!("iszero(NULL)", "NULL");
Ok(())
}

Expand Down
16 changes: 16 additions & 0 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ pub enum BuiltinScalarFunction {
Gcd,
/// lcm, Least common multiple
Lcm,
/// isnan
Isnan,
/// iszero
Iszero,
/// ln, Natural logarithm
Ln,
/// log, same as log10
Expand Down Expand Up @@ -334,6 +338,8 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Factorial => Volatility::Immutable,
BuiltinScalarFunction::Floor => Volatility::Immutable,
BuiltinScalarFunction::Gcd => Volatility::Immutable,
BuiltinScalarFunction::Isnan => Volatility::Immutable,
BuiltinScalarFunction::Iszero => Volatility::Immutable,
BuiltinScalarFunction::Lcm => Volatility::Immutable,
BuiltinScalarFunction::Ln => Volatility::Immutable,
BuiltinScalarFunction::Log => Volatility::Immutable,
Expand Down Expand Up @@ -774,6 +780,8 @@ impl BuiltinScalarFunction {
_ => Ok(Float64),
},

BuiltinScalarFunction::Isnan | BuiltinScalarFunction::Iszero => Ok(Boolean),

BuiltinScalarFunction::ArrowTypeof => Ok(Utf8),

BuiltinScalarFunction::Abs
Expand Down Expand Up @@ -1184,6 +1192,12 @@ impl BuiltinScalarFunction {
| BuiltinScalarFunction::CurrentTime => {
Signature::uniform(0, vec![], self.volatility())
}
BuiltinScalarFunction::Isnan | BuiltinScalarFunction::Iszero => {
Signature::one_of(
vec![Exact(vec![Float32]), Exact(vec![Float64])],
self.volatility(),
)
}
}
}
}
Expand All @@ -1208,6 +1222,8 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
BuiltinScalarFunction::Factorial => &["factorial"],
BuiltinScalarFunction::Floor => &["floor"],
BuiltinScalarFunction::Gcd => &["gcd"],
BuiltinScalarFunction::Isnan => &["isnan"],
BuiltinScalarFunction::Iszero => &["iszero"],
BuiltinScalarFunction::Lcm => &["lcm"],
BuiltinScalarFunction::Ln => &["ln"],
BuiltinScalarFunction::Log => &["log"],
Expand Down
14 changes: 14 additions & 0 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,18 @@ scalar_expr!(CurrentDate, current_date, ,"returns current UTC date as a [`DataTy
scalar_expr!(Now, now, ,"returns current timestamp in nanoseconds, using the same value for all instances of now() in same statement");
scalar_expr!(CurrentTime, current_time, , "returns current UTC time as a [`DataType::Time64`] value");
scalar_expr!(Nanvl, nanvl, x y, "returns x if x is not NaN otherwise returns y");
scalar_expr!(
Isnan,
isnan,
num,
"returns true if a given number is +NaN or -NaN otherwise returns false"
);
scalar_expr!(
Iszero,
iszero,
num,
"returns true if a given number is +0.0 or -0.0 otherwise returns false"
);

scalar_expr!(ArrowTypeof, arrow_typeof, val, "data type");

Expand Down Expand Up @@ -1003,6 +1015,8 @@ mod test {
test_unary_scalar_expr!(Ln, ln);
test_scalar_expr!(Atan2, atan2, y, x);
test_scalar_expr!(Nanvl, nanvl, x, y);
test_scalar_expr!(Isnan, isnan, input);
test_scalar_expr!(Iszero, iszero, input);

test_scalar_expr!(Ascii, ascii, input);
test_scalar_expr!(BitLength, bit_length, string);
Expand Down
6 changes: 6 additions & 0 deletions datafusion/physical-expr/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,12 @@ pub fn create_physical_fun(
BuiltinScalarFunction::Gcd => {
Arc::new(|args| make_scalar_function(math_expressions::gcd)(args))
}
BuiltinScalarFunction::Isnan => {
Arc::new(|args| make_scalar_function(math_expressions::isnan)(args))
}
BuiltinScalarFunction::Iszero => {
Arc::new(|args| make_scalar_function(math_expressions::iszero)(args))
}
BuiltinScalarFunction::Lcm => {
Arc::new(|args| make_scalar_function(math_expressions::lcm)(args))
}
Expand Down
141 changes: 139 additions & 2 deletions datafusion/physical-expr/src/math_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! Math expressions

use arrow::array::ArrayRef;
use arrow::array::{Float32Array, Float64Array, Int64Array};
use arrow::array::{BooleanArray, Float32Array, Float64Array, Int64Array};
use arrow::datatypes::DataType;
use datafusion_common::ScalarValue;
use datafusion_common::ScalarValue::{Float32, Int64};
Expand Down Expand Up @@ -142,6 +142,19 @@ macro_rules! make_function_inputs2 {
}};
}

macro_rules! make_function_scalar_inputs_return_type {
($ARG: expr, $NAME:expr, $ARGS_TYPE:ident, $RETURN_TYPE:ident, $FUNC: block) => {{
let arg = downcast_arg!($ARG, $NAME, $ARGS_TYPE);

arg.iter()
.map(|a| match a {
Some(a) => Some($FUNC(a)),
_ => None,
})
.collect::<$RETURN_TYPE>()
}};
}

math_unary_function!("sqrt", sqrt);
math_unary_function!("cbrt", cbrt);
math_unary_function!("sin", sin);
Expand Down Expand Up @@ -306,6 +319,56 @@ pub fn nanvl(args: &[ArrayRef]) -> Result<ArrayRef> {
}
}

/// Isnan SQL function
pub fn isnan(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
DataType::Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float64Array,
BooleanArray,
{ f64::is_nan }
)) as ArrayRef),

DataType::Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float32Array,
BooleanArray,
{ f32::is_nan }
)) as ArrayRef),

other => Err(DataFusionError::Internal(format!(
"Unsupported data type {other:?} for function isnan"
))),
}
}

/// Iszero SQL function
pub fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
DataType::Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float64Array,
BooleanArray,
{ |x: f64| { x == 0_f64 } }
)) as ArrayRef),

DataType::Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float32Array,
BooleanArray,
{ |x: f32| { x == 0_f32 } }
)) as ArrayRef),

other => Err(DataFusionError::Internal(format!(
"Unsupported data type {other:?} for function iszero"
))),
}
}

/// Pi SQL function
pub fn pi(args: &[ColumnarValue]) -> Result<ColumnarValue> {
if !matches!(&args[0], ColumnarValue::Array(_)) {
Expand Down Expand Up @@ -650,7 +713,9 @@ mod tests {

use super::*;
use arrow::array::{Float64Array, NullArray};
use datafusion_common::cast::{as_float32_array, as_float64_array, as_int64_array};
use datafusion_common::cast::{
as_boolean_array, as_float32_array, as_float64_array, as_int64_array,
};

#[test]
fn test_random_expression() {
Expand Down Expand Up @@ -1041,4 +1106,76 @@ mod tests {
assert_eq!(floats.value(2), 3.0);
assert!(floats.value(3).is_nan());
}

#[test]
fn test_isnan_f64() {
let args: Vec<ArrayRef> = vec![Arc::new(Float64Array::from(vec![
1.0,
f64::NAN,
3.0,
-f64::NAN,
]))];

let result = isnan(&args).expect("failed to initialize function isnan");
let booleans =
as_boolean_array(&result).expect("failed to initialize function isnan");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}

#[test]
fn test_isnan_f32() {
let args: Vec<ArrayRef> = vec![Arc::new(Float32Array::from(vec![
1.0,
f32::NAN,
3.0,
f32::NAN,
]))];

let result = isnan(&args).expect("failed to initialize function isnan");
let booleans =
as_boolean_array(&result).expect("failed to initialize function isnan");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}

#[test]
fn test_iszero_f64() {
let args: Vec<ArrayRef> =
vec![Arc::new(Float64Array::from(vec![1.0, 0.0, 3.0, -0.0]))];

let result = iszero(&args).expect("failed to initialize function iszero");
let booleans =
as_boolean_array(&result).expect("failed to initialize function iszero");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}

#[test]
fn test_iszero_f32() {
let args: Vec<ArrayRef> =
vec![Arc::new(Float32Array::from(vec![1.0, 0.0, 3.0, -0.0]))];

let result = iszero(&args).expect("failed to initialize function iszero");
let booleans =
as_boolean_array(&result).expect("failed to initialize function iszero");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}
}
2 changes: 2 additions & 0 deletions datafusion/proto/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,8 @@ enum ScalarFunction {
ArrayReplaceAll = 110;
Nanvl = 111;
Flatten = 112;
Isnan = 113;
Iszero = 114;
}

message ScalarFunctionNode {
Expand Down
6 changes: 6 additions & 0 deletions datafusion/proto/src/generated/pbjson.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions datafusion/proto/src/generated/prost.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion datafusion/proto/src/logical_plan/from_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use datafusion_expr::{
concat_ws_expr, cos, cosh, cot, current_date, current_time, date_bin, date_part,
date_trunc, degrees, digest, exp,
expr::{self, InList, Sort, WindowFunction},
factorial, floor, from_unixtime, gcd, lcm, left, ln, log, log10, log2,
factorial, floor, from_unixtime, gcd, isnan, iszero, lcm, left, ln, log, log10, log2,
logical_plan::{PlanType, StringifiedPlan},
lower, lpad, ltrim, md5, nanvl, now, nullif, octet_length, pi, power, radians,
random, regexp_match, regexp_replace, repeat, replace, reverse, right, round, rpad,
Expand Down Expand Up @@ -525,6 +525,8 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
ScalarFunction::FromUnixtime => Self::FromUnixtime,
ScalarFunction::Atan2 => Self::Atan2,
ScalarFunction::Nanvl => Self::Nanvl,
ScalarFunction::Isnan => Self::Isnan,
ScalarFunction::Iszero => Self::Iszero,
ScalarFunction::ArrowTypeof => Self::ArrowTypeof,
}
}
Expand Down Expand Up @@ -1577,6 +1579,8 @@ pub fn parse_expr(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
)),
ScalarFunction::Isnan => Ok(isnan(parse_expr(&args[0], registry)?)),
ScalarFunction::Iszero => Ok(iszero(parse_expr(&args[0], registry)?)),
_ => Err(proto_error(
"Protobuf deserialization error: Unsupported scalar function",
)),
Expand Down
2 changes: 2 additions & 0 deletions datafusion/proto/src/logical_plan/to_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1524,6 +1524,8 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
BuiltinScalarFunction::FromUnixtime => Self::FromUnixtime,
BuiltinScalarFunction::Atan2 => Self::Atan2,
BuiltinScalarFunction::Nanvl => Self::Nanvl,
BuiltinScalarFunction::Isnan => Self::Isnan,
BuiltinScalarFunction::Iszero => Self::Iszero,
BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof,
};

Expand Down
12 changes: 12 additions & 0 deletions datafusion/sqllogictest/test_files/math.slt
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,15 @@ query RRR
SELECT nanvl(asin(10), 1.0), nanvl(1.0, 2.0), nanvl(asin(10), asin(10))
----
1 1 NaN

# isnan
query BBBB
SELECT isnan(1.0), isnan('NaN'::DOUBLE), isnan(-'NaN'::DOUBLE), isnan(NULL)
----
false true true NULL

# iszero
query BBBB
SELECT iszero(1.0), iszero(0.0), iszero(-0.0), iszero(NULL)
----
false true true NULL
Loading

0 comments on commit cf152af

Please sign in to comment.