Skip to content

Commit

Permalink
Reduce code repetition in datafusion/functions mod files (#10700)
Browse files Browse the repository at this point in the history
* initial reduce repetition using macros

* formatting and docs

* fix docs

* refix doc

* replace math mod too

* fix vec arguments

* fix math variadic args

* apply to functions

* pattern-match hack to avoid second macro

* missed a function

* fix merge conflict

* fix octet_length argument
  • Loading branch information
MohamedAbdeen21 committed Jun 3, 2024
1 parent a92f803 commit 3aae451
Show file tree
Hide file tree
Showing 8 changed files with 339 additions and 516 deletions.
82 changes: 36 additions & 46 deletions datafusion/functions/src/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,59 +42,49 @@ make_udf_function!(named_struct::NamedStructFunc, NAMED_STRUCT, named_struct);
make_udf_function!(getfield::GetFieldFunc, GET_FIELD, get_field);
make_udf_function!(coalesce::CoalesceFunc, COALESCE, coalesce);

// Export the functions out of this package, both as expr_fn as well as a list of functions
pub mod expr_fn {
use datafusion_expr::{Expr, Literal};

/// returns NULL if value1 equals value2; otherwise it returns value1. This
/// can be used to perform the inverse operation of the COALESCE expression
pub fn nullif(arg1: Expr, arg2: Expr) -> Expr {
super::nullif().call(vec![arg1, arg2])
}

/// returns value1 cast to the `arrow_type` given the second argument. This
/// can be used to cast to a specific `arrow_type`.
pub fn arrow_cast(arg1: Expr, arg2: Expr) -> Expr {
super::arrow_cast().call(vec![arg1, arg2])
}

/// Returns value2 if value1 is NULL; otherwise it returns value1
pub fn nvl(arg1: Expr, arg2: Expr) -> Expr {
super::nvl().call(vec![arg1, arg2])
}

/// Returns value2 if value1 is not NULL; otherwise, it returns value3.
pub fn nvl2(arg1: Expr, arg2: Expr, arg3: Expr) -> Expr {
super::nvl2().call(vec![arg1, arg2, arg3])
}

/// Returns the Arrow type of the input expression.
pub fn arrow_typeof(arg1: Expr) -> Expr {
super::arrow_typeof().call(vec![arg1])
}

/// Returns a struct with the given arguments
pub fn r#struct(args: Vec<Expr>) -> Expr {
super::r#struct().call(args)
}

/// Returns a struct with the given names and arguments pairs
pub fn named_struct(args: Vec<Expr>) -> Expr {
super::named_struct().call(args)
}

/// Returns the value of the field with the given name from the struct
pub fn get_field(arg1: Expr, field_name: impl Literal) -> Expr {
super::get_field().call(vec![arg1, field_name.lit()])
}
export_functions!((
nullif,
"Returns NULL if value1 equals value2; otherwise it returns value1. This can be used to perform the inverse operation of the COALESCE expression",
arg1 arg2
),(
arrow_cast,
"Returns value2 if value1 is NULL; otherwise it returns value1",
arg1 arg2
),(
nvl,
"Returns value2 if value1 is NULL; otherwise it returns value1",
arg1 arg2
),(
nvl2,
"Returns value2 if value1 is not NULL; otherwise, it returns value3.",
arg1 arg2 arg3
),(
arrow_typeof,
"Returns the Arrow type of the input expression.",
arg1
),(
r#struct,
"Returns a struct with the given arguments",
args,
),(
named_struct,
"Returns a struct with the given names and arguments pairs",
args,
),(
coalesce,
"Returns `coalesce(args...)`, which evaluates to the value of the first expr which is not NULL",
args,
));

/// Returns `coalesce(args...)`, which evaluates to the value of the first expr which is not NULL
pub fn coalesce(args: Vec<Expr>) -> Expr {
super::coalesce().call(args)
#[doc = "Returns the value of the field with the given name from the struct"]
pub fn get_field(arg1: Expr, arg2: impl Literal) -> Expr {
super::get_field().call(vec![arg1, arg2.lit()])
}
}

/// Return a list of all functions in this package
pub fn functions() -> Vec<Arc<ScalarUDF>> {
vec![
nullif(),
Expand Down
60 changes: 35 additions & 25 deletions datafusion/functions/src/crypto/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@

//! "crypto" DataFusion functions

use datafusion_expr::ScalarUDF;
use std::sync::Arc;

pub mod basic;
pub mod digest;
pub mod md5;
Expand All @@ -30,28 +33,35 @@ make_udf_function!(sha224::SHA224Func, SHA224, sha224);
make_udf_function!(sha256::SHA256Func, SHA256, sha256);
make_udf_function!(sha384::SHA384Func, SHA384, sha384);
make_udf_function!(sha512::SHA512Func, SHA512, sha512);
export_functions!((
digest,
input_arg1 input_arg2,
"Computes the binary hash of an expression using the specified algorithm."
),(
md5,
input_arg,
"Computes an MD5 128-bit checksum for a string expression."
),(
sha224,
input_arg1,
"Computes the SHA-224 hash of a binary string."
),(
sha256,
input_arg1,
"Computes the SHA-256 hash of a binary string."
),(
sha384,
input_arg1,
"Computes the SHA-384 hash of a binary string."
),(
sha512,
input_arg1,
"Computes the SHA-512 hash of a binary string."
));

pub mod expr_fn {
export_functions!((
digest,
"Computes the binary hash of an expression using the specified algorithm.",
input_arg1 input_arg2
),(
md5,
"Computes an MD5 128-bit checksum for a string expression.",
input_arg
),(
sha224,
"Computes the SHA-224 hash of a binary string.",
input_arg1
),(
sha256,
"Computes the SHA-256 hash of a binary string.",
input_arg1
),(
sha384,
"Computes the SHA-384 hash of a binary string.",
input_arg1
),(
sha512,
"Computes the SHA-512 hash of a binary string.",
input_arg1
));
}

pub fn functions() -> Vec<Arc<ScalarUDF>> {
vec![digest(), md5(), sha224(), sha256(), sha384(), sha512()]
}
123 changes: 54 additions & 69 deletions datafusion/functions/src/datetime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,45 +79,60 @@ make_udf_function!(
pub mod expr_fn {
use datafusion_expr::Expr;

#[doc = "returns current UTC date as a Date32 value"]
pub fn current_date() -> Expr {
super::current_date().call(vec![])
}

#[doc = "returns current UTC time as a Time64 value"]
pub fn current_time() -> Expr {
super::current_time().call(vec![])
}

#[doc = "coerces an arbitrary timestamp to the start of the nearest specified interval"]
pub fn date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr {
super::date_bin().call(vec![stride, source, origin])
}

#[doc = "extracts a subfield from the date"]
pub fn date_part(part: Expr, date: Expr) -> Expr {
super::date_part().call(vec![part, date])
}

#[doc = "truncates the date to a specified level of precision"]
pub fn date_trunc(part: Expr, date: Expr) -> Expr {
super::date_trunc().call(vec![part, date])
}

#[doc = "converts an integer to RFC3339 timestamp format string"]
pub fn from_unixtime(unixtime: Expr) -> Expr {
super::from_unixtime().call(vec![unixtime])
}

#[doc = "make a date from year, month and day component parts"]
pub fn make_date(year: Expr, month: Expr, day: Expr) -> Expr {
super::make_date().call(vec![year, month, day])
}

#[doc = "returns the current timestamp in nanoseconds, using the same value for all instances of now() in same statement"]
pub fn now() -> Expr {
super::now().call(vec![])
}
export_functions!((
current_date,
"returns current UTC date as a Date32 value",
),(
current_time,
"returns current UTC time as a Time64 value",
),(
from_unixtime,
"converts an integer to RFC3339 timestamp format string",
unixtime
),(
date_bin,
"coerces an arbitrary timestamp to the start of the nearest specified interval",
stride source origin
),(
date_part,
"extracts a subfield from the date",
part date
),(
date_trunc,
"truncates the date to a specified level of precision",
part date
),(
make_date,
"make a date from year, month and day component parts",
year month day
),(
now,
"returns the current timestamp in nanoseconds, using the same value for all instances of now() in same statement",
),(
to_unixtime,
"converts a string and optional formats to a Unixtime",
args,
),(
to_timestamp,
"converts a string and optional formats to a `Timestamp(Nanoseconds, None)`",
args,
),(
to_timestamp_seconds,
"converts a string and optional formats to a `Timestamp(Seconds, None)`",
args,
),(
to_timestamp_millis,
"converts a string and optional formats to a `Timestamp(Milliseconds, None)`",
args,
),(
to_timestamp_micros,
"converts a string and optional formats to a `Timestamp(Microseconds, None)`",
args,
),(
to_timestamp_nanos,
"converts a string and optional formats to a `Timestamp(Nanoseconds, None)`",
args,
));

/// Returns a string representation of a date, time, timestamp or duration based
/// on a Chrono pattern.
Expand Down Expand Up @@ -247,36 +262,6 @@ pub mod expr_fn {
pub fn to_date(args: Vec<Expr>) -> Expr {
super::to_date().call(args)
}

#[doc = "converts a string and optional formats to a Unixtime"]
pub fn to_unixtime(args: Vec<Expr>) -> Expr {
super::to_unixtime().call(args)
}

#[doc = "converts a string and optional formats to a `Timestamp(Nanoseconds, None)`"]
pub fn to_timestamp(args: Vec<Expr>) -> Expr {
super::to_timestamp().call(args)
}

#[doc = "converts a string and optional formats to a `Timestamp(Seconds, None)`"]
pub fn to_timestamp_seconds(args: Vec<Expr>) -> Expr {
super::to_timestamp_seconds().call(args)
}

#[doc = "converts a string and optional formats to a `Timestamp(Milliseconds, None)`"]
pub fn to_timestamp_millis(args: Vec<Expr>) -> Expr {
super::to_timestamp_millis().call(args)
}

#[doc = "converts a string and optional formats to a `Timestamp(Microseconds, None)`"]
pub fn to_timestamp_micros(args: Vec<Expr>) -> Expr {
super::to_timestamp_micros().call(args)
}

#[doc = "converts a string and optional formats to a `Timestamp(Nanoseconds, None)`"]
pub fn to_timestamp_nanos(args: Vec<Expr>) -> Expr {
super::to_timestamp_nanos().call(args)
}
}

/// Return a list of all functions in this package
Expand Down
22 changes: 18 additions & 4 deletions datafusion/functions/src/encoding/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,28 @@
// specific language governing permissions and limitations
// under the License.

use datafusion_expr::ScalarUDF;
use std::sync::Arc;

pub mod inner;

// create `encode` and `decode` UDFs
make_udf_function!(inner::EncodeFunc, ENCODE, encode);
make_udf_function!(inner::DecodeFunc, DECODE, decode);

// Export the functions out of this package, both as expr_fn as well as a list of functions
export_functions!(
(encode, input encoding, "encode the `input`, using the `encoding`. encoding can be base64 or hex"),
(decode, input encoding, "decode the `input`, using the `encoding`. encoding can be base64 or hex")
);
pub mod expr_fn {
export_functions!( (
encode,
"encode the `input`, using the `encoding`. encoding can be base64 or hex",
input encoding
),(
decode,
"decode the `input`, using the `encoding`. encoding can be base64 or hex",
input encoding
));
}

pub fn functions() -> Vec<Arc<ScalarUDF>> {
vec![encode(), decode()]
}
38 changes: 22 additions & 16 deletions datafusion/functions/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,25 +36,31 @@
/// ]
/// }
/// ```
///
/// Exported functions accept:
/// - `Vec<Expr>` argument (single argument followed by a comma)
/// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
macro_rules! export_functions {
($(($FUNC:ident, $($arg:ident)*, $DOC:expr)),*) => {
pub mod expr_fn {
$(
#[doc = $DOC]
/// Return $name(arg)
pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
super::$FUNC().call(vec![$($arg),*],)
}
)*
($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
$(
// switch to single-function cases below
export_functions!(single $FUNC, $DOC, $($arg)*);
)*
};

// single vector argument (a single argument followed by a comma)
(single $FUNC:ident, $DOC:expr, $arg:ident,) => {
#[doc = $DOC]
pub fn $FUNC($arg: Vec<datafusion_expr::Expr>) -> datafusion_expr::Expr {
super::$FUNC().call($arg)
}
};

/// Return a list of all functions in this package
pub fn functions() -> Vec<std::sync::Arc<datafusion_expr::ScalarUDF>> {
vec![
$(
$FUNC(),
)*
]
// variadic arguments (zero or more arguments, without commas)
(single $FUNC:ident, $DOC:expr, $($arg:ident)*) => {
#[doc = $DOC]
pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
super::$FUNC().call(vec![$($arg),*])
}
};
}
Expand Down
Loading

0 comments on commit 3aae451

Please sign in to comment.