Skip to content

Commit

Permalink
[task apache#9539] Move starts_with, to_hex, trim, upper to datafusio…
Browse files Browse the repository at this point in the history
…n-functions

Signed-off-by: tangruilin <[email protected]>
  • Loading branch information
Tangruilin committed Mar 14, 2024
1 parent 9d0c05b commit 0171242
Show file tree
Hide file tree
Showing 18 changed files with 716 additions and 366 deletions.
58 changes: 0 additions & 58 deletions datafusion/core/tests/dataframe/dataframe_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -650,26 +650,6 @@ async fn test_fn_split_part() -> Result<()> {
Ok(())
}

#[tokio::test]
async fn test_fn_starts_with() -> Result<()> {
let expr = starts_with(col("a"), lit("abc"));

let expected = [
"+---------------------------------+",
"| starts_with(test.a,Utf8(\"abc\")) |",
"+---------------------------------+",
"| true |",
"| true |",
"| false |",
"| false |",
"+---------------------------------+",
];

assert_fn_batches!(expr, expected);

Ok(())
}

#[tokio::test]
async fn test_fn_ends_with() -> Result<()> {
let expr = ends_with(col("a"), lit("DEF"));
Expand Down Expand Up @@ -749,25 +729,6 @@ async fn test_cast() -> Result<()> {
Ok(())
}

#[tokio::test]
async fn test_fn_to_hex() -> Result<()> {
let expr = to_hex(col("b"));

let expected = [
"+----------------+",
"| to_hex(test.b) |",
"+----------------+",
"| 1 |",
"| a |",
"| a |",
"| 64 |",
"+----------------+",
];
assert_fn_batches!(expr, expected);

Ok(())
}

#[tokio::test]
#[cfg(feature = "unicode_expressions")]
async fn test_fn_translate() -> Result<()> {
Expand All @@ -788,25 +749,6 @@ async fn test_fn_translate() -> Result<()> {
Ok(())
}

#[tokio::test]
async fn test_fn_upper() -> Result<()> {
let expr = upper(col("a"));

let expected = [
"+---------------+",
"| upper(test.a) |",
"+---------------+",
"| ABCDEF |",
"| ABC123 |",
"| CBADEF |",
"| 123ABCDEF |",
"+---------------+",
];
assert_fn_batches!(expr, expected);

Ok(())
}

#[tokio::test]
async fn test_fn_encode() -> Result<()> {
let expr = encode(col("a"), lit("hex"));
Expand Down
57 changes: 14 additions & 43 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,22 +182,14 @@ pub enum BuiltinScalarFunction {
Rtrim,
/// split_part
SplitPart,
/// starts_with
StartsWith,
/// strpos
Strpos,
/// substr
Substr,
/// to_hex
ToHex,
/// make_date
MakeDate,
/// translate
Translate,
/// trim
Trim,
/// upper
Upper,
/// uuid
Uuid,
/// overlay
Expand Down Expand Up @@ -331,15 +323,11 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Rpad => Volatility::Immutable,
BuiltinScalarFunction::Rtrim => Volatility::Immutable,
BuiltinScalarFunction::SplitPart => Volatility::Immutable,
BuiltinScalarFunction::StartsWith => Volatility::Immutable,
BuiltinScalarFunction::Strpos => Volatility::Immutable,
BuiltinScalarFunction::Substr => Volatility::Immutable,
BuiltinScalarFunction::ToHex => Volatility::Immutable,
BuiltinScalarFunction::ToChar => Volatility::Immutable,
BuiltinScalarFunction::MakeDate => Volatility::Immutable,
BuiltinScalarFunction::Translate => Volatility::Immutable,
BuiltinScalarFunction::Trim => Volatility::Immutable,
BuiltinScalarFunction::Upper => Volatility::Immutable,
BuiltinScalarFunction::OverLay => Volatility::Immutable,
BuiltinScalarFunction::Levenshtein => Volatility::Immutable,
BuiltinScalarFunction::SubstrIndex => Volatility::Immutable,
Expand Down Expand Up @@ -470,20 +458,13 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::SplitPart => {
utf8_to_str_type(&input_expr_types[0], "split_part")
}
BuiltinScalarFunction::StartsWith => Ok(Boolean),
BuiltinScalarFunction::EndsWith => Ok(Boolean),
BuiltinScalarFunction::Strpos => {
utf8_to_int_type(&input_expr_types[0], "strpos/instr/position")
}
BuiltinScalarFunction::Substr => {
utf8_to_str_type(&input_expr_types[0], "substr")
}
BuiltinScalarFunction::ToHex => Ok(match input_expr_types[0] {
Int8 | Int16 | Int32 | Int64 => Utf8,
_ => {
return plan_err!("The to_hex function can only accept integers.");
}
}),
BuiltinScalarFunction::SubstrIndex => {
utf8_to_str_type(&input_expr_types[0], "substr_index")
}
Expand All @@ -495,10 +476,6 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Translate => {
utf8_to_str_type(&input_expr_types[0], "translate")
}
BuiltinScalarFunction::Trim => utf8_to_str_type(&input_expr_types[0], "trim"),
BuiltinScalarFunction::Upper => {
utf8_to_str_type(&input_expr_types[0], "upper")
}

BuiltinScalarFunction::Factorial
| BuiltinScalarFunction::Gcd
Expand Down Expand Up @@ -618,18 +595,16 @@ impl BuiltinScalarFunction {
| BuiltinScalarFunction::InitCap
| BuiltinScalarFunction::Lower
| BuiltinScalarFunction::OctetLength
| BuiltinScalarFunction::Reverse
| BuiltinScalarFunction::Upper => {
| BuiltinScalarFunction::Reverse => {
Signature::uniform(1, vec![Utf8, LargeUtf8], self.volatility())
}
BuiltinScalarFunction::Btrim
| BuiltinScalarFunction::Ltrim
| BuiltinScalarFunction::Rtrim
| BuiltinScalarFunction::Trim => Signature::one_of(
| BuiltinScalarFunction::Rtrim => Signature::one_of(
vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])],
self.volatility(),
),
BuiltinScalarFunction::Chr | BuiltinScalarFunction::ToHex => {
BuiltinScalarFunction::Chr => {
Signature::uniform(1, vec![Int64], self.volatility())
}
BuiltinScalarFunction::Lpad | BuiltinScalarFunction::Rpad => {
Expand Down Expand Up @@ -696,17 +671,17 @@ impl BuiltinScalarFunction {
self.volatility(),
),

BuiltinScalarFunction::EndsWith
| BuiltinScalarFunction::Strpos
| BuiltinScalarFunction::StartsWith => Signature::one_of(
vec![
Exact(vec![Utf8, Utf8]),
Exact(vec![Utf8, LargeUtf8]),
Exact(vec![LargeUtf8, Utf8]),
Exact(vec![LargeUtf8, LargeUtf8]),
],
self.volatility(),
),
BuiltinScalarFunction::EndsWith | BuiltinScalarFunction::Strpos => {
Signature::one_of(
vec![
Exact(vec![Utf8, Utf8]),
Exact(vec![Utf8, LargeUtf8]),
Exact(vec![LargeUtf8, Utf8]),
Exact(vec![LargeUtf8, LargeUtf8]),
],
self.volatility(),
)
}

BuiltinScalarFunction::Substr => Signature::one_of(
vec![
Expand Down Expand Up @@ -931,13 +906,9 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Rpad => &["rpad"],
BuiltinScalarFunction::Rtrim => &["rtrim"],
BuiltinScalarFunction::SplitPart => &["split_part"],
BuiltinScalarFunction::StartsWith => &["starts_with"],
BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"],
BuiltinScalarFunction::Substr => &["substr"],
BuiltinScalarFunction::ToHex => &["to_hex"],
BuiltinScalarFunction::Translate => &["translate"],
BuiltinScalarFunction::Trim => &["trim"],
BuiltinScalarFunction::Upper => &["upper"],
BuiltinScalarFunction::Uuid => &["uuid"],
BuiltinScalarFunction::Levenshtein => &["levenshtein"],
BuiltinScalarFunction::SubstrIndex => &["substr_index", "substring_index"],
Expand Down
18 changes: 0 additions & 18 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -575,12 +575,6 @@ scalar_expr!(Log10, log10, num, "base 10 logarithm of number");
scalar_expr!(Ln, ln, num, "natural logarithm (base e) of number");
scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`");
scalar_expr!(Atan2, atan2, y x, "inverse tangent of a division given in the argument");
scalar_expr!(
ToHex,
to_hex,
num,
"returns the hexdecimal representation of an integer"
);
scalar_expr!(Uuid, uuid, , "returns uuid v4 as a string value");
scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`");

Expand Down Expand Up @@ -725,19 +719,11 @@ scalar_expr!(
"removes all characters, spaces by default, from the end of a string"
);
scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index.");
scalar_expr!(StartsWith, starts_with, string prefix, "whether the `string` starts with the `prefix`");
scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`");
scalar_expr!(Strpos, strpos, string substring, "finds the position from where the `substring` matches the `string`");
scalar_expr!(Substr, substr, string position, "substring from the `position` to the end");
scalar_expr!(Substr, substring, string position length, "substring from the `position` with `length` characters");
scalar_expr!(Translate, translate, string from to, "replaces the characters in `from` with the counterpart in `to`");
scalar_expr!(
Trim,
trim,
string,
"removes all characters, space by default from the string"
);
scalar_expr!(Upper, upper, string, "converts the string to upper case");
//use vec as parameter
nary_scalar_expr!(
Lpad,
Expand Down Expand Up @@ -1220,15 +1206,11 @@ mod test {
test_nary_scalar_expr!(Rpad, rpad, string, count, characters);
test_scalar_expr!(Rtrim, rtrim, string);
test_scalar_expr!(SplitPart, split_part, expr, delimiter, index);
test_scalar_expr!(StartsWith, starts_with, string, characters);
test_scalar_expr!(EndsWith, ends_with, string, characters);
test_scalar_expr!(Strpos, strpos, string, substring);
test_scalar_expr!(Substr, substr, string, position);
test_scalar_expr!(Substr, substring, string, position, count);
test_scalar_expr!(ToHex, to_hex, string);
test_scalar_expr!(Translate, translate, string, from, to);
test_scalar_expr!(Trim, trim, string);
test_scalar_expr!(Upper, upper, string);

test_scalar_expr!(ArrayPopFront, array_pop_front, array);
test_scalar_expr!(ArrayPopBack, array_pop_back, array);
Expand Down
6 changes: 4 additions & 2 deletions datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,21 @@ authors = { workspace = true }
rust-version = { workspace = true }

[features]
# enable string functions
string_expressions = []
# enable core functions
core_expressions = []
# enable datetime functions
datetime_expressions = []
# Enable encoding by default so the doctests work. In general don't automatically enable all packages.
default = [
"core_expressions",
"datetime_expressions",
"encoding_expressions",
"math_expressions",
"regex_expressions",
"crypto_expressions",
]
"string_expressions",
] # Enable encoding by default so the doctests work. In general don't automatically enable all packages.
# enable encode/decode functions
encoding_expressions = ["base64", "hex"]
# enable math functions
Expand Down
7 changes: 6 additions & 1 deletion datafusion/functions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ use log::debug;
#[macro_use]
pub mod macros;

#[cfg(feature = "string_expressions")]
pub mod string;
make_stub_package!(string, "string_expressions");

/// Core datafusion expressions
/// Enabled via feature flag `core_expressions`
#[cfg(feature = "core_expressions")]
Expand Down Expand Up @@ -144,7 +148,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
.chain(encoding::functions())
.chain(math::functions())
.chain(regex::functions())
.chain(crypto::functions());
.chain(crypto::functions())
.chain(string::functions());

all_functions.try_for_each(|udf| {
let existing_udf = registry.register_udf(udf)?;
Expand Down
Loading

0 comments on commit 0171242

Please sign in to comment.