-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-44131][SQL] Add call_function and deprecate call_udf for Scala API #41687
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
32e5a26
7a185b7
64bb940
47cc5f4
3a6f044
d6e20c8
47b3121
9ea18a4
edc4d14
c49d2cf
0f7e98a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| Project [lower(g#0) AS lower(g)#0] | ||
| +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| { | ||
| "common": { | ||
| "planId": "1" | ||
| }, | ||
| "project": { | ||
| "input": { | ||
| "common": { | ||
| "planId": "0" | ||
| }, | ||
| "localRelation": { | ||
| "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" | ||
| } | ||
| }, | ||
| "expressions": [{ | ||
| "unresolvedFunction": { | ||
| "functionName": "lower", | ||
| "arguments": [{ | ||
| "unresolvedAttribute": { | ||
| "unparsedIdentifier": "g" | ||
| } | ||
| }] | ||
| } | ||
| }] | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1936,9 +1936,7 @@ object functions { | |
| * @group math_funcs | ||
| * @since 3.5.0 | ||
| */ | ||
| def try_add(left: Column, right: Column): Column = withExpr { | ||
| UnresolvedFunction("try_add", Seq(left.expr, right.expr), isDistinct = false) | ||
| } | ||
| def try_add(left: Column, right: Column): Column = call_function("try_add", left, right) | ||
|
|
||
| /** | ||
| * Returns the mean calculated from values of a group and the result is null on overflow. | ||
|
|
@@ -1957,9 +1955,8 @@ object functions { | |
| * @group math_funcs | ||
| * @since 3.5.0 | ||
| */ | ||
| def try_divide(dividend: Column, divisor: Column): Column = withExpr { | ||
| UnresolvedFunction("try_divide", Seq(dividend.expr, divisor.expr), isDistinct = false) | ||
| } | ||
| def try_divide(dividend: Column, divisor: Column): Column = | ||
| call_function("try_divide", dividend, divisor) | ||
|
|
||
| /** | ||
| * Returns `left``*``right` and the result is null on overflow. The acceptable input types are | ||
|
|
@@ -1968,9 +1965,8 @@ object functions { | |
| * @group math_funcs | ||
| * @since 3.5.0 | ||
| */ | ||
| def try_multiply(left: Column, right: Column): Column = withExpr { | ||
| UnresolvedFunction("try_multiply", Seq(left.expr, right.expr), isDistinct = false) | ||
| } | ||
| def try_multiply(left: Column, right: Column): Column = | ||
| call_function("try_multiply", left, right) | ||
|
|
||
| /** | ||
| * Returns `left``-``right` and the result is null on overflow. The acceptable input types are | ||
|
|
@@ -1979,9 +1975,8 @@ object functions { | |
| * @group math_funcs | ||
| * @since 3.5.0 | ||
| */ | ||
| def try_subtract(left: Column, right: Column): Column = withExpr { | ||
| UnresolvedFunction("try_subtract", Seq(left.expr, right.expr), isDistinct = false) | ||
| } | ||
| def try_subtract(left: Column, right: Column): Column = | ||
| call_function("try_subtract", left, right) | ||
|
|
||
| /** | ||
| * Returns the sum calculated from values of a group and the result is null on overflow. | ||
|
|
@@ -2366,19 +2361,15 @@ object functions { | |
| * @group math_funcs | ||
| * @since 3.3.0 | ||
| */ | ||
| def ceil(e: Column, scale: Column): Column = withExpr { | ||
| UnresolvedFunction(Seq("ceil"), Seq(e.expr, scale.expr), isDistinct = false) | ||
| } | ||
| def ceil(e: Column, scale: Column): Column = call_function("ceil", e, scale) | ||
|
|
||
| /** | ||
| * Computes the ceiling of the given value of `e` to 0 decimal places. | ||
| * | ||
| * @group math_funcs | ||
| * @since 1.4.0 | ||
| */ | ||
| def ceil(e: Column): Column = withExpr { | ||
| UnresolvedFunction(Seq("ceil"), Seq(e.expr), isDistinct = false) | ||
| } | ||
| def ceil(e: Column): Column = call_function("ceil", e) | ||
|
|
||
| /** | ||
| * Computes the ceiling of the given value of `e` to 0 decimal places. | ||
|
|
@@ -2522,19 +2513,15 @@ object functions { | |
| * @group math_funcs | ||
| * @since 3.3.0 | ||
| */ | ||
| def floor(e: Column, scale: Column): Column = withExpr { | ||
| UnresolvedFunction(Seq("floor"), Seq(e.expr, scale.expr), isDistinct = false) | ||
| } | ||
| def floor(e: Column, scale: Column): Column = call_function("floor", e, scale) | ||
|
|
||
| /** | ||
| * Computes the floor of the given value of `e` to 0 decimal places. | ||
| * | ||
| * @group math_funcs | ||
| * @since 1.4.0 | ||
| */ | ||
| def floor(e: Column): Column = withExpr { | ||
| UnresolvedFunction(Seq("floor"), Seq(e.expr), isDistinct = false) | ||
| } | ||
| def floor(e: Column): Column = call_function("floor", e) | ||
|
|
||
| /** | ||
| * Computes the floor of the given column value to 0 decimal places. | ||
|
|
@@ -4007,9 +3994,8 @@ object functions { | |
| * @group string_funcs | ||
| * @since 3.3.0 | ||
| */ | ||
| def lpad(str: Column, len: Int, pad: Array[Byte]): Column = withExpr { | ||
| UnresolvedFunction("lpad", Seq(str.expr, lit(len).expr, lit(pad).expr), isDistinct = false) | ||
| } | ||
| def lpad(str: Column, len: Int, pad: Array[Byte]): Column = | ||
| call_function("lpad", str, lit(len), lit(pad)) | ||
|
|
||
| /** | ||
| * Trim the spaces from left end for the specified string value. | ||
|
|
@@ -4190,9 +4176,8 @@ object functions { | |
| * @group string_funcs | ||
| * @since 3.3.0 | ||
| */ | ||
| def rpad(str: Column, len: Int, pad: Array[Byte]): Column = withExpr { | ||
| UnresolvedFunction("rpad", Seq(str.expr, lit(len).expr, lit(pad).expr), isDistinct = false) | ||
| } | ||
| def rpad(str: Column, len: Int, pad: Array[Byte]): Column = | ||
| call_function("rpad", str, lit(len), lit(pad)) | ||
|
|
||
| /** | ||
| * Repeats a string column n times, and returns it as a new string column. | ||
|
|
@@ -4628,9 +4613,7 @@ object functions { | |
| * @group string_funcs | ||
| * @since 3.5.0 | ||
| */ | ||
| def endswith(str: Column, suffix: Column): Column = withExpr { | ||
| UnresolvedFunction(Seq("endswith"), Seq(str.expr, suffix.expr), isDistinct = false) | ||
| } | ||
| def endswith(str: Column, suffix: Column): Column = call_function("endswith", str, suffix) | ||
|
|
||
| /** | ||
| * Returns a boolean. The value is True if str starts with prefix. | ||
|
|
@@ -4640,9 +4623,7 @@ object functions { | |
| * @group string_funcs | ||
| * @since 3.5.0 | ||
| */ | ||
| def startswith(str: Column, prefix: Column): Column = withExpr { | ||
| UnresolvedFunction(Seq("startswith"), Seq(str.expr, prefix.expr), isDistinct = false) | ||
| } | ||
| def startswith(str: Column, prefix: Column): Column = call_function("startswith", str, prefix) | ||
|
|
||
| /** | ||
| * Returns the ASCII character having the binary equivalent to `n`. | ||
|
|
@@ -4752,9 +4733,7 @@ object functions { | |
| * @group string_funcs | ||
| * @since 3.5.0 | ||
| */ | ||
| def contains(left: Column, right: Column): Column = withExpr { | ||
| UnresolvedFunction(Seq("contains"), Seq(left.expr, right.expr), isDistinct = false) | ||
| } | ||
| def contains(left: Column, right: Column): Column = call_function("contains", left, right) | ||
|
|
||
| /** | ||
| * Returns the `n`-th input, e.g., returns `input2` when `n` is 2. | ||
|
|
@@ -5167,9 +5146,7 @@ object functions { | |
| * @group datetime_funcs | ||
| * @since 3.5.0 | ||
| */ | ||
| def extract(field: Column, source: Column): Column = withExpr { | ||
| UnresolvedFunction("extract", Seq(field.expr, source.expr), isDistinct = false) | ||
| } | ||
| def extract(field: Column, source: Column): Column = call_function("extract", field, source) | ||
|
|
||
| /** | ||
| * Extracts a part of the date/timestamp or interval source. | ||
|
|
@@ -5181,9 +5158,7 @@ object functions { | |
| * @group datetime_funcs | ||
| * @since 3.5.0 | ||
| */ | ||
| def date_part(field: Column, source: Column): Column = withExpr { | ||
| UnresolvedFunction("date_part", Seq(field.expr, source.expr), isDistinct = false) | ||
| } | ||
| def date_part(field: Column, source: Column): Column = call_function("date_part", field, source) | ||
|
|
||
| /** | ||
| * Extracts a part of the date/timestamp or interval source. | ||
|
|
@@ -5195,9 +5170,7 @@ object functions { | |
| * @group datetime_funcs | ||
| * @since 3.5.0 | ||
| */ | ||
| def datepart(field: Column, source: Column): Column = withExpr { | ||
| UnresolvedFunction("datepart", Seq(field.expr, source.expr), isDistinct = false) | ||
| } | ||
| def datepart(field: Column, source: Column): Column = call_function("datepart", field, source) | ||
|
|
||
| /** | ||
| * Returns the last day of the month which the given date belongs to. | ||
|
|
@@ -8363,9 +8336,9 @@ object functions { | |
| * @since 1.5.0 | ||
| */ | ||
| @scala.annotation.varargs | ||
| @deprecated("Use call_udf") | ||
| @deprecated("Use call_function") | ||
| def callUDF(udfName: String, cols: Column*): Column = | ||
| call_udf(udfName, cols: _*) | ||
| call_function(udfName, cols: _*) | ||
|
|
||
| /** | ||
| * Call an user-defined function. | ||
|
|
@@ -8383,9 +8356,20 @@ object functions { | |
| * @since 3.2.0 | ||
| */ | ||
| @scala.annotation.varargs | ||
| def call_udf(udfName: String, cols: Column*): Column = withExpr { | ||
| UnresolvedFunction(udfName, cols.map(_.expr), isDistinct = false) | ||
| } | ||
| @deprecated("Use call_function") | ||
| def call_udf(udfName: String, cols: Column*): Column = | ||
| call_function(udfName, cols: _*) | ||
|
|
||
| /** | ||
| * Call a builtin or temp function. | ||
| * | ||
| * @param funcName function name | ||
| * @param cols the expression parameters of function | ||
| * @since 3.5.0 | ||
|
||
| */ | ||
| @scala.annotation.varargs | ||
| def call_function(funcName: String, cols: Column*): Column = | ||
|
||
| withExpr { UnresolvedFunction(funcName, cols.map(_.expr), false) } | ||
|
|
||
| /** | ||
| * Unwrap UDT data type column into its underlying type. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just realize that it maybe problematic in such cases, if some users happen to register a udf with the same name
ceilThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point. If we want avoid this issue, it seems we should make the
built-in-only,udf-only,globalas you said.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, if the goal of this PR is to replace
call_udfwithcall_function, we can resolve this naming conflict issue in another PRs.