-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Introduce Sum UDAF #10651
Introduce Sum UDAF #10651
Changes from 49 commits
192211f
403ee1d
5bcab35
426f2ab
3712e18
e080d37
2b6784c
64db75c
c086a22
a9a5423
201c90c
3e9e7e9
22540b2
d9229db
518894a
ee068db
6224333
47ae11f
25dcb64
d16f1b1
5381f2d
b403331
78a70b3
79019fe
20e9f79
4f2f0ac
ca4b528
e6b021e
81dd68f
ffb0a98
dafd1aa
70b8651
f6d37bf
921dc00
093fb24
f684f5d
c1e74f7
6d3ef58
53c7bb1
02fd8a5
6c7ce04
5490bcf
5f93beb
ff947bb
2492ba7
73573be
f2b3732
2c0c52c
62346dd
a41fcc5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -142,7 +142,7 @@ async fn test_udaf_as_window_with_frame_without_retract_batch() { | |
let sql = "SELECT time_sum(time) OVER(ORDER BY time ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as time_sum from t"; | ||
// Note if this query ever does start working | ||
let err = execute(&ctx, sql).await.unwrap_err(); | ||
assert_contains!(err.to_string(), "This feature is not implemented: Aggregate can not be used as a sliding accumulator because `retract_batch` is not implemented: AggregateUDF { inner: AggregateUDF { name: \"time_sum\", signature: Signature { type_signature: Exact([Timestamp(Nanosecond, None)]), volatility: Immutable }, fun: \"<FUNC>\" } }(t.time) ORDER BY [t.time ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING"); | ||
assert_contains!(err.to_string(), "This feature is not implemented: Aggregate can not be used as a sliding accumulator because `retract_batch` is not implemented: time_sum(t.time) ORDER BY [t.time ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that is certainly nicer |
||
} | ||
|
||
/// Basic query for with a udaf returning a structure | ||
|
@@ -729,7 +729,10 @@ impl AggregateUDFImpl for TestGroupsAccumulator { | |
true | ||
} | ||
|
||
fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> { | ||
fn create_groups_accumulator( | ||
&self, | ||
_args: AccumulatorArgs, | ||
) -> Result<Box<dyn GroupsAccumulator>> { | ||
Ok(Box::new(self.clone())) | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -754,10 +754,14 @@ impl WindowFunctionDefinition { | |
impl fmt::Display for WindowFunctionDefinition { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
match self { | ||
WindowFunctionDefinition::AggregateFunction(fun) => fun.fmt(f), | ||
WindowFunctionDefinition::BuiltInWindowFunction(fun) => fun.fmt(f), | ||
WindowFunctionDefinition::AggregateUDF(fun) => std::fmt::Debug::fmt(fun, f), | ||
WindowFunctionDefinition::WindowUDF(fun) => fun.fmt(f), | ||
WindowFunctionDefinition::AggregateFunction(fun) => { | ||
std::fmt::Display::fmt(fun, f) | ||
} | ||
WindowFunctionDefinition::BuiltInWindowFunction(fun) => { | ||
std::fmt::Display::fmt(fun, f) | ||
} | ||
WindowFunctionDefinition::AggregateUDF(fun) => std::fmt::Display::fmt(fun, f), | ||
WindowFunctionDefinition::WindowUDF(fun) => std::fmt::Display::fmt(fun, f), | ||
} | ||
} | ||
} | ||
|
@@ -2263,7 +2267,11 @@ mod test { | |
let fun = find_df_window_func(name).unwrap(); | ||
let fun2 = find_df_window_func(name.to_uppercase().as_str()).unwrap(); | ||
assert_eq!(fun, fun2); | ||
assert_eq!(fun.to_string(), name.to_uppercase()); | ||
if fun.to_string() == "first_value" || fun.to_string() == "last_value" { | ||
assert_eq!(fun.to_string(), name); | ||
} else { | ||
assert_eq!(fun.to_string(), name.to_uppercase()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder, maybe we should treat udf names case insensitive way. What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #10695 track the issue to rename |
||
} | ||
} | ||
Ok(()) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,8 +21,10 @@ use crate::expr::{ | |
InSubquery, Placeholder, ScalarFunction, Sort, TryCast, Unnest, WindowFunction, | ||
}; | ||
use crate::type_coercion::binary::get_result_type; | ||
use crate::type_coercion::functions::data_types_with_scalar_udf; | ||
use crate::{utils, LogicalPlan, Projection, Subquery}; | ||
use crate::type_coercion::functions::{ | ||
data_types_with_aggregate_udf, data_types_with_scalar_udf, | ||
}; | ||
use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition}; | ||
use arrow::compute::can_cast_types; | ||
use arrow::datatypes::{DataType, Field}; | ||
use datafusion_common::{ | ||
|
@@ -158,7 +160,25 @@ impl ExprSchemable for Expr { | |
.iter() | ||
.map(|e| e.get_type(schema)) | ||
.collect::<Result<Vec<_>>>()?; | ||
fun.return_type(&data_types) | ||
match fun { | ||
WindowFunctionDefinition::AggregateUDF(udf) => { | ||
let new_types = data_types_with_aggregate_udf(&data_types, udf).map_err(|err| { | ||
plan_datafusion_err!( | ||
"{} and {}", | ||
err, | ||
utils::generate_signature_error_msg( | ||
fun.name(), | ||
fun.signature().clone(), | ||
&data_types | ||
) | ||
) | ||
})?; | ||
Ok(fun.return_type(&new_types)?) | ||
} | ||
Comment on lines
+164
to
+177
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should bury this check, and conversion inside to the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer separate |
||
_ => { | ||
fun.return_type(&data_types) | ||
} | ||
} | ||
} | ||
Expr::AggregateFunction(AggregateFunction { func_def, args, .. }) => { | ||
let data_types = args | ||
|
@@ -170,7 +190,18 @@ impl ExprSchemable for Expr { | |
fun.return_type(&data_types) | ||
} | ||
AggregateFunctionDefinition::UDF(fun) => { | ||
Ok(fun.return_type(&data_types)?) | ||
let new_types = data_types_with_aggregate_udf(&data_types, fun).map_err(|err| { | ||
plan_datafusion_err!( | ||
"{} and {}", | ||
err, | ||
utils::generate_signature_error_msg( | ||
fun.name(), | ||
fun.signature().clone(), | ||
&data_types | ||
) | ||
) | ||
})?; | ||
Comment on lines
+193
to
+203
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar comment above applies here |
||
Ok(fun.return_type(&new_types)?) | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
expr / sum is not removed in this PR, so I need to remove this to avoid import conflict