Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove AggregateFunctionDefinition #11803

Merged
merged 2 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 32 additions & 37 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ use datafusion_common::{
};
use datafusion_expr::dml::CopyTo;
use datafusion_expr::expr::{
self, AggregateFunction, AggregateFunctionDefinition, Alias, Between, BinaryExpr,
Cast, GroupingSet, InList, Like, TryCast, WindowFunction,
self, AggregateFunction, Alias, Between, BinaryExpr, Cast, GroupingSet, InList, Like,
TryCast, WindowFunction,
};
use datafusion_expr::expr_rewriter::unnormalize_cols;
use datafusion_expr::expr_vec_fmt;
Expand Down Expand Up @@ -223,18 +223,15 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
create_function_physical_name(&fun.to_string(), false, args, Some(order_by))
}
Expr::AggregateFunction(AggregateFunction {
func_def,
func,
distinct,
args,
filter: _,
order_by,
null_treatment: _,
}) => create_function_physical_name(
func_def.name(),
*distinct,
args,
order_by.as_ref(),
),
}) => {
create_function_physical_name(func.name(), *distinct, args, order_by.as_ref())
}
Expr::GroupingSet(grouping_set) => match grouping_set {
GroupingSet::Rollup(exprs) => Ok(format!(
"ROLLUP ({})",
Expand Down Expand Up @@ -1817,7 +1814,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
) -> Result<AggregateExprWithOptionalArgs> {
match e {
Expr::AggregateFunction(AggregateFunction {
func_def,
func,
distinct,
args,
filter,
Expand All @@ -1839,36 +1836,34 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
.unwrap_or(sqlparser::ast::NullTreatment::RespectNulls)
== NullTreatment::IgnoreNulls;

let (agg_expr, filter, order_by) = match func_def {
AggregateFunctionDefinition::UDF(fun) => {
let sort_exprs = order_by.clone().unwrap_or(vec![]);
let physical_sort_exprs = match order_by {
Some(exprs) => Some(create_physical_sort_exprs(
exprs,
logical_input_schema,
execution_props,
)?),
None => None,
};
let (agg_expr, filter, order_by) = {
let sort_exprs = order_by.clone().unwrap_or(vec![]);
let physical_sort_exprs = match order_by {
Some(exprs) => Some(create_physical_sort_exprs(
exprs,
logical_input_schema,
execution_props,
)?),
None => None,
};

let ordering_reqs: Vec<PhysicalSortExpr> =
physical_sort_exprs.clone().unwrap_or(vec![]);
let ordering_reqs: Vec<PhysicalSortExpr> =
physical_sort_exprs.clone().unwrap_or(vec![]);

let agg_expr = udaf::create_aggregate_expr_with_dfschema(
fun,
&physical_args,
args,
&sort_exprs,
&ordering_reqs,
logical_input_schema,
name,
ignore_nulls,
*distinct,
false,
)?;
let agg_expr = udaf::create_aggregate_expr_with_dfschema(
func,
&physical_args,
args,
&sort_exprs,
&ordering_reqs,
logical_input_schema,
name,
ignore_nulls,
*distinct,
false,
)?;

(agg_expr, filter, physical_sort_exprs)
}
(agg_expr, filter, physical_sort_exprs)
};

Ok((agg_expr, filter, order_by))
Expand Down
34 changes: 9 additions & 25 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -627,22 +627,6 @@ impl Sort {
}
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎉 🧹

/// Defines which implementation of an aggregate function DataFusion should call.
pub enum AggregateFunctionDefinition {
/// Resolved to a user defined aggregate function
UDF(Arc<crate::AggregateUDF>),
}

impl AggregateFunctionDefinition {
/// Function's name for display
pub fn name(&self) -> &str {
match self {
AggregateFunctionDefinition::UDF(udf) => udf.name(),
}
}
}

/// Aggregate function
///
/// See also [`ExprFunctionExt`] to set these fields on `Expr`
Expand All @@ -651,7 +635,7 @@ impl AggregateFunctionDefinition {
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub struct AggregateFunction {
/// Name of the function
pub func_def: AggregateFunctionDefinition,
pub func: Arc<crate::AggregateUDF>,
/// List of expressions to feed to the functions as arguments
pub args: Vec<Expr>,
/// Whether this is a DISTINCT aggregation or not
Expand All @@ -666,15 +650,15 @@ pub struct AggregateFunction {
impl AggregateFunction {
/// Create a new AggregateFunction expression with a user-defined function (UDF)
pub fn new_udf(
udf: Arc<crate::AggregateUDF>,
func: Arc<crate::AggregateUDF>,
args: Vec<Expr>,
distinct: bool,
filter: Option<Box<Expr>>,
order_by: Option<Vec<Expr>>,
null_treatment: Option<NullTreatment>,
) -> Self {
Self {
func_def: AggregateFunctionDefinition::UDF(udf),
func,
args,
distinct,
filter,
Expand Down Expand Up @@ -1666,14 +1650,14 @@ impl Expr {
func.hash(hasher);
}
Expr::AggregateFunction(AggregateFunction {
func_def,
func,
args: _args,
distinct,
filter: _filter,
order_by: _order_by,
null_treatment,
}) => {
func_def.hash(hasher);
func.hash(hasher);
distinct.hash(hasher);
null_treatment.hash(hasher);
}
Expand Down Expand Up @@ -1870,15 +1854,15 @@ impl fmt::Display for Expr {
Ok(())
}
Expr::AggregateFunction(AggregateFunction {
func_def,
func,
distinct,
ref args,
filter,
order_by,
null_treatment,
..
}) => {
fmt_function(f, func_def.name(), *distinct, args, true)?;
fmt_function(f, func.name(), *distinct, args, true)?;
if let Some(nt) = null_treatment {
write!(f, " {}", nt)?;
}
Expand Down Expand Up @@ -2190,14 +2174,14 @@ fn write_name<W: Write>(w: &mut W, e: &Expr) -> Result<()> {
write!(w, "{window_frame}")?;
}
Expr::AggregateFunction(AggregateFunction {
func_def,
func,
distinct,
args,
filter,
order_by,
null_treatment,
}) => {
write_function_name(w, func_def.name(), *distinct, args)?;
write_function_name(w, func.name(), *distinct, args)?;
if let Some(fe) = filter {
write!(w, " FILTER (WHERE {fe})")?;
};
Expand Down
47 changes: 21 additions & 26 deletions datafusion/expr/src/expr_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

use super::{Between, Expr, Like};
use crate::expr::{
AggregateFunction, AggregateFunctionDefinition, Alias, BinaryExpr, Cast, InList,
InSubquery, Placeholder, ScalarFunction, Sort, TryCast, Unnest, WindowFunction,
AggregateFunction, Alias, BinaryExpr, Cast, InList, InSubquery, Placeholder,
ScalarFunction, Sort, TryCast, Unnest, WindowFunction,
};
use crate::type_coercion::binary::get_result_type;
use crate::type_coercion::functions::{
Expand Down Expand Up @@ -193,28 +193,24 @@ impl ExprSchemable for Expr {
_ => fun.return_type(&data_types, &nullability),
}
}
Expr::AggregateFunction(AggregateFunction { func_def, args, .. }) => {
Expr::AggregateFunction(AggregateFunction { func, args, .. }) => {
let data_types = args
.iter()
.map(|e| e.get_type(schema))
.collect::<Result<Vec<_>>>()?;
match func_def {
AggregateFunctionDefinition::UDF(fun) => {
let new_types = data_types_with_aggregate_udf(&data_types, fun)
.map_err(|err| {
plan_datafusion_err!(
"{} {}",
err,
utils::generate_signature_error_msg(
fun.name(),
fun.signature().clone(),
&data_types
)
let new_types = data_types_with_aggregate_udf(&data_types, func)
.map_err(|err| {
plan_datafusion_err!(
"{} {}",
err,
utils::generate_signature_error_msg(
func.name(),
func.signature().clone(),
&data_types
)
})?;
Ok(fun.return_type(&new_types)?)
}
}
)
})?;
Ok(func.return_type(&new_types)?)
}
Expr::Not(_)
| Expr::IsNull(_)
Expand Down Expand Up @@ -329,13 +325,12 @@ impl ExprSchemable for Expr {
}
}
Expr::Cast(Cast { expr, .. }) => expr.nullable(input_schema),
Expr::AggregateFunction(AggregateFunction { func_def, .. }) => {
match func_def {
// TODO: UDF should be able to customize nullability
AggregateFunctionDefinition::UDF(udf) if udf.name() == "count" => {
Ok(false)
}
AggregateFunctionDefinition::UDF(_) => Ok(true),
Expr::AggregateFunction(AggregateFunction { func, .. }) => {
// TODO: UDF should be able to customize nullability
if func.name() == "count" {
Ok(false)
} else {
Ok(true)
}
}
Expr::ScalarVariable(_, _)
Expand Down
31 changes: 13 additions & 18 deletions datafusion/expr/src/tree_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
//! Tree node implementation for logical expr

use crate::expr::{
AggregateFunction, AggregateFunctionDefinition, Alias, Between, BinaryExpr, Case,
Cast, GroupingSet, InList, InSubquery, Like, Placeholder, ScalarFunction, Sort,
TryCast, Unnest, WindowFunction,
AggregateFunction, Alias, Between, BinaryExpr, Case, Cast, GroupingSet, InList,
InSubquery, Like, Placeholder, ScalarFunction, Sort, TryCast, Unnest, WindowFunction,
};
use crate::{Expr, ExprFunctionExt};

Expand Down Expand Up @@ -304,7 +303,7 @@ impl TreeNode for Expr {
}),
Expr::AggregateFunction(AggregateFunction {
args,
func_def,
func,
distinct,
filter,
order_by,
Expand All @@ -316,20 +315,16 @@ impl TreeNode for Expr {
order_by,
transform_option_vec(order_by, &mut f)
)?
.map_data(
|(new_args, new_filter, new_order_by)| match func_def {
AggregateFunctionDefinition::UDF(fun) => {
Ok(Expr::AggregateFunction(AggregateFunction::new_udf(
fun,
new_args,
distinct,
new_filter,
new_order_by,
null_treatment,
)))
}
},
)?,
.map_data(|(new_args, new_filter, new_order_by)| {
Ok(Expr::AggregateFunction(AggregateFunction::new_udf(
func,
new_args,
distinct,
new_filter,
new_order_by,
null_treatment,
)))
})?,
Expr::GroupingSet(grouping_set) => match grouping_set {
GroupingSet::Rollup(exprs) => transform_vec(exprs, &mut f)?
.update_data(|ve| Expr::GroupingSet(GroupingSet::Rollup(ve))),
Expand Down
4 changes: 1 addition & 3 deletions datafusion/functions-nested/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
use datafusion_common::{exec_err, utils::list_ndims, DFSchema, Result};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::{
expr::AggregateFunctionDefinition,
planner::{ExprPlanner, PlannerResult, RawBinaryExpr, RawFieldAccessExpr},
sqlparser, Expr, ExprSchemable, GetFieldAccess,
};
Expand Down Expand Up @@ -171,6 +170,5 @@ impl ExprPlanner for FieldAccessPlanner {
}

fn is_array_agg(agg_func: &datafusion_expr::expr::AggregateFunction) -> bool {
let AggregateFunctionDefinition::UDF(udf) = &agg_func.func_def;
return udf.name() == "array_agg";
return agg_func.func.name() == "array_agg";
}
8 changes: 3 additions & 5 deletions datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ use crate::utils::NamePreserver;
use datafusion_common::config::ConfigOptions;
use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
use datafusion_common::Result;
use datafusion_expr::expr::{
AggregateFunction, AggregateFunctionDefinition, WindowFunction,
};
use datafusion_expr::expr::{AggregateFunction, WindowFunction};
use datafusion_expr::utils::COUNT_STAR_EXPANSION;
use datafusion_expr::{lit, Expr, LogicalPlan, WindowFunctionDefinition};

Expand Down Expand Up @@ -56,10 +54,10 @@ fn is_wildcard(expr: &Expr) -> bool {
fn is_count_star_aggregate(aggregate_function: &AggregateFunction) -> bool {
matches!(aggregate_function,
AggregateFunction {
func_def: AggregateFunctionDefinition::UDF(udf),
func,
args,
..
} if udf.name() == "count" && (args.len() == 1 && is_wildcard(&args[0]) || args.is_empty()))
} if func.name() == "count" && (args.len() == 1 && is_wildcard(&args[0]) || args.is_empty()))
}

fn is_count_star_window_aggregate(window_function: &WindowFunction) -> bool {
Expand Down
Loading
Loading