Skip to content

Commit

Permalink
Remove expr_fn::sum and replace them with function stub (#10816)
Browse files Browse the repository at this point in the history
* introduce stub for test

Signed-off-by: jayzhan211 <[email protected]>

* fix err msg

Signed-off-by: jayzhan211 <[email protected]>

* dont compare error msg, ci is not consistent with local

Signed-off-by: jayzhan211 <[email protected]>

* comment and cli update

Signed-off-by: jayzhan211 <[email protected]>

* fmt

Signed-off-by: jayzhan211 <[email protected]>

---------

Signed-off-by: jayzhan211 <[email protected]>
  • Loading branch information
jayzhan211 committed Jun 8, 2024
1 parent c012e9c commit e3af174
Show file tree
Hide file tree
Showing 15 changed files with 246 additions and 82 deletions.
1 change: 1 addition & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 0 additions & 14 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,20 +168,6 @@ pub fn max(expr: Expr) -> Expr {
))
}

/// Create an expression to represent the sum() aggregate function
///
/// TODO: Remove this function and use `sum` from `datafusion_functions_aggregate::expr_fn` instead
pub fn sum(expr: Expr) -> Expr {
Expr::AggregateFunction(AggregateFunction::new(
aggregate_function::AggregateFunction::Sum,
vec![expr],
false,
None,
None,
None,
))
}

/// Create an expression to represent the array_agg() aggregate function
pub fn array_agg(expr: Expr) -> Expr {
Expr::AggregateFunction(AggregateFunction::new(
Expand Down
54 changes: 1 addition & 53 deletions datafusion/expr/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1719,7 +1719,7 @@ pub fn unnest_with_options(
mod tests {
use super::*;
use crate::logical_plan::StringifiedPlan;
use crate::{col, expr, expr_fn::exists, in_subquery, lit, scalar_subquery, sum};
use crate::{col, expr, expr_fn::exists, in_subquery, lit, scalar_subquery};

use datafusion_common::SchemaError;

Expand Down Expand Up @@ -1775,28 +1775,6 @@ mod tests {
);
}

#[test]
fn plan_builder_aggregate() -> Result<()> {
let plan =
table_scan(Some("employee_csv"), &employee_schema(), Some(vec![3, 4]))?
.aggregate(
vec![col("state")],
vec![sum(col("salary")).alias("total_salary")],
)?
.project(vec![col("state"), col("total_salary")])?
.limit(2, Some(10))?
.build()?;

let expected = "Limit: skip=2, fetch=10\
\n Projection: employee_csv.state, total_salary\
\n Aggregate: groupBy=[[employee_csv.state]], aggr=[[SUM(employee_csv.salary) AS total_salary]]\
\n TableScan: employee_csv projection=[state, salary]";

assert_eq!(expected, format!("{plan:?}"));

Ok(())
}

#[test]
fn plan_builder_sort() -> Result<()> {
let plan =
Expand Down Expand Up @@ -2037,36 +2015,6 @@ mod tests {
}
}

#[test]
fn aggregate_non_unique_names() -> Result<()> {
let plan = table_scan(
Some("employee_csv"),
&employee_schema(),
// project state and salary by column index
Some(vec![3, 4]),
)?
// two columns with the same name => error
.aggregate(vec![col("state")], vec![sum(col("salary")).alias("state")]);

match plan {
Err(DataFusionError::SchemaError(
SchemaError::AmbiguousReference {
field:
Column {
relation: Some(TableReference::Bare { table }),
name,
},
},
_,
)) => {
assert_eq!(*"employee_csv", *table);
assert_eq!("state", &name);
Ok(())
}
_ => plan_err!("Plan should have returned an DataFusionError::SchemaError"),
}
}

fn employee_schema() -> Schema {
Schema::new(vec![
Field::new("id", DataType::Int32, false),
Expand Down
1 change: 1 addition & 0 deletions datafusion/optimizer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ hashbrown = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true }
log = { workspace = true }
paste = "1.0.14"
regex-syntax = "0.8.0"

[dev-dependencies]
Expand Down
11 changes: 5 additions & 6 deletions datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,14 @@ fn analyze_internal(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
#[cfg(test)]
mod tests {
use super::*;
use crate::test::function_stub::sum;
use crate::test::*;
use arrow::datatypes::DataType;
use datafusion_common::ScalarValue;
use datafusion_expr::expr::Sort;
use datafusion_expr::{
col, count, exists, expr, in_subquery, logical_plan::LogicalPlanBuilder, max,
out_ref_col, scalar_subquery, sum, wildcard, AggregateFunction, WindowFrame,
out_ref_col, scalar_subquery, wildcard, AggregateFunction, WindowFrame,
WindowFrameBound, WindowFrameUnits,
};
use std::sync::Arc;
Expand Down Expand Up @@ -275,11 +276,9 @@ mod tests {
#[test]
fn test_count_wildcard_on_non_count_aggregate() -> Result<()> {
let table_scan = test_table_scan()?;
let err = LogicalPlanBuilder::from(table_scan)
.aggregate(Vec::<Expr>::new(), vec![sum(wildcard())])
.unwrap_err()
.to_string();
assert!(err.contains("Error during planning: No function matches the given name and argument types 'SUM(Null)'."), "{err}");
let res = LogicalPlanBuilder::from(table_scan)
.aggregate(Vec::<Expr>::new(), vec![sum(wildcard())]);
assert!(res.is_err());
Ok(())
}

Expand Down
4 changes: 3 additions & 1 deletion datafusion/optimizer/src/common_subexpr_eliminate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -840,13 +840,15 @@ mod test {
use arrow::datatypes::Schema;

use datafusion_expr::logical_plan::{table_scan, JoinType};
use datafusion_expr::{avg, lit, logical_plan::builder::LogicalPlanBuilder, sum};

use datafusion_expr::{avg, lit, logical_plan::builder::LogicalPlanBuilder};
use datafusion_expr::{
grouping_set, AccumulatorFactoryFunction, AggregateUDF, Signature,
SimpleAggregateUDF, Volatility,
};

use crate::optimizer::OptimizerContext;
use crate::test::function_stub::sum;
use crate::test::*;

use super::*;
Expand Down
3 changes: 2 additions & 1 deletion datafusion/optimizer/src/eliminate_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,11 @@ mod tests {

use datafusion_common::{Result, ScalarValue};
use datafusion_expr::{
col, lit, logical_plan::builder::LogicalPlanBuilder, sum, Expr, LogicalPlan,
col, lit, logical_plan::builder::LogicalPlanBuilder, Expr, LogicalPlan,
};

use crate::eliminate_filter::EliminateFilter;
use crate::test::function_stub::sum;
use crate::test::*;

fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> {
Expand Down
2 changes: 1 addition & 1 deletion datafusion/optimizer/src/eliminate_limit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,11 @@ mod tests {
use datafusion_expr::{
col,
logical_plan::{builder::LogicalPlanBuilder, JoinType},
sum,
};
use std::sync::Arc;

use crate::push_down_limit::PushDownLimit;
use crate::test::function_stub::sum;

fn observe(_plan: &LogicalPlan, _rule: &dyn OptimizerRule) {}
fn assert_optimized_plan_eq(plan: LogicalPlan, expected: &str) -> Result<()> {
Expand Down
3 changes: 2 additions & 1 deletion datafusion/optimizer/src/push_down_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1090,13 +1090,14 @@ mod tests {
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::logical_plan::table_scan;
use datafusion_expr::{
col, in_list, in_subquery, lit, sum, ColumnarValue, Extension, ScalarUDF,
col, in_list, in_subquery, lit, ColumnarValue, Extension, ScalarUDF,
ScalarUDFImpl, Signature, TableSource, TableType, UserDefinedLogicalNodeCore,
Volatility,
};

use crate::optimizer::Optimizer;
use crate::rewrite_disjunctive_predicate::RewriteDisjunctivePredicate;
use crate::test::function_stub::sum;
use crate::test::*;
use crate::OptimizerContext;

Expand Down
5 changes: 2 additions & 3 deletions datafusion/optimizer/src/scalar_subquery_to_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,10 +400,9 @@ mod tests {
use super::*;
use crate::test::*;

use crate::test::function_stub::sum;
use arrow::datatypes::DataType;
use datafusion_expr::{
col, lit, max, min, out_ref_col, scalar_subquery, sum, Between,
};
use datafusion_expr::{col, lit, max, min, out_ref_col, scalar_subquery, Between};

/// Test multiple correlated subqueries
#[test]
Expand Down
3 changes: 2 additions & 1 deletion datafusion/optimizer/src/single_distinct_to_groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,12 +360,13 @@ impl OptimizerRule for SingleDistinctToGroupBy {
#[cfg(test)]
mod tests {
use super::*;
use crate::test::function_stub::sum;
use crate::test::*;
use datafusion_expr::expr;
use datafusion_expr::expr::GroupingSet;
use datafusion_expr::{
count, count_distinct, lit, logical_plan::builder::LogicalPlanBuilder, max, min,
sum, AggregateFunction,
AggregateFunction,
};

fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> {
Expand Down
Loading

0 comments on commit e3af174

Please sign in to comment.