From 2982803d785bfeee6e1401d2e81e439748fa80c2 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 21 Sep 2022 11:45:03 -0600 Subject: [PATCH 1/7] Revert to earlier version of rule and skip failing tests --- .../sql/optimizer/eliminate_agg_distinct.rs | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs b/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs index fd854176b..b64123732 100644 --- a/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs +++ b/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs @@ -114,12 +114,13 @@ impl OptimizerRule for EliminateAggDistinct { // combine the two sets to get all unique expressions let mut unique_expressions = distinct_columns.clone(); unique_expressions.extend(not_distinct_columns.clone()); - let mut unique_expressions: Vec = - Vec::from_iter(unique_set_without_aliases(&unique_expressions)); - unique_expressions.sort_by(|l, r| format!("{}", l).cmp(&format!("{}", r))); - // create one plan per unique expression being aggregated - let plans: Vec = unique_expressions + let unique_expressions = unique_set_without_aliases(&unique_expressions); + + let mut x: Vec = Vec::from_iter(unique_expressions); + x.sort_by(|l, r| format!("{}", l).cmp(&format!("{}", r))); + + let plans: Vec = x .iter() .map(|expr| { create_plan( @@ -179,10 +180,26 @@ fn create_plan( let has_non_distinct = _not_distinct_columns.contains(expr); assert!(has_distinct || has_non_distinct); - let distinct_expr: Vec = - to_sorted_vec(strip_aliases(&Vec::from_iter(distinct_columns.iter()))); - let not_distinct_expr: Vec = - to_sorted_vec(strip_aliases(&Vec::from_iter(not_distinct_columns.iter()))); + let distinct_columns: Vec = distinct_columns + .iter() + .filter(|e| match e { + Expr::Alias(x, _) => x.as_ref() == expr, + other => *other == expr, + }) + .cloned() + .collect(); + + let not_distinct_columns: Vec = not_distinct_columns + .iter() + .filter(|e| match e { + Expr::Alias(x, _) => x.as_ref() == expr, + other => *other == expr, + }) + .cloned() + .collect(); + + let distinct_expr: Vec = to_sorted_vec(distinct_columns); + let not_distinct_expr: Vec = to_sorted_vec(not_distinct_columns); if has_distinct && has_non_distinct && distinct_expr.len() == 1 && not_distinct_expr.len() == 1 { @@ -486,6 +503,7 @@ mod tests { .expect("building plan") } + #[ignore] #[test] fn test_single_distinct_group_by() -> Result<()> { let plan = LogicalPlanBuilder::from(test_table_scan("a")) @@ -500,6 +518,7 @@ mod tests { Ok(()) } + #[ignore] #[test] fn test_single_distinct_group_by_with_alias() -> Result<()> { let plan = LogicalPlanBuilder::from(test_table_scan("a")) From 3171e73a8a714a00b9ab3b9c450149501c4fd8b2 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 21 Sep 2022 11:51:02 -0600 Subject: [PATCH 2/7] add Rust workflow --- .github/workflows/rust.yml | 75 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 .github/workflows/rust.yml diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 000000000..1f08dd913 --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,75 @@ +name: Rust + +on: + # always trigger on PR + push: + pull_request: + # manual trigger + # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow + workflow_dispatch: + +jobs: + # Check crate compiles + linux-build-lib: + name: cargo check + runs-on: ubuntu-latest + container: + image: amd64/rust + env: + # Disable full debug symbol generation to speed up CI build and keep memory down + # "1" means line tables only, which is useful for panic tracebacks. + RUSTFLAGS: "-C debuginfo=1" + steps: + - uses: actions/checkout@v3 + - name: Cache Cargo + uses: actions/cache@v3 + with: + # these represent dependencies downloaded by cargo + # and thus do not depend on the OS, arch nor rust version. + path: /github/home/.cargo + key: cargo-cache- + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: stable + - name: Check workspace in debug mode + run: | + cargo check + - name: Check workspace in release mode + run: | + cargo check --release + - name: Check workspace without default features + run: | + cargo check --no-default-features -p datafusion + - name: Check workspace with all features + run: | + cargo check + + # test the crate + linux-test: + name: cargo test (amd64) + needs: [linux-build-lib] + runs-on: ubuntu-latest + container: + image: amd64/rust + env: + # Disable full debug symbol generation to speed up CI build and keep memory down + # "1" means line tables only, which is useful for panic tracebacks. + RUSTFLAGS: "-C debuginfo=1" + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - name: Cache Cargo + uses: actions/cache@v3 + with: + path: /github/home/.cargo + # this key equals the ones on `linux-build-lib` for re-use + key: cargo-cache- + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: stable + - name: Run tests + run: | + cargo test From 8a5810c0966a56d54145443864e71ca439a184a4 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 21 Sep 2022 11:53:59 -0600 Subject: [PATCH 3/7] ci --- .github/actions/setup-builder/action.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/actions/setup-builder/action.yaml diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml new file mode 100644 index 000000000..7f576cd69 --- /dev/null +++ b/.github/actions/setup-builder/action.yaml @@ -0,0 +1,17 @@ +name: Prepare Rust Builder +description: 'Prepare Rust Build Environment' +inputs: + rust-version: + description: 'version of rust to install (e.g. stable)' + required: true + default: 'stable' +runs: + using: "composite" + steps: + - name: Setup Rust toolchain + shell: bash + run: | + echo "Installing ${{ inputs.rust-version }}" + rustup toolchain install ${{ inputs.rust-version }} + rustup default ${{ inputs.rust-version }} + rustup component add rustfmt From 0088970b80d79de0826a5c0e41ff5d2d4f3bef16 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 21 Sep 2022 11:56:19 -0600 Subject: [PATCH 4/7] ci --- .github/workflows/rust.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 1f08dd913..0b8b8536e 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -34,16 +34,12 @@ jobs: rust-version: stable - name: Check workspace in debug mode run: | + cd dask_planner cargo check - name: Check workspace in release mode run: | + cd dask_planner cargo check --release - - name: Check workspace without default features - run: | - cargo check --no-default-features -p datafusion - - name: Check workspace with all features - run: | - cargo check # test the crate linux-test: @@ -72,4 +68,5 @@ jobs: rust-version: stable - name: Run tests run: | + cd dask_planner cargo test From e32ba140f16fa8739d2825d4477ed90fdf8df7d4 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 21 Sep 2022 12:40:55 -0600 Subject: [PATCH 5/7] fix remaining regressions in optimizer rule --- .../src/sql/optimizer/eliminate_agg_distinct.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs b/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs index b64123732..cac4004df 100644 --- a/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs +++ b/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs @@ -333,10 +333,7 @@ fn create_plan( // Re-create the original Aggregate node without the DISTINCT element let count = Expr::AggregateFunction { fun: AggregateFunction::Count, - args: vec![col(&first_aggregate - .schema() - .field(group_expr.len()) - .qualified_name())], + args: vec![col(&first_aggregate.schema().field(group_expr.len()).qualified_name())], distinct: false, filter: None, }; @@ -503,30 +500,28 @@ mod tests { .expect("building plan") } - #[ignore] #[test] fn test_single_distinct_group_by() -> Result<()> { let plan = LogicalPlanBuilder::from(test_table_scan("a")) .aggregate(vec![col("a")], vec![count_distinct(col("b"))])? .build()?; - let expected = "Projection: #a.a, #COUNT(a.a) AS COUNT(DISTINCT a.b)\ - \n Aggregate: groupBy=[[#a.a]], aggr=[[COUNT(#a.a)]]\ + let expected = "Projection: #a.a, #COUNT(a.b) AS COUNT(DISTINCT a.b)\ + \n Aggregate: groupBy=[[#a.a]], aggr=[[COUNT(#a.b)]]\ \n Aggregate: groupBy=[[#a.a, #a.b]], aggr=[[]]\ \n TableScan: a"; assert_optimized_plan_eq(&plan, expected); Ok(()) } - #[ignore] #[test] fn test_single_distinct_group_by_with_alias() -> Result<()> { let plan = LogicalPlanBuilder::from(test_table_scan("a")) .aggregate(vec![col("a")], vec![count_distinct(col("b")).alias("cd_b")])? .build()?; - let expected = "Projection: #a.a, #COUNT(a.a) AS cd_b\ - \n Aggregate: groupBy=[[#a.a]], aggr=[[COUNT(#a.a)]]\ + let expected = "Projection: #a.a, #COUNT(a.b) AS cd_b\ + \n Aggregate: groupBy=[[#a.a]], aggr=[[COUNT(#a.b)]]\ \n Aggregate: groupBy=[[#a.a, #a.b]], aggr=[[]]\ \n TableScan: a"; assert_optimized_plan_eq(&plan, expected); From 5eb586062b0a47e6be3d5171e9854a972e08d410 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 21 Sep 2022 12:42:35 -0600 Subject: [PATCH 6/7] fmt --- dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs b/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs index cac4004df..14af5d193 100644 --- a/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs +++ b/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs @@ -333,7 +333,10 @@ fn create_plan( // Re-create the original Aggregate node without the DISTINCT element let count = Expr::AggregateFunction { fun: AggregateFunction::Count, - args: vec![col(&first_aggregate.schema().field(group_expr.len()).qualified_name())], + args: vec![col(&first_aggregate + .schema() + .field(group_expr.len()) + .qualified_name())], distinct: false, filter: None, }; From 8027c7e4f0036a7b834174c8557bdf94342e19c9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 21 Sep 2022 13:16:29 -0600 Subject: [PATCH 7/7] unignore tests --- dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs b/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs index d3e36cb96..14af5d193 100644 --- a/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs +++ b/dask_planner/src/sql/optimizer/eliminate_agg_distinct.rs @@ -503,7 +503,6 @@ mod tests { .expect("building plan") } - #[ignore] #[test] fn test_single_distinct_group_by() -> Result<()> { let plan = LogicalPlanBuilder::from(test_table_scan("a")) @@ -518,7 +517,6 @@ mod tests { Ok(()) } - #[ignore] #[test] fn test_single_distinct_group_by_with_alias() -> Result<()> { let plan = LogicalPlanBuilder::from(test_table_scan("a"))