Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle case-sensitive identifier when decorrelating predicate subquery #12443

Merged
merged 3 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 33 additions & 2 deletions datafusion/optimizer/src/decorrelate_predicate_subquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,8 +370,8 @@ mod tests {
use super::*;
use crate::test::*;

use arrow::datatypes::DataType;
use datafusion_expr::{and, binary_expr, col, lit, not, or, out_ref_col};
use arrow::datatypes::{DataType, Field, Schema};
use datafusion_expr::{and, binary_expr, col, lit, not, or, out_ref_col, table_scan};

fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> {
assert_optimized_plan_eq_display_indent(
Expand Down Expand Up @@ -1909,4 +1909,35 @@ mod tests {

assert_optimized_plan_equal(plan, expected)
}

#[test]
fn upper_case_ident() -> Result<()> {
let fields = vec![
Field::new("A", DataType::UInt32, false),
Field::new("B", DataType::UInt32, false),
];

let schema = Schema::new(fields);
let table_scan_a = table_scan(Some("\"TEST_A\""), &schema, None)?.build()?;
let table_scan_b = table_scan(Some("\"TEST_B\""), &schema, None)?.build()?;

let subquery = LogicalPlanBuilder::from(table_scan_b)
.filter(col("\"A\"").eq(out_ref_col(DataType::UInt32, "\"TEST_A\".\"A\"")))?
.project(vec![lit(1)])?
.build()?;

let plan = LogicalPlanBuilder::from(table_scan_a)
.filter(exists(Arc::new(subquery)))?
.project(vec![col("\"TEST_A\".\"B\"")])?
.build()?;

let expected = "Projection: TEST_A.B [B:UInt32]\
\n LeftSemi Join: Filter: __correlated_sq_1.A = TEST_A.A [A:UInt32, B:UInt32]\
\n TableScan: TEST_A [A:UInt32, B:UInt32]\
\n SubqueryAlias: __correlated_sq_1 [Int32(1):Int32, A:UInt32]\
\n Projection: Int32(1), TEST_B.A [Int32(1):Int32, A:UInt32]\
\n TableScan: TEST_B [A:UInt32, B:UInt32]";

assert_optimized_plan_equal(plan, expected)
}
}
4 changes: 1 addition & 3 deletions datafusion/optimizer/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,7 @@ pub(crate) fn replace_qualified_name(
) -> Result<Expr> {
let alias_cols: Vec<Column> = cols
.iter()
.map(|col| {
Column::from_qualified_name(format!("{}.{}", subquery_alias, col.name))
})
.map(|col| Column::new(Some(subquery_alias), &col.name))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

.collect();
let replace_map: HashMap<&Column, &Column> =
cols.iter().zip(alias_cols.iter()).collect();
Expand Down
25 changes: 25 additions & 0 deletions datafusion/optimizer/tests/optimizer_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,31 @@ fn select_wildcard_with_repeated_column_but_is_aliased() {
assert_eq!(expected, format!("{plan}"));
}

#[test]
fn select_correlated_predicate_subquery_with_uppercase_ident() {
let sql = r#"
SELECT *
FROM
test
WHERE
EXISTS (
SELECT 1
FROM (SELECT col_int32 as "COL_INT32", col_uint32 as "COL_UINT32" FROM test) "T1"
WHERE "T1"."COL_INT32" = test.col_int32
)
"#;
let plan = test_sql(sql).unwrap();
let expected = "LeftSemi Join: test.col_int32 = __correlated_sq_1.COL_INT32\
\n Filter: test.col_int32 IS NOT NULL\
\n TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]\
\n SubqueryAlias: __correlated_sq_1\
\n SubqueryAlias: T1\
\n Projection: test.col_int32 AS COL_INT32\
\n Filter: test.col_int32 IS NOT NULL\
\n TableScan: test projection=[col_int32]";
assert_eq!(expected, format!("{plan}"));
}

fn test_sql(sql: &str) -> Result<LogicalPlan> {
// parse the SQL
let dialect = GenericDialect {}; // or AnsiDialect, or your own dialect ...
Expand Down
31 changes: 31 additions & 0 deletions datafusion/sqllogictest/test_files/join.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1178,3 +1178,34 @@ drop table t1;

statement ok
drop table t0;

# Test decorrelate query with the uppercase table name and column name
statement ok
create table "T1"("C1" int, "C2" int);

statement ok
create table "T2"("C1" int, "C3" int);

statement ok
select "C1" from "T1" where not exists (select 1 from "T2" where "T1"."C1" = "T2"."C1")

statement ok
create table t1(c1 int, c2 int);

statement ok
create table t2(c1 int, c3 int);

statement ok
select "C1" from (select c1 as "C1", c2 as "C2" from t1) as "T1" where not exists (select 1 from (select c1 as "C1", c3 as "C3" from t2) as "T2" where "T1"."C1" = "T2"."C1")

statement ok
drop table "T1";

statement ok
drop table "T2";

statement ok
drop table t1;

statement ok
drop table t2;
Loading