diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java index f1dd9dd6e0c3..1875538e089f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java @@ -229,12 +229,16 @@ public RelNode visit(HiveJoin join) { // No self-join detected, return the join as is aliases.addAll(lf.aliases); aliases.addAll(rf.aliases); + } else { + // Self-join detected, introduce a derived table for the left side + aliases.addAll(rf.aliases); + newL = introduceDerivedTable(newL); + } + if (newL == join.getLeft() && newR == join.getRight()) { + return join; + } else { return join.copy(join.getTraitSet(), Arrays.asList(newL, newR)); } - // Self-join detected, introduce a derived table for the left side - aliases.addAll(rf.aliases); - introduceDerivedTable(newL, join); - return join; } @Override diff --git a/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q b/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q index 3293a409ed2d..4195ec443b5c 100644 --- a/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q +++ b/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q @@ -3,4 +3,9 @@ create table t1 (key int, value int); explain cbo with cte as (select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1) -select * from cte a join cte b join cte c +select * from cte a join cte b join cte c; + +explain cbo +with cte as +(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1) +select * from cte a join t1 b join cte c; diff --git a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out index 327da7947268..ed26fae7c2a3 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out @@ -25,10 +25,32 @@ POSTHOOK: Input: default@t1 CBO PLAN: HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$12], value0=[$13], BLOCK__OFFSET__INSIDE__FILE0=[$14], INPUT__FILE__NAME0=[$15], ROW__ID0=[$16], ROW__IS__DELETED0=[$17], key1=[$6], value1=[$7], BLOCK__OFFSET__INSIDE__FILE1=[$8], INPUT__FILE__NAME1=[$9], ROW__ID1=[$10], ROW__IS__DELETED1=[$11]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$6], value0=[$7], BLOCK__OFFSET__INSIDE__FILE0=[$8], INPUT__FILE__NAME0=[$9], ROW__ID0=[$10], ROW__IS__DELETED0=[$11]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5]) - HiveTableScan(table=[[default, t1]], table:alias=[t1]) - HiveTableScan(table=[[default, t1]], table:alias=[t1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) HiveTableScan(table=[[default, t1]], table:alias=[t1]) +Warning: Shuffle Join MERGEJOIN[13][tables = [t1, $hdt$_0]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, t1]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: explain cbo +with cte as +(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1) +select * from cte a join t1 b join cte c +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +with cte as +(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1) +select * from cte a join t1 b join cte c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +CBO PLAN: +HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + HiveProject(key=[$0], value=[$1]) + HiveTableScan(table=[[default, t1]], table:alias=[b]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + diff --git a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_mv.q.out b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_mv.q.out index b77ea3139889..8ff1dbfdfd22 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_mv.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_mv.q.out @@ -43,10 +43,8 @@ POSTHOOK: Input: default@t1 CBO PLAN: HiveProject(key=[$0], value=[$1], key0=[$4], value0=[$5], key1=[$2], value1=[$3]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(key=[$0], value=[$1]) - HiveTableScan(table=[[default, mv]], table:alias=[default.mv]) - HiveTableScan(table=[[default, mv]], table:alias=[default.mv]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableScan(table=[[default, mv]], table:alias=[default.mv]) + HiveTableScan(table=[[default, mv]], table:alias=[default.mv]) HiveTableScan(table=[[default, mv]], table:alias=[default.mv]) diff --git a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_subquery.q.out b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_subquery.q.out index 56ad6d900ea5..8e3804a9d9d3 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_subquery.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_subquery.q.out @@ -27,10 +27,8 @@ POSTHOOK: Input: default@t1 CBO PLAN: HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$12], value0=[$13], BLOCK__OFFSET__INSIDE__FILE0=[$14], INPUT__FILE__NAME0=[$15], ROW__ID0=[$16], ROW__IS__DELETED0=[$17], key1=[$6], value1=[$7], BLOCK__OFFSET__INSIDE__FILE1=[$8], INPUT__FILE__NAME1=[$9], ROW__ID1=[$10], ROW__IS__DELETED1=[$11]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$6], value0=[$7], BLOCK__OFFSET__INSIDE__FILE0=[$8], INPUT__FILE__NAME0=[$9], ROW__ID0=[$10], ROW__IS__DELETED0=[$11]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5]) - HiveTableScan(table=[[default, t1]], table:alias=[t1]) - HiveTableScan(table=[[default, t1]], table:alias=[t1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) + HiveTableScan(table=[[default, t1]], table:alias=[t1]) HiveTableScan(table=[[default, t1]], table:alias=[t1])