From e93b5333019635e9043ea3431453c19aa93af45c Mon Sep 17 00:00:00 2001 From: packy92 <110370499+packy92@users.noreply.github.com> Date: Wed, 16 Nov 2022 11:07:34 +0800 Subject: [PATCH 1/3] [Enhancement] colocate join need consider column equivalent conduction(backport #13344) --- .../ChildOutputPropertyGuarantor.java | 39 ++++++++-- .../starrocks/sql/plan/ColocateJoinTest.java | 72 +++++++++++++++++++ 2 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 fe/fe-core/src/test/java/com/starrocks/sql/plan/ColocateJoinTest.java diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/ChildOutputPropertyGuarantor.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/ChildOutputPropertyGuarantor.java index ed89ccc9749e4..0c5120f11d678 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/ChildOutputPropertyGuarantor.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/ChildOutputPropertyGuarantor.java @@ -4,9 +4,12 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import com.starrocks.catalog.ColocateTableIndex; +import com.starrocks.common.Pair; import com.starrocks.qe.ConnectContext; import com.starrocks.server.GlobalStateMgr; +import com.starrocks.sql.optimizer.base.ColumnRefSet; import com.starrocks.sql.optimizer.base.DistributionProperty; import com.starrocks.sql.optimizer.base.DistributionSpec; import com.starrocks.sql.optimizer.base.HashDistributionDesc; @@ -21,6 +24,7 @@ import com.starrocks.sql.optimizer.task.TaskContext; import java.util.List; +import java.util.Set; public class ChildOutputPropertyGuarantor extends PropertyDeriverBase { private PhysicalPropertySet requirements; @@ -96,16 +100,27 @@ public boolean canColocateJoin(HashDistributionSpec leftLocalDistributionSpec, Preconditions.checkState(leftLocalDistributionDesc.getColumns().size() == rightLocalDistributionDesc.getColumns().size()); } - // check orders of predicate columns is right - // check predicate columns is satisfy bucket hash columns + + // The order of equivalence predicates(shuffle columns are derived from them) is + // meaningless, hence it is correct to use a set to save these shuffle pairs. According + // to the distribution column information of the left and right children, we can build + // distribution pairs. We can use colocate join is judged by whether all the distribution + // pairs are exist in the equivalent predicates set. + Set> shufflePairs = Sets.newHashSet(); + for (int i = 0; i < leftShuffleColumns.size(); i++) { + shufflePairs.add(Pair.create(leftShuffleColumns.get(i), rightShuffleColumns.get(i))); + } + for (int i = 0; i < leftLocalDistributionDesc.getColumns().size(); ++i) { int leftScanColumnId = leftLocalDistributionDesc.getColumns().get(i); - int leftIndex = leftShuffleColumns.indexOf(leftScanColumnId); + ColumnRefSet leftEquivalentCols = leftLocalDistributionSpec.getPropertyInfo() + .getEquivalentColumns(leftScanColumnId); int rightScanColumnId = rightLocalDistributionDesc.getColumns().get(i); - int rightIndex = rightShuffleColumns.indexOf(rightScanColumnId); + ColumnRefSet rightEquivalentCols = rightLocalDistributionSpec.getPropertyInfo() + .getEquivalentColumns(rightScanColumnId); - if (leftIndex != rightIndex) { + if (!isDistributionPairExist(shufflePairs, leftEquivalentCols, rightEquivalentCols)) { return false; } } @@ -325,4 +340,18 @@ public Void visitPhysicalJoin(PhysicalJoinOperator node, ExpressionContext conte return visitOperator(node, context); } + + private boolean isDistributionPairExist(Set> shufflePairs, + ColumnRefSet leftEquivalentCols, + ColumnRefSet rightEquivalentCols) { + for (int leftCol : leftEquivalentCols.getColumnIds()) { + for (int rightCol : rightEquivalentCols.getColumnIds()) { + Pair distributionPair = Pair.create(leftCol, rightCol); + if (shufflePairs.contains(distributionPair)) { + return true; + } + } + } + return false; + } } diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/plan/ColocateJoinTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/plan/ColocateJoinTest.java new file mode 100644 index 0000000000000..04f151f43f87b --- /dev/null +++ b/fe/fe-core/src/test/java/com/starrocks/sql/plan/ColocateJoinTest.java @@ -0,0 +1,72 @@ +// This file is licensed under the Elastic License 2.0. Copyright 2021-present, StarRocks Inc. + +package com.starrocks.sql.plan; + +import com.google.common.collect.Lists; +import com.starrocks.common.FeConstants; +import org.apache.commons.lang.StringUtils; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.List; + +public class ColocateJoinTest extends PlanTestBase { + + @BeforeClass + public static void beforeClass() throws Exception { + PlanTestBase.beforeClass(); + FeConstants.runningUnitTest = true; + starRocksAssert.withTable("CREATE TABLE `colocate_t2_1` (\n" + + " `v7` bigint NULL COMMENT \"\",\n" + + " `v8` bigint NULL COMMENT \"\",\n" + + " `v9` bigint NULL\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(`v7`, `v8`, v9)\n" + + "DISTRIBUTED BY HASH(`v7`) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\",\n" + + "\"in_memory\" = \"false\",\n" + + "\"storage_format\" = \"DEFAULT\",\n" + + "\"colocate_with\" = \"colocate_group_1\"" + + ");"); + } + + @Test + public void testColocateJoinOnce() throws Exception { + List sqls = Lists.newArrayList(); + sqls.add("select * from colocate_t0 join colocate_t1 on v1 = v5 and v1 = v4"); + sqls.add("select * from colocate_t0 join colocate_t1 on v2 = v4 and v1 = v4"); + sqls.add("select * from colocate_t0 join colocate_t1 on v1 + v2 = v4 + v5 and v1 = v4 + 1 and v1 = v4"); + sqls.add("select * from colocate_t0, colocate_t1 where v1 = v5 and v1 = v4"); + sqls.add("select * from colocate_t0, colocate_t1 where v2 = v4 and v1 = v4"); + sqls.add("select * from colocate_t0, colocate_t1 where v1 + v2 = v4 + v5 and v1 = v4 + 1 and v1 = v4"); + + sqls.add("select * from colocate_t0, colocate_t1, colocate_t2_1 where v1 = v5 and v5 = v7"); + for (String sql : sqls) { + String plan = getFragmentPlan(sql); + int count = StringUtils.countMatches(plan, "INNER JOIN (COLOCATE)"); + Assert.assertEquals(plan, 1, count); + } + + } + + @Test + public void testColocateJoinTwice() throws Exception { + List sqls = Lists.newArrayList(); + sqls.add("select * from colocate_t0 join colocate_t1 on v1 = v4 join colocate_t2_1 on v4 = v7"); + sqls.add("select * from colocate_t0 join colocate_t1 on v1 = v5 and v1 = v4 join colocate_t2_1 on v5 = v7 and v7 = v2"); + sqls.add("select * from colocate_t0 join colocate_t1 on v1 = v5 join colocate_t2_1 on v1 = v4 and v1 = v7"); + + + sqls.add("select * from colocate_t0, colocate_t1, colocate_t2_1 where v1 = v4 and v4 = v7"); + sqls.add("select * from colocate_t0, colocate_t1, colocate_t2_1 where v1 = v5 and v1 = v4 and v5 = v7 and v7 = v2"); + sqls.add("select * from colocate_t0, colocate_t1, colocate_t2_1 where v1 = v5 and v1 = v4 and v1 = v7"); + for (String sql : sqls) { + String plan = getFragmentPlan(sql); + int count = StringUtils.countMatches(plan, "INNER JOIN (COLOCATE)"); + Assert.assertEquals(plan, 2, count); + } + + } +} From afd42df985c5ca402e730002149b276878c4057c Mon Sep 17 00:00:00 2001 From: packy Date: Thu, 17 Nov 2022 11:35:01 +0800 Subject: [PATCH 2/3] fix ut --- fe/fe-core/src/test/resources/sql/tpch/q2.sql | 35 ++- .../src/test/resources/sql/tpchcost/q2.sql | 240 +++++++----------- 2 files changed, 110 insertions(+), 165 deletions(-) diff --git a/fe/fe-core/src/test/resources/sql/tpch/q2.sql b/fe/fe-core/src/test/resources/sql/tpch/q2.sql index abf017ac95dbb..0bdd81a10b9fd 100644 --- a/fe/fe-core/src/test/resources/sql/tpch/q2.sql +++ b/fe/fe-core/src/test/resources/sql/tpch/q2.sql @@ -53,24 +53,23 @@ TOP-N (order by [[16: S_ACCTBAL DESC NULLS LAST, 26: N_NAME ASC NULLS FIRST, 12: EXCHANGE BROADCAST INNER JOIN (join-predicate [11: S_SUPPKEY = 20: PS_SUPPKEY] post-join-predicate [null]) SCAN (columns[17: S_COMMENT, 11: S_SUPPKEY, 12: S_NAME, 13: S_ADDRESS, 14: S_NATIONKEY, 15: S_PHONE, 16: S_ACCTBAL] predicate[null]) - EXCHANGE BROADCAST - INNER JOIN (join-predicate [22: PS_SUPPLYCOST = 57: min AND 19: PS_PARTKEY = 1: P_PARTKEY] post-join-predicate [null]) - SCAN (columns[19: PS_PARTKEY, 20: PS_SUPPKEY, 22: PS_SUPPLYCOST] predicate[null]) - EXCHANGE SHUFFLE[1] - INNER JOIN (join-predicate [1: P_PARTKEY = 34: PS_PARTKEY] post-join-predicate [null]) + EXCHANGE SHUFFLE[20] + INNER JOIN (join-predicate [57: min = 22: PS_SUPPLYCOST AND 34: PS_PARTKEY = 1: P_PARTKEY] post-join-predicate [null]) + AGGREGATE ([GLOBAL] aggregate [{57: min=min(57: min)}] group by [[34: PS_PARTKEY]] having [57: min IS NOT NULL] + AGGREGATE ([LOCAL] aggregate [{57: min=min(37: PS_SUPPLYCOST)}] group by [[34: PS_PARTKEY]] having [null] + INNER JOIN (join-predicate [35: PS_SUPPKEY = 40: S_SUPPKEY] post-join-predicate [null]) + SCAN (columns[34: PS_PARTKEY, 35: PS_SUPPKEY, 37: PS_SUPPLYCOST] predicate[null]) + EXCHANGE BROADCAST + INNER JOIN (join-predicate [43: S_NATIONKEY = 48: N_NATIONKEY] post-join-predicate [null]) + SCAN (columns[40: S_SUPPKEY, 43: S_NATIONKEY] predicate[null]) + EXCHANGE BROADCAST + INNER JOIN (join-predicate [50: N_REGIONKEY = 53: R_REGIONKEY] post-join-predicate [null]) + SCAN (columns[50: N_REGIONKEY, 48: N_NATIONKEY] predicate[null]) + EXCHANGE BROADCAST + SCAN (columns[53: R_REGIONKEY, 54: R_NAME] predicate[54: R_NAME = AMERICA]) + INNER JOIN (join-predicate [19: PS_PARTKEY = 1: P_PARTKEY] post-join-predicate [null]) + SCAN (columns[19: PS_PARTKEY, 20: PS_SUPPKEY, 22: PS_SUPPLYCOST] predicate[null]) + EXCHANGE SHUFFLE[1] SCAN (columns[1: P_PARTKEY, 3: P_MFGR, 5: P_TYPE, 6: P_SIZE] predicate[6: P_SIZE = 12 AND 5: P_TYPE LIKE %COPPER]) - EXCHANGE SHUFFLE[34] - AGGREGATE ([GLOBAL] aggregate [{57: min=min(57: min)}] group by [[34: PS_PARTKEY]] having [null] - AGGREGATE ([LOCAL] aggregate [{57: min=min(37: PS_SUPPLYCOST)}] group by [[34: PS_PARTKEY]] having [null] - INNER JOIN (join-predicate [35: PS_SUPPKEY = 40: S_SUPPKEY] post-join-predicate [null]) - SCAN (columns[34: PS_PARTKEY, 35: PS_SUPPKEY, 37: PS_SUPPLYCOST] predicate[null]) - EXCHANGE BROADCAST - INNER JOIN (join-predicate [43: S_NATIONKEY = 48: N_NATIONKEY] post-join-predicate [null]) - SCAN (columns[40: S_SUPPKEY, 43: S_NATIONKEY] predicate[null]) - EXCHANGE BROADCAST - INNER JOIN (join-predicate [50: N_REGIONKEY = 53: R_REGIONKEY] post-join-predicate [null]) - SCAN (columns[50: N_REGIONKEY, 48: N_NATIONKEY] predicate[null]) - EXCHANGE BROADCAST - SCAN (columns[53: R_REGIONKEY, 54: R_NAME] predicate[54: R_NAME = AMERICA]) [end] diff --git a/fe/fe-core/src/test/resources/sql/tpchcost/q2.sql b/fe/fe-core/src/test/resources/sql/tpchcost/q2.sql index d10098cb8cc6c..79a6ab3179929 100644 --- a/fe/fe-core/src/test/resources/sql/tpchcost/q2.sql +++ b/fe/fe-core/src/test/resources/sql/tpchcost/q2.sql @@ -1,54 +1,9 @@ -[sql] -select - s_acctbal, - s_name, - n_name, - p_partkey, - p_mfgr, - s_address, - s_phone, - s_comment -from - part, - supplier, - partsupp, - nation, - region -where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and p_size = 12 - and p_type like '%COPPER' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AMERICA' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp, - supplier, - nation, - region - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AMERICA' -) -order by - s_acctbal desc, - n_name, - s_name, - p_partkey limit 100; -[fragment statistics] -PLAN FRAGMENT 0(F17) +PLAN FRAGMENT 0(F16) Output Exprs:16: S_ACCTBAL | 12: S_NAME | 26: N_NAME | 1: P_PARTKEY | 3: P_MFGR | 13: S_ADDRESS | 15: S_PHONE | 17: S_COMMENT Input Partition: UNPARTITIONED RESULT SINK -38:MERGING-EXCHANGE +37:MERGING-EXCHANGE limit: 100 cardinality: 100 column statistics: @@ -67,9 +22,9 @@ PLAN FRAGMENT 1(F00) Input Partition: RANDOM OutPut Partition: UNPARTITIONED -OutPut Exchange Id: 38 +OutPut Exchange Id: 37 -37:TOP-N +36:TOP-N | order by: [16, DOUBLE, false] DESC, [26, VARCHAR, false] ASC, [12, VARCHAR, false] ASC, [1, INT, false] ASC | offset: 0 | limit: 100 @@ -86,7 +41,7 @@ OutPut Exchange Id: 38 | * PS_SUPPKEY-->[1.0, 1000000.0, 0.0, 8.0, 180000.00000000003] ESTIMATE | * N_NAME-->[-Infinity, Infinity, 0.0, 25.0, 5.0] ESTIMATE | -36:Project +35:Project | output columns: | 1 <-> [1: P_PARTKEY, INT, false] | 3 <-> [3: P_MFGR, VARCHAR, false] @@ -107,7 +62,7 @@ OutPut Exchange Id: 38 | * S_COMMENT-->[-Infinity, Infinity, 0.0, 101.0, 10000.0] ESTIMATE | * N_NAME-->[-Infinity, Infinity, 0.0, 25.0, 5.0] ESTIMATE | -35:HASH JOIN +34:HASH JOIN | join op: INNER JOIN (BUCKET_SHUFFLE) | equal join conjunct: [11: S_SUPPKEY, INT, false] = [20: PS_SUPPKEY, INT, false] | build runtime filters: @@ -126,7 +81,7 @@ OutPut Exchange Id: 38 | * PS_SUPPKEY-->[1.0, 1000000.0, 0.0, 8.0, 180000.00000000003] ESTIMATE | * N_NAME-->[-Infinity, Infinity, 0.0, 25.0, 5.0] ESTIMATE | -|----34:EXCHANGE +|----33:EXCHANGE | cardinality: 360000 | 9:Project @@ -191,9 +146,9 @@ PLAN FRAGMENT 2(F05) Input Partition: RANDOM OutPut Partition: BUCKET_SHUFFLE_HASH_PARTITIONED: 20: PS_SUPPKEY -OutPut Exchange Id: 34 +OutPut Exchange Id: 33 -33:Project +32:Project | output columns: | 1 <-> [1: P_PARTKEY, INT, false] | 3 <-> [3: P_MFGR, VARCHAR, false] @@ -204,8 +159,9 @@ OutPut Exchange Id: 34 | * P_MFGR-->[-Infinity, Infinity, 0.0, 25.0, 5.0] ESTIMATE | * PS_SUPPKEY-->[1.0, 1000000.0, 0.0, 8.0, 400000.0] ESTIMATE | -32:HASH JOIN -| join op: INNER JOIN (BUCKET_SHUFFLE) +31:HASH JOIN +| join op: INNER JOIN (COLOCATE) +| colocate: true | equal join conjunct: [22: PS_SUPPLYCOST, DOUBLE, false] = [57: min, DOUBLE, true] | equal join conjunct: [19: PS_PARTKEY, INT, false] = [1: P_PARTKEY, INT, false] | build runtime filters: @@ -221,8 +177,80 @@ OutPut Exchange Id: 34 | * PS_SUPPLYCOST-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE | * min-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE | -|----31:EXCHANGE -| cardinality: 100000 +|----30:Project +| | output columns: +| | 1 <-> [1: P_PARTKEY, INT, false] +| | 3 <-> [3: P_MFGR, VARCHAR, false] +| | 57 <-> [57: min, DOUBLE, true] +| | cardinality: 100000 +| | column statistics: +| | * P_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 100000.0] ESTIMATE +| | * P_MFGR-->[-Infinity, Infinity, 0.0, 25.0, 5.0] ESTIMATE +| | * min-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE +| | +| 29:HASH JOIN +| | join op: INNER JOIN (BUCKET_SHUFFLE) +| | equal join conjunct: [34: PS_PARTKEY, INT, false] = [1: P_PARTKEY, INT, false] +| | build runtime filters: +| | - filter_id = 5, build_expr = (1: P_PARTKEY), remote = false +| | output columns: 1, 3, 57 +| | cardinality: 100000 +| | column statistics: +| | * P_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 100000.0] ESTIMATE +| | * P_MFGR-->[-Infinity, Infinity, 0.0, 25.0, 5.0] ESTIMATE +| | * PS_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 100000.0] ESTIMATE +| | * min-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE +| | +| |----28:EXCHANGE +| | cardinality: 100000 +| | +| 25:AGGREGATE (update finalize) +| | aggregate: min[([37: PS_SUPPLYCOST, DOUBLE, false]); args: DOUBLE; result: DOUBLE; args nullable: false; result nullable: true] +| | group by: [34: PS_PARTKEY, INT, false] +| | having: 57: min IS NOT NULL +| | cardinality: 16000000 +| | column statistics: +| | * PS_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 1.6E7] ESTIMATE +| | * min-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE +| | +| 24:Project +| | output columns: +| | 34 <-> [34: PS_PARTKEY, INT, false] +| | 37 <-> [37: PS_SUPPLYCOST, DOUBLE, false] +| | cardinality: 16000000 +| | column statistics: +| | * PS_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 1.6E7] ESTIMATE +| | * PS_SUPPLYCOST-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE +| | +| 23:HASH JOIN +| | join op: INNER JOIN (BROADCAST) +| | equal join conjunct: [35: PS_SUPPKEY, INT, false] = [40: S_SUPPKEY, INT, false] +| | build runtime filters: +| | - filter_id = 4, build_expr = (40: S_SUPPKEY), remote = false +| | output columns: 34, 37 +| | cardinality: 16000000 +| | column statistics: +| | * PS_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 1.6E7] ESTIMATE +| | * PS_SUPPKEY-->[1.0, 1000000.0, 0.0, 8.0, 200000.0] ESTIMATE +| | * PS_SUPPLYCOST-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE +| | * S_SUPPKEY-->[1.0, 1000000.0, 0.0, 4.0, 200000.0] ESTIMATE +| | +| |----22:EXCHANGE +| | cardinality: 200000 +| | +| 11:OlapScanNode +| table: partsupp, rollup: partsupp +| preAggregation: on +| partitionsRatio=1/1, tabletsRatio=10/10 +| actualRows=0, avgRowSize=24.0 +| cardinality: 80000000 +| probe runtime filters: +| - filter_id = 4, probe_expr = (35: PS_SUPPKEY) +| - filter_id = 5, probe_expr = (34: PS_PARTKEY) +| column statistics: +| * PS_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 2.0E7] ESTIMATE +| * PS_SUPPKEY-->[1.0, 1000000.0, 0.0, 8.0, 1000000.0] ESTIMATE +| * PS_SUPPLYCOST-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE | 10:OlapScanNode table: partsupp, rollup: partsupp @@ -238,87 +266,7 @@ column statistics: * PS_SUPPKEY-->[1.0, 1000000.0, 0.0, 8.0, 1000000.0] ESTIMATE * PS_SUPPLYCOST-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE -PLAN FRAGMENT 3(F06) - -Input Partition: RANDOM -OutPut Partition: BUCKET_SHUFFLE_HASH_PARTITIONED: 1: P_PARTKEY -OutPut Exchange Id: 31 - -30:Project -| output columns: -| 1 <-> [1: P_PARTKEY, INT, false] -| 3 <-> [3: P_MFGR, VARCHAR, false] -| 57 <-> [57: min, DOUBLE, true] -| cardinality: 100000 -| column statistics: -| * P_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 100000.0] ESTIMATE -| * P_MFGR-->[-Infinity, Infinity, 0.0, 25.0, 5.0] ESTIMATE -| * min-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE -| -29:HASH JOIN -| join op: INNER JOIN (BUCKET_SHUFFLE) -| equal join conjunct: [34: PS_PARTKEY, INT, false] = [1: P_PARTKEY, INT, false] -| build runtime filters: -| - filter_id = 5, build_expr = (1: P_PARTKEY), remote = false -| output columns: 1, 3, 57 -| cardinality: 100000 -| column statistics: -| * P_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 100000.0] ESTIMATE -| * P_MFGR-->[-Infinity, Infinity, 0.0, 25.0, 5.0] ESTIMATE -| * PS_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 100000.0] ESTIMATE -| * min-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE -| -|----28:EXCHANGE -| cardinality: 100000 -| -25:AGGREGATE (update finalize) -| aggregate: min[([37: PS_SUPPLYCOST, DOUBLE, false]); args: DOUBLE; result: DOUBLE; args nullable: false; result nullable: true] -| group by: [34: PS_PARTKEY, INT, false] -| cardinality: 16000000 -| column statistics: -| * PS_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 1.6E7] ESTIMATE -| * min-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE -| -24:Project -| output columns: -| 34 <-> [34: PS_PARTKEY, INT, false] -| 37 <-> [37: PS_SUPPLYCOST, DOUBLE, false] -| cardinality: 16000000 -| column statistics: -| * PS_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 1.6E7] ESTIMATE -| * PS_SUPPLYCOST-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE -| -23:HASH JOIN -| join op: INNER JOIN (BROADCAST) -| equal join conjunct: [35: PS_SUPPKEY, INT, false] = [40: S_SUPPKEY, INT, false] -| build runtime filters: -| - filter_id = 4, build_expr = (40: S_SUPPKEY), remote = false -| output columns: 34, 37 -| cardinality: 16000000 -| column statistics: -| * PS_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 1.6E7] ESTIMATE -| * PS_SUPPKEY-->[1.0, 1000000.0, 0.0, 8.0, 200000.0] ESTIMATE -| * PS_SUPPLYCOST-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE -| * S_SUPPKEY-->[1.0, 1000000.0, 0.0, 4.0, 200000.0] ESTIMATE -| -|----22:EXCHANGE -| cardinality: 200000 -| -11:OlapScanNode -table: partsupp, rollup: partsupp -preAggregation: on -partitionsRatio=1/1, tabletsRatio=10/10 -actualRows=0, avgRowSize=24.0 -cardinality: 80000000 -probe runtime filters: -- filter_id = 4, probe_expr = (35: PS_SUPPKEY) -- filter_id = 5, probe_expr = (34: PS_PARTKEY) -column statistics: -* PS_PARTKEY-->[1.0, 2.0E7, 0.0, 8.0, 2.0E7] ESTIMATE -* PS_SUPPKEY-->[1.0, 1000000.0, 0.0, 8.0, 1000000.0] ESTIMATE -* PS_SUPPLYCOST-->[1.0, 1000.0, 0.0, 8.0, 99864.0] ESTIMATE - -PLAN FRAGMENT 4(F13) +PLAN FRAGMENT 3(F13) Input Partition: RANDOM OutPut Partition: BUCKET_SHUFFLE_HASH_PARTITIONED: 1: P_PARTKEY @@ -346,7 +294,7 @@ column statistics: * P_TYPE-->[-Infinity, Infinity, 0.0, 25.0, 150.0] ESTIMATE * P_SIZE-->[12.0, 12.0, 0.0, 4.0, 50.0] ESTIMATE -PLAN FRAGMENT 5(F07) +PLAN FRAGMENT 4(F07) Input Partition: RANDOM OutPut Partition: UNPARTITIONED @@ -386,7 +334,7 @@ column statistics: * S_SUPPKEY-->[1.0, 1000000.0, 0.0, 4.0, 1000000.0] ESTIMATE * S_NATIONKEY-->[0.0, 24.0, 0.0, 4.0, 25.0] ESTIMATE -PLAN FRAGMENT 6(F08) +PLAN FRAGMENT 5(F08) Input Partition: RANDOM OutPut Partition: UNPARTITIONED @@ -426,7 +374,7 @@ column statistics: * N_NATIONKEY-->[0.0, 24.0, 0.0, 4.0, 25.0] ESTIMATE * N_REGIONKEY-->[0.0, 4.0, 0.0, 4.0, 5.0] ESTIMATE -PLAN FRAGMENT 7(F09) +PLAN FRAGMENT 6(F09) Input Partition: RANDOM OutPut Partition: UNPARTITIONED @@ -450,7 +398,7 @@ column statistics: * R_REGIONKEY-->[0.0, 4.0, 0.0, 4.0, 1.0] ESTIMATE * R_NAME-->[-Infinity, Infinity, 0.0, 25.0, 1.0] ESTIMATE -PLAN FRAGMENT 8(F01) +PLAN FRAGMENT 7(F01) Input Partition: RANDOM OutPut Partition: UNPARTITIONED @@ -494,7 +442,7 @@ column statistics: * N_NAME-->[-Infinity, Infinity, 0.0, 25.0, 25.0] ESTIMATE * N_REGIONKEY-->[0.0, 4.0, 0.0, 4.0, 5.0] ESTIMATE -PLAN FRAGMENT 9(F02) +PLAN FRAGMENT 8(F02) Input Partition: RANDOM OutPut Partition: UNPARTITIONED @@ -516,6 +464,4 @@ actualRows=0, avgRowSize=29.0 cardinality: 1 column statistics: * R_REGIONKEY-->[0.0, 4.0, 0.0, 4.0, 1.0] ESTIMATE -* R_NAME-->[-Infinity, Infinity, 0.0, 25.0, 1.0] ESTIMATE -[end] - +* R_NAME-->[-Infinity, Infinity, 0.0, 25.0, 1.0] ESTIMATE \ No newline at end of file From 6df4282c143223ebc7d36a9b49818b1c6eb48592 Mon Sep 17 00:00:00 2001 From: packy Date: Thu, 17 Nov 2022 12:45:47 +0800 Subject: [PATCH 3/3] fix ut --- fe/fe-core/src/test/resources/sql/tpch/q2.sql | 51 +------------------ 1 file changed, 2 insertions(+), 49 deletions(-) diff --git a/fe/fe-core/src/test/resources/sql/tpch/q2.sql b/fe/fe-core/src/test/resources/sql/tpch/q2.sql index 0bdd81a10b9fd..01789f6a6f568 100644 --- a/fe/fe-core/src/test/resources/sql/tpch/q2.sql +++ b/fe/fe-core/src/test/resources/sql/tpch/q2.sql @@ -1,48 +1,3 @@ -[sql] -select - s_acctbal, - s_name, - n_name, - p_partkey, - p_mfgr, - s_address, - s_phone, - s_comment -from - part, - supplier, - partsupp, - nation, - region -where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and p_size = 12 - and p_type like '%COPPER' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AMERICA' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp, - supplier, - nation, - region - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AMERICA' -) -order by - s_acctbal desc, - n_name, - s_name, - p_partkey limit 100; -[result] TOP-N (order by [[16: S_ACCTBAL DESC NULLS LAST, 26: N_NAME ASC NULLS FIRST, 12: S_NAME ASC NULLS FIRST, 1: P_PARTKEY ASC NULLS FIRST]]) TOP-N (order by [[16: S_ACCTBAL DESC NULLS LAST, 26: N_NAME ASC NULLS FIRST, 12: S_NAME ASC NULLS FIRST, 1: P_PARTKEY ASC NULLS FIRST]]) INNER JOIN (join-predicate [30: R_REGIONKEY = 27: N_REGIONKEY] post-join-predicate [null]) @@ -55,7 +10,7 @@ TOP-N (order by [[16: S_ACCTBAL DESC NULLS LAST, 26: N_NAME ASC NULLS FIRST, 12: SCAN (columns[17: S_COMMENT, 11: S_SUPPKEY, 12: S_NAME, 13: S_ADDRESS, 14: S_NATIONKEY, 15: S_PHONE, 16: S_ACCTBAL] predicate[null]) EXCHANGE SHUFFLE[20] INNER JOIN (join-predicate [57: min = 22: PS_SUPPLYCOST AND 34: PS_PARTKEY = 1: P_PARTKEY] post-join-predicate [null]) - AGGREGATE ([GLOBAL] aggregate [{57: min=min(57: min)}] group by [[34: PS_PARTKEY]] having [57: min IS NOT NULL] + AGGREGATE ([GLOBAL] aggregate [{57: min=min(57: min)}] group by [[34: PS_PARTKEY]] having [null] AGGREGATE ([LOCAL] aggregate [{57: min=min(37: PS_SUPPLYCOST)}] group by [[34: PS_PARTKEY]] having [null] INNER JOIN (join-predicate [35: PS_SUPPKEY = 40: S_SUPPKEY] post-join-predicate [null]) SCAN (columns[34: PS_PARTKEY, 35: PS_SUPPKEY, 37: PS_SUPPLYCOST] predicate[null]) @@ -70,6 +25,4 @@ TOP-N (order by [[16: S_ACCTBAL DESC NULLS LAST, 26: N_NAME ASC NULLS FIRST, 12: INNER JOIN (join-predicate [19: PS_PARTKEY = 1: P_PARTKEY] post-join-predicate [null]) SCAN (columns[19: PS_PARTKEY, 20: PS_SUPPKEY, 22: PS_SUPPLYCOST] predicate[null]) EXCHANGE SHUFFLE[1] - SCAN (columns[1: P_PARTKEY, 3: P_MFGR, 5: P_TYPE, 6: P_SIZE] predicate[6: P_SIZE = 12 AND 5: P_TYPE LIKE %COPPER]) -[end] - + SCAN (columns[1: P_PARTKEY, 3: P_MFGR, 5: P_TYPE, 6: P_SIZE] predicate[6: P_SIZE = 12 AND 5: P_TYPE LIKE %COPPER]) \ No newline at end of file