From 10d316167e391cf756edbcccf2ebf22987e2552a Mon Sep 17 00:00:00 2001 From: miomiocat <284487410@qq.com> Date: Fri, 15 Apr 2022 16:26:22 +0800 Subject: [PATCH 1/3] Fix missing materialized column for hudi table --- .../optimizer/operator/logical/LogicalHudiScanOperator.java | 2 ++ .../rule/transformation/PruneHDFSScanColumnRule.java | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHudiScanOperator.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHudiScanOperator.java index 4a5667e9b12339..624a2a09cce941 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHudiScanOperator.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHudiScanOperator.java @@ -36,6 +36,8 @@ public LogicalHudiScanOperator(Table table, Preconditions.checkState(table instanceof HudiTable); this.tableType = tableType; + HudiTable hudiTable = (HudiTable) table; + partitionColumns.addAll(hudiTable.getPartitionColumnNames()); } private LogicalHudiScanOperator(LogicalHudiScanOperator.Builder builder) { diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java index a363a698aac990..dc6e1eac911e3b 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java @@ -138,6 +138,10 @@ private boolean containsMaterializedColumn(LogicalScanOperator scanOperator, Set return scanColumns.size() != 0 && !((LogicalHiveScanOperator) scanOperator).getPartitionColumns().containsAll( scanColumns.stream().map(ColumnRefOperator::getName).collect(Collectors.toList())); } + if (scanOperator instanceof LogicalHudiScanOperator) { + return scanColumns.size() != 0 && !((LogicalHudiScanOperator) scanOperator).getPartitionColumns().containsAll( + scanColumns.stream().map(ColumnRefOperator::getName).collect(Collectors.toList())); + } return scanColumns.size() != 0; } From 52a63f8da0d10517a61c28d3d02f0fa59a363e96 Mon Sep 17 00:00:00 2001 From: miomiocat <284487410@qq.com> Date: Fri, 15 Apr 2022 17:08:01 +0800 Subject: [PATCH 2/3] refine --- .../logical/LogicalHiveScanOperator.java | 5 ----- .../logical/LogicalHudiScanOperator.java | 5 ----- .../operator/logical/LogicalScanOperator.java | 7 +++++++ .../PruneHDFSScanColumnRule.java | 18 +++++------------- 4 files changed, 12 insertions(+), 23 deletions(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHiveScanOperator.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHiveScanOperator.java index 62486559fbd7ae..282584a2538887 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHiveScanOperator.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHiveScanOperator.java @@ -19,7 +19,6 @@ public class LogicalHiveScanOperator extends LogicalScanOperator { private final Table.TableType tableType; private ScanOperatorPredicates predicates = new ScanOperatorPredicates(); - private Set partitionColumns = Sets.newHashSet(); public LogicalHiveScanOperator(Table table, Table.TableType tableType, @@ -58,10 +57,6 @@ public Table.TableType getTableType() { return tableType; } - public Set getPartitionColumns() { - return partitionColumns; - } - @Override public ScanOperatorPredicates getScanOperatorPredicates() { return this.predicates; diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHudiScanOperator.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHudiScanOperator.java index 624a2a09cce941..09848c586d2370 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHudiScanOperator.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalHudiScanOperator.java @@ -19,7 +19,6 @@ public class LogicalHudiScanOperator extends LogicalScanOperator { private final Table.TableType tableType; private ScanOperatorPredicates predicates = new ScanOperatorPredicates(); - private Set partitionColumns = Sets.newHashSet(); public LogicalHudiScanOperator(Table table, Table.TableType tableType, @@ -58,10 +57,6 @@ public Table.TableType getTableType() { return tableType; } - public Set getPartitionColumns() { - return partitionColumns; - } - @Override public ScanOperatorPredicates getScanOperatorPredicates() { return this.predicates; diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalScanOperator.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalScanOperator.java index 4e093f63ececa0..04a4b2ee13d96a 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalScanOperator.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/logical/LogicalScanOperator.java @@ -2,6 +2,7 @@ package com.starrocks.sql.optimizer.operator.logical; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Sets; import com.starrocks.catalog.Column; import com.starrocks.catalog.Table; import com.starrocks.common.AnalysisException; @@ -23,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; public abstract class LogicalScanOperator extends LogicalOperator { protected final Table table; @@ -34,6 +36,7 @@ public abstract class LogicalScanOperator extends LogicalOperator { protected final ImmutableMap colRefToColumnMetaMap; protected final ImmutableMap columnMetaToColRefMap; protected final ImmutableMap columnFilters; + protected Set partitionColumns = Sets.newHashSet(); public LogicalScanOperator( OperatorType type, @@ -79,6 +82,10 @@ public List getOutputColumns() { return new ArrayList<>(colRefToColumnMetaMap.keySet()); } + public Set getPartitionColumns() { + return partitionColumns; + } + @Override public ColumnRefSet getOutputColumns(ExpressionContext expressionContext) { if (projection != null) { diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java index dc6e1eac911e3b..615879b8989b49 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java @@ -134,25 +134,17 @@ public List transform(OptExpression input, OptimizerContext conte } private boolean containsMaterializedColumn(LogicalScanOperator scanOperator, Set scanColumns) { - if (scanOperator instanceof LogicalHiveScanOperator) { - return scanColumns.size() != 0 && !((LogicalHiveScanOperator) scanOperator).getPartitionColumns().containsAll( - scanColumns.stream().map(ColumnRefOperator::getName).collect(Collectors.toList())); - } - if (scanOperator instanceof LogicalHudiScanOperator) { - return scanColumns.size() != 0 && !((LogicalHudiScanOperator) scanOperator).getPartitionColumns().containsAll( + if (scanOperator instanceof LogicalHiveScanOperator || scanOperator instanceof LogicalHudiScanOperator) { + return scanColumns.size() != 0 && !scanOperator.getPartitionColumns().containsAll( scanColumns.stream().map(ColumnRefOperator::getName).collect(Collectors.toList())); } return scanColumns.size() != 0; } private boolean isPartitionColumn(LogicalScanOperator scanOperator, String columnName) { - if (scanOperator instanceof LogicalHiveScanOperator) { - // Hive partition columns is not materialized column, so except partition columns - return ((LogicalHiveScanOperator) scanOperator).getPartitionColumns().contains(columnName); - } - if (scanOperator instanceof LogicalHudiScanOperator) { - // Hudi partition columns is not materialized column, so except partition columns - return ((LogicalHudiScanOperator) scanOperator).getPartitionColumns().contains(columnName); + if (scanOperator instanceof LogicalHiveScanOperator || scanOperator instanceof LogicalHudiScanOperator) { + // Hive/Hudi partition columns is not materialized column, so except partition columns + return scanOperator.getPartitionColumns().contains(columnName); } return false; } From 3a079bfdfe0e80f18b8252b0d42a607a42261bb6 Mon Sep 17 00:00:00 2001 From: miomiocat <284487410@qq.com> Date: Mon, 18 Apr 2022 11:26:07 +0800 Subject: [PATCH 3/3] ut --- .../PruneHDFSScanColumnRule.java | 14 ++-- .../PruneHDFSScanColumnRuleTest.java | 68 +++++++++++++++++-- 2 files changed, 68 insertions(+), 14 deletions(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java index 615879b8989b49..75b525b665747e 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRule.java @@ -134,18 +134,12 @@ public List transform(OptExpression input, OptimizerContext conte } private boolean containsMaterializedColumn(LogicalScanOperator scanOperator, Set scanColumns) { - if (scanOperator instanceof LogicalHiveScanOperator || scanOperator instanceof LogicalHudiScanOperator) { - return scanColumns.size() != 0 && !scanOperator.getPartitionColumns().containsAll( - scanColumns.stream().map(ColumnRefOperator::getName).collect(Collectors.toList())); - } - return scanColumns.size() != 0; + return scanColumns.size() != 0 && !scanOperator.getPartitionColumns().containsAll( + scanColumns.stream().map(ColumnRefOperator::getName).collect(Collectors.toList())); } private boolean isPartitionColumn(LogicalScanOperator scanOperator, String columnName) { - if (scanOperator instanceof LogicalHiveScanOperator || scanOperator instanceof LogicalHudiScanOperator) { - // Hive/Hudi partition columns is not materialized column, so except partition columns - return scanOperator.getPartitionColumns().contains(columnName); - } - return false; + // Hive/Hudi partition columns is not materialized column, so except partition columns + return scanOperator.getPartitionColumns().contains(columnName); } } \ No newline at end of file diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRuleTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRuleTest.java index 29f72ae929fa5f..d78b048eaee0a7 100644 --- a/fe/fe-core/src/test/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRuleTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/sql/optimizer/rule/transformation/PruneHDFSScanColumnRuleTest.java @@ -4,12 +4,14 @@ import com.google.common.collect.Maps; import com.starrocks.catalog.Column; +import com.starrocks.catalog.HudiTable; import com.starrocks.catalog.IcebergTable; import com.starrocks.catalog.Table; import com.starrocks.catalog.Type; import com.starrocks.sql.optimizer.OptExpression; import com.starrocks.sql.optimizer.OptimizerContext; import com.starrocks.sql.optimizer.base.ColumnRefSet; +import com.starrocks.sql.optimizer.operator.logical.LogicalHudiScanOperator; import com.starrocks.sql.optimizer.operator.logical.LogicalIcebergScanOperator; import com.starrocks.sql.optimizer.operator.scalar.BinaryPredicateOperator; import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator; @@ -28,6 +30,7 @@ public class PruneHDFSScanColumnRuleTest { private PruneHDFSScanColumnRule icebergRule = PruneHDFSScanColumnRule.ICEBERG_SCAN; + private PruneHDFSScanColumnRule hudiRule = PruneHDFSScanColumnRule.HUDI_SCAN; ColumnRefOperator intColumnOperator = new ColumnRefOperator(1, Type.INT, "id", true); ColumnRefOperator strColumnOperator = new ColumnRefOperator(2, Type.STRING, "name", true); @@ -39,8 +42,8 @@ public class PruneHDFSScanColumnRuleTest { @Test public void transformIcebergWithPredicate(@Mocked IcebergTable table, - @Mocked OptimizerContext context, - @Mocked TaskContext taskContext) { + @Mocked OptimizerContext context, + @Mocked TaskContext taskContext) { OptExpression scan = new OptExpression( new LogicalIcebergScanOperator(table, Table.TableType.ICEBERG, scanColumnMap, Maps.newHashMap(), -1, @@ -59,8 +62,8 @@ public void transformIcebergWithPredicate(@Mocked IcebergTable table, @Test public void transformIcebergWithNoScanColumn(@Mocked IcebergTable table, - @Mocked OptimizerContext context, - @Mocked TaskContext taskContext) { + @Mocked OptimizerContext context, + @Mocked TaskContext taskContext) { OptExpression scan = new OptExpression( new LogicalIcebergScanOperator(table, Table.TableType.ICEBERG, scanColumnMap, Maps.newHashMap(), -1, null)); @@ -93,4 +96,61 @@ private void doIcebergTransform(OptExpression scan, Assert.assertEquals(transferMap.size(), 1); Assert.assertEquals(transferMap.get(intColumnOperator).getName(), "id"); } + + @Test + public void transformHudiWithPredicate(@Mocked HudiTable table, + @Mocked OptimizerContext context, + @Mocked TaskContext taskContext) { + OptExpression scan = new OptExpression( + new LogicalHudiScanOperator(table, Table.TableType.HUDI, + scanColumnMap, Maps.newHashMap(), -1, + new BinaryPredicateOperator(BinaryPredicateOperator.BinaryType.EQ, + new ColumnRefOperator(1, Type.INT, "id", true), + ConstantOperator.createInt(1)))); + + List taskContextList = new ArrayList<>(); + taskContextList.add(taskContext); + + ColumnRefSet requiredOutputColumns = new ColumnRefSet(new ArrayList<>( + Collections.singleton(new ColumnRefOperator(1, Type.INT, "id", true)))); + + doHudiTransform(scan, context, requiredOutputColumns, taskContextList, taskContext); + } + + @Test + public void transformHudiWithNoScanColumn(@Mocked HudiTable table, + @Mocked OptimizerContext context, + @Mocked TaskContext taskContext) { + OptExpression scan = new OptExpression( + new LogicalHudiScanOperator(table, Table.TableType.HUDI, + scanColumnMap, Maps.newHashMap(), -1, null)); + + List taskContextList = new ArrayList<>(); + taskContextList.add(taskContext); + + ColumnRefSet requiredOutputColumns = new ColumnRefSet(new ArrayList<>()); + + doHudiTransform(scan, context, requiredOutputColumns, taskContextList, taskContext); + } + + private void doHudiTransform(OptExpression scan, + OptimizerContext context, + ColumnRefSet requiredOutputColumns, + List taskContextList, + TaskContext taskContext) { + new Expectations() { { + context.getTaskContext(); + minTimes = 0; + result = taskContextList; + + taskContext.getRequiredColumns(); + minTimes = 0; + result = requiredOutputColumns; + }}; + List list = hudiRule.transform(scan, context); + Map transferMap = ((LogicalHudiScanOperator)list.get(0) + .getOp()).getColRefToColumnMetaMap(); + Assert.assertEquals(transferMap.size(), 1); + Assert.assertEquals(transferMap.get(intColumnOperator).getName(), "id"); + } }