From ebc5c559417cc1015da42e02883cb2bf276f396f Mon Sep 17 00:00:00 2001 From: stephen <91597003+stephen-shelby@users.noreply.github.com> Date: Mon, 24 Oct 2022 21:45:04 +0800 Subject: [PATCH] aaaaa (#12425) Fixes # Because we completely refactored hive meta cache refer to #11349, This pr we refactor hive tpch plan test. origin pr : #10013 --- .../external/MockedMetadataMgrForHive.java | 496 ++++++++++++++++++ .../sql/plan/HivePartitionPruneTest.java | 53 ++ .../starrocks/sql/plan/HivePlanTestBase.java | 34 ++ .../starrocks/sql/plan/HiveTPCHPlanTest.java | 143 +++++ 4 files changed, 726 insertions(+) create mode 100644 fe/fe-core/src/test/java/com/starrocks/external/MockedMetadataMgrForHive.java create mode 100644 fe/fe-core/src/test/java/com/starrocks/sql/plan/HivePartitionPruneTest.java create mode 100644 fe/fe-core/src/test/java/com/starrocks/sql/plan/HivePlanTestBase.java create mode 100644 fe/fe-core/src/test/java/com/starrocks/sql/plan/HiveTPCHPlanTest.java diff --git a/fe/fe-core/src/test/java/com/starrocks/external/MockedMetadataMgrForHive.java b/fe/fe-core/src/test/java/com/starrocks/external/MockedMetadataMgrForHive.java new file mode 100644 index 0000000000000..362dfd4e31d60 --- /dev/null +++ b/fe/fe-core/src/test/java/com/starrocks/external/MockedMetadataMgrForHive.java @@ -0,0 +1,496 @@ +// This file is licensed under the Elastic License 2.0. Copyright 2021-present, StarRocks Inc. + +package com.starrocks.external; + +import com.clearspring.analytics.util.Lists; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; +import com.starrocks.analysis.DateLiteral; +import com.starrocks.analysis.IntLiteral; +import com.starrocks.catalog.Column; +import com.starrocks.catalog.Database; +import com.starrocks.catalog.HiveMetaStoreTable; +import com.starrocks.catalog.PartitionKey; +import com.starrocks.catalog.PrimitiveType; +import com.starrocks.catalog.Type; +import com.starrocks.common.util.DateUtils; +import com.starrocks.connector.ConnectorMgr; +import com.starrocks.connector.exception.StarRocksConnectorException; +import com.starrocks.external.hive.CachingHiveMetastore; +import com.starrocks.external.hive.HiveMetaClient; +import com.starrocks.external.hive.HiveMetastore; +import com.starrocks.external.hive.HiveMetastoreApiConverter; +import com.starrocks.external.hive.HiveMetastoreOperations; +import com.starrocks.external.hive.HivePartitionStats; +import com.starrocks.external.hive.HiveRemoteFileIO; +import com.starrocks.external.hive.HiveStatisticsProvider; +import com.starrocks.server.LocalMetastore; +import com.starrocks.server.MetadataMgr; +import com.starrocks.sql.optimizer.OptimizerContext; +import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator; +import com.starrocks.sql.optimizer.statistics.ColumnStatistic; +import com.starrocks.sql.optimizer.statistics.Statistics; +import org.apache.commons.collections4.map.CaseInsensitiveMap; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; + +import java.lang.reflect.Method; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static com.starrocks.external.hive.CachingHiveMetastore.createCatalogLevelInstance; +import static com.starrocks.sql.optimizer.Utils.getLongFromDateTime; +import static java.lang.Double.NEGATIVE_INFINITY; +import static java.lang.Double.POSITIVE_INFINITY; + +public class MockedMetadataMgrForHive extends MetadataMgr { + + // catalog -> db -> tableName -> table + private static final Map>> MOCK_TABLE_MAP = Maps.newHashMap(); + private final AtomicLong idGen = new AtomicLong(0L); + private static final List MOCKED_FILES = ImmutableList.of( + new RemoteFileInfo(null, ImmutableList.of(), null)); + + static { + mockTPCHTable(); + mockPartitionTable(); + } + + public MockedMetadataMgrForHive(LocalMetastore localMetastore, ConnectorMgr connectorMgr) { + super(localMetastore, connectorMgr); + } + + @Override + public com.starrocks.catalog.Table getTable(String catalogName, String dbName, String tblName) { + return MOCK_TABLE_MAP.get(catalogName).get(dbName).get(tblName).table; + } + + @Override + public Database getDb(String catalogName, String dbName) { + return new Database(idGen.getAndIncrement(), dbName); + } + + @Override + public List listPartitionNames(String catalogName, String dbName, String tableName) { + return MOCK_TABLE_MAP.get(catalogName).get(dbName).get(tableName).partitionNames; + } + + @Override + public Statistics getTableStatistics(OptimizerContext session, + String catalogName, + com.starrocks.catalog.Table table, + List columns, + List partitionKeys) { + HiveMetaStoreTable hmsTable = (HiveMetaStoreTable) table; + String hiveDb = hmsTable.getDbName(); + String tblName = hmsTable.getTableName(); + HiveTableInfo info = MOCK_TABLE_MAP.get(catalogName).get(hiveDb).get(tblName); + Statistics.Builder builder = Statistics.builder(); + builder.setOutputRowCount(info.rowCount); + for (ColumnRefOperator columnRefOperator : columns) { + ColumnStatistic columnStatistic = info.columnStatsMap.get(columnRefOperator.getName()); + builder.addColumnStatistic(columnRefOperator, columnStatistic); + } + return builder.build(); + } + + @Override + public List getRemoteFileInfos(String catalogName, com.starrocks.catalog.Table table, + List partitionKeys) { + HiveMetaStoreTable hmsTbl = (HiveMetaStoreTable) table; + int size = partitionKeys.size(); + return MOCK_TABLE_MAP.get(catalogName).get(hmsTbl.getDbName()).get(hmsTbl.getTableName()).partitions.subList(0, size); + } + + public static void mockTPCHTable() { + String catalogName = "hive0"; + String dbName = "tpch"; + + Map mockTables = Maps.newHashMap(); + Map> mockDbTables = Maps.newHashMap(); + mockDbTables.put(dbName, mockTables); + + // Mock table region + List cols = Lists.newArrayList(); + cols.add(new FieldSchema("r_regionkey", "int", null)); + cols.add(new FieldSchema("r_name", "string", null)); + cols.add(new FieldSchema("r_comment", "string", null)); + StorageDescriptor sd = new StorageDescriptor(cols, "", "", "", false, -1, null, Lists.newArrayList(), + Lists.newArrayList(), Maps.newHashMap()); + + CaseInsensitiveMap regionStats = new CaseInsensitiveMap<>(); + regionStats.put("r_regionkey", new ColumnStatistic(0, 4, 0, 4, 5)); + regionStats.put("r_name", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 6.8, 5)); + regionStats.put("r_comment", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 66, 5)); + + Table region = new Table("region", "tpch", null, 0, 0, 0, sd, + Lists.newArrayList(), Maps.newHashMap(), null, null, "EXTERNAL_TABLE"); + mockTables.put(region.getTableName(), new HiveTableInfo(HiveMetastoreApiConverter.toHiveTable(region, catalogName), + ImmutableList.of(), 5, regionStats, MOCKED_FILES)); + + // Mock table nation + cols = Lists.newArrayList(); + cols.add(new FieldSchema("n_nationkey", "int", null)); + cols.add(new FieldSchema("n_name", "string", null)); + cols.add(new FieldSchema("n_regionkey", "int", null)); + cols.add(new FieldSchema("n_comment", "string", null)); + sd = new StorageDescriptor(cols, "", "", "", false, -1, null, Lists.newArrayList(), + Lists.newArrayList(), Maps.newHashMap()); + + Map nationStats = new CaseInsensitiveMap<>(); + nationStats.put("n_nationkey", new ColumnStatistic(0, 24, 0, 4, 25)); + nationStats.put("n_name", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 25, 25)); + nationStats.put("n_regionkey", new ColumnStatistic(0, 4, 0, 4, 5)); + nationStats.put("n_comment", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 0, 25)); + Table nation = new Table("nation", "tpch", null, 0, 0, 0, sd, + Lists.newArrayList(), Maps.newHashMap(), null, null, "EXTERNAL_TABLE"); + mockTables.put(nation.getTableName(), new HiveTableInfo(HiveMetastoreApiConverter.toHiveTable(nation, catalogName), + ImmutableList.of(), 25, nationStats, MOCKED_FILES)); + + // Mock table supplier + cols = Lists.newArrayList(); + cols.add(new FieldSchema("s_suppkey", "int", null)); + cols.add(new FieldSchema("s_name", "string", null)); + cols.add(new FieldSchema("s_address", "string", null)); + cols.add(new FieldSchema("s_nationkey", "int", null)); + cols.add(new FieldSchema("s_phone", "string", null)); + cols.add(new FieldSchema("s_acctbal", "decimal(15,2)", null)); + cols.add(new FieldSchema("s_comment", "string", null)); + sd = new StorageDescriptor(cols, "", "", "", false, -1, null, Lists.newArrayList(), + Lists.newArrayList(), Maps.newHashMap()); + + CaseInsensitiveMap supplierStats = new CaseInsensitiveMap<>(); + supplierStats.put("s_suppkey", new ColumnStatistic(1, 1000000.0, 0, 4, 1000000)); + supplierStats.put("s_name", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 25, 1000000)); + supplierStats.put("s_address", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 40, 1000000)); + supplierStats.put("s_nationkey", new ColumnStatistic(0, 24, 0, 4, 25)); + supplierStats.put("s_phone", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 15, 1000000)); + supplierStats.put("s_acctbal", new ColumnStatistic(-998.22, 9999.72, 0, 8, 656145)); + supplierStats.put("s_comment", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 101, 984748)); + Table suppler = new Table("supplier", "tpch", null, 0, 0, 0, sd, + Lists.newArrayList(), Maps.newHashMap(), null, null, "EXTERNAL_TABLE"); + mockTables.put(suppler.getTableName(), new HiveTableInfo(HiveMetastoreApiConverter.toHiveTable(suppler, catalogName), + ImmutableList.of(), 1000000, supplierStats, MOCKED_FILES)); + + // Mock table part + cols = Lists.newArrayList(); + cols.add(new FieldSchema("p_partkey", "int", null)); + cols.add(new FieldSchema("p_name", "string", null)); + cols.add(new FieldSchema("p_mfgr", "string", null)); + cols.add(new FieldSchema("p_brand", "string", null)); + cols.add(new FieldSchema("p_type", "string", null)); + cols.add(new FieldSchema("p_size", "int", null)); + cols.add(new FieldSchema("p_container", "string", null)); + cols.add(new FieldSchema("p_retailprice", "decimal(15,2)", null)); + cols.add(new FieldSchema("p_comment", "string", null)); + sd = new StorageDescriptor(cols, "", "", "", false, -1, null, Lists.newArrayList(), + Lists.newArrayList(), Maps.newHashMap()); + + CaseInsensitiveMap partStats = new CaseInsensitiveMap<>(); + partStats.put("p_partkey", new ColumnStatistic(1, 20000000, 0, 8, 20000000)); + partStats.put("p_name", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 55, 20000000)); + partStats.put("p_mfgr", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 25, 5)); + partStats.put("p_brand", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 10, 25)); + partStats.put("p_type", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 25, 150)); + partStats.put("p_size", new ColumnStatistic(1, 50, 0, 4, 50)); + partStats.put("p_container", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 10, 40)); + partStats.put("p_retailprice", new ColumnStatistic(901, 2098.99, 0, 8, 120039)); + partStats.put("p_comment", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 0, 3927659)); + Table part = new Table("part", "tpch", null, 0, 0, 0, sd, + Lists.newArrayList(), Maps.newHashMap(), null, null, "EXTERNAL_TABLE"); + HiveTableInfo hiveTableInfo = new HiveTableInfo(HiveMetastoreApiConverter.toHiveTable(part, catalogName), + ImmutableList.of(), 20000000, partStats, MOCKED_FILES); + mockTables.put(part.getTableName(), hiveTableInfo); + + // Mock table partsupp + cols = Lists.newArrayList(); + cols.add(new FieldSchema("ps_partkey", "int", null)); + cols.add(new FieldSchema("ps_suppkey", "int", null)); + cols.add(new FieldSchema("ps_availqty", "int", null)); + cols.add(new FieldSchema("ps_supplycost", "decimal(15,2)", null)); + cols.add(new FieldSchema("ps_comment", "string", null)); + sd = new StorageDescriptor(cols, "", "", "", false, -1, null, Lists.newArrayList(), + Lists.newArrayList(), Maps.newHashMap()); + + CaseInsensitiveMap partSuppStats = new CaseInsensitiveMap<>(); + partSuppStats.put("ps_partkey", new ColumnStatistic(1, 20000000, 0, 8, 20000000)); + partSuppStats.put("ps_suppkey", new ColumnStatistic(1, 1000000, 0, 8, 1000000)); + partSuppStats.put("ps_availqty", new ColumnStatistic(1, 9999, 0, 4, 9999)); + partSuppStats.put("ps_supplycost", new ColumnStatistic(1, 1000, 0, 8, 99864)); + partSuppStats.put("ps_comment", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 199, 71873944)); + Table partSupp = new Table("partsupp", "tpch", null, 0, 0, 0, sd, + Lists.newArrayList(), Maps.newHashMap(), null, null, "EXTERNAL_TABLE"); + mockTables.put(partSupp.getTableName(), new HiveTableInfo(HiveMetastoreApiConverter.toHiveTable(partSupp, catalogName), + ImmutableList.of(), 80000000, partSuppStats, MOCKED_FILES)); + + // Mock customer table + cols = Lists.newArrayList(); + cols.add(new FieldSchema("c_custkey", "int", null)); + cols.add(new FieldSchema("c_name", "string", null)); + cols.add(new FieldSchema("c_address", "string", null)); + cols.add(new FieldSchema("c_nationkey", "int", null)); + cols.add(new FieldSchema("c_phone", "string", null)); + cols.add(new FieldSchema("c_acctbal", "decimal(15,2)", null)); + cols.add(new FieldSchema("c_mktsegment", "string", null)); + cols.add(new FieldSchema("c_comment", "string", null)); + sd = new StorageDescriptor(cols, "", "", "", false, -1, null, Lists.newArrayList(), + Lists.newArrayList(), Maps.newHashMap()); + + Map customerStats = new CaseInsensitiveMap<>(); + customerStats.put("c_custkey", new ColumnStatistic(1, 15000000, 0, 8, 15000000)); + customerStats.put("c_name", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 25, 15000000)); + customerStats.put("c_address", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 40, 15000000)); + customerStats.put("c_nationkey", new ColumnStatistic(0, 24, 0, 4, 25)); + customerStats.put("c_phone", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 15, 15000000)); + customerStats.put("c_acctbal", new ColumnStatistic(-999.99, 9999.99, 0, 8, 1086564)); + customerStats.put("c_mktsegment", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 10, 5)); + customerStats.put("c_comment", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 117, 14788744)); + Table customer = new Table("customer", "tpch", null, 0, 0, 0, sd, + Lists.newArrayList(), Maps.newHashMap(), null, null, "EXTERNAL_TABLE"); + mockTables.put(customer.getTableName(), new HiveTableInfo(HiveMetastoreApiConverter.toHiveTable(customer, catalogName), + ImmutableList.of(), 15000000, customerStats, MOCKED_FILES)); + + // Mock table orders + cols = Lists.newArrayList(); + cols.add(new FieldSchema("o_orderkey", "int", null)); + cols.add(new FieldSchema("o_custkey", "int", null)); + cols.add(new FieldSchema("o_orderstatus", "string", null)); + cols.add(new FieldSchema("o_totalprice", "decimal(15,2)", null)); + cols.add(new FieldSchema("o_orderdate", "date", null)); + cols.add(new FieldSchema("o_orderpriority", "string", null)); + cols.add(new FieldSchema("o_clerk", "string", null)); + cols.add(new FieldSchema("o_shippriority", "int", null)); + cols.add(new FieldSchema("o_comment", "string", null)); + sd = new StorageDescriptor(cols, "", "", "", false, -1, null, Lists.newArrayList(), + Lists.newArrayList(), Maps.newHashMap()); + + CaseInsensitiveMap ordersStats = new CaseInsensitiveMap<>(); + ordersStats.put("o_orderkey", new ColumnStatistic(1, 600000000, 0, 8, 150000000)); + ordersStats.put("o_custkey", new ColumnStatistic(1, 150000000, 0, 8, 10031873)); + ordersStats.put("o_orderstatus", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 1, 3)); + ordersStats.put("o_totalprice", new ColumnStatistic(811.73, 591036.15, 0, 8, 34696580)); + ordersStats.put("o_orderdate", new ColumnStatistic(getLongFromDateTime( + DateUtils.parseStringWithDefaultHSM("1992-01-01", DateUtils.DATE_FORMATTER)), + getLongFromDateTime(DateUtils.parseStringWithDefaultHSM("1998-08-02", + DateUtils.DATE_FORMATTER)), 0, 4, 2412)); + ordersStats.put("o_orderpriority", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 15, 5)); + ordersStats.put("o_clerk", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 15, 100836)); + ordersStats.put("o_shippriority", new ColumnStatistic(0, 0, 0, 4, 1)); + ordersStats.put("o_comment", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 79, 110204136)); + Table orders = new Table("orders", "tpch", null, 0, 0, 0, sd, + Lists.newArrayList(), Maps.newHashMap(), null, null, "EXTERNAL_TABLE"); + mockTables.put(orders.getTableName(), new HiveTableInfo(HiveMetastoreApiConverter.toHiveTable(orders, catalogName), + ImmutableList.of(), 150000000, ordersStats, MOCKED_FILES)); + + // Mock table lineitem + cols = Lists.newArrayList(); + cols.add(new FieldSchema("l_orderkey", "int", null)); + cols.add(new FieldSchema("l_partkey", "int", null)); + cols.add(new FieldSchema("l_suppkey", "int", null)); + cols.add(new FieldSchema("l_linenumber", "int", null)); + cols.add(new FieldSchema("l_quantity", "decimal(15,2)", null)); + cols.add(new FieldSchema("l_extendedprice", "decimal(15,2)", null)); + cols.add(new FieldSchema("l_discount", "decimal(15,2)", null)); + cols.add(new FieldSchema("l_tax", "decimal(15,2)", null)); + cols.add(new FieldSchema("l_returnflag", "string", null)); + cols.add(new FieldSchema("l_linestatus", "string", null)); + cols.add(new FieldSchema("l_shipdate", "date", null)); + cols.add(new FieldSchema("l_commitdate", "date", null)); + cols.add(new FieldSchema("l_receiptdate", "date", null)); + cols.add(new FieldSchema("l_shipinstruct", "string", null)); + cols.add(new FieldSchema("l_shipmode", "string", null)); + cols.add(new FieldSchema("l_comment", "string", null)); + sd = new StorageDescriptor(cols, "", "", "", false, -1, null, Lists.newArrayList(), + Lists.newArrayList(), Maps.newHashMap()); + + Map lineitemStats = new CaseInsensitiveMap<>(); + lineitemStats.put("l_orderkey", new ColumnStatistic(1, 600000000, 0, 8, 150000000)); + lineitemStats.put("l_partkey", new ColumnStatistic(1, 20000000, 0, 8, 20000000)); + lineitemStats.put("l_suppkey", new ColumnStatistic(1, 1000000, 0, 4, 1000000)); + lineitemStats.put("l_linenumber", new ColumnStatistic(1, 7, 0, 4, 7)); + lineitemStats.put("l_quantity", new ColumnStatistic(1, 50, 0, 8, 50)); + lineitemStats.put("l_extendedprice", new ColumnStatistic(901, 104949.5, 0, 8, 3736520)); + lineitemStats.put("l_discount", new ColumnStatistic(0, 0.1, 0, 8, 11)); + lineitemStats.put("l_tax", new ColumnStatistic(0, 0.08, 0, 8, 9)); + lineitemStats.put("l_returnflag", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 1, 3)); + lineitemStats.put("l_linestatus", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 1, 2)); + lineitemStats.put("l_shipdate", new ColumnStatistic(getLongFromDateTime( + DateUtils.parseStringWithDefaultHSM("1992-01-02", DateUtils.DATE_FORMATTER)), + getLongFromDateTime(DateUtils.parseStringWithDefaultHSM("1998-12-01", DateUtils.DATE_FORMATTER)), + 0, 4, 2526)); + lineitemStats.put("l_commitdate", new ColumnStatistic(getLongFromDateTime( + DateUtils.parseStringWithDefaultHSM("1992-01-31", DateUtils.DATE_FORMATTER)), + getLongFromDateTime(DateUtils.parseStringWithDefaultHSM("1998-10-31", DateUtils.DATE_FORMATTER)), + 0, 4, 2466)); + lineitemStats.put("l_receiptdate", new ColumnStatistic(getLongFromDateTime(DateUtils.parseStringWithDefaultHSM( + "1992-01-03", DateUtils.DATE_FORMATTER)), + getLongFromDateTime(DateUtils.parseStringWithDefaultHSM("1998-12-31", DateUtils.DATE_FORMATTER)), + 0, 4, 2554)); + lineitemStats.put("l_shipinstruct", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 25, 4)); + lineitemStats.put("l_shipmode", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 10, 7)); + lineitemStats.put("l_comment", new ColumnStatistic(NEGATIVE_INFINITY, POSITIVE_INFINITY, 0, 44, 142089728)); + Table lineitem = new Table("lineitem", "tpch", null, 0, 0, 0, sd, + Lists.newArrayList(), Maps.newHashMap(), null, null, "EXTERNAL_TABLE"); + mockTables.put(lineitem.getTableName(), new HiveTableInfo(HiveMetastoreApiConverter.toHiveTable(lineitem, catalogName), + ImmutableList.of(), 600037902, lineitemStats, MOCKED_FILES)); + + MOCK_TABLE_MAP.put(catalogName, mockDbTables); + } + + public static void mockPartitionTable() { + String catalogName = "hive0"; + String dbName = "partitioned_db"; + + MOCK_TABLE_MAP.putIfAbsent(catalogName, Maps.newHashMap()); + Map> mockDbTables = MOCK_TABLE_MAP.get(catalogName); + mockDbTables.putIfAbsent(dbName, Maps.newHashMap()); + Map mockTables = mockDbTables.get(dbName); + + List cols = Lists.newArrayList(); + cols.add(new FieldSchema("c1", "int", null)); + cols.add(new FieldSchema("c2", "string", null)); + cols.add(new FieldSchema("c3", "string", null)); + StorageDescriptor sd = new StorageDescriptor(cols, "", "", "", false, -1, null, Lists.newArrayList(), + Lists.newArrayList(), Maps.newHashMap()); + Table t1 = new Table("t1", "partitioned_db", null, 0, 0, 0, sd, + ImmutableList.of(new FieldSchema("par_col", "string", null)), Maps.newHashMap(), + null, null, "EXTERNAL_TABLE"); + + List partitionNames = ImmutableList.of("par_col=0", "par_col=1", "par_col=2"); + + List partitionKeyList = Lists.newArrayList(); + partitionKeyList.add(new PartitionKey(ImmutableList.of(new IntLiteral(0)), ImmutableList.of(PrimitiveType.INT))); + partitionKeyList.add(new PartitionKey(ImmutableList.of(new IntLiteral(1)), ImmutableList.of(PrimitiveType.INT))); + partitionKeyList.add(new PartitionKey(ImmutableList.of(new IntLiteral(2)), ImmutableList.of(PrimitiveType.INT))); + Column partitionColumn = new Column("par_col", Type.INT); + double avgNumPerPartition = (double) (100 / 3); + double rowCount = 100; + List partitionColumnNames = ImmutableList.of("par_col"); + Map hivePartitionStatsMap = Maps.newHashMap(); + Method method; + ColumnStatistic partitionColumnStats; + HiveMetaClient metaClient = new HiveMetaClient(new HiveConf()); + HiveMetastore metastore = new HiveMetastore(metaClient, "hive0"); + CachingHiveMetastore cachingHiveMetastore = createCatalogLevelInstance( + metastore, Executors.newSingleThreadExecutor(), 0, 0, 0, false); + HiveMetastoreOperations hmsOps = new HiveMetastoreOperations(cachingHiveMetastore, false); + RemoteFileIO remoteFileIO = new HiveRemoteFileIO(new Configuration()); + CachingRemoteFileIO cacheIO = new CachingRemoteFileIO(remoteFileIO, Executors.newSingleThreadExecutor(), 0, 0, 0); + RemoteFileOperations fileOps = new RemoteFileOperations(cacheIO, Executors.newSingleThreadExecutor(), false, false); + + HiveStatisticsProvider hiveStatisticsProvider = new HiveStatisticsProvider(hmsOps, fileOps); + try { + method = HiveStatisticsProvider.class.getDeclaredMethod("createPartitionColumnStatistics", + Column.class, List.class, Map.class, List.class, double.class, double.class); + method.setAccessible(true); + partitionColumnStats = (ColumnStatistic) method.invoke(hiveStatisticsProvider, partitionColumn, + partitionKeyList, hivePartitionStatsMap, partitionColumnNames, avgNumPerPartition, rowCount); + } catch (Exception e) { + throw new StarRocksConnectorException("get partition statistics failed", e); + } + + Map columnStatisticMap; + List colNames = cols.stream().map(FieldSchema::getName).collect(Collectors.toList()); + columnStatisticMap = colNames.stream().collect(Collectors.toMap(Function.identity(), + col -> ColumnStatistic.unknown())); + columnStatisticMap.put("par_col", partitionColumnStats); + List partitions = Lists.newArrayList(); + partitions.add(new RemoteFileInfo(null, ImmutableList.of(), null)); + partitions.add(new RemoteFileInfo(null, ImmutableList.of(), null)); + partitions.add(new RemoteFileInfo(null, ImmutableList.of(), null)); + + mockTables.put(t1.getTableName(), new HiveTableInfo(HiveMetastoreApiConverter.toHiveTable(t1, catalogName), + partitionNames, (long) rowCount, columnStatisticMap, partitions)); + + cols = Lists.newArrayList(); + cols.add(new FieldSchema("l_orderkey", "int", null)); + cols.add(new FieldSchema("l_partkey", "int", null)); + cols.add(new FieldSchema("l_suppkey", "int", null)); + cols.add(new FieldSchema("l_linenumber", "int", null)); + cols.add(new FieldSchema("l_quantity", "decimal(15,2)", null)); + cols.add(new FieldSchema("l_extendedprice", "decimal(15,2)", null)); + cols.add(new FieldSchema("l_discount", "decimal(15,2)", null)); + cols.add(new FieldSchema("l_tax", "decimal(15,2)", null)); + cols.add(new FieldSchema("l_returnflag", "string", null)); + cols.add(new FieldSchema("l_linestatus", "string", null)); + cols.add(new FieldSchema("l_commitdate", "date", null)); + cols.add(new FieldSchema("l_receiptdate", "date", null)); + cols.add(new FieldSchema("l_shipinstruct", "string", null)); + cols.add(new FieldSchema("l_shipmode", "string", null)); + cols.add(new FieldSchema("l_comment", "string", null)); + sd = new StorageDescriptor(cols, "", "", "", false, -1, null, Lists.newArrayList(), + Lists.newArrayList(), Maps.newHashMap()); + Table lineItemPar = new Table("lineitem_par", "partitioned_db", null, 0, 0, 0, sd, + ImmutableList.of(new FieldSchema("l_shipdate", "Date", null)), Maps.newHashMap(), null, null, "EXTERNAL_TABLE"); + + partitionColumn = new Column("l_shipdate", Type.DATE); + + List lineitemPartitionKeyList = Lists.newArrayList(); + lineitemPartitionKeyList.add(new PartitionKey(ImmutableList.of(new DateLiteral(1998, 1, 1)), + ImmutableList.of(PrimitiveType.DATE))); + lineitemPartitionKeyList.add(new PartitionKey(ImmutableList.of(new DateLiteral(1998, 1, 2)), + ImmutableList.of(PrimitiveType.DATE))); + lineitemPartitionKeyList.add(new PartitionKey(ImmutableList.of(new DateLiteral(1998, 1, 3)), + ImmutableList.of(PrimitiveType.DATE))); + lineitemPartitionKeyList.add(new PartitionKey(ImmutableList.of(new DateLiteral(1998, 1, 4)), + ImmutableList.of(PrimitiveType.DATE))); + lineitemPartitionKeyList.add(new PartitionKey(ImmutableList.of(new DateLiteral(1998, 1, 5)), + ImmutableList.of(PrimitiveType.DATE))); + + partitionNames = ImmutableList.of("l_shipdate=1998-01-01", "l_shipdate=1998-01-02", "l_shipdate=1998-01-03", + "l_shipdate=1998-01-04", "l_shipdate=1998-01-05"); + + partitionColumnNames = ImmutableList.of("l_shipdate"); + + rowCount = 600037902; + avgNumPerPartition = rowCount / partitionNames.size(); + + try { + partitionColumnStats = (ColumnStatistic) method.invoke(hiveStatisticsProvider, partitionColumn, + lineitemPartitionKeyList, hivePartitionStatsMap, partitionColumnNames, avgNumPerPartition, rowCount); + } catch (Exception e) { + throw new StarRocksConnectorException("get statistics failed", e); + } + + List partitions1 = Lists.newArrayList(); + partitions1.add(new RemoteFileInfo(null, ImmutableList.of(), null)); + partitions1.add(new RemoteFileInfo(null, ImmutableList.of(), null)); + partitions1.add(new RemoteFileInfo(null, ImmutableList.of(), null)); + + colNames = cols.stream().map(FieldSchema::getName).collect(Collectors.toList()); + Map columnStatisticMap1; + columnStatisticMap1 = colNames.stream().collect(Collectors.toMap(Function.identity(), + col -> ColumnStatistic.unknown())); + columnStatisticMap.put("l_shipdate", partitionColumnStats); + + mockTables.put(lineItemPar.getTableName(), new HiveTableInfo(HiveMetastoreApiConverter.toHiveTable( + lineItemPar, catalogName), partitionNames, (long) rowCount, columnStatisticMap1, partitions1)); + } + + private static class HiveTableInfo { + public final com.starrocks.catalog.Table table; + public final List partitionNames; + public final long rowCount; + public final Map columnStatsMap; + private final List partitions; + + public HiveTableInfo(com.starrocks.catalog.Table table, + List partitionNames, + long rowCount, + Map columnStatsMap, + List partitions) { + this.table = table; + this.partitionNames = partitionNames; + this.rowCount = rowCount; + this.columnStatsMap = columnStatsMap; + this.partitions = partitions; + } + } +} diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/plan/HivePartitionPruneTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/plan/HivePartitionPruneTest.java new file mode 100644 index 0000000000000..517c87f97b3dd --- /dev/null +++ b/fe/fe-core/src/test/java/com/starrocks/sql/plan/HivePartitionPruneTest.java @@ -0,0 +1,53 @@ +// This file is licensed under the Elastic License 2.0. Copyright 2021-present, StarRocks Inc. + +package com.starrocks.sql.plan; + +import com.starrocks.common.DdlException; +import com.starrocks.server.GlobalStateMgr; +import org.junit.Before; +import org.junit.Test; + +public class HivePartitionPruneTest extends HivePlanTestBase { + @Before + public void setUp() throws DdlException { + GlobalStateMgr.getCurrentState().changeCatalogDb(connectContext, "hive0.partitioned_db"); + } + + @Test + public void testHivePartitionPrune() throws Exception { + String sql = "select * from t1 where par_col = 0;"; + String plan = getFragmentPlan(sql); + assertContains(plan, "0:HdfsScanNode\n" + + " TABLE: t1\n" + + " PARTITION PREDICATES: 4: par_col = '0'\n" + + " partitions=1/3"); + + sql = "select * from t1 where par_col = 1 and c1 = 2"; + plan = getFragmentPlan(sql); + assertContains(plan, "0:HdfsScanNode\n" + + " TABLE: t1\n" + + " PARTITION PREDICATES: 4: par_col = '1'\n" + + " NON-PARTITION PREDICATES: 1: c1 = 2\n" + + " MIN/MAX PREDICATES: 5: c1 <= 2, 6: c1 >= 2\n" + + " partitions=1/3"); + + sql = "select * from t1 where par_col = abs(-1) and c1 = 2"; + plan = getFragmentPlan(sql); + assertContains(plan, "0:HdfsScanNode\n" + + " TABLE: t1\n" + + " PARTITION PREDICATES: 4: par_col = CAST(abs(-1) AS VARCHAR(1048576))\n" + + " NON-PARTITION PREDICATES: 1: c1 = 2\n" + + " NO EVAL-PARTITION PREDICATES: 4: par_col = CAST(abs(-1) AS VARCHAR(1048576))\n" + + " MIN/MAX PREDICATES: 5: c1 <= 2, 6: c1 >= 2\n" + + " partitions=3/3"); + + sql = "select * from t1 where par_col = 1+1 and c1 = 2"; + plan = getFragmentPlan(sql); + assertContains(plan, "0:HdfsScanNode\n" + + " TABLE: t1\n" + + " PARTITION PREDICATES: 4: par_col = '2'\n" + + " NON-PARTITION PREDICATES: 1: c1 = 2\n" + + " MIN/MAX PREDICATES: 5: c1 <= 2, 6: c1 >= 2\n" + + " partitions=1/3"); + } +} diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/plan/HivePlanTestBase.java b/fe/fe-core/src/test/java/com/starrocks/sql/plan/HivePlanTestBase.java new file mode 100644 index 0000000000000..2c9717402e56f --- /dev/null +++ b/fe/fe-core/src/test/java/com/starrocks/sql/plan/HivePlanTestBase.java @@ -0,0 +1,34 @@ +// This file is licensed under the Elastic License 2.0. Copyright 2021-present, StarRocks Inc. + +package com.starrocks.sql.plan; + +import com.google.common.collect.Maps; +import com.starrocks.common.DdlException; +import com.starrocks.common.FeConstants; +import com.starrocks.external.MockedMetadataMgrForHive; +import com.starrocks.server.GlobalStateMgr; +import org.junit.BeforeClass; + +import java.util.Map; + +public class HivePlanTestBase extends PlanTestBase { + + @BeforeClass + public static void beforeClass() throws Exception { + PlanTestBase.beforeClass(); + FeConstants.runningUnitTest = true; + mockHiveCatalog(); + } + + private static void mockHiveCatalog() throws DdlException { + Map properties = Maps.newHashMap(); + + properties.put("type", "hive"); + properties.put("hive.metastore.uris", "thrift://127.0.0.1:9083"); + GlobalStateMgr.getCurrentState().getCatalogMgr().createCatalog("hive", "hive0", "", properties); + + GlobalStateMgr gsmMgr = connectContext.getGlobalStateMgr(); + MockedMetadataMgrForHive metadataMgr = new MockedMetadataMgrForHive(gsmMgr.getLocalMetastore(), gsmMgr.getConnectorMgr()); + gsmMgr.setMetadataMgr(metadataMgr); + } +} diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/plan/HiveTPCHPlanTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/plan/HiveTPCHPlanTest.java new file mode 100644 index 0000000000000..87edaa03f483d --- /dev/null +++ b/fe/fe-core/src/test/java/com/starrocks/sql/plan/HiveTPCHPlanTest.java @@ -0,0 +1,143 @@ +// This file is licensed under the Elastic License 2.0. Copyright 2021-present, StarRocks Inc. + +package com.starrocks.sql.plan; + +import com.starrocks.common.DdlException; +import com.starrocks.server.GlobalStateMgr; +import com.starrocks.utframe.UtFrameUtils; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class HiveTPCHPlanTest extends HivePlanTestBase { + @BeforeClass + public static void beforeClass() throws Exception { + HivePlanTestBase.beforeClass(); + UtFrameUtils.addMockBackend(10002); + UtFrameUtils.addMockBackend(10003); + GlobalStateMgr.getCurrentState().changeCatalogDb(connectContext, "hive0.tpch"); + } + + @AfterClass + public static void afterClass() { + try { + UtFrameUtils.dropMockBackend(10002); + UtFrameUtils.dropMockBackend(10003); + } catch (DdlException e) { + e.printStackTrace(); + } + } + + @Test + public void testTPCH1() { + runFileUnitTest("external/hive/tpch/q1"); + } + + @Test + public void testTPCH2() { + runFileUnitTest("external/hive/tpch/q2"); + } + + @Test + public void testTPCH3() { + runFileUnitTest("external/hive/tpch/q3"); + } + + @Test + public void testTPCH4() { + runFileUnitTest("external/hive/tpch/q4"); + } + + @Test + public void testTPCH5() { + runFileUnitTest("external/hive/tpch/q5"); + } + + @Test + public void testTPCH6() { + runFileUnitTest("external/hive/tpch/q6"); + } + + @Test + public void testTPCH7() { + runFileUnitTest("external/hive/tpch/q7"); + } + + @Test + public void testTPCH8() { + int oldValue = connectContext.getSessionVariable().getMaxTransformReorderJoins(); + connectContext.getSessionVariable().setMaxTransformReorderJoins(4); + runFileUnitTest("external/hive/tpch/q8"); + connectContext.getSessionVariable().setMaxTransformReorderJoins(oldValue); + } + + @Test + public void testTPCH9() { + runFileUnitTest("external/hive/tpch/q9"); + } + + @Test + public void testTPCH10() { + runFileUnitTest("external/hive/tpch/q10"); + } + + @Test + public void testTPCH11() { + runFileUnitTest("external/hive/tpch/q11"); + } + + @Test + public void testTPCH12() { + runFileUnitTest("external/hive/tpch/q12"); + } + + @Test + public void testTPCH13() { + runFileUnitTest("external/hive/tpch/q13"); + } + + @Test + public void testTPCH14() { + runFileUnitTest("external/hive/tpch/q14"); + } + + @Test + public void testTPCH15() { + runFileUnitTest("external/hive/tpch/q15"); + } + + @Test + public void testTPCH16() { + runFileUnitTest("external/hive/tpch/q16"); + } + + @Test + public void testTPCH17() { + runFileUnitTest("external/hive/tpch/q17"); + } + + @Test + public void testTPCH18() { + runFileUnitTest("external/hive/tpch/q18"); + } + + @Test + public void testTPCH19() { + runFileUnitTest("external/hive/tpch/q19"); + } + + @Test + public void testTPCH20() { + runFileUnitTest("external/hive/tpch/q20"); + } + + @Test + public void testTPCH21() { + runFileUnitTest("external/hive/tpch/q21"); + } + + @Test + public void testTPCH22() { + runFileUnitTest("external/hive/tpch/q22"); + } +}