diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java index 1fdb20091a50..f3fc938ca249 100644 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java +++ b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java @@ -13,7 +13,9 @@ */ package io.trino.plugin.hive; +import com.google.common.collect.ImmutableList; import com.google.common.net.HostAndPort; +import io.trino.spi.connector.SchemaTableName; import org.apache.hadoop.net.NetUtils; import org.testng.SkipException; import org.testng.annotations.BeforeClass; @@ -75,4 +77,77 @@ public void testHiveViewTranslationError() // TODO: combine this with tests for successful translation (currently in TestHiveViews product test) } } + + @Override + public void testUpdateBasicPartitionStatistics() + throws Exception + { + SchemaTableName tableName = temporaryTable("update_basic_partition_statistics"); + try { + createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); + // When the table has partitions, but row count statistics are set to zero, we treat this case as empty + // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are + // used to ingest data into partitioned hive tables. + testUpdatePartitionStatistics( + tableName, + PartitionStatistics.empty(), + ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2), + ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1)); + } + finally { + dropTable(tableName); + } + } + + @Override + public void testUpdatePartitionColumnStatistics() + throws Exception + { + SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); + try { + createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); + // When the table has partitions, but row count statistics are set to zero, we treat this case as empty + // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are + // used to ingest data into partitioned hive tables. + testUpdatePartitionStatistics( + tableName, + PartitionStatistics.empty(), + ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2), + ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2)); + } + finally { + dropTable(tableName); + } + } + + @Override + public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() + throws Exception + { + SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); + try { + createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); + // When the table has partitions, but row count statistics are set to zero, we treat this case as empty + // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are + // used to ingest data into partitioned hive tables. + testUpdatePartitionStatistics( + tableName, + PartitionStatistics.empty(), + ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS), + ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS)); + } + finally { + dropTable(tableName); + } + } + + @Override + public void testStorePartitionWithStatistics() + throws Exception + { + // When the table has partitions, but row count statistics are set to zero, we treat this case as empty + // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are + // used to ingest data into partitioned hive tables. + testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1, STATISTICS_2, STATISTICS_1_1, PartitionStatistics.empty()); + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java index b74220122636..7a0b2e5a2961 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java @@ -63,6 +63,7 @@ import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; import static io.trino.plugin.hive.util.HiveUtil.makePartName; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; +import static java.util.AbstractMap.SimpleEntry; import static java.util.Objects.requireNonNull; /** @@ -161,6 +162,18 @@ public Map getPartitionStatistics(Table table, List Map partitionRowCounts = partitionBasicStatistics.entrySet().stream() .collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount())); + long tableRowCount = partitionRowCounts.values().stream() + .mapToLong(count -> count.orElse(0)) + .sum(); + if (!partitionRowCounts.isEmpty() && tableRowCount == 0) { + // When the table has partitions, but row count statistics are set to zero, we treat this case as empty + // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are + // used to ingest data into partitioned hive tables. + partitionBasicStatistics = partitionBasicStatistics.keySet().stream() + .map(key -> new SimpleEntry<>(key, HiveBasicStatistics.createEmptyStatistics())) + .collect(toImmutableMap(SimpleEntry::getKey, SimpleEntry::getValue)); + } + Map> colStatsMap = client.getPartitionColumnStatistics(table.getDatabaseName(), table.getTableName(), ImmutableList.copyOf(partitionBasicStatistics.keySet()), dataColumns); Map> partitionColumnStatistics = colStatsMap.entrySet().stream() diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java index f2a505c94885..869af7e98f0c 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java @@ -116,6 +116,7 @@ import javax.inject.Inject; import java.time.Duration; +import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; import java.util.Comparator; import java.util.List; @@ -318,10 +319,24 @@ public PartitionStatistics getTableStatistics(Table table) @Override public Map getPartitionStatistics(Table table, List partitions) { - return columnStatisticsProvider.getPartitionColumnStatistics(partitions).entrySet().stream() + Map partitionBasicStatistics = columnStatisticsProvider.getPartitionColumnStatistics(partitions).entrySet().stream() .collect(toImmutableMap( entry -> makePartitionName(table, entry.getKey()), entry -> new PartitionStatistics(getHiveBasicStatistics(entry.getKey().getParameters()), entry.getValue()))); + + long tableRowCount = partitionBasicStatistics.values().stream() + .mapToLong(partitionStatistics -> partitionStatistics.getBasicStatistics().getRowCount().orElse(0)) + .sum(); + if (!partitionBasicStatistics.isEmpty() && tableRowCount == 0) { + // When the table has partitions, but row count statistics are set to zero, we treat this case as empty + // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are + // used to ingest data into partitioned hive tables. + partitionBasicStatistics = partitionBasicStatistics.keySet().stream() + .map(key -> new SimpleEntry<>(key, PartitionStatistics.empty())) + .collect(toImmutableMap(SimpleEntry::getKey, SimpleEntry::getValue)); + } + + return partitionBasicStatistics; } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java index 0bd5993fd0c9..43947e012687 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java @@ -87,6 +87,7 @@ import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; +import java.util.AbstractMap.SimpleEntry; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -387,6 +388,19 @@ public Map getPartitionStatistics(Table table, List })); Map partitionRowCounts = partitionBasicStatistics.entrySet().stream() .collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount())); + + long tableRowCount = partitionRowCounts.values().stream() + .mapToLong(count -> count.orElse(0)) + .sum(); + if (!partitionRowCounts.isEmpty() && tableRowCount == 0) { + // When the table has partitions, but row count statistics are set to zero, we treat this case as empty + // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are + // used to ingest data into partitioned hive tables. + partitionBasicStatistics = partitionBasicStatistics.keySet().stream() + .map(partitionName -> new SimpleEntry<>(partitionName, HiveBasicStatistics.createEmptyStatistics())) + .collect(toImmutableMap(SimpleEntry::getKey, SimpleEntry::getValue)); + } + Map> partitionColumnStatistics = getPartitionColumnStatistics( table.getDbName(), table.getTableName(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java index 9a7166bec7e2..06cb66a91c2f 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java @@ -543,7 +543,7 @@ private static RowType toRowType(List columns) .add(new ColumnMetadata("ds", VARCHAR)) .build(); - protected static final PartitionStatistics EMPTY_TABLE_STATISTICS = new PartitionStatistics(createZeroStatistics(), ImmutableMap.of()); + protected static final PartitionStatistics ZERO_TABLE_STATISTICS = new PartitionStatistics(createZeroStatistics(), ImmutableMap.of()); protected static final PartitionStatistics BASIC_STATISTICS_1 = new PartitionStatistics(new HiveBasicStatistics(0, 20, 3, 0), ImmutableMap.of()); protected static final PartitionStatistics BASIC_STATISTICS_2 = new PartitionStatistics(new HiveBasicStatistics(0, 30, 2, 0), ImmutableMap.of()); @@ -584,7 +584,7 @@ private static RowType toRowType(List columns) .filter(entry -> entry.getKey().hashCode() % 2 == 1) .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue))); - private static final PartitionStatistics STATISTICS_2 = + protected static final PartitionStatistics STATISTICS_2 = new PartitionStatistics( BASIC_STATISTICS_2.getBasicStatistics(), ImmutableMap.builder() @@ -605,7 +605,7 @@ private static RowType toRowType(List columns) .put("t_long_decimal", createDecimalColumnStatistics(Optional.of(new BigDecimal("71234567890123456.123")), Optional.of(new BigDecimal("78123456789012345.123")), OptionalLong.of(2), OptionalLong.of(1))) .buildOrThrow()); - private static final PartitionStatistics STATISTICS_EMPTY_OPTIONAL_FIELDS = + protected static final PartitionStatistics STATISTICS_EMPTY_OPTIONAL_FIELDS = new PartitionStatistics( new HiveBasicStatistics(OptionalLong.of(0), OptionalLong.of(20), OptionalLong.empty(), OptionalLong.of(0)), ImmutableMap.builder() @@ -2738,7 +2738,7 @@ public void testTableCreationIgnoreExisting() targetPath = locationService.forNewTable(transaction.getMetastore(), session, schemaName, tableName); Table table = createSimpleTable(schemaTableName, columns, session, targetPath, "q1"); transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), false, EMPTY_TABLE_STATISTICS, false); + .createTable(session, table, privileges, Optional.empty(), Optional.empty(), false, ZERO_TABLE_STATISTICS, false); Optional tableHandle = transaction.getMetastore().getTable(schemaName, tableName); assertTrue(tableHandle.isPresent()); transaction.commit(); @@ -2748,7 +2748,7 @@ public void testTableCreationIgnoreExisting() try (Transaction transaction = newTransaction()) { Table table = createSimpleTable(schemaTableName, columns, session, targetPath.appendSuffix("_2"), "q2"); transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), false, EMPTY_TABLE_STATISTICS, false); + .createTable(session, table, privileges, Optional.empty(), Optional.empty(), false, ZERO_TABLE_STATISTICS, false); transaction.commit(); fail("Expected exception"); } @@ -2760,7 +2760,7 @@ public void testTableCreationIgnoreExisting() try (Transaction transaction = newTransaction()) { Table table = createSimpleTable(schemaTableName, columns, session, targetPath.appendSuffix("_3"), "q3"); transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), true, EMPTY_TABLE_STATISTICS, false); + .createTable(session, table, privileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); transaction.commit(); } @@ -2769,7 +2769,7 @@ public void testTableCreationIgnoreExisting() try (Transaction transaction = newTransaction()) { Table table = createSimpleTable(schemaTableName, columns, session, targetPath.appendSuffix("_4"), "q4"); transaction.getMetastore() - .createTable(session, table, privileges, Optional.empty(), Optional.empty(), true, EMPTY_TABLE_STATISTICS, false); + .createTable(session, table, privileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); transaction.commit(); fail("Expected exception"); } @@ -3116,7 +3116,7 @@ public void testCreateEmptyTableShouldNotCreateStagingDirectory() Optional.empty(), Optional.empty(), true, - EMPTY_TABLE_STATISTICS, + ZERO_TABLE_STATISTICS, false); transaction.commit(); @@ -3302,7 +3302,7 @@ public void testUpdateBasicTableStatistics() SchemaTableName tableName = temporaryTable("update_basic_table_statistics"); try { doCreateEmptyTable(tableName, ORC, STATISTICS_TABLE_COLUMNS); - testUpdateTableStatistics(tableName, EMPTY_TABLE_STATISTICS, BASIC_STATISTICS_1, BASIC_STATISTICS_2); + testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, BASIC_STATISTICS_1, BASIC_STATISTICS_2); } finally { dropTable(tableName); @@ -3316,7 +3316,7 @@ public void testUpdateTableColumnStatistics() SchemaTableName tableName = temporaryTable("update_table_column_statistics"); try { doCreateEmptyTable(tableName, ORC, STATISTICS_TABLE_COLUMNS); - testUpdateTableStatistics(tableName, EMPTY_TABLE_STATISTICS, STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2); + testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2); } finally { dropTable(tableName); @@ -3330,7 +3330,7 @@ public void testUpdateTableColumnStatisticsEmptyOptionalFields() SchemaTableName tableName = temporaryTable("update_table_column_statistics_empty_optional_fields"); try { doCreateEmptyTable(tableName, ORC, STATISTICS_TABLE_COLUMNS); - testUpdateTableStatistics(tableName, EMPTY_TABLE_STATISTICS, STATISTICS_EMPTY_OPTIONAL_FIELDS); + testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, STATISTICS_EMPTY_OPTIONAL_FIELDS); } finally { dropTable(tableName); @@ -3375,7 +3375,7 @@ public void testUpdateBasicPartitionStatistics() createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); testUpdatePartitionStatistics( tableName, - EMPTY_TABLE_STATISTICS, + ZERO_TABLE_STATISTICS, ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2), ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1)); } @@ -3393,7 +3393,7 @@ public void testUpdatePartitionColumnStatistics() createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); testUpdatePartitionStatistics( tableName, - EMPTY_TABLE_STATISTICS, + ZERO_TABLE_STATISTICS, ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2), ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2)); } @@ -3411,7 +3411,7 @@ public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); testUpdatePartitionStatistics( tableName, - EMPTY_TABLE_STATISTICS, + ZERO_TABLE_STATISTICS, ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS), ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS)); } @@ -3499,7 +3499,7 @@ public void testIllegalStorageFormatDuringTableScan() .setStorageFormat(StorageFormat.createNullable(null, null, null)) .setSerdeParameters(ImmutableMap.of())); PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner, session.getUser()); - transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.empty(), Optional.empty(), true, EMPTY_TABLE_STATISTICS, false); + transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); transaction.commit(); } @@ -3563,8 +3563,8 @@ protected void createDummyPartitionedTable(SchemaTableName tableName, List new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())) .collect(toImmutableList()); metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> EMPTY_TABLE_STATISTICS); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> EMPTY_TABLE_STATISTICS); + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> ZERO_TABLE_STATISTICS); + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> ZERO_TABLE_STATISTICS); } protected void testUpdatePartitionStatistics( @@ -3620,7 +3620,7 @@ protected void testUpdatePartitionStatistics( public void testStorePartitionWithStatistics() throws Exception { - testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1, STATISTICS_2, STATISTICS_1_1, EMPTY_TABLE_STATISTICS); + testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1, STATISTICS_2, STATISTICS_1_1, ZERO_TABLE_STATISTICS); } protected void testStorePartitionWithStatistics( @@ -4267,7 +4267,7 @@ private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName // statistics HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName); - assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * (i + 1)); + assertEquals(tableStatistics.getRowCount().orElse(0), CREATE_TABLE_DATA.getRowCount() * (i + 1)); assertEquals(tableStatistics.getFileCount().getAsLong(), i + 1L); assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L); @@ -5600,7 +5600,7 @@ protected void createEmptyTable( .setSerdeParameters(ImmutableMap.of()); PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner, session.getUser()); - transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.empty(), Optional.empty(), true, EMPTY_TABLE_STATISTICS, false); + transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.empty(), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); transaction.commit(); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java index 5e067194d17b..f97a28461b84 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java @@ -254,7 +254,7 @@ private void createExternalTable(SchemaTableName schemaTableName, HiveStorageFor .setSerdeParameters(ImmutableMap.of()); PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner, session.getUser()); - transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.of(externalLocation), Optional.empty(), true, EMPTY_TABLE_STATISTICS, false); + transaction.getMetastore().createTable(session, tableBuilder.build(), principalPrivileges, Optional.of(externalLocation), Optional.empty(), true, ZERO_TABLE_STATISTICS, false); transaction.commit(); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java index dc47f9bfa0f1..03bdffa53904 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java @@ -301,7 +301,7 @@ public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() public void testStorePartitionWithStatistics() throws Exception { - testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, BASIC_STATISTICS_1, BASIC_STATISTICS_2, BASIC_STATISTICS_1, EMPTY_TABLE_STATISTICS); + testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, BASIC_STATISTICS_1, BASIC_STATISTICS_2, BASIC_STATISTICS_1, ZERO_TABLE_STATISTICS); } @Override @@ -353,8 +353,8 @@ public void testGetPartitionsWithFilterUsingReservedKeywordsAsColumnName() .map(partitionName -> new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())) .collect(toImmutableList()); metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName1, currentStatistics -> EMPTY_TABLE_STATISTICS); - metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName2, currentStatistics -> EMPTY_TABLE_STATISTICS); + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName1, currentStatistics -> ZERO_TABLE_STATISTICS); + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName2, currentStatistics -> ZERO_TABLE_STATISTICS); Optional> partitionNames = metastoreClient.getPartitionNamesByFilter( tableName.getSchemaName(), @@ -1103,7 +1103,7 @@ public void testStatisticsLargeNumberOfColumns() .setColumnStatistics(columnStatistics.buildOrThrow()).build(); doCreateEmptyTable(tableName, ORC, columns.build()); - testUpdateTableStatistics(tableName, EMPTY_TABLE_STATISTICS, partitionStatistics); + testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, partitionStatistics); } finally { dropTable(tableName); @@ -1136,8 +1136,8 @@ public void testStatisticsLongColumnNames() doCreateEmptyTable(tableName, ORC, columns); assertThat(metastore.getTableStatistics(tableName.getSchemaName(), tableName.getTableName())) - .isEqualTo(EMPTY_TABLE_STATISTICS); - testUpdateTableStatistics(tableName, EMPTY_TABLE_STATISTICS, partitionStatistics); + .isEqualTo(ZERO_TABLE_STATISTICS); + testUpdateTableStatistics(tableName, ZERO_TABLE_STATISTICS, partitionStatistics); } finally { dropTable(tableName); @@ -1170,7 +1170,7 @@ public void testStatisticsColumnModification() tableName.getTableName(), NO_ACID_TRANSACTION, actualStatistics -> { - assertThat(actualStatistics).isEqualTo(EMPTY_TABLE_STATISTICS); + assertThat(actualStatistics).isEqualTo(ZERO_TABLE_STATISTICS); return partitionStatistics; }); @@ -1276,7 +1276,7 @@ public void testInvalidColumnStatisticsMetadata() tableName.getTableName(), NO_ACID_TRANSACTION, actualStatistics -> { - assertThat(actualStatistics).isEqualTo(EMPTY_TABLE_STATISTICS); + assertThat(actualStatistics).isEqualTo(ZERO_TABLE_STATISTICS); return partitionStatistics; }); @@ -1453,7 +1453,7 @@ private void createDummyPartitionedTable(SchemaTableName tableName, List metastoreClient.updatePartitionStatistics( - tableName.getSchemaName(), tableName.getTableName(), partitionName, currentStatistics -> EMPTY_TABLE_STATISTICS)); + tableName.getSchemaName(), tableName.getTableName(), partitionName, currentStatistics -> ZERO_TABLE_STATISTICS)); } private class CloseableSchamaTableName diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTableStatistics.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTableStatistics.java index ca56764718de..567c5330bc17 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTableStatistics.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTableStatistics.java @@ -36,6 +36,7 @@ import static io.trino.tempto.fulfillment.table.MutableTablesState.mutableTablesState; import static io.trino.tempto.fulfillment.table.TableRequirements.mutableTable; import static io.trino.tempto.fulfillment.table.hive.tpch.TpchTableDefinitions.NATION; +import static io.trino.testing.TestingNames.randomNameSuffix; import static io.trino.tests.product.hive.AllSimpleTypesTableDefinitions.ALL_HIVE_SIMPLE_TYPES_TEXTFILE; import static io.trino.tests.product.hive.HiveTableDefinitions.NATION_PARTITIONED_BY_BIGINT_REGIONKEY; import static io.trino.tests.product.hive.HiveTableDefinitions.NATION_PARTITIONED_BY_VARCHAR_REGIONKEY; @@ -1460,6 +1461,43 @@ public void testMixedHiveAndPrestoStatistics() } } + @Test + public void testEmptyPartitionedHiveStatistics() + { + String tableName = "test_empty_partitioned_hive_" + randomNameSuffix(); + try { + onHive().executeQuery(format("CREATE TABLE %s (a INT) PARTITIONED BY (p INT)", tableName)); + + // disable computation of statistics + onHive().executeQuery("set hive.stats.autogather=false"); + + onHive().executeQuery(format("INSERT INTO TABLE %s PARTITION (p=1) VALUES (11),(12),(13),(14)", tableName)); + onHive().executeQuery(format("INSERT INTO TABLE %s PARTITION (p=2) VALUES (21),(22),(23)", tableName)); + + String showStatsPartitionOne = format("SHOW STATS FOR (SELECT * FROM %s WHERE p = 1)", tableName); + String showStatsPartitionTwo = format("SHOW STATS FOR (SELECT * FROM %s WHERE p = 2)", tableName); + String showStatsWholeTable = format("SHOW STATS FOR %s", tableName); + + assertThat(onTrino().executeQuery(showStatsPartitionOne)).containsOnly( + row("p", null, 1.0, 0.0, null, "1", "1"), + row("a", null, null, null, null, null, null), + row(null, null, null, null, null, null, null)); + assertThat(onTrino().executeQuery(showStatsPartitionTwo)).containsOnly( + row("p", null, 1.0, 0.0, null, "2", "2"), + row("a", null, null, null, null, null, null), + row(null, null, null, null, null, null, null)); + assertThat(onTrino().executeQuery(showStatsWholeTable)).containsOnly( + row("p", null, 2.0, 0.0, null, "1", "2"), + row("a", null, null, null, null, null, null), + row(null, null, null, null, null, null, null)); + } + finally { + // enable computation of statistics + onHive().executeQuery("set hive.stats.autogather=true"); + onHive().executeQuery("DROP TABLE " + tableName); + } + } + private static void assertComputeTableStatisticsOnCreateTable(String sourceTableName, List expectedStatistics) { String copiedTableName = "assert_compute_table_statistics_on_create_table_" + sourceTableName;