diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java index 06c7b65ef34c..f1d0a2a366a9 100644 --- a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java +++ b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHive.java @@ -103,7 +103,7 @@ public void testUpdateBasicPartitionStatistics() // used to ingest data into partitioned hive tables. testUpdatePartitionStatistics( tableName, - PartitionStatistics.empty(), + EMPTY_ROWCOUNT_STATISTICS, ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2), ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1)); } @@ -124,7 +124,7 @@ public void testUpdatePartitionColumnStatistics() // used to ingest data into partitioned hive tables. testUpdatePartitionStatistics( tableName, - PartitionStatistics.empty(), + EMPTY_ROWCOUNT_STATISTICS, ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2), ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2)); } @@ -145,7 +145,7 @@ public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() // used to ingest data into partitioned hive tables. testUpdatePartitionStatistics( tableName, - PartitionStatistics.empty(), + EMPTY_ROWCOUNT_STATISTICS, ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS), ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS)); } @@ -161,7 +161,7 @@ public void testStorePartitionWithStatistics() // When the table has partitions, but row count statistics are set to zero, we treat this case as empty // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are // used to ingest data into partitioned hive tables. - testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1, STATISTICS_2, STATISTICS_1_1, PartitionStatistics.empty()); + testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1, STATISTICS_2, STATISTICS_1_1, EMPTY_ROWCOUNT_STATISTICS); } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveBasicStatistics.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveBasicStatistics.java index 0ae16566732c..d83bf1bacc58 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveBasicStatistics.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveBasicStatistics.java @@ -83,6 +83,11 @@ public OptionalLong getOnDiskDataSizeInBytes() return onDiskDataSizeInBytes; } + public HiveBasicStatistics withEmptyRowCount() + { + return new HiveBasicStatistics(fileCount, OptionalLong.empty(), inMemoryDataSizeInBytes, onDiskDataSizeInBytes); + } + @Override public boolean equals(Object o) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java index 3537517abe6a..f77000f0ea10 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/PartitionStatistics.java @@ -94,6 +94,11 @@ public static Builder builder() return new Builder(); } + public PartitionStatistics withBasicStatistics(HiveBasicStatistics basicStatistics) + { + return new PartitionStatistics(basicStatistics, columnStatistics); + } + public static class Builder { private HiveBasicStatistics basicStatistics = HiveBasicStatistics.createEmptyStatistics(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java index d437438d43ee..3ba761147232 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java @@ -333,8 +333,10 @@ public Map getPartitionStatistics(Table table, List // When the table has partitions, but row count statistics are set to zero, we treat this case as empty // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are // used to ingest data into partitioned hive tables. - partitionBasicStatistics = partitionBasicStatistics.keySet().stream() - .map(key -> new SimpleEntry<>(key, PartitionStatistics.empty())) + partitionBasicStatistics = partitionBasicStatistics.entrySet().stream() + .map(entry -> new SimpleEntry<>( + entry.getKey(), + entry.getValue().withBasicStatistics(entry.getValue().getBasicStatistics().withEmptyRowCount()))) .collect(toImmutableMap(SimpleEntry::getKey, SimpleEntry::getValue)); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java index 5178f82af21e..8ad9aaea4b3c 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java @@ -399,8 +399,11 @@ public Map getPartitionStatistics(Table table, List // When the table has partitions, but row count statistics are set to zero, we treat this case as empty // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are // used to ingest data into partitioned hive tables. - partitionBasicStatistics = partitionBasicStatistics.keySet().stream() - .map(partitionName -> new SimpleEntry<>(partitionName, HiveBasicStatistics.createEmptyStatistics())) + // https://github.com/trinodb/trino/issues/18798 Hive Metastore assumes any new partition statistics to at least have all parameters that the partition used to have + partitionBasicStatistics = partitionBasicStatistics.entrySet().stream() + .map(entry -> new SimpleEntry<>( + entry.getKey(), + entry.getValue().withEmptyRowCount())) .collect(toImmutableMap(SimpleEntry::getKey, SimpleEntry::getValue)); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java index a8aecc5bbbdd..c3efe779a3c5 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java @@ -543,6 +543,7 @@ private static RowType toRowType(List columns) .build(); protected static final PartitionStatistics ZERO_TABLE_STATISTICS = new PartitionStatistics(createZeroStatistics(), ImmutableMap.of()); + protected static final PartitionStatistics EMPTY_ROWCOUNT_STATISTICS = ZERO_TABLE_STATISTICS.withBasicStatistics(ZERO_TABLE_STATISTICS.getBasicStatistics().withEmptyRowCount()); protected static final PartitionStatistics BASIC_STATISTICS_1 = new PartitionStatistics(new HiveBasicStatistics(0, 20, 3, 0), ImmutableMap.of()); protected static final PartitionStatistics BASIC_STATISTICS_2 = new PartitionStatistics(new HiveBasicStatistics(0, 30, 2, 0), ImmutableMap.of()); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java index 903ec16c5cb9..5f5a9ae0d6c3 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java @@ -308,7 +308,7 @@ public void testUpdateBasicPartitionStatistics() createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS); testUpdatePartitionStatistics( tableName, - PartitionStatistics.empty(), + EMPTY_ROWCOUNT_STATISTICS, ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2), ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1)); } @@ -329,7 +329,7 @@ public void testUpdatePartitionColumnStatistics() // used to ingest data into partitioned hive tables. testUpdatePartitionStatistics( tableName, - PartitionStatistics.empty(), + EMPTY_ROWCOUNT_STATISTICS, ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2), ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2)); } @@ -345,7 +345,7 @@ public void testStorePartitionWithStatistics() // When the table has partitions, but row count statistics are set to zero, we treat this case as empty // statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are // used to ingest data into partitioned hive tables. - testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, BASIC_STATISTICS_1, BASIC_STATISTICS_2, BASIC_STATISTICS_1, PartitionStatistics.empty()); + testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, BASIC_STATISTICS_1, BASIC_STATISTICS_2, BASIC_STATISTICS_1, EMPTY_ROWCOUNT_STATISTICS); } @Override