Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public void testUpdateBasicPartitionStatistics()
// used to ingest data into partitioned hive tables.
Comment thread
Dith3r marked this conversation as resolved.
Outdated
testUpdatePartitionStatistics(
tableName,
PartitionStatistics.empty(),
EMPTY_ROWCOUNT_STATISTICS,
ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2),
ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1));
}
Expand All @@ -124,7 +124,7 @@ public void testUpdatePartitionColumnStatistics()
// used to ingest data into partitioned hive tables.
testUpdatePartitionStatistics(
tableName,
PartitionStatistics.empty(),
EMPTY_ROWCOUNT_STATISTICS,
ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2),
ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2));
}
Expand All @@ -145,7 +145,7 @@ public void testUpdatePartitionColumnStatisticsEmptyOptionalFields()
// used to ingest data into partitioned hive tables.
testUpdatePartitionStatistics(
tableName,
PartitionStatistics.empty(),
EMPTY_ROWCOUNT_STATISTICS,
ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS),
ImmutableList.of(STATISTICS_EMPTY_OPTIONAL_FIELDS));
}
Expand All @@ -161,7 +161,7 @@ public void testStorePartitionWithStatistics()
// When the table has partitions, but row count statistics are set to zero, we treat this case as empty
// statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are
// used to ingest data into partitioned hive tables.
testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1, STATISTICS_2, STATISTICS_1_1, PartitionStatistics.empty());
testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, STATISTICS_1, STATISTICS_2, STATISTICS_1_1, EMPTY_ROWCOUNT_STATISTICS);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ public OptionalLong getOnDiskDataSizeInBytes()
return onDiskDataSizeInBytes;
}

public HiveBasicStatistics withEmptyRowCount()
{
return new HiveBasicStatistics(fileCount, OptionalLong.empty(), inMemoryDataSizeInBytes, onDiskDataSizeInBytes);
}

@Override
public boolean equals(Object o)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ public static Builder builder()
return new Builder();
}

public PartitionStatistics withBasicStatistics(HiveBasicStatistics basicStatistics)
{
return new PartitionStatistics(basicStatistics, columnStatistics);
}

public static class Builder
{
private HiveBasicStatistics basicStatistics = HiveBasicStatistics.createEmptyStatistics();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,10 @@ public Map<String, PartitionStatistics> getPartitionStatistics(Table table, List
// When the table has partitions, but row count statistics are set to zero, we treat this case as empty
// statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are
// used to ingest data into partitioned hive tables.
partitionBasicStatistics = partitionBasicStatistics.keySet().stream()
.map(key -> new SimpleEntry<>(key, PartitionStatistics.empty()))
partitionBasicStatistics = partitionBasicStatistics.entrySet().stream()
.map(entry -> new SimpleEntry<>(
entry.getKey(),
entry.getValue().withBasicStatistics(entry.getValue().getBasicStatistics().withEmptyRowCount())))
.collect(toImmutableMap(SimpleEntry::getKey, SimpleEntry::getValue));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,11 @@ public Map<String, PartitionStatistics> getPartitionStatistics(Table table, List
// When the table has partitions, but row count statistics are set to zero, we treat this case as empty
// statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are
// used to ingest data into partitioned hive tables.
partitionBasicStatistics = partitionBasicStatistics.keySet().stream()
.map(partitionName -> new SimpleEntry<>(partitionName, HiveBasicStatistics.createEmptyStatistics()))
// https://github.com/trinodb/trino/issues/18798 Hive Metastore assumes any new partition statistics to at least have all parameters that the partition used to have
partitionBasicStatistics = partitionBasicStatistics.entrySet().stream()
.map(entry -> new SimpleEntry<>(
entry.getKey(),
entry.getValue().withEmptyRowCount()))
.collect(toImmutableMap(SimpleEntry::getKey, SimpleEntry::getValue));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,7 @@ private static RowType toRowType(List<ColumnMetadata> columns)
.build();

protected static final PartitionStatistics ZERO_TABLE_STATISTICS = new PartitionStatistics(createZeroStatistics(), ImmutableMap.of());
protected static final PartitionStatistics EMPTY_ROWCOUNT_STATISTICS = ZERO_TABLE_STATISTICS.withBasicStatistics(ZERO_TABLE_STATISTICS.getBasicStatistics().withEmptyRowCount());
protected static final PartitionStatistics BASIC_STATISTICS_1 = new PartitionStatistics(new HiveBasicStatistics(0, 20, 3, 0), ImmutableMap.of());
protected static final PartitionStatistics BASIC_STATISTICS_2 = new PartitionStatistics(new HiveBasicStatistics(0, 30, 2, 0), ImmutableMap.of());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ public void testUpdateBasicPartitionStatistics()
createDummyPartitionedTable(tableName, STATISTICS_PARTITIONED_TABLE_COLUMNS);
testUpdatePartitionStatistics(
tableName,
PartitionStatistics.empty(),
EMPTY_ROWCOUNT_STATISTICS,
ImmutableList.of(BASIC_STATISTICS_1, BASIC_STATISTICS_2),
ImmutableList.of(BASIC_STATISTICS_2, BASIC_STATISTICS_1));
}
Expand All @@ -329,7 +329,7 @@ public void testUpdatePartitionColumnStatistics()
// used to ingest data into partitioned hive tables.
testUpdatePartitionStatistics(
tableName,
PartitionStatistics.empty(),
EMPTY_ROWCOUNT_STATISTICS,
ImmutableList.of(STATISTICS_1_1, STATISTICS_1_2, STATISTICS_2),
ImmutableList.of(STATISTICS_1_2, STATISTICS_1_1, STATISTICS_2));
}
Expand All @@ -345,7 +345,7 @@ public void testStorePartitionWithStatistics()
// When the table has partitions, but row count statistics are set to zero, we treat this case as empty
// statistics to avoid underestimation in the CBO. This scenario may be caused when other engines are
// used to ingest data into partitioned hive tables.
testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, BASIC_STATISTICS_1, BASIC_STATISTICS_2, BASIC_STATISTICS_1, PartitionStatistics.empty());
testStorePartitionWithStatistics(STATISTICS_PARTITIONED_TABLE_COLUMNS, BASIC_STATISTICS_1, BASIC_STATISTICS_2, BASIC_STATISTICS_1, EMPTY_ROWCOUNT_STATISTICS);
}

@Override
Expand Down