diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/statistics/MetastoreHiveStatisticsProvider.java b/presto-hive/src/main/java/com/facebook/presto/hive/statistics/MetastoreHiveStatisticsProvider.java index 04d5020fb1521..5b60fe072e920 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/statistics/MetastoreHiveStatisticsProvider.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/statistics/MetastoreHiveStatisticsProvider.java @@ -101,6 +101,7 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab Estimate nullsFraction; if (hiveColumnHandle.isPartitionKey()) { rangeStatistics.setDistinctValuesCount(countDistinctPartitionKeys(hiveColumnHandle, hivePartitions)); + rangeStatistics.setDataSize(calculateDataSize(partitionStatistics, columnName)); nullsFraction = calculateNullsFractionForPartitioningKey(hiveColumnHandle, hivePartitions, partitionStatistics); if (isLowHighSupportedForType(prestoType)) { lowValueCandidates = hivePartitions.stream() @@ -114,6 +115,7 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab } else { rangeStatistics.setDistinctValuesCount(calculateDistinctValuesCount(partitionStatistics, columnName)); + rangeStatistics.setDataSize(calculateDataSize(partitionStatistics, columnName)); nullsFraction = calculateNullsFraction(partitionStatistics, columnName, rowCount); if (isLowHighSupportedForType(prestoType)) { @@ -212,6 +214,34 @@ private Estimate calculateDistinctValuesCount(Map s DoubleStream::max); } + private Estimate calculateDataSize(Map statisticsByPartitionName, String column) + { + List knownPartitionDataSizes = statisticsByPartitionName.values().stream() + .map(stats -> { + OptionalDouble averageColumnLength = stats.getColumnStatistics().get(column).getAverageColumnLength(); + OptionalLong rowCount = stats.getBasicStatistics().getRowCount(); + OptionalLong nullsCount = stats.getColumnStatistics().get(column).getNullsCount(); + if (!averageColumnLength.isPresent() || !rowCount.isPresent()) { + return OptionalDouble.empty(); + } + + long nonNullsCount = rowCount.getAsLong() - nullsCount.orElse(0); + return OptionalDouble.of(averageColumnLength.getAsDouble() * nonNullsCount); + }) + .filter(OptionalDouble::isPresent) + .map(OptionalDouble::getAsDouble) + .collect(toImmutableList()); + + double knownPartitionDataSizesSum = knownPartitionDataSizes.stream().mapToDouble(a -> a).sum(); + long partitionsWithStatsCount = knownPartitionDataSizes.size(); + long allPartitionsCount = statisticsByPartitionName.size(); + + if (partitionsWithStatsCount == 0) { + return Estimate.unknownValue(); + } + return new Estimate(knownPartitionDataSizesSum / partitionsWithStatsCount * allPartitionsCount); + } + private Estimate calculateNullsFraction(Map statisticsByPartitionName, String column, Estimate totalRowsCount) { Estimate totalNullsCount = summarizePartitionStatistics(