diff --git a/presto-docs/src/main/sphinx/connector/iceberg.rst b/presto-docs/src/main/sphinx/connector/iceberg.rst
index a51b9f0169d2e..8c38bf77ecef5 100644
--- a/presto-docs/src/main/sphinx/connector/iceberg.rst
+++ b/presto-docs/src/main/sphinx/connector/iceberg.rst
@@ -323,6 +323,8 @@ Property Name Description
``iceberg.metrics-max-inferred-column`` The maximum number of columns for which metrics ``100``
are collected.
+``iceberg.max-statistics-file-cache-size`` Maximum size in bytes that should be consumed by the ``256MB``
+ statistics file cache.
======================================================= ============================================================= ============
Table Properties
diff --git a/presto-iceberg/pom.xml b/presto-iceberg/pom.xml
index f8d65d59164df..1550d47846ac1 100644
--- a/presto-iceberg/pom.xml
+++ b/presto-iceberg/pom.xml
@@ -187,6 +187,11 @@
provided
+
+ com.facebook.airlift
+ stats
+
+
com.facebook.drift
drift-api
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java
index be35159d509f7..eb7b5df390b78 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java
@@ -25,6 +25,7 @@
import com.facebook.presto.hive.HiveWrittenPartitions;
import com.facebook.presto.hive.NodeVersion;
import com.facebook.presto.iceberg.changelog.ChangelogUtil;
+import com.facebook.presto.iceberg.statistics.StatisticsFileCache;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ColumnMetadata;
import com.facebook.presto.spi.ConnectorInsertTableHandle;
@@ -182,6 +183,7 @@ public abstract class IcebergAbstractMetadata
protected final RowExpressionService rowExpressionService;
protected final FilterStatsCalculatorService filterStatsCalculatorService;
protected Transaction transaction;
+ protected final StatisticsFileCache statisticsFileCache;
private final StandardFunctionResolution functionResolution;
private final ConcurrentMap icebergTables = new ConcurrentHashMap<>();
@@ -192,7 +194,8 @@ public IcebergAbstractMetadata(
RowExpressionService rowExpressionService,
JsonCodec commitTaskCodec,
NodeVersion nodeVersion,
- FilterStatsCalculatorService filterStatsCalculatorService)
+ FilterStatsCalculatorService filterStatsCalculatorService,
+ StatisticsFileCache statisticsFileCache)
{
this.typeManager = requireNonNull(typeManager, "typeManager is null");
this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null");
@@ -200,6 +203,7 @@ public IcebergAbstractMetadata(
this.rowExpressionService = requireNonNull(rowExpressionService, "rowExpressionService is null");
this.nodeVersion = requireNonNull(nodeVersion, "nodeVersion is null");
this.filterStatsCalculatorService = requireNonNull(filterStatsCalculatorService, "filterStatsCalculatorService is null");
+ this.statisticsFileCache = requireNonNull(statisticsFileCache, "statisticsFileCache is null");
}
protected final Table getIcebergTable(ConnectorSession session, SchemaTableName schemaTableName)
@@ -733,7 +737,7 @@ public Map getColumnHandles(ConnectorSession session, Conn
@Override
public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle, Optional tableLayoutHandle, List columnHandles, Constraint constraint)
{
- TableStatistics baseStatistics = calculateBaseTableStatistics(this, typeManager, session, (IcebergTableHandle) tableHandle, tableLayoutHandle, columnHandles, constraint);
+ TableStatistics baseStatistics = calculateBaseTableStatistics(this, typeManager, session, statisticsFileCache, (IcebergTableHandle) tableHandle, tableLayoutHandle, columnHandles, constraint);
return calculateStatisticsConsideringLayout(filterStatsCalculatorService, rowExpressionService, baseStatistics, session, tableLayoutHandle);
}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergCommonModule.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergCommonModule.java
index 66d06f3a31061..1ea2d23ef9a93 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergCommonModule.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergCommonModule.java
@@ -44,6 +44,8 @@
import com.facebook.presto.iceberg.procedure.RemoveOrphanFiles;
import com.facebook.presto.iceberg.procedure.RollbackToSnapshotProcedure;
import com.facebook.presto.iceberg.procedure.UnregisterTableProcedure;
+import com.facebook.presto.iceberg.statistics.StatisticsFileCache;
+import com.facebook.presto.iceberg.statistics.StatisticsFileCacheKey;
import com.facebook.presto.orc.CachingStripeMetadataSource;
import com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory;
import com.facebook.presto.orc.EncryptionLibrary;
@@ -71,6 +73,7 @@
import com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider;
import com.facebook.presto.spi.connector.ConnectorSplitManager;
import com.facebook.presto.spi.procedure.Procedure;
+import com.facebook.presto.spi.statistics.ColumnStatistics;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.inject.Binder;
@@ -170,6 +173,20 @@ public void setup(Binder binder)
binder.bind(ConnectorPlanOptimizerProvider.class).to(IcebergPlanOptimizerProvider.class).in(Scopes.SINGLETON);
}
+ @Singleton
+ @Provides
+ public StatisticsFileCache createStatisticsFileCache(IcebergConfig config, MBeanExporter exporter)
+ {
+ Cache delegate = CacheBuilder.newBuilder()
+ .maximumWeight(config.getMaxStatisticsFileCacheSize().toBytes())
+ .weigher((key, entry) -> (int) entry.getEstimatedSize())
+ .recordStats()
+ .build();
+ CacheStatsMBean bean = new CacheStatsMBean(delegate);
+ exporter.export(generatedNameOf(StatisticsFileCache.class, connectorId), bean);
+ return new StatisticsFileCache(delegate);
+ }
+
@ForCachingHiveMetastore
@Singleton
@Provides
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergConfig.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergConfig.java
index 74464f938f516..fe11f4b2fefbe 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergConfig.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergConfig.java
@@ -19,6 +19,7 @@
import com.facebook.presto.spi.statistics.ColumnStatisticType;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
+import io.airlift.units.DataSize;
import org.apache.iceberg.hadoop.HadoopFileIO;
import javax.validation.constraints.DecimalMax;
@@ -33,6 +34,8 @@
import static com.facebook.presto.iceberg.CatalogType.HIVE;
import static com.facebook.presto.iceberg.IcebergFileFormat.PARQUET;
import static com.facebook.presto.iceberg.util.StatisticsUtil.decodeMergeFlags;
+import static io.airlift.units.DataSize.Unit.MEGABYTE;
+import static io.airlift.units.DataSize.succinctDataSize;
import static org.apache.iceberg.CatalogProperties.IO_MANIFEST_CACHE_EXPIRATION_INTERVAL_MS_DEFAULT;
import static org.apache.iceberg.CatalogProperties.IO_MANIFEST_CACHE_MAX_CONTENT_LENGTH_DEFAULT;
import static org.apache.iceberg.CatalogProperties.IO_MANIFEST_CACHE_MAX_TOTAL_BYTES_DEFAULT;
@@ -67,6 +70,7 @@ public class IcebergConfig
private long manifestCacheExpireDuration = IO_MANIFEST_CACHE_EXPIRATION_INTERVAL_MS_DEFAULT;
private long manifestCacheMaxContentLength = IO_MANIFEST_CACHE_MAX_CONTENT_LENGTH_DEFAULT;
private int splitManagerThreads = Runtime.getRuntime().availableProcessors();
+ private DataSize maxStatisticsFileCacheSize = succinctDataSize(256, MEGABYTE);
@NotNull
public FileFormat getFileFormat()
@@ -395,4 +399,17 @@ public IcebergConfig setMetricsMaxInferredColumn(int metricsMaxInferredColumn)
this.metricsMaxInferredColumn = metricsMaxInferredColumn;
return this;
}
+
+ public DataSize getMaxStatisticsFileCacheSize()
+ {
+ return maxStatisticsFileCacheSize;
+ }
+
+ @Config("iceberg.max-statistics-file-cache-size")
+ @ConfigDescription("The maximum size in bytes the statistics file cache should consume")
+ public IcebergConfig setMaxStatisticsFileCacheSize(DataSize maxStatisticsFileCacheSize)
+ {
+ this.maxStatisticsFileCacheSize = maxStatisticsFileCacheSize;
+ return this;
+ }
}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergErrorCode.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergErrorCode.java
index 1f1e5fdf5146a..3548ef24ba496 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergErrorCode.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergErrorCode.java
@@ -38,7 +38,8 @@ public enum IcebergErrorCode
ICEBERG_INVALID_TABLE_TIMESTAMP(12, USER_ERROR),
ICEBERG_ROLLBACK_ERROR(13, EXTERNAL),
ICEBERG_INVALID_FORMAT_VERSION(14, USER_ERROR),
- ICEBERG_UNKNOWN_MANIFEST_TYPE(15, EXTERNAL);
+ ICEBERG_UNKNOWN_MANIFEST_TYPE(15, EXTERNAL),
+ ICEBERG_CACHE_WRITE_ERROR(16, INTERNAL_ERROR);
private final ErrorCode errorCode;
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadata.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadata.java
index f64337a1079db..ea610a5037083 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadata.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadata.java
@@ -33,6 +33,7 @@
import com.facebook.presto.hive.metastore.PrestoTableType;
import com.facebook.presto.hive.metastore.PrincipalPrivileges;
import com.facebook.presto.hive.metastore.Table;
+import com.facebook.presto.iceberg.statistics.StatisticsFileCache;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ConnectorNewTableLayout;
import com.facebook.presto.spi.ConnectorOutputTableHandle;
@@ -160,9 +161,10 @@ public IcebergHiveMetadata(
JsonCodec commitTaskCodec,
NodeVersion nodeVersion,
FilterStatsCalculatorService filterStatsCalculatorService,
- IcebergHiveTableOperationsConfig hiveTableOeprationsConfig)
+ IcebergHiveTableOperationsConfig hiveTableOeprationsConfig,
+ StatisticsFileCache statisticsFileCache)
{
- super(typeManager, functionResolution, rowExpressionService, commitTaskCodec, nodeVersion, filterStatsCalculatorService);
+ super(typeManager, functionResolution, rowExpressionService, commitTaskCodec, nodeVersion, filterStatsCalculatorService, statisticsFileCache);
this.metastore = requireNonNull(metastore, "metastore is null");
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.hiveTableOeprationsConfig = requireNonNull(hiveTableOeprationsConfig, "hiveTableOperationsConfig is null");
@@ -448,7 +450,7 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab
{
IcebergTableHandle handle = (IcebergTableHandle) tableHandle;
org.apache.iceberg.Table icebergTable = getIcebergTable(session, handle.getSchemaTableName());
- TableStatistics icebergStatistics = calculateBaseTableStatistics(this, typeManager, session, (IcebergTableHandle) tableHandle, tableLayoutHandle, columnHandles, constraint);
+ TableStatistics icebergStatistics = calculateBaseTableStatistics(this, typeManager, session, statisticsFileCache, (IcebergTableHandle) tableHandle, tableLayoutHandle, columnHandles, constraint);
EnumSet mergeFlags = getHiveStatisticsMergeStrategy(session);
TableStatistics mergedStatistics = Optional.of(mergeFlags)
.filter(set -> !set.isEmpty())
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadataFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadataFactory.java
index f8175f1b0c856..5bc8c9e4a8b0c 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadataFactory.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadataFactory.java
@@ -18,6 +18,7 @@
import com.facebook.presto.hive.HdfsEnvironment;
import com.facebook.presto.hive.NodeVersion;
import com.facebook.presto.hive.metastore.ExtendedHiveMetastore;
+import com.facebook.presto.iceberg.statistics.StatisticsFileCache;
import com.facebook.presto.spi.connector.ConnectorMetadata;
import com.facebook.presto.spi.function.StandardFunctionResolution;
import com.facebook.presto.spi.plan.FilterStatsCalculatorService;
@@ -39,6 +40,7 @@ public class IcebergHiveMetadataFactory
final NodeVersion nodeVersion;
final FilterStatsCalculatorService filterStatsCalculatorService;
final IcebergHiveTableOperationsConfig operationsConfig;
+ final StatisticsFileCache statisticsFileCache;
@Inject
public IcebergHiveMetadataFactory(
@@ -50,7 +52,8 @@ public IcebergHiveMetadataFactory(
JsonCodec commitTaskCodec,
NodeVersion nodeVersion,
FilterStatsCalculatorService filterStatsCalculatorService,
- IcebergHiveTableOperationsConfig operationsConfig)
+ IcebergHiveTableOperationsConfig operationsConfig,
+ StatisticsFileCache statisticsFileCache)
{
this.metastore = requireNonNull(metastore, "metastore is null");
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
@@ -61,6 +64,7 @@ public IcebergHiveMetadataFactory(
this.nodeVersion = requireNonNull(nodeVersion, "nodeVersion is null");
this.filterStatsCalculatorService = requireNonNull(filterStatsCalculatorService, "filterStatsCalculatorService is null");
this.operationsConfig = requireNonNull(operationsConfig, "operationsConfig is null");
+ this.statisticsFileCache = requireNonNull(statisticsFileCache, "statisticsFileCache is null");
}
public ConnectorMetadata create()
@@ -74,6 +78,7 @@ public ConnectorMetadata create()
commitTaskCodec,
nodeVersion,
filterStatsCalculatorService,
- operationsConfig);
+ operationsConfig,
+ statisticsFileCache);
}
}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadata.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadata.java
index bb18a676b2450..44205752b159f 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadata.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadata.java
@@ -18,6 +18,7 @@
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.hive.NodeVersion;
import com.facebook.presto.hive.TableAlreadyExistsException;
+import com.facebook.presto.iceberg.statistics.StatisticsFileCache;
import com.facebook.presto.iceberg.util.IcebergPrestoModelConverters;
import com.facebook.presto.spi.ConnectorNewTableLayout;
import com.facebook.presto.spi.ConnectorOutputTableHandle;
@@ -82,9 +83,10 @@ public IcebergNativeMetadata(
JsonCodec commitTaskCodec,
CatalogType catalogType,
NodeVersion nodeVersion,
- FilterStatsCalculatorService filterStatsCalculatorService)
+ FilterStatsCalculatorService filterStatsCalculatorService,
+ StatisticsFileCache statisticsFileCache)
{
- super(typeManager, functionResolution, rowExpressionService, commitTaskCodec, nodeVersion, filterStatsCalculatorService);
+ super(typeManager, functionResolution, rowExpressionService, commitTaskCodec, nodeVersion, filterStatsCalculatorService, statisticsFileCache);
this.catalogFactory = requireNonNull(catalogFactory, "catalogFactory is null");
this.catalogType = requireNonNull(catalogType, "catalogType is null");
}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadataFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadataFactory.java
index 33a3caf34b9f8..98e0e1f546946 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadataFactory.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadataFactory.java
@@ -16,6 +16,7 @@
import com.facebook.airlift.json.JsonCodec;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.hive.NodeVersion;
+import com.facebook.presto.iceberg.statistics.StatisticsFileCache;
import com.facebook.presto.spi.connector.ConnectorMetadata;
import com.facebook.presto.spi.function.StandardFunctionResolution;
import com.facebook.presto.spi.plan.FilterStatsCalculatorService;
@@ -36,6 +37,7 @@ public class IcebergNativeMetadataFactory
final RowExpressionService rowExpressionService;
final NodeVersion nodeVersion;
final FilterStatsCalculatorService filterStatsCalculatorService;
+ final StatisticsFileCache statisticsFileCache;
@Inject
public IcebergNativeMetadataFactory(
@@ -46,7 +48,8 @@ public IcebergNativeMetadataFactory(
RowExpressionService rowExpressionService,
JsonCodec commitTaskCodec,
NodeVersion nodeVersion,
- FilterStatsCalculatorService filterStatsCalculatorService)
+ FilterStatsCalculatorService filterStatsCalculatorService,
+ StatisticsFileCache statisticsFileCache)
{
this.catalogFactory = requireNonNull(catalogFactory, "catalogFactory is null");
this.typeManager = requireNonNull(typeManager, "typeManager is null");
@@ -57,10 +60,11 @@ public IcebergNativeMetadataFactory(
requireNonNull(config, "config is null");
this.catalogType = config.getCatalogType();
this.filterStatsCalculatorService = requireNonNull(filterStatsCalculatorService, "filterStatsCalculatorService is null");
+ this.statisticsFileCache = requireNonNull(statisticsFileCache, "statisticsFileCache is null");
}
public ConnectorMetadata create()
{
- return new IcebergNativeMetadata(catalogFactory, typeManager, functionResolution, rowExpressionService, commitTaskCodec, catalogType, nodeVersion, filterStatsCalculatorService);
+ return new IcebergNativeMetadata(catalogFactory, typeManager, functionResolution, rowExpressionService, commitTaskCodec, catalogType, nodeVersion, filterStatsCalculatorService, statisticsFileCache);
}
}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TableStatisticsMaker.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TableStatisticsMaker.java
index 9da3de696f620..b86a7e9c87cb0 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TableStatisticsMaker.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/TableStatisticsMaker.java
@@ -14,11 +14,14 @@
package com.facebook.presto.iceberg;
import com.facebook.airlift.log.Logger;
+import com.facebook.presto.common.RuntimeUnit;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.predicate.TupleDomain;
import com.facebook.presto.common.type.FixedWidthType;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.hive.NodeVersion;
+import com.facebook.presto.iceberg.statistics.StatisticsFileCache;
+import com.facebook.presto.iceberg.statistics.StatisticsFileCacheKey;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.Constraint;
import com.facebook.presto.spi.PrestoException;
@@ -31,6 +34,7 @@
import com.facebook.presto.spi.statistics.TableStatistics;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.theta.CompactSketch;
import org.apache.iceberg.ContentFile;
@@ -93,6 +97,8 @@
import static com.facebook.presto.spi.statistics.ColumnStatisticType.TOTAL_SIZE_IN_BYTES;
import static com.facebook.presto.spi.statistics.SourceInfo.ConfidenceLevel.HIGH;
import static com.google.common.collect.ImmutableList.toImmutableList;
+import static com.google.common.collect.ImmutableMap.toImmutableMap;
+import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.collect.Iterables.getOnlyElement;
import static java.lang.Long.parseLong;
import static java.lang.Math.abs;
@@ -112,6 +118,10 @@ public class TableStatisticsMaker
private final ConnectorSession session;
private final TypeManager typeManager;
+ private static final String STATISITCS_CACHE_METRIC_FILE_SIZE_FORMAT = "StatisticsFileCache/PuffinFileSize/%s/%s";
+ private static final String STATISITCS_CACHE_METRIC_FILE_COLUMN_COUNT_FORMAT = "StatisticsFileCache/ColumnCount/%s/%s";
+ private static final String STATISITCS_CACHE_METRIC_PARTIAL_MISS_FORMAT = "StatisticsFileCache/PartialMiss/%s/%s";
+
private TableStatisticsMaker(Table icebergTable, ConnectorSession session, TypeManager typeManager)
{
this.icebergTable = icebergTable;
@@ -129,12 +139,24 @@ private TableStatisticsMaker(Table icebergTable, ConnectorSession session, TypeM
.put(ICEBERG_DATA_SIZE_BLOB_TYPE_ID, TableStatisticsMaker::readDataSizeBlob)
.build();
- public static TableStatistics getTableStatistics(ConnectorSession session, TypeManager typeManager, Optional> currentPredicate, Constraint constraint, IcebergTableHandle tableHandle, Table icebergTable, List columns)
+ public static TableStatistics getTableStatistics(
+ ConnectorSession session,
+ TypeManager typeManager,
+ StatisticsFileCache statisticsFileCache,
+ Optional> currentPredicate,
+ Constraint constraint,
+ IcebergTableHandle tableHandle,
+ Table icebergTable,
+ List columns)
{
- return new TableStatisticsMaker(icebergTable, session, typeManager).makeTableStatistics(tableHandle, currentPredicate, constraint, columns);
+ return new TableStatisticsMaker(icebergTable, session, typeManager).makeTableStatistics(statisticsFileCache, tableHandle, currentPredicate, constraint, columns);
}
- private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Optional> currentPredicate, Constraint constraint, List selectedColumns)
+ private TableStatistics makeTableStatistics(StatisticsFileCache statisticsFileCache,
+ IcebergTableHandle tableHandle,
+ Optional> currentPredicate,
+ Constraint constraint,
+ List selectedColumns)
{
if (!tableHandle.getIcebergTableName().getSnapshotId().isPresent() || constraint.getSummary().isNone()) {
return TableStatistics.builder()
@@ -192,8 +214,14 @@ private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Opti
TableStatistics.Builder result = TableStatistics.builder();
result.setRowCount(Estimate.of(recordCount));
- Map tableStats = getClosestStatisticsFileForSnapshot(tableHandle)
- .map(this::loadStatisticsFile).orElseGet(Collections::emptyMap);
+ // transformValues returns a view. We need to make a copy of the map in order to update
+ Map tableStats = ImmutableMap.copyOf(
+ Maps.transformValues(getClosestStatisticsFileForSnapshot(tableHandle)
+ .map(file -> loadStatisticsFile(tableHandle, file, statisticsFileCache, selectedColumns.stream()
+ .map(IcebergColumnHandle::getId)
+ .collect(Collectors.toList())))
+ .orElseGet(Collections::emptyMap),
+ ColumnStatistics::buildFrom));
// scale all NDV values loaded from puffin files based on row count
totalRecordCount.ifPresent(fullTableRecordCount -> tableStats.forEach((id, stat) ->
stat.setDistinctValuesCount(stat.getDistinctValuesCount().map(value -> value * recordCount / fullTableRecordCount))));
@@ -497,8 +525,42 @@ private Optional getClosestStatisticsFileForSnapshot(IcebergTabl
/**
* Builds a map of field ID to ColumnStatistics for a particular {@link StatisticsFile}.
*/
- private Map loadStatisticsFile(StatisticsFile file)
+ private Map loadStatisticsFile(IcebergTableHandle tableHandle, StatisticsFile file, StatisticsFileCache statisticsFileCache, List columnIds)
{
+ // first, try to load all stats from the cache. If the map doesn't contain all keys, load the missing
+ // stats into the cache.
+ Map cachedStats = columnIds.stream()
+ .map(id -> Pair.of(id, statisticsFileCache.getIfPresent(new StatisticsFileCacheKey(file, id))))
+ .filter(pair -> pair.second() != null)
+ .collect(toImmutableMap(Pair::first, Pair::second));
+ Set missingStats = columnIds.stream().filter(id -> !cachedStats.containsKey(id)).collect(toImmutableSet());
+ if (missingStats.isEmpty()) {
+ return cachedStats;
+ }
+ if (!cachedStats.isEmpty()) {
+ session.getRuntimeStats().addMetricValue(
+ String.format(STATISITCS_CACHE_METRIC_PARTIAL_MISS_FORMAT,
+ tableHandle.getSchemaTableName(),
+ file.path()),
+ RuntimeUnit.NONE, 1);
+ }
+
+ String fileSizeMetricName = String.format(STATISITCS_CACHE_METRIC_FILE_SIZE_FORMAT,
+ tableHandle.getSchemaTableName(),
+ file.path());
+ if (session.getRuntimeStats().getMetric(fileSizeMetricName) == null) {
+ long fileSize = file.fileFooterSizeInBytes();
+ session.getRuntimeStats().addMetricValue(fileSizeMetricName, RuntimeUnit.NONE, fileSize);
+ statisticsFileCache.recordFileSize(fileSize);
+ }
+ String columnCountMetricName = String.format(STATISITCS_CACHE_METRIC_FILE_COLUMN_COUNT_FORMAT,
+ tableHandle.getSchemaTableName(),
+ file.path());
+ if (session.getRuntimeStats().getMetric(columnCountMetricName) == null) {
+ long columnCount = file.blobMetadata().size();
+ session.getRuntimeStats().addMetricValue(columnCountMetricName, RuntimeUnit.NONE, columnCount);
+ statisticsFileCache.recordColumnCount(columnCount);
+ }
Map result = new HashMap<>();
try (FileIO io = icebergTable.io()) {
InputFile inputFile = io.newInputFile(file.path());
@@ -507,6 +569,10 @@ private Map loadStatisticsFile(StatisticsFile
BlobMetadata metadata = data.first();
ByteBuffer blob = data.second();
Integer field = getOnlyElement(metadata.inputFields());
+ if (!missingStats.contains(field)) {
+ continue;
+ }
+
Optional.ofNullable(puffinStatReaders.get(metadata.type()))
.ifPresent(statReader -> {
result.compute(field, (key, value) -> {
@@ -523,8 +589,19 @@ private Map loadStatisticsFile(StatisticsFile
throw new PrestoException(ICEBERG_FILESYSTEM_ERROR, "failed to read statistics file at " + file.path(), e);
}
}
-
- return ImmutableMap.copyOf(result);
+ Map computedResults = new HashMap<>(Maps.transformValues(result, ColumnStatistics.Builder::build));
+ missingStats.stream()
+ .filter(id -> !computedResults.containsKey(id))
+ .forEach(id -> computedResults.put(id, ColumnStatistics.empty()));
+ ImmutableMap.Builder finalResult = ImmutableMap.builder();
+ computedResults.forEach((key, value) -> {
+ // stats for a particular column may not appear in a file. Add a cache entry to
+ // denote that this file has been read for a particular column to avoid reading the
+ // file again
+ statisticsFileCache.put(new StatisticsFileCacheKey(file, key), value);
+ finalResult.put(key, value);
+ });
+ return finalResult.build();
}
public static List getSupportedColumnStatistics(String columnName, com.facebook.presto.common.type.Type type)
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/statistics/StatisticsFileCache.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/statistics/StatisticsFileCache.java
new file mode 100644
index 0000000000000..faf61aa6427fa
--- /dev/null
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/statistics/StatisticsFileCache.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.iceberg.statistics;
+
+import com.facebook.airlift.stats.DistributionStat;
+import com.facebook.presto.spi.statistics.ColumnStatistics;
+import com.google.common.cache.Cache;
+import com.google.common.cache.ForwardingCache.SimpleForwardingCache;
+import org.weakref.jmx.Managed;
+import org.weakref.jmx.Nested;
+
+public class StatisticsFileCache
+ extends SimpleForwardingCache
+{
+ private final DistributionStat fileSizes = new DistributionStat();
+ private final DistributionStat columnCounts = new DistributionStat();
+
+ public StatisticsFileCache(Cache delegate)
+ {
+ super(delegate);
+ }
+
+ @Managed
+ @Nested
+ public DistributionStat getFileSizeDistribution()
+ {
+ return fileSizes;
+ }
+
+ public void recordFileSize(long size)
+ {
+ fileSizes.add(size);
+ }
+
+ @Managed
+ @Nested
+ public DistributionStat getColumnCountDistribution()
+ {
+ return columnCounts;
+ }
+
+ public void recordColumnCount(long count)
+ {
+ columnCounts.add(count);
+ }
+}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/statistics/StatisticsFileCacheKey.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/statistics/StatisticsFileCacheKey.java
new file mode 100644
index 0000000000000..65afd009dd0a4
--- /dev/null
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/statistics/StatisticsFileCacheKey.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.iceberg.statistics;
+
+import org.apache.iceberg.StatisticsFile;
+
+import java.util.Objects;
+
+import static java.util.Objects.requireNonNull;
+
+public class StatisticsFileCacheKey
+{
+ private final StatisticsFile file;
+ private final int columnId;
+
+ public StatisticsFileCacheKey(StatisticsFile file, int columnId)
+ {
+ this.file = requireNonNull(file, "file is null");
+ this.columnId = columnId;
+ }
+
+ @Override
+ public boolean equals(Object o)
+ {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof StatisticsFileCacheKey)) {
+ return false;
+ }
+ StatisticsFileCacheKey that = (StatisticsFileCacheKey) o;
+ return columnId == that.columnId && Objects.equals(file, that.file);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(file, columnId);
+ }
+}
diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/util/StatisticsUtil.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/util/StatisticsUtil.java
index 446296346c7f4..a0587ccd8b069 100644
--- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/util/StatisticsUtil.java
+++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/util/StatisticsUtil.java
@@ -22,6 +22,7 @@
import com.facebook.presto.iceberg.IcebergTableHandle;
import com.facebook.presto.iceberg.IcebergTableLayoutHandle;
import com.facebook.presto.iceberg.TableStatisticsMaker;
+import com.facebook.presto.iceberg.statistics.StatisticsFileCache;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorTableLayoutHandle;
@@ -214,6 +215,7 @@ public static TableStatistics calculateBaseTableStatistics(
ConnectorMetadata metadata,
TypeManager typeManager,
ConnectorSession session,
+ StatisticsFileCache statisticsFileCache,
IcebergTableHandle tableHandle,
Optional tableLayoutHandle,
List columnHandles,
@@ -226,6 +228,7 @@ public static TableStatistics calculateBaseTableStatistics(
.collect(toImmutableList()),
tableLayoutHandle.map(IcebergTableLayoutHandle.class::cast));
return TableStatisticsMaker.getTableStatistics(session, typeManager,
+ statisticsFileCache,
tableLayoutHandle
.map(IcebergTableLayoutHandle.class::cast)
.map(IcebergTableLayoutHandle::getValidPredicate),
diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java
index 7b2cb01e61749..fa140c98a8b94 100644
--- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java
+++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java
@@ -30,12 +30,15 @@
import com.facebook.presto.hive.MetastoreClientConfig;
import com.facebook.presto.hive.authentication.NoHdfsAuthentication;
import com.facebook.presto.iceberg.delete.DeleteFile;
+import com.facebook.presto.metadata.CatalogMetadata;
import com.facebook.presto.metadata.Metadata;
+import com.facebook.presto.metadata.MetadataUtil;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.Constraint;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.TableHandle;
import com.facebook.presto.spi.analyzer.MetadataResolver;
+import com.facebook.presto.spi.connector.classloader.ClassLoaderSafeConnectorMetadata;
import com.facebook.presto.spi.security.AllowAllAccessControl;
import com.facebook.presto.spi.statistics.ColumnStatistics;
import com.facebook.presto.spi.statistics.Estimate;
@@ -46,9 +49,11 @@
import com.facebook.presto.tests.AbstractTestQueryFramework;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
+import com.google.common.cache.CacheStats;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -81,6 +86,7 @@
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
+import java.lang.reflect.Field;
import java.nio.file.Path;
import java.time.LocalDateTime;
import java.time.LocalTime;
@@ -933,7 +939,7 @@ public void testStatsByDistance()
Session weightedSession = Session.builder(getSession())
.setCatalogSessionProperty("iceberg", STATISTIC_SNAPSHOT_RECORD_DIFFERENCE_WEIGHT, "10000000")
.build();
- Function ndvs = (x) -> columnStatsFor(getTableStats("test_stat_dist", Optional.of(snapshots.get(x)), weightedSession), "col0")
+ Function ndvs = (x) -> columnStatsFor(getTableStats("test_stat_dist", Optional.of(snapshots.get(x)), weightedSession, Optional.empty()), "col0")
.getDistinctValuesCount();
assertEquals(ndvs.apply(0).getValue(), 1);
assertEquals(ndvs.apply(1).getValue(), 1);
@@ -991,10 +997,10 @@ private TableStatistics getTableStats(String name)
private TableStatistics getTableStats(String name, Optional snapshot)
{
- return getTableStats(name, snapshot, getSession());
+ return getTableStats(name, snapshot, getSession(), Optional.empty());
}
- private TableStatistics getTableStats(String name, Optional snapshot, Session session)
+ private TableStatistics getTableStats(String name, Optional snapshot, Session session, Optional> columns)
{
TransactionId transactionId = getQueryRunner().getTransactionManager().beginTransaction(false);
Session metadataSession = session.beginTransactionId(
@@ -1008,7 +1014,9 @@ private TableStatistics getTableStats(String name, Optional snapshot, Sess
TableHandle handle = resolver.getTableHandle(QualifiedObjectName.valueOf(qualifiedName)).get();
return metadata.getTableStatistics(metadataSession,
handle,
- new ArrayList<>(resolver.getColumnHandles(handle).values()),
+ new ArrayList<>(columns
+ .map(columnSet -> Maps.filterKeys(resolver.getColumnHandles(handle), columnSet::contains))
+ .orElse(resolver.getColumnHandles(handle)).values()),
Constraint.alwaysTrue());
}
@@ -1829,6 +1837,36 @@ public void testFilterWithRemainingPredicate(boolean pushdownFilterEnabled)
assertQuerySucceeds("DROP TABLE test_filterstats_remaining_predicate");
}
+ public void testStatisticsFileCache()
+ throws Exception
+ {
+ assertQuerySucceeds("CREATE TABLE test_statistics_file_cache(i int)");
+ assertUpdate("INSERT INTO test_statistics_file_cache VALUES 1, 2, 3, 4, 5", 5);
+ assertQuerySucceeds("ANALYZE test_statistics_file_cache");
+ Session session = Session.builder(getSession())
+ .setTransactionId(getQueryRunner().getTransactionManager().beginTransaction(false))
+ .build();
+ Optional handle = MetadataUtil.getOptionalTableHandle(session,
+ getQueryRunner().getTransactionManager(),
+ QualifiedObjectName.valueOf(session.getCatalog().get(), session.getSchema().get(), "test_statistics_file_cache"),
+ Optional.empty());
+ CatalogMetadata catalogMetadata = getQueryRunner().getTransactionManager()
+ .getCatalogMetadata(session.getTransactionId().get(), handle.get().getConnectorId());
+ // There isn't an easy way to access the cache internally, so use some reflection to grab it
+ Field delegate = ClassLoaderSafeConnectorMetadata.class.getDeclaredField("delegate");
+ delegate.setAccessible(true);
+ IcebergAbstractMetadata metadata = (IcebergAbstractMetadata) delegate.get(catalogMetadata.getMetadataFor(handle.get().getConnectorId()));
+ CacheStats initial = metadata.statisticsFileCache.stats();
+ assertEquals(metadata.statisticsFileCache.stats().minus(initial).hitCount(), 0);
+ TableStatistics stats = getTableStats("test_statistics_file_cache", Optional.empty(), getSession(), Optional.of(ImmutableList.of("i")));
+ assertEquals(stats.getRowCount().getValue(), 5);
+ assertEquals(metadata.statisticsFileCache.stats().minus(initial).missCount(), 1);
+ getTableStats("test_statistics_file_cache", Optional.empty(), getSession(), Optional.of(ImmutableList.of("i")));
+ assertEquals(metadata.statisticsFileCache.stats().minus(initial).missCount(), 1);
+ assertEquals(metadata.statisticsFileCache.stats().minus(initial).hitCount(), 1);
+ getQueryRunner().execute("DROP TABLE test_statistics_file_cache");
+ }
+
private void testCheckDeleteFiles(Table icebergTable, int expectedSize, List expectedFileContent)
{
// check delete file list
diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergConfig.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergConfig.java
index cbfeebf72797e..d28503a27d316 100644
--- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergConfig.java
+++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergConfig.java
@@ -30,6 +30,8 @@
import static com.facebook.presto.iceberg.IcebergFileFormat.PARQUET;
import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES;
import static com.facebook.presto.spi.statistics.ColumnStatisticType.TOTAL_SIZE_IN_BYTES;
+import static io.airlift.units.DataSize.Unit.MEGABYTE;
+import static io.airlift.units.DataSize.succinctDataSize;
import static org.apache.iceberg.CatalogProperties.IO_MANIFEST_CACHE_EXPIRATION_INTERVAL_MS_DEFAULT;
import static org.apache.iceberg.CatalogProperties.IO_MANIFEST_CACHE_MAX_CONTENT_LENGTH_DEFAULT;
import static org.apache.iceberg.CatalogProperties.IO_MANIFEST_CACHE_MAX_TOTAL_BYTES_DEFAULT;
@@ -66,7 +68,8 @@ public void testDefaults()
.setSplitManagerThreads(Runtime.getRuntime().availableProcessors())
.setMetadataPreviousVersionsMax(METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT)
.setMetadataDeleteAfterCommit(METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT)
- .setMetricsMaxInferredColumn(METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT));
+ .setMetricsMaxInferredColumn(METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT)
+ .setMaxStatisticsFileCacheSize(succinctDataSize(256, MEGABYTE)));
}
@Test
@@ -97,6 +100,7 @@ public void testExplicitPropertyMappings()
.put("iceberg.metadata-previous-versions-max", "1")
.put("iceberg.metadata-delete-after-commit", "true")
.put("iceberg.metrics-max-inferred-column", "16")
+ .put("iceberg.max-statistics-file-cache-size", "512MB")
.build();
IcebergConfig expected = new IcebergConfig()
@@ -123,7 +127,8 @@ public void testExplicitPropertyMappings()
.setSplitManagerThreads(42)
.setMetadataPreviousVersionsMax(1)
.setMetadataDeleteAfterCommit(true)
- .setMetricsMaxInferredColumn(16);
+ .setMetricsMaxInferredColumn(16)
+ .setMaxStatisticsFileCacheSize(succinctDataSize(512, MEGABYTE));
assertFullMapping(properties, expected);
}
diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergDistributedHive.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergDistributedHive.java
index 7bd7fb34bacee..3954524b959b2 100644
--- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergDistributedHive.java
+++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergDistributedHive.java
@@ -59,6 +59,14 @@ public void testPartShowStatsWithFilters()
// Hive doesn't support returning statistics on partitioned tables
}
+ @Override
+ public void testStatisticsFileCache()
+ throws Exception
+ {
+ // hive doesn't write Iceberg statistics files when metastore is in use,
+ // so this test won't complete successfully.
+ }
+
@Override
protected Table loadTable(String tableName)
{
diff --git a/presto-main/src/main/java/com/facebook/presto/cost/DisjointRangeDomainHistogram.java b/presto-main/src/main/java/com/facebook/presto/cost/DisjointRangeDomainHistogram.java
index 1d4b44140de41..553cf84d07bf9 100644
--- a/presto-main/src/main/java/com/facebook/presto/cost/DisjointRangeDomainHistogram.java
+++ b/presto-main/src/main/java/com/facebook/presto/cost/DisjointRangeDomainHistogram.java
@@ -24,6 +24,7 @@
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import com.google.common.collect.TreeRangeSet;
+import org.openjdk.jol.info.ClassLayout;
import java.util.Collection;
import java.util.NoSuchElementException;
@@ -68,6 +69,9 @@
public class DisjointRangeDomainHistogram
implements ConnectorHistogram
{
+ private static final long INSTANCE_SIZE = ClassLayout.parseClass(DisjointRangeDomainHistogram.class).instanceSize();
+ private static final long RANGE_SIZE = ClassLayout.parseClass(Range.class).instanceSize();
+
private final ConnectorHistogram source;
// use RangeSet as the internal representation of the ranges, but the constructor arguments
// use StatisticRange to support serialization and deserialization.
@@ -356,4 +360,13 @@ public int hashCode()
{
return hash(source, getRanges());
}
+
+ @Override
+ public long getEstimatedSize()
+ {
+ // don't count the source histogram as it's just a reference to
+ // another histogram. We don't want to count the retained memory.
+ return INSTANCE_SIZE +
+ (RANGE_SIZE * ranges.size());
+ }
}
diff --git a/presto-main/src/main/java/com/facebook/presto/cost/UniformDistributionHistogram.java b/presto-main/src/main/java/com/facebook/presto/cost/UniformDistributionHistogram.java
index d06232d1fb608..c9cb6bfd19523 100644
--- a/presto-main/src/main/java/com/facebook/presto/cost/UniformDistributionHistogram.java
+++ b/presto-main/src/main/java/com/facebook/presto/cost/UniformDistributionHistogram.java
@@ -18,6 +18,7 @@
import com.facebook.presto.spi.statistics.Estimate;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
+import org.openjdk.jol.info.ClassLayout;
import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkArgument;
@@ -38,6 +39,7 @@
public class UniformDistributionHistogram
implements ConnectorHistogram
{
+ private static final long INSTANCE_SIZE = ClassLayout.parseClass(UniformDistributionHistogram.class).instanceSize();
private final double lowValue;
private final double highValue;
@@ -111,6 +113,12 @@ public Estimate inverseCumulativeProbability(double percentile)
return Estimate.of(lowValue + (percentile * (highValue - lowValue)));
}
+ @Override
+ public long getEstimatedSize()
+ {
+ return INSTANCE_SIZE;
+ }
+
@Override
public String toString()
{
diff --git a/presto-main/src/test/java/com/facebook/presto/cost/TestDisjointRangeDomainHistogram.java b/presto-main/src/test/java/com/facebook/presto/cost/TestDisjointRangeDomainHistogram.java
index 1cbddcc781c58..0c11e729b1fef 100644
--- a/presto-main/src/test/java/com/facebook/presto/cost/TestDisjointRangeDomainHistogram.java
+++ b/presto-main/src/test/java/com/facebook/presto/cost/TestDisjointRangeDomainHistogram.java
@@ -254,6 +254,12 @@ public Estimate inverseCumulativeProbability(double percentile)
}
return Estimate.of(distribution.inverseCumulativeProbability(percentile));
}
+
+ @Override
+ public long getEstimatedSize()
+ {
+ return 0;
+ }
}
@Override
diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ColumnStatistics.java b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ColumnStatistics.java
index bec6fb5c31299..97b1ac7fd4c1a 100644
--- a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ColumnStatistics.java
+++ b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ColumnStatistics.java
@@ -14,15 +14,21 @@
package com.facebook.presto.spi.statistics;
import com.fasterxml.jackson.annotation.JsonProperty;
+import org.openjdk.jol.info.ClassLayout;
import java.util.Objects;
import java.util.Optional;
+import static com.facebook.presto.spi.statistics.DoubleRange.RANGE_SIZE;
+import static com.facebook.presto.spi.statistics.Estimate.ESTIMATE_SIZE;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
public final class ColumnStatistics
{
+ private static final long COLUMN_STATISTICS_SIZE = ClassLayout.parseClass(ColumnStatistics.class).instanceSize();
+ private static final long OPTION_SIZE = ClassLayout.parseClass(Optional.class).instanceSize();
+
private static final ColumnStatistics EMPTY = new ColumnStatistics(Estimate.unknown(), Estimate.unknown(), Estimate.unknown(), Optional.empty(), Optional.empty());
private final Estimate nullsFraction;
@@ -142,6 +148,15 @@ public static Builder buildFrom(ColumnStatistics statistics)
.setHistogram(statistics.getHistogram());
}
+ public long getEstimatedSize()
+ {
+ return COLUMN_STATISTICS_SIZE +
+ 3 * ESTIMATE_SIZE +
+ 2 * OPTION_SIZE +
+ histogram.map(ConnectorHistogram::getEstimatedSize).orElse(0L) +
+ range.map(unused -> RANGE_SIZE).orElse(0L);
+ }
+
/**
* If one of the estimates below is unspecified, the default "unknown" estimate value
* (represented by floating point NaN) may cause the resulting symbol statistics
diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ConnectorHistogram.java b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ConnectorHistogram.java
index 0febeb7f0d3fa..0b5187eb7d038 100644
--- a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ConnectorHistogram.java
+++ b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ConnectorHistogram.java
@@ -61,4 +61,9 @@ public interface ConnectorHistogram
* @return the value in the distribution corresponding to the percentile
*/
Estimate inverseCumulativeProbability(double percentile);
+
+ /**
+ * @return an approximation of the memory utilization of this histogram.
+ */
+ long getEstimatedSize();
}
diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/DoubleRange.java b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/DoubleRange.java
index eab5015013648..b30c856e1c112 100644
--- a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/DoubleRange.java
+++ b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/DoubleRange.java
@@ -14,6 +14,7 @@
package com.facebook.presto.spi.statistics;
import com.fasterxml.jackson.annotation.JsonProperty;
+import org.openjdk.jol.info.ClassLayout;
import java.util.Objects;
@@ -25,6 +26,8 @@
public class DoubleRange
{
+ static final long RANGE_SIZE = ClassLayout.parseClass(DoubleRange.class).instanceSize();
+
private final double min;
private final double max;
diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/Estimate.java b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/Estimate.java
index 02f8713d4da80..946d5f69d5b52 100644
--- a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/Estimate.java
+++ b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/Estimate.java
@@ -19,6 +19,7 @@
import com.facebook.drift.annotations.ThriftStruct;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
+import org.openjdk.jol.info.ClassLayout;
import java.util.Objects;
import java.util.function.Function;
@@ -31,6 +32,8 @@
@ThriftStruct
public final class Estimate
{
+ static final long ESTIMATE_SIZE = ClassLayout.parseClass(Estimate.class).instanceSize();
+
// todo eventually add some notion of statistic reliability
// Skipping for now as there hard to compute it properly and so far we do not have
// usecase for that.
diff --git a/presto-spi/src/test/java/com/facebook/presto/spi/statistics/TestColumnStatistics.java b/presto-spi/src/test/java/com/facebook/presto/spi/statistics/TestColumnStatistics.java
new file mode 100644
index 0000000000000..e33d3143b35cb
--- /dev/null
+++ b/presto-spi/src/test/java/com/facebook/presto/spi/statistics/TestColumnStatistics.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.spi.statistics;
+
+import org.openjdk.jol.info.ClassLayout;
+import org.openjdk.jol.info.GraphLayout;
+import org.testng.annotations.Test;
+
+import java.util.Optional;
+
+import static io.airlift.slice.SizeOf.sizeOf;
+import static org.testng.Assert.assertEquals;
+
+public class TestColumnStatistics
+{
+ /**
+ * Test for all non-histogram stats that memory is accounted accurately
+ */
+ @Test
+ public void testColumnStatisticsEstimatedSizeAccuracySimple()
+ {
+ ColumnStatistics stats = ColumnStatistics.builder()
+ .setDataSize(Estimate.of(100))
+ .setDistinctValuesCount(Estimate.of(1))
+ .setRange(new DoubleRange(100, 100))
+ .setNullsFraction(Estimate.of(0.1))
+ .build();
+
+ // test without histogram
+ long actualSize = GraphLayout.parseInstance(stats).totalSize();
+ assertEquals(actualSize, stats.getEstimatedSize());
+ }
+
+ /**
+ * Test for when histogram is included but has little-to-no memory usage.
+ */
+ @Test
+ public void testColumnStatisticsEstimatedSizeAccuracyHistogramEmpty()
+ {
+ ColumnStatistics stats = ColumnStatistics.builder()
+ .setDataSize(Estimate.of(100))
+ .setDistinctValuesCount(Estimate.of(1))
+ .setRange(new DoubleRange(100, 100))
+ .setNullsFraction(Estimate.of(0.1))
+ .setHistogram(Optional.of(new ConnectorHistogram()
+ {
+ @Override
+ public Estimate cumulativeProbability(double value, boolean inclusive)
+ {
+ return null;
+ }
+
+ @Override
+ public Estimate inverseCumulativeProbability(double percentile)
+ {
+ return null;
+ }
+
+ @Override
+ public long getEstimatedSize()
+ {
+ return ClassLayout.parseClass(this.getClass()).instanceSize() * 2L;
+ }
+ }))
+ .build();
+ long actualSize = GraphLayout.parseInstance(stats).totalSize();
+ assertEquals(actualSize, stats.getEstimatedSize());
+ }
+
+ /**
+ * Test for when histogram is included and has a significant memory footprint
+ */
+ @Test
+ public void testColumnStatisticsEstimatedSizeAccuracyHistogram()
+ {
+ ColumnStatistics stats = ColumnStatistics.builder()
+ .setDataSize(Estimate.of(100))
+ .setDistinctValuesCount(Estimate.of(1))
+ .setRange(new DoubleRange(100, 100))
+ .setNullsFraction(Estimate.of(0.1))
+ .setHistogram(Optional.of(new ConnectorHistogram()
+ {
+ final byte[] memory = new byte[4096];
+
+ @Override
+ public Estimate cumulativeProbability(double value, boolean inclusive)
+ {
+ return null;
+ }
+
+ @Override
+ public Estimate inverseCumulativeProbability(double percentile)
+ {
+ return null;
+ }
+
+ @Override
+ public long getEstimatedSize()
+ {
+ return sizeOf(memory) + (ClassLayout.parseClass(this.getClass()).instanceSize() * 2L);
+ }
+ }))
+ .build();
+
+ // test histogram with fields histogram
+ long actualSize = GraphLayout.parseInstance(stats).totalSize();
+ double error = actualSize * 1E-2;
+ assertEquals(actualSize, stats.getEstimatedSize(), error);
+ }
+}